diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h index 6b99a77e73b..7ee981f6544 100644 --- a/src/cudamatrix/cu-kernels-ansi.h +++ b/src/cudamatrix/cu-kernels-ansi.h @@ -110,6 +110,54 @@ void cudaD_add_mat_repeated(dim3 Gr, dim3 Bl, double alpha, const double *src, MatrixDim src_dim, double *dst, MatrixDim dst_dim); void cudaF_add_mat_repeated(dim3 Gr, dim3 Bl, float alpha, const float *src, MatrixDim src_dim, float *dst, MatrixDim dst_dim); +void cudaD_max_mat_blocks(dim3 Gr, dim3 Bl, + const double *src, double *dst, double *index_max_, + const int32_cuda stride, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_, + int A_tran); +void cudaF_max_mat_blocks(dim3 Gr, dim3 Bl, + const float *src, float *dst, float *index_max_, + const int32_cuda stride, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_, + int A_tran); +void cudaD_max_mat_blocks_back(dim3 Gr, dim3 Bl, + const double *src, double *dst, double *index_max_, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_); +void cudaF_max_mat_blocks_back(dim3 Gr, dim3 Bl, + const float *src, float *dst, float *index_max_, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_); void cudaD_add_mat_diag_vec(dim3 Gr, dim3 Bl, double alpha, double *mat, MatrixDim mat_dim, const double *mat2, int mat2_row_stride, int mat2_col_stride, diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 934a860a055..34f5c7652ba 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -773,6 +773,149 @@ static void _add_mat_blocks_trans(Real alpha, const Real* src, } } +template +__global__ +static void _max_mat_blocks(const Real *src, Real *dst, Real *index_max_, + const int32_cuda stride, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_) { + int32_cuda i = blockIdx.x * blockDim.x + threadIdx.x; + int32_cuda j = blockIdx.y * blockDim.y + threadIdx.y; + int32_cuda k = blockIdx.z * blockDim.z + threadIdx.z; + int32_cuda num_pools_h = 1 + (input_h_dim_ - pool_h_size_) / pool_h_step_; + int32_cuda num_pools_f = 1 + (input_f_dim_ - pool_f_size_) / pool_f_step_; + + // initialize the temporary maximum value and its index in each pool + int32_cuda max_row = i * pool_t_step_; + int32_cuda max_col = j * pool_h_step_ * input_f_dim_ + k * pool_f_step_; + int32_cuda max_value = src[max_row * input_h_dim_ * input_f_dim_ + max_col]; + + // loop over all the elements in each pool to find the maximum one, + // and record its index. + + for (int32_cuda t = 0; t < pool_t_size_; t += stride) { + // the index of row in *src + int32_cuda idx_row = i * pool_t_step_ + t; + + for (int32_cuda h = 0; h < pool_h_size_; h++) { + for (int32_cuda f = 0; f < pool_f_size_; f++) { + // the index of column in *src + int32_cuda idx_col = (j * pool_h_step_ + h) * input_f_dim_ + k * pool_f_step_ + f; + + if (src[idx_row * input_h_dim_ * input_f_dim_ + idx_col] > max_value) { + max_row = idx_row; + max_col = idx_col; + max_value = src[max_row * input_h_dim_ * input_f_dim_ + max_col]; + } + } + } + } + + dst[i * num_pools_h * num_pools_f + j * num_pools_f + k] = max_value; + + // the index of indexes stored in vector 'index_max_'. + int32_cuda idx_in_idxmax = (i * num_pools_h + j) * num_pools_f + k; + index_max_[idx_in_idxmax] = max_row; + index_max_[idx_in_idxmax + 1] = max_col; +} + +// this function is basicall the same as _max_mat_blocks, except it +// deal with the transpose matrix of *src. So the column and row index +// are exchanged. +template +__global__ +static void _max_mat_blocks_trans(const Real *src, Real *dst, Real *index_max_, + const int32_cuda stride, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_) { + int32_cuda i = blockIdx.x * blockDim.x + threadIdx.x; + int32_cuda j = blockIdx.y * blockDim.y + threadIdx.y; + int32_cuda k = blockIdx.z * blockDim.z + threadIdx.z; + int32_cuda num_pools_t = 1 + (input_t_dim_ - pool_t_size_) / pool_t_step_; + int32_cuda num_pools_h = 1 + (input_h_dim_ - pool_h_size_) / pool_h_step_; + int32_cuda num_pools_f = 1 + (input_f_dim_ - pool_f_size_) / pool_f_step_; + + int32_cuda max_row = i * pool_t_step_; + int32_cuda max_col = j * pool_h_step_ * input_f_dim_ + k * pool_f_step_; + int32_cuda max_value = src[max_col * input_t_dim_ + max_row]; + + for (int32_cuda t = 0; t < pool_t_size_; t += stride) { + int32_cuda idx_row = i * pool_t_step_ + t; + + for (int32_cuda h = 0; h < pool_h_size_; h++) { + for (int32_cuda f = 0; f < pool_f_size_; f++) { + int32_cuda idx_col = (j * pool_h_step_ + h) * input_f_dim_ + k * pool_f_step_ + f; + + if (src[idx_col * input_t_dim_ + idx_row] > max_value) { + max_row = idx_row; + max_col = idx_col; + max_value = src[max_col * input_t_dim_ + max_row]; + } + } + } + } + + dst[(j * num_pools_f + k) * num_pools_t + i] = max_value; + + int32_cuda idx_in_idxmax = (i * num_pools_h + j) * num_pools_f + k; + index_max_[idx_in_idxmax] = max_row; + index_max_[idx_in_idxmax + 1] = max_col; + +} + +template +__global__ +static void _max_mat_blocks_back(const Real *src, Real *dst, Real *index_max_, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_) { + int32_cuda i = blockIdx.x * blockDim.x + threadIdx.x; + int32_cuda j = blockIdx.y * blockDim.y + threadIdx.y; + int32_cuda k = blockIdx.z * blockDim.z + threadIdx.z; + int32_cuda num_pools_h = 1 + (input_h_dim_ - pool_h_size_) / pool_h_step_; + int32_cuda num_pools_f = 1 + (input_f_dim_ - pool_f_size_) / pool_f_step_; + + for (int32_cuda t = 0; t < pool_t_size_ ; t++) { + int32_cuda idx_row = i * pool_t_step_ + t; + + for (int32_cuda h = 0; h < pool_h_size_ ; h++) { + for (int32_cuda f = 0; f < pool_f_size_; f++) { + int32_cuda idx_col = (j * pool_h_step_ + h) * input_f_dim_ + k * pool_f_step_ + f; + int32_cuda idx_in_idxmax = (i * num_pools_h + j) * num_pools_f + k; + + if (idx_row == index_max_[idx_in_idxmax] && + idx_col == index_max_[idx_in_idxmax + 1] || + dst[idx_row * input_h_dim_ * input_f_dim_ + idx_col] != 0) { + dst[idx_row * input_h_dim_ * input_f_dim_ + idx_col] = + src[i * num_pools_h * num_pools_f + j * num_pools_f + k]; + } else { + dst[idx_row * input_h_dim_ * input_f_dim_ + idx_col] = 0; + } + } + } + } +} + template __global__ static void _set_mat_mat_div_mat(const Real* A, const Real* B, const Real* C, @@ -3957,6 +4100,48 @@ void cudaF_add_mat_repeated(dim3 Gr, dim3 Bl, float alpha, const float* src, _add_mat_repeated<<>>(alpha, src, src_dim, dst, dst_dim); } +void cudaF_max_mat_blocks(dim3 Gr, dim3 Bl, + const float *src, float *dst, float *index_max_, + const int32_cuda stride, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_, + int A_trans) { + if (A_trans) { + _max_mat_blocks_trans<<>>(src, dst, index_max_, stride, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_); + } else { + _max_mat_blocks<<>>(src, dst, index_max_, stride, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_); + } +} + +void cudaF_max_mat_blocks_back(dim3 Gr, dim3 Bl, + const float *src, float *dst, float *index_max_, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_) { + _max_mat_blocks_back<<>>(src, dst, index_max_, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_); +} void cudaF_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const float *A, const float *B, const float *C, float *dst, MatrixDim d, @@ -4661,6 +4846,49 @@ void cudaD_add_mat_repeated(dim3 Gr, dim3 Bl, double alpha, const double* src, _add_mat_repeated<<>>(alpha, src, src_dim, dst, dst_dim); } +void cudaD_max_mat_blocks(dim3 Gr, dim3 Bl, + const double *src, double *dst, double *index_max_, + const int32_cuda stride, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_, + int A_trans) { + if (A_trans) { + _max_mat_blocks_trans<<>>(src, dst, index_max_, stride, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_); + } else { + _max_mat_blocks<<>>(src, dst, index_max_, stride, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_); + } +} + +void cudaD_max_mat_blocks_back(dim3 Gr, dim3 Bl, + const double *src, double *dst, double *index_max_, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_) { + _max_mat_blocks_back<<>>(src, dst, index_max_, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_); +} + void cudaD_set_mat_mat_div_mat(dim3 Gr, dim3 Bl, const double *A, const double *B, const double *C, double *dst, MatrixDim d, int stride_a, int stride_b, diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 8f719a8c4a1..0712179bca8 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -168,6 +168,74 @@ inline void cuda_add_mat_repeated(dim3 Gr, dim3 Bl, float alpha, float *dst, MatrixDim dst_dim) { cudaF_add_mat_repeated(Gr, Bl, alpha, src, src_dim, dst, dst_dim); } +inline void cuda_max_mat_blocks(dim3 Gr, dim3 Bl, + const double *src, double *dst, double *index_max_, + const int32_cuda stride, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_, + int A_trans) { + cudaD_max_mat_blocks(Gr, Bl, src, dst, index_max_, stride, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_, A_trans); +} +inline void cuda_max_mat_blocks(dim3 Gr, dim3 Bl, + const float *src, float *dst, float *index_max_, + const int32_cuda stride, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_, + int A_trans) { + cudaF_max_mat_blocks(Gr, Bl, src, dst, index_max_, stride, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_, A_trans); +} +inline void cuda_max_mat_blocks_back(dim3 Gr, dim3 Bl, + const double *src, double *dst, double *index_max_, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_) { + cudaD_max_mat_blocks_back(Gr, Bl, src, dst, index_max_, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_); +} +inline void cuda_max_mat_blocks_back(dim3 Gr, dim3 Bl, + const float *src, float *dst, float *index_max_, + const int32_cuda input_t_dim_, + const int32_cuda pool_t_size_, + const int32_cuda pool_t_step_, + const int32_cuda input_h_dim_, + const int32_cuda pool_h_size_, + const int32_cuda pool_h_step_, + const int32_cuda input_f_dim_, + const int32_cuda pool_f_size_, + const int32_cuda pool_f_step_) { + cudaF_max_mat_blocks_back(Gr, Bl, src, dst, index_max_, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_); +} inline void cuda_add_mat_diag_vec(dim3 Gr, dim3 Bl, double alpha, double *mat, MatrixDim mat_dim, const double *mat2, int mat2_row_stride, int mat2_col_stride, diff --git a/src/cudamatrix/cu-matrix-test.cc b/src/cudamatrix/cu-matrix-test.cc index 01030bb8353..60d87ca1ca7 100644 --- a/src/cudamatrix/cu-matrix-test.cc +++ b/src/cudamatrix/cu-matrix-test.cc @@ -2917,6 +2917,97 @@ static void UnitTestCuMatrixEqualElementMask() { } +template +static void UnitTestCuMatrixMaxMatBlocks() { + for (int32 l = 0; l < 5; l++) { + int32 stride = RandInt(1, 5); + int32 input_t_dim_ = RandInt(1, 100); + int32 pool_t_size_ = RandInt(1, 10); + int32 pool_t_step_ = RandInt(1, 10); + int32 input_h_dim_ = RandInt(1, 100); + int32 pool_h_size_ = RandInt(1, 10); + int32 pool_h_step_ = RandInt(1, 10); + int32 input_f_dim_ = RandInt(1, 100); + int32 pool_f_size_ = RandInt(1, 10); + int32 pool_f_step_ = RandInt(1, 10); + + + // this part is for testing of forward propagation + CuMatrix in_value(input_t_dim_, input_h_dim_ * input_f_dim_); + in_value.SetRandn(); + + int32 num_pools_t = 1 + (input_t_dim_ - pool_t_size_) / pool_t_step_; + int32 num_pools_h = 1 + (input_h_dim_ - pool_h_size_) / pool_h_step_; + int32 num_pools_f = 1 + (input_f_dim_ - pool_f_size_) / pool_f_step_; + + CuMatrix out_value(num_pools_t, num_pools_h * num_pools_f); + out_value.SetRandn(); + + CuVector index_max_(2 * num_pools_t * num_pools_h * num_pools_f); + index_max_.SetRandn(); + + CuMatrix out_value_copy(out_value); + + out_value.MaxMatBlocks(in_value, index_max_, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_, + kNoTrans); + int32 tmp = 0; + for (int32 t = 0; t < num_pools_t; t++) { + for (int32 h = 0; h < num_pools_t; h++) { + for (int32 f = 0; f < num_pools_f; f++) { + // initialize the maximum value as the first element in the pool + int32 max_x = 0; int32 max_y = 0; + int32 max_value = in_value(t * pool_t_step_, h * pool_h_step_ * input_f_dim_ + f * pool_f_step_); + + // find the maximm value in the pool + for (int32 x = 0; x < pool_t_size_; x += stride) { + int32 cur_x = t * pool_t_step_ + x; + + for (int32 y = 0; y < pool_h_size_; y++) { + for (int32 z = 0; z < pool_f_size_; z++) { + int32 cur_y = (h * pool_h_step_ + y) * input_f_dim_ + f * pool_f_step_ + z; + if (in_value(cur_x, cur_y) > max_value) { + max_x = cur_x; + max_y = cur_y; + max_value = in_value(cur_x, cur_y); + index_max_(tmp) = max_x; + index_max_(tmp+1) = max_y; + } + } + } + } + out_value_copy(t, h * num_pools_f + f) = max_value; + tmp += 2; + } + } + } + + AssertEqual(out_value, out_value_copy); + + // this part is for testing backward propagation + CuMatrix in_deriv(input_t_dim_, input_h_dim_ * input_f_dim_); + in_deriv.SetZero(); + CuMatrix out_deriv(num_pools_t, num_pools_h * num_pools_f); + out_deriv.SetRandn(); + CuMatrix in_deriv_copy(in_deriv); + + in_deriv.MaxMatBlocks(out_deriv, index_max_, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_, + kNoTrans); + + for (int32 x = 0; x < num_pools_t * num_pools_h * num_pools_f; x += 2) { + int32 row_tmp = (x / 2) / (num_pools_h * num_pools_f); + int32 col_tmp = (x / 2) % (num_pools_h * num_pools_f); + in_deriv_copy(index_max_(x),index_max_(x+1)) = out_deriv(row_tmp, col_tmp); + } + AssertEqual(in_deriv, in_deriv_copy); + } +} + template void CudaMatrixUnitTest() { UnitTestCuMatrixApplyExpSpecial(); UnitTestCuMatrixApplyExpLimited(); @@ -2987,6 +3078,7 @@ template void CudaMatrixUnitTest() { UnitTestCuMatrixAddToElements(); UnitTestCuMatrixLookup(); UnitTestCuMatrixEqualElementMask(); + UnitTestCuMatrixMaxMatBlocks(); // test CuVector methods UnitTestCuVectorAddVec(); UnitTestCuVectorAddRowSumMat(); diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index beccd9dc4a5..f0c10086433 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc @@ -1187,6 +1187,140 @@ void CuMatrixBase::AddMatBlocks(Real alpha, const CuMatrixBase &A, } } + +template +void CuMatrixBase::MaxMatBlocks(const CuMatrixBase &A, + CuVectorBase &index_max_, + const int32 stride, + const int32 input_t_dim_, + const int32 pool_t_size_, + const int32 pool_t_step_, + const int32 input_h_dim_, + const int32 pool_h_size_, + const int32 pool_h_step_, + const int32 input_f_dim_, + const int32 pool_f_size_, + const int32 pool_f_step_, + MatrixTransposeType transA) { + if (num_rows_ == 0 || num_cols_ == 0) return; + + int32 num_pools_t = 1 + (input_t_dim_ - pool_t_size_) / pool_t_step_; + int32 num_pools_h = 1 + (input_h_dim_ - pool_h_size_) / pool_h_step_; + int32 num_pools_f = 1 + (input_f_dim_ - pool_f_size_) / pool_f_step_; + + KALDI_ASSERT((input_t_dim_ - pool_t_size_) % pool_t_step_ == 0 && + (input_h_dim_ - pool_h_size_) % pool_h_step_ == 0 && + (input_f_dim_ - pool_f_size_) % pool_f_step_ == 0); + + if (A.NumRows() >= (transA == kNoTrans ? num_rows_ : num_cols_) && + A.NumCols() >= (transA == kNoTrans ? num_cols_ : num_rows_)) { + // This is the "forward-propagation" version of MaxMatBlocks. + // It supports both regular and transposed operation. + if (transA == kNoTrans) { + KALDI_ASSERT(A.NumRows() == input_t_dim_ && + A.NumCols() == input_h_dim_ * input_f_dim_ && + num_rows_ == num_pools_t && + num_cols_ == num_pools_h * num_pools_f); + + } else { + KALDI_ASSERT(A.NumCols() == input_t_dim_ && + A.NumRows() == input_h_dim_ * input_f_dim_ && + num_cols_ == num_pools_t && + num_rows_ == num_pools_h * num_pools_f); + + } +#if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + CuTimer tim; + dim3 dimBlock(num_pools_t, num_pools_h, num_pools_f); + dim3 dimGrid(1); + + cuda_max_mat_blocks(dimGrid, dimBlock, A.data_, data_, index_max_.data_, stride, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_, + (transA == kTrans ? 1 : 0)); + CU_SAFE_CALL(cudaGetLastError()); + + CuDevice::Instantiate().AccuProfile(__func__, tim); + } else +#endif + { + // maxpooling without cuda + int32 tmp = 0; + for (int32 t = 0; t < num_pools_t; t++) { + for (int32 h = 0; h < num_pools_t; h++) { + for (int32 f = 0; f < num_pools_f; f++) { + // initialize the maximum value as the first element in the pool + int32 max_x = 0; int32 max_y = 0; + int32 max_value = A(t * pool_t_step_, h * pool_h_step_ * input_f_dim_ + f * pool_f_step_); + + // find the maximm value in the pool + for (int32 x = 0; x < pool_t_size_; x++) { + int32 cur_x = t * pool_t_step_ + x; + + for (int32 y = 0; y < pool_h_size_; y++) { + for (int32 z = 0; z < pool_f_size_; z++) { + int32 cur_y = (h * pool_h_step_ + y) * input_f_dim_ + f * pool_f_step_ + z; + if (A(cur_x, cur_y) > max_value) { + max_x = cur_x; + max_y = cur_y; + max_value = A(cur_x, cur_y); + index_max_(tmp) = max_x; + index_max_(tmp+1) = max_y; + } + } + } + } + (*this)(t, h * num_pools_f + f) = max_value; + tmp += 2; + } + } + } + } + } else { + + // This is the "backward-propagation" version of MaxMatBlocks, where + // *this is larger than src. + if (transA == kNoTrans){ + KALDI_ASSERT(A.NumRows() == num_pools_t && + A.NumCols() == num_pools_h * num_pools_f && + num_rows_ == input_t_dim_ && + num_cols_ == input_h_dim_ * input_f_dim_); + } else { + KALDI_ASSERT(A.NumCols() == num_pools_t && + A.NumRows() == num_pools_h * num_pools_f && + num_cols_ == input_t_dim_ && + num_rows_ == input_h_dim_ * input_f_dim_); + } + +#if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + CuTimer tim; + dim3 dimBlock(num_pools_t, num_pools_h, num_pools_f); + dim3 dimGrid(1); + + cuda_max_mat_blocks_back(dimGrid, dimBlock, A.data_, data_, index_max_.data_, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_); + + CU_SAFE_CALL(cudaGetLastError()); + CuDevice::Instantiate().AccuProfile(__func__, tim); + } else +#endif + { + // maxpooling backward propagation without cuda + this->SetZero(); + for (int32 x = 0; x < num_pools_t * num_pools_h * num_pools_f; x += 2) { + int32 row_tmp = (x / 2) / (num_pools_h * num_pools_f); + int32 col_tmp = (x / 2) % (num_pools_h * num_pools_f); + (*this)(index_max_(x),index_max_(x+1)) = A(row_tmp, col_tmp); + } + } + } +} + /// dst = a * b / c (by element; when c = 0, dst = a) /// dst can be an alias of a, b or c safely and get expected result. template diff --git a/src/cudamatrix/cu-matrix.h b/src/cudamatrix/cu-matrix.h index 03e69b639d3..345d3fd980b 100644 --- a/src/cudamatrix/cu-matrix.h +++ b/src/cudamatrix/cu-matrix.h @@ -509,6 +509,84 @@ class CuMatrixBase { void AddMatBlocks(Real alpha, const CuMatrixBase &A, MatrixTransposeType trans = kNoTrans); + + /// This function is used for do the maxpooling over blocks. The detailed + /// description is written in the MaxPoolingOverBlock component in file + /// nnet-convolutional-component.h + /// + /// To point out, this function has two version 'forward-propagate' and + /// 'backward-propagate': + /// + /// (1) When the size of input matrix &A is larger than *this, it is then a + /// 'forward-propagate' version, and the function do the maxpooling + /// depending on the parameters. Meanwhile, it stores the index of + /// maximum value in each pool in vector 'index_max_' for backpropagation. + /// + /// (2) When the size of input matrix &A is smaller than *this, it is then a + /// 'backward-propagate' version. According to the vector 'index_max_', the + /// function set all the values in &out_deriv whose index is not in + /// vector(not corresponding to maximum value in each pool of &in_value) + /// as zero, and keeps those correponding to maximum value as the *in_deriv. + /// Parameters: + /// + /// size of input matrix: + /// input_t_dim_ size of the input along t-axis + /// (e.g. number of time steps) + /// input_h_dim_ size of input along h-axis + /// (e.g. number of mel-frequency bins) + /// input_f_dim_ size of input along f-axis + /// (e.g. number of filters in the ConvolutionComponent) + /// + /// block size: + /// pool_t_size_ size of the pooling window along t-axis + /// pool_h_size_ size of the pooling window along h-axis + /// pool_f_size_ size of the pooling window along f-axis + /// (So, the dimension of block is: + /// pool_t_size_ by pool_h_size_ * pool_f_size_) + /// + /// stride size: + /// pool_t_step_ the number of steps taken along t-axis of input + /// before computing the next pool (e.g. the stride + /// size along t-axis) + /// pool_h_step_ the number of steps taken along h-axis of input + /// before computing the next pool (e.g. the stride + /// size along t-axis) + /// pool_f_step_ the number of steps taken along f-axis of input + /// before computing the next pool (e.g. the stride + /// size along t-axis) + /// stride the time stride size within blocks. So we get + /// one row of maxpooling candidate every stride rows + /// in the input matrix. + + /// index_max_ a vector that store the index of the maximum + /// value as (r, c), used in back-propagation. The + /// size of this vector is 2 * num_pools_t * + /// num_pools_h * num_pools_f + /// + /// So there are totally num_pools_t * num_pools_h * num_pools_f blocks, + /// where: + /// num_pools_t = 1 + (input_t_dim_ - pool_t_size_) / pool_t_step_; + /// // the number of blocks in t dimension + /// num_pools_h = 1 + (input_h_dim_ - pool_h_size_) / pool_h_step_; + /// // the number of blocks in h dimension + /// num_pools_f = 1 + (input_f_dim_ - pool_f_size_) / pool_f_step_; + /// // the number of blocks in f dimension + /// If we have index idx_t, idx_h, idx_f in each axis, then we can find + /// the block with: + /// row index: + /// [start_t, start_t + pool_t_size_]; + /// column index: combination of sets: + /// [start_col(0), start_col(0) + pool_f_size_], + /// [start_col(1), start_col(1) + pool_f_size_], + /// ..., + /// [start_col(pool_h_size_), start_col(pool_h_size_) + pool_f_size_] + /// where: + /// start_row = idx_t * pool_t_step_ + /// start_col(i) = (idx_h * pool_h_step_ + i) * input_f_dim_ + idx_f * pool_f_step_ + void MaxMatBlocks(const CuMatrixBase &A, CuVectorBase &index_max_, const int32 stride, const int32 input_t_dim_, const int32 pool_t_size_, const int32 pool_t_step_, + const int32 input_h_dim_, const int32 pool_h_size_, const int32 pool_h_step_, + const int32 input_f_dim_, const int32 pool_f_size_, const int32 pool_f_step_, + MatrixTransposeType trans = kNoTrans); /// (for each column c of *this), c = alpha * col + beta * c void AddVecToCols(Real alpha, const CuVectorBase &col, Real beta = 1.0); /// (for each row r of *this), r = alpha * row + beta * r diff --git a/src/nnet3/nnet-convolutional-component.cc b/src/nnet3/nnet-convolutional-component.cc index bea3b9d31d5..7bcca4bd3dc 100644 --- a/src/nnet3/nnet-convolutional-component.cc +++ b/src/nnet3/nnet-convolutional-component.cc @@ -1,4 +1,4 @@ -// nnet3/nnet-convolutional-component.cc + // nnet3/nnet-convolutional-component.cc // Copyright 2017 Johns Hopkins University (author: Daniel Povey) @@ -666,6 +666,171 @@ void TimeHeightConvolutionComponent::PrecomputedIndexes::Read( ExpectToken(is, binary, ""); } +MaxPoolingOverBlock::MaxPoolingOverBlock( + const MaxPoolingOverBlock &other): + input_t_dim_(other.input_t_dim_), + input_h_dim_(other.input_h_dim_), + input_f_dim_(other.input_f_dim_), + pool_t_size_(other.pool_t_size_), + pool_h_size_(other.pool_h_size_), + pool_f_size_(other.pool_f_size_), + pool_t_step_(other.pool_t_step_), + pool_h_step_(other.pool_h_step_), + pool_f_step_(other.pool_f_step_) { } + +// aquire input dim +int32 MaxPoolingOverBlock::InputDim() const { + return input_t_dim_ * input_h_dim_ * input_f_dim_; +} + +// aquire output dim +int32 MaxPoolingOverBlock::OutputDim() const { + int32 num_pools_t = 1 + (input_t_dim_ - pool_t_size_) / pool_t_step_; + int32 num_pools_h = 1 + (input_h_dim_ - pool_h_size_) / pool_h_step_; + int32 num_pools_f = 1 + (input_f_dim_ - pool_f_size_) / pool_f_step_; + return num_pools_t * num_pools_h * num_pools_f; +} + +// check the component parameters +void MaxPoolingOverBlock::Check() const { + // sanity check of the max pooling parameters + KALDI_ASSERT(input_t_dim_ > 0); + KALDI_ASSERT(input_h_dim_ > 0); + KALDI_ASSERT(input_f_dim_ > 0); + KALDI_ASSERT(pool_t_size_ > 0); + KALDI_ASSERT(pool_h_size_ > 0); + KALDI_ASSERT(pool_f_size_ > 0); + KALDI_ASSERT(pool_t_step_ > 0); + KALDI_ASSERT(pool_h_step_ > 0); + KALDI_ASSERT(pool_f_step_ > 0); + KALDI_ASSERT(input_t_dim_ >= pool_t_size_); + KALDI_ASSERT(input_h_dim_ >= pool_h_size_); + KALDI_ASSERT(input_f_dim_ >= pool_f_size_); + KALDI_ASSERT(pool_t_size_ >= pool_t_step_); + KALDI_ASSERT(pool_h_size_ >= pool_h_step_); + KALDI_ASSERT(pool_f_size_ >= pool_f_step_); + KALDI_ASSERT((input_t_dim_ - pool_t_size_) % pool_t_step_ == 0); + KALDI_ASSERT((input_h_dim_ - pool_h_size_) % pool_h_step_ == 0); + KALDI_ASSERT((input_f_dim_ - pool_f_size_) % pool_f_step_ == 0); +} + +// initialize the component using configuration file +void MaxPoolingOverBlock::InitFromConfig(ConfigLine *cfl) { + bool ok = true; + + ok = ok && cfl->GetValue("input-t-dim", &input_t_dim_); + ok = ok && cfl->GetValue("input-h-dim", &input_h_dim_); + ok = ok && cfl->GetValue("input-f-dim", &input_f_dim_); + ok = ok && cfl->GetValue("pool-t-size", &pool_t_size_); + ok = ok && cfl->GetValue("pool-h-size", &pool_h_size_); + ok = ok && cfl->GetValue("pool-f-size", &pool_f_size_); + ok = ok && cfl->GetValue("pool-t-step", &pool_t_step_); + ok = ok && cfl->GetValue("pool-h-step", &pool_h_step_); + ok = ok && cfl->GetValue("pool-f-step", &pool_f_step_); + + if (cfl->HasUnusedValues()) + KALDI_ERR << "Could not process these elements in initializer: " + << cfl->UnusedValues(); + if (!ok) + KALDI_ERR << "Bad initializer " + << cfl->WholeLine(); + + Check(); +} + +void MaxPoolingOverBlock::Read(std::istream &is, bool binary) { + ExpectOneOrTwoTokens(is, binary, "", ""); + ReadBasicType(is, binary, &input_t_dim_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &input_h_dim_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &input_f_dim_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &pool_t_size_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &pool_h_size_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &pool_f_size_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &pool_t_step_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &pool_h_step_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &pool_f_step_); + ExpectToken(is, binary, ""); + Check(); +} + +void MaxPoolingOverBlock::Write(std::ostream &os, bool binary) const { + WriteToken(os, binary, ""); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, input_t_dim_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, input_h_dim_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, input_f_dim_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, pool_t_size_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, pool_h_size_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, pool_f_size_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, pool_t_step_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, pool_h_step_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, pool_f_step_); + WriteToken(os, binary, ""); +} + +// display information about component +std::string MaxPoolingOverBlock::Info() const { + std::ostringstream stream; + stream << Type() + << ", input-t-dim=" << input_t_dim_ + << ", input-h-dim=" << input_h_dim_ + << ", input-f-dim=" << input_f_dim_ + << ", pool-t-size=" << pool_t_size_ + << ", pool-h-size=" << pool_h_size_ + << ", pool-f-size=" << pool_f_size_ + << ", pool-t-step=" << pool_t_step_ + << ", pool-h-step=" << pool_h_step_ + << ", pool-f-step=" << pool_f_step_; + return stream.str(); +} + +void* MaxPoolingOverBlock::Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in_value, + CuMatrixBase *out_value) const { + + out_value->MaxMatBlocks(in_value, index_max_, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_, + kNoTrans); + return NULL; +} + +void MaxPoolingOverBlock::Backprop( + const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &,//in_value, + const CuMatrixBase &,//out_value, + const CuMatrixBase &out_deriv, + void *memo, + Component *to_update, + CuMatrixBase *in_deriv) const { + + if (in_deriv) { + in_deriv->MaxMatBlocks(out_deriv, index_max_, + input_t_dim_, pool_t_size_, pool_t_step_, + input_h_dim_, pool_h_size_, pool_h_step_, + input_f_dim_, pool_f_size_, pool_f_step_, + kNoTrans); + + } +} } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-convolutional-component.h b/src/nnet3/nnet-convolutional-component.h index 35cf0de11c9..6cd421eaf5a 100644 --- a/src/nnet3/nnet-convolutional-component.h +++ b/src/nnet3/nnet-convolutional-component.h @@ -370,8 +370,190 @@ class TimeHeightConvolutionComponent: public UpdatableComponent { OnlineNaturalGradient preconditioner_out_; }; - - +/** MaxPoolingOverBlock gets maximum value over blocks of its input + this component should be compatible with TimeHeightConvolutionComponent + + MaxPoolingOverBlock : + MaxPoolingOverBlock component was firstly used in ConvNet. It inspired + Maxout nonlinearity. Each output element of this component is the + maximum of a block of input elements where the block has a + dimension (pool_t_size_, pool_h_size_ * pool_f_size_). + Blocks could overlap if the shift value on any axis is smaller + than its corresponding pool size (e.g. pool_t_step_ < pool_t_size_). + If the shift values are euqal to their pool size, there is no + overlap; while if they all equal 1, the blocks overlap to + the greatest possible extent. + + This component is designed to be used after a ConvolutionComponent + so that the input matrix is propagated from a 2d-convolutional layer. + This component implements maxpooling which performs + max pooling along the three axes. + + Input : A matrix with dimensions: + t: (e.g. time) + h: (e.g. height, mel-frequency) + f: (e.g. channels like number of filters in the ConvolutionComponent) + + Parameters: + + input_t_dim_ size of the input along t-axis + (e.g. number of time steps) + input_h_dim_ size of input along h-axis + (e.g. number of mel-frequency bins) + input_f_dim_ size of input along f-axis + (e.g. number of filters in the ConvolutionComponent) + + pool_t_size_ size of the pooling window along t-axis + pool_h_size_ size of the pooling window along h-axis + pool_f_size_ size of the pooling window along f-axis + + pool_t_step_ the number of steps taken along t-axis of input + before computing the next pool (e.g. the stride + size along t-axis) + pool_h_step_ the number of steps taken along h-axis of input + before computing the next pool (e.g. the stride + size along t-axis) + pool_f_step_ the number of steps taken along f-axis of input + before computing the next pool (e.g. the stride + size along t-axis) + + index_max_ a vector that store the index of the maximum + value as (r, c), used in back-propagation. The + size of this vector is 2 * num_pools_t * + num_pools_h * num_pools_f + + So there are totally num_pools_t * num_pools_h * num_pools_f blocks, + where: + num_pools_t = 1 + (input_t_dim_ - pool_t_size_) / pool_t_step_; + // the number of blocks in t dimension + num_pools_h = 1 + (input_h_dim_ - pool_h_size_) / pool_h_step_; + // the number of blocks in h dimension + num_pools_f = 1 + (input_f_dim_ - pool_f_size_) / pool_f_step_; + // the number of blocks in f dimension + + If we have index idx_t, idx_h, idx_f in each axis, then we can find + the block with: + row index: + [start_t, start_t + pool_t_size_]; + column index: combination of sets: + [start_col(0), start_col(0) + pool_f_size_], + [start_col(1), start_col(1) + pool_f_size_], + ..., + [start_col(pool_h_size_), start_col(pool_h_size_) + pool_f_size_] + where: + start_row = idx_t * pool_t_step_ + start_col(i) = (idx_h * pool_h_step_ + i) * input_f_dim_ + idx_f * pool_f_step_ + + Example: + We store the 3D matrix + into a 2D matrix by concatenating each 2D matrix at different channel like: + + h = 0 h = 1 + |------------------------|------------------------|----... + f=0 f=1 f=2 ... f=n f=0 f=1 f=2 ... f=n + |----|----|----|----|----|----|----|----|----|----|----... + t=0 **** **** **** **** ****|**** **** **** **** ****|****... - + t=1 **** **** **** **** ****|**** **** **** **** ****|****... | m + t=2 **** **** **** **** ****|**** **** **** **** ****|****... | a + t=3 **** **** **** **** ****|**** **** **** **** ****|****... | t + t=4 **** **** **** **** ****|**** **** **** **** ****|****... | r + t=5 **** **** **** **** ****|**** **** **** **** ****|****... | i + t=6 **** **** **** **** ****|**** **** **** **** ****|****... | x + t=7 **** **** **** **** ****|**** **** **** **** ****|****... - + + In this case, if we set pool_t_size = 2, pool_t_step = 1 + pool_h_size = 2, pool_h_step = 1 + pool_f_size = 2, pool_f_step = 1 + Then, the pooling block is like: + + h = 0 h = 1 h = 0 h = 1 h = 1 h = 2 + |---------|---------| |---------|---------| |---------|---------| + f=0 f=1 f=0 f=1 f=1 f=2 f=1 f=2 f=0 f=1 f=0 f=1 + |----|----|----|----| |----|----|----|----| ... |----|----|----|----| ...... + t=0 **** **** **** **** t=0 **** **** **** **** t=0 **** **** **** **** + t=1 **** **** **** **** t=1 **** **** **** **** t=1 **** **** **** **** + + + h = 0 f = 1 h = 0 h = 1 h = 1 h = 2 + |---------|---------| |---------|---------| |---------|---------| + f=0 f=1 f=0 f=1 f=1 f=2 f=1 f=2 f=0 f=1 f=0 f=1 + |----|----|----|----| |----|----|----|----| ... |----|----|----|----| ...... + t=1 **** **** **** **** t=1 **** **** **** **** t=1 **** **** **** **** + t=2 **** **** **** **** t=2 **** **** **** **** t=2 **** **** **** **** + + . . . + . . . + . . . + . . . + . . . + . . . + + Since the stride of filter(pool_f_step) is usually smaller than the + stride of height(poo_h_step), we arrange each row of output as: + (all filters for height 0)(all filters for height 1)... + + + + Output : The output is also a 2D matrix with dimension (num_block_t by + num_block_h * num_block_f), with each element corresponding to + a block. + + + */ +class MaxPoolingOverBlock: public Component { + public: + explicit MaxPoolingOverBlock(const MaxPoolingOverBlock &other); + MaxPoolingOverBlock(): input_t_dim_(0), input_h_dim_(0), input_f_dim_(0), + pool_t_size_(0), pool_h_size_(0), pool_f_size_(0), + pool_t_step_(0), pool_h_step_(0), pool_f_step_(0) { } + virtual std::string Type() const { return "MaxPoolingOverBlock"; } + virtual int32 Properties() const { + return kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput|kBackpropAdds; + } + virtual void InitFromConfig(ConfigLine *cfl); + virtual int32 InputDim() const; + virtual int32 OutputDim() const; + virtual void Read(std::istream &is, bool binary); + virtual void Write(std::ostream &os, bool binary) const; + virtual std::string Info() const; + virtual Component* Copy() const { return new MaxPoolingOverBlock(*this); } + virtual void* Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in_value, + CuMatrixBase *out_value) const; + virtual void Backprop(const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &,//in_value, + const CuMatrixBase &,//out_value, + const CuMatrixBase &out_deriv, + void *memo, + Component *to_update, + CuMatrixBase *in_deriv) const; + virtual void Check() const; + + + protected: + int32 input_t_dim_; // size of the input along t-axis + // (e.g. number of time steps) + int32 input_h_dim_; // size of input along h-axis + // (e.g. number of mel-frequency bins) + int32 input_f_dim_; // size of input along f-axis + // (e.g. number of filters in the ConvolutionComponent) + + int32 pool_t_size_; // size of the pooling window along t-axis + int32 pool_h_size_; // size of the pooling window along h-axis + int32 pool_f_size_; // size of the pooling window along f-axis + + int32 pool_t_step_; // the number of steps taken along t-axis of input + // before computing the next pool + int32 pool_h_step_; // the number of steps taken along h-axis of input + // before computing the next pool + int32 pool_f_step_; // the number of steps taken along f-axis of input + // before computing the next pool + + CuVectorBase index_max_; // the index of maximum value + + MaxPoolingOverBlock &operator = (const MaxPoolingOverBlock &other); // Disallow. +}; } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 4eb078c0fcb..081e61bfa28 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -5860,6 +5860,5 @@ void SumBlockComponent::Backprop( } } - } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index 3929c253aab..55e7ff703cf 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -1220,7 +1220,6 @@ class SumBlockComponent: public Component { SumBlockComponent &operator = (const SumBlockComponent &other); // Disallow. }; - /* ClipGradientComponent just duplicates its input, but clips gradients during backpropagation if they cross a predetermined threshold.