From 09ec7031f3c13d0a4a0c04cf2bcfa60007967098 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Mon, 26 Jan 2026 21:38:47 -0800 Subject: [PATCH 1/2] [REFINE] WaveNet: Refactor _LayerArray constructor to use LayerArrayParams - Updated the _LayerArray constructor to accept a LayerArrayParams object, simplifying parameter management and improving code readability. - Adjusted the internal logic to utilize the new params structure for layer initialization, ensuring consistency across layer configurations. - Modified relevant test files to align with the new constructor signature, maintaining comprehensive test coverage for layer array functionality. --- NAM/wavenet.cpp | 61 +++++-------------- NAM/wavenet.h | 37 +---------- tools/test/test_wavenet/test_layer_array.cpp | 40 +++++++----- .../test/test_wavenet/test_real_time_safe.cpp | 11 ++-- .../test/test_wavenet_configurable_gating.cpp | 11 ++-- 5 files changed, 55 insertions(+), 105 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 6af3d85..17d065a 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -212,44 +212,21 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma // LayerArray ================================================================= -nam::wavenet::_LayerArray::_LayerArray( - const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, - const int kernel_size, const std::vector& dilations, - const std::vector& activation_configs, const std::vector& gating_modes, - const bool head_bias, const int groups_input, const int groups_input_mixin, const int groups_1x1, - const Head1x1Params& head1x1_params, const std::vector& secondary_activation_configs, - const _FiLMParams& conv_pre_film_params, const _FiLMParams& conv_post_film_params, - const _FiLMParams& input_mixin_pre_film_params, const _FiLMParams& input_mixin_post_film_params, - const _FiLMParams& activation_pre_film_params, const _FiLMParams& activation_post_film_params, - const _FiLMParams& _1x1_post_film_params, const _FiLMParams& head1x1_post_film_params) -: _rechannel(input_size, channels, false) -, _head_rechannel(head1x1_params.active ? head1x1_params.out_channels : bottleneck, head_size, head_bias) -, _head_output_size(head1x1_params.active ? head1x1_params.out_channels : bottleneck) +nam::wavenet::_LayerArray::_LayerArray(const LayerArrayParams& params) +: _rechannel(params.input_size, params.channels, false) +, _head_rechannel(params.head1x1_params.active ? params.head1x1_params.out_channels : params.bottleneck, + params.head_size, params.head_bias) +, _head_output_size(params.head1x1_params.active ? params.head1x1_params.out_channels : params.bottleneck) { - const size_t num_layers = dilations.size(); - if (activation_configs.size() != num_layers) - { - throw std::invalid_argument("_LayerArray: dilations size (" + std::to_string(num_layers) - + ") must match activation_configs size (" + std::to_string(activation_configs.size()) - + ")"); - } - if (gating_modes.size() != num_layers) - { - throw std::invalid_argument("_LayerArray: dilations size (" + std::to_string(num_layers) - + ") must match gating_modes size (" + std::to_string(gating_modes.size()) + ")"); - } - if (secondary_activation_configs.size() != num_layers) - { - throw std::invalid_argument("_LayerArray: dilations size (" + std::to_string(num_layers) - + ") must match secondary_activation_configs size (" - + std::to_string(secondary_activation_configs.size()) + ")"); - } - for (size_t i = 0; i < dilations.size(); i++) + const size_t num_layers = params.dilations.size(); + for (size_t i = 0; i < num_layers; i++) this->_layers.push_back( - _Layer(condition_size, channels, bottleneck, kernel_size, dilations[i], activation_configs[i], gating_modes[i], - groups_input, groups_input_mixin, groups_1x1, head1x1_params, secondary_activation_configs[i], - conv_pre_film_params, conv_post_film_params, input_mixin_pre_film_params, input_mixin_post_film_params, - activation_pre_film_params, activation_post_film_params, _1x1_post_film_params, head1x1_post_film_params)); + _Layer(params.condition_size, params.channels, params.bottleneck, params.kernel_size, params.dilations[i], + params.activation_configs[i], params.gating_modes[i], params.groups_input, params.groups_input_mixin, + params.groups_1x1, params.head1x1_params, params.secondary_activation_configs[i], + params.conv_pre_film_params, params.conv_post_film_params, params.input_mixin_pre_film_params, + params.input_mixin_post_film_params, params.activation_pre_film_params, params.activation_post_film_params, + params._1x1_post_film_params, params.head1x1_post_film_params)); } void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize) @@ -397,17 +374,7 @@ nam::wavenet::WaveNet::WaveNet(const int in_channels, throw std::runtime_error(ss.str().c_str()); } } - this->_layer_arrays.push_back(nam::wavenet::_LayerArray( - layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size, - layer_array_params[i].channels, layer_array_params[i].bottleneck, layer_array_params[i].kernel_size, - layer_array_params[i].dilations, layer_array_params[i].activation_configs, layer_array_params[i].gating_modes, - layer_array_params[i].head_bias, layer_array_params[i].groups_input, layer_array_params[i].groups_input_mixin, - layer_array_params[i].groups_1x1, layer_array_params[i].head1x1_params, - layer_array_params[i].secondary_activation_configs, layer_array_params[i].conv_pre_film_params, - layer_array_params[i].conv_post_film_params, layer_array_params[i].input_mixin_pre_film_params, - layer_array_params[i].input_mixin_post_film_params, layer_array_params[i].activation_pre_film_params, - layer_array_params[i].activation_post_film_params, layer_array_params[i]._1x1_post_film_params, - layer_array_params[i].head1x1_post_film_params)); + this->_layer_arrays.push_back(nam::wavenet::_LayerArray(layer_array_params[i])); if (i > 0) if (layer_array_params[i].channels != layer_array_params[i - 1].head_size) { diff --git a/NAM/wavenet.h b/NAM/wavenet.h index a2ce917..06c56b2 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -427,40 +427,9 @@ class LayerArrayParams class _LayerArray { public: - /// \brief Constructor with GatingMode enum and typed ActivationConfig - /// \param input_size Input size (number of channels) to the layer array - /// \param condition_size Size of the conditioning input - /// \param head_size Size of the head output (after head rechannel) - /// \param channels Number of channels in each layer - /// \param bottleneck Bottleneck size (internal channel count) - /// \param kernel_size Kernel size for dilated convolutions - /// \param dilations Vector of dilation factors, one per layer - /// \param activation_configs Vector of primary activation configurations, one per layer - /// \param gating_modes Vector of gating modes, one per layer - /// \param head_bias Whether to use bias in the head rechannel - /// \param groups_input Number of groups for input convolutions - /// \param groups_input_mixin Number of groups for input mixin - /// \param groups_1x1 Number of groups for 1x1 convolutions - /// \param head1x1_params Parameters for optional head1x1 convolutions - /// \param secondary_activation_configs Vector of secondary activation configs for gating/blending, one per layer - /// \param conv_pre_film_params FiLM parameters before input convolutions - /// \param conv_post_film_params FiLM parameters after input convolutions - /// \param input_mixin_pre_film_params FiLM parameters before input mixin - /// \param input_mixin_post_film_params FiLM parameters after input mixin - /// \param activation_pre_film_params FiLM parameters before activation - /// \param activation_post_film_params FiLM parameters after activation - /// \param _1x1_post_film_params FiLM parameters after 1x1 convolutions - /// \param head1x1_post_film_params FiLM parameters after head1x1 convolutions - _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels, - const int bottleneck, const int kernel_size, const std::vector& dilations, - const std::vector& activation_configs, - const std::vector& gating_modes, const bool head_bias, const int groups_input, - const int groups_input_mixin, const int groups_1x1, const Head1x1Params& head1x1_params, - const std::vector& secondary_activation_configs, - const _FiLMParams& conv_pre_film_params, const _FiLMParams& conv_post_film_params, - const _FiLMParams& input_mixin_pre_film_params, const _FiLMParams& input_mixin_post_film_params, - const _FiLMParams& activation_pre_film_params, const _FiLMParams& activation_post_film_params, - const _FiLMParams& _1x1_post_film_params, const _FiLMParams& head1x1_post_film_params); + /// \brief Constructor with LayerArrayParams + /// \param params Parameters for constructing the layer array + _LayerArray(const LayerArrayParams& params); /// \brief Resize all arrays to be able to process maxBufferSize frames /// \param maxBufferSize Maximum number of frames to process in a single call diff --git a/tools/test/test_wavenet/test_layer_array.cpp b/tools/test/test_wavenet/test_layer_array.cpp index fc81896..aa6c5a5 100644 --- a/tools/test/test_wavenet/test_layer_array.cpp +++ b/tools/test/test_wavenet/test_layer_array.cpp @@ -32,10 +32,13 @@ static nam::wavenet::_LayerArray make_layer_array( std::vector gating_modes(dilations.size(), gating_mode); std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); - return nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, - activation_configs, gating_modes, head_bias, groups_input, groups_input_mixin, - groups_1x1, head1x1_params, secondary_activation_configs, film_params, film_params, - film_params, film_params, film_params, film_params, film_params, film_params); + std::vector dilations_copy = dilations; // Make a copy since we need to move it + nam::wavenet::LayerArrayParams params( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations_copy), + std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, groups_1x1, + head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, + film_params, film_params, film_params, film_params); + return nam::wavenet::_LayerArray(params); } // Test layer array construction and basic processing void test_layer_array_basic() @@ -214,10 +217,12 @@ void test_layer_array_different_activations() assert(secondary_activation_configs.size() == dilations.size()); auto film_params = make_default_film_params(); - nam::wavenet::_LayerArray layer_array( - input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation_configs, - gating_modes, head_bias, groups, groups_input_mixin, groups_1x1, head1x1_params, secondary_activation_configs, - film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); + nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, + std::move(dilations), std::move(activation_configs), std::move(gating_modes), + head_bias, groups, groups_input_mixin, groups_1x1, head1x1_params, + std::move(secondary_activation_configs), film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); + nam::wavenet::_LayerArray layer_array(params); const int numFrames = 4; layer_array.SetMaxBufferSize(numFrames); @@ -287,16 +292,19 @@ void test_layer_array_different_activations() // Now create a comparison LayerArray with all ReLU activations and NONE gating // This should produce different outputs since it doesn't have gating or saturating activations + std::vector dilations_all_relu{1, 2, 3}; // Copy of dilations for the second layer array std::vector all_relu_configs( - dilations.size(), nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU)); - std::vector all_none_gating_modes(dilations.size(), nam::wavenet::GatingMode::NONE); + dilations_all_relu.size(), nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU)); + std::vector all_none_gating_modes( + dilations_all_relu.size(), nam::wavenet::GatingMode::NONE); std::vector all_empty_secondary_configs( - dilations.size(), nam::activations::ActivationConfig{}); - nam::wavenet::_LayerArray layer_array_all_relu(input_size, condition_size, head_size, channels, bottleneck, - kernel_size, dilations, all_relu_configs, all_none_gating_modes, - head_bias, groups, groups_input_mixin, groups_1x1, head1x1_params, - all_empty_secondary_configs, film_params, film_params, film_params, - film_params, film_params, film_params, film_params, film_params); + dilations_all_relu.size(), nam::activations::ActivationConfig{}); + nam::wavenet::LayerArrayParams params_all_relu( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations_all_relu), + std::move(all_relu_configs), std::move(all_none_gating_modes), head_bias, groups, groups_input_mixin, groups_1x1, + head1x1_params, std::move(all_empty_secondary_configs), film_params, film_params, film_params, film_params, + film_params, film_params, film_params, film_params); + nam::wavenet::_LayerArray layer_array_all_relu(params_all_relu); layer_array_all_relu.SetMaxBufferSize(numFrames); // Create weights for all-NONE version (simpler, no gating) diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index 0561b17..bf4be07 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -52,10 +52,13 @@ static nam::wavenet::_LayerArray make_layer_array( std::vector gating_modes(dilations.size(), gating_mode); std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); - return nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, - activation_configs, gating_modes, head_bias, groups_input, groups_input_mixin, - groups_1x1, head1x1_params, secondary_activation_configs, film_params, film_params, - film_params, film_params, film_params, film_params, film_params, film_params); + std::vector dilations_copy = dilations; // Make a copy since we need to move it + nam::wavenet::LayerArrayParams params( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations_copy), + std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, groups_1x1, + head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, + film_params, film_params, film_params, film_params); + return nam::wavenet::_LayerArray(params); } // Helper function to create LayerArrayParams with default FiLM parameters diff --git a/tools/test/test_wavenet_configurable_gating.cpp b/tools/test/test_wavenet_configurable_gating.cpp index b5b8e24..0b9997f 100644 --- a/tools/test/test_wavenet_configurable_gating.cpp +++ b/tools/test/test_wavenet_configurable_gating.cpp @@ -66,10 +66,13 @@ static nam::wavenet::_LayerArray make_layer_array( std::vector gating_modes(dilations.size(), gating_mode); std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); - return nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, - activation_configs, gating_modes, head_bias, groups_input, groups_input_mixin, - groups_1x1, head1x1_params, secondary_activation_configs, film_params, film_params, - film_params, film_params, film_params, film_params, film_params, film_params); + std::vector dilations_copy = dilations; // Make a copy since we need to move it + nam::wavenet::LayerArrayParams params( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations_copy), + std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, groups_1x1, + head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, + film_params, film_params, film_params, film_params); + return nam::wavenet::_LayerArray(params); } class TestConfigurableGating From 8e564b0465cd459f614a27341991ec7b9d2a8813 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Mon, 26 Jan 2026 22:14:41 -0800 Subject: [PATCH 2/2] [REFINE] WaveNet: Introduce LayerParams struct for _Layer initialization - Refactored the _Layer constructor to accept a LayerParams struct, consolidating parameters for improved readability and maintainability. - Updated all relevant test files to utilize the new LayerParams structure, ensuring consistent layer initialization across the codebase. - Enhanced the clarity of the code by reducing the number of parameters passed directly to the _Layer constructor. --- NAM/wavenet.cpp | 17 +- NAM/wavenet.h | 194 ++++++++++++------ tools/test/test_wavenet/test_head1x1.cpp | 9 +- tools/test/test_wavenet/test_layer.cpp | 9 +- .../test/test_wavenet/test_real_time_safe.cpp | 66 +++--- .../test/test_wavenet_configurable_gating.cpp | 9 +- 6 files changed, 188 insertions(+), 116 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 17d065a..0d7fa5c 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -220,13 +220,16 @@ nam::wavenet::_LayerArray::_LayerArray(const LayerArrayParams& params) { const size_t num_layers = params.dilations.size(); for (size_t i = 0; i < num_layers; i++) - this->_layers.push_back( - _Layer(params.condition_size, params.channels, params.bottleneck, params.kernel_size, params.dilations[i], - params.activation_configs[i], params.gating_modes[i], params.groups_input, params.groups_input_mixin, - params.groups_1x1, params.head1x1_params, params.secondary_activation_configs[i], - params.conv_pre_film_params, params.conv_post_film_params, params.input_mixin_pre_film_params, - params.input_mixin_post_film_params, params.activation_pre_film_params, params.activation_post_film_params, - params._1x1_post_film_params, params.head1x1_post_film_params)); + { + LayerParams layer_params( + params.condition_size, params.channels, params.bottleneck, params.kernel_size, params.dilations[i], + params.activation_configs[i], params.gating_modes[i], params.groups_input, params.groups_input_mixin, + params.groups_1x1, params.head1x1_params, params.secondary_activation_configs[i], params.conv_pre_film_params, + params.conv_post_film_params, params.input_mixin_pre_film_params, params.input_mixin_post_film_params, + params.activation_pre_film_params, params.activation_post_film_params, params._1x1_post_film_params, + params.head1x1_post_film_params); + this->_layers.push_back(_Layer(layer_params)); + } } void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize) diff --git a/NAM/wavenet.h b/NAM/wavenet.h index 06c56b2..2cf8be7 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -76,6 +76,86 @@ struct _FiLMParams const bool shift; ///< Whether to apply shift in addition to scale }; +/// \brief Parameters for constructing a single Layer +/// +/// Contains all configuration needed to construct a _Layer +struct LayerParams +{ + /// \brief Constructor + /// \param condition_size_ Size of the conditioning input + /// \param channels_ Number of input/output channels from layer to layer + /// \param bottleneck_ Internal channel count + /// \param kernel_size_ Kernel size for the dilated convolution + /// \param dilation_ Dilation factor for the convolution + /// \param activation_config_ Primary activation function configuration + /// \param gating_mode_ Gating mode (NONE, GATED, or BLENDED) + /// \param groups_input_ Number of groups for the input convolution + /// \param groups_input_mixin_ Number of groups for the input mixin convolution + /// \param groups_1x1_ Number of groups for the 1x1 convolution + /// \param head1x1_params_ Configuration of the optional head1x1 convolution + /// \param secondary_activation_config_ Secondary activation (for gating/blending) + /// \param conv_pre_film_params_ FiLM parameters before the input convolution + /// \param conv_post_film_params_ FiLM parameters after the input convolution + /// \param input_mixin_pre_film_params_ FiLM parameters before the input mixin + /// \param input_mixin_post_film_params_ FiLM parameters after the input mixin + /// \param activation_pre_film_params_ FiLM parameters after the input/mixin summed output before activation + /// \param activation_post_film_params_ FiLM parameters after the activation output before the 1x1 convolution + /// \param _1x1_post_film_params_ FiLM parameters after the 1x1 convolution + /// \param head1x1_post_film_params_ FiLM parameters after the head1x1 convolution + LayerParams(const int condition_size_, const int channels_, const int bottleneck_, const int kernel_size_, + const int dilation_, const activations::ActivationConfig& activation_config_, + const GatingMode gating_mode_, const int groups_input_, const int groups_input_mixin_, + const int groups_1x1_, const Head1x1Params& head1x1_params_, + const activations::ActivationConfig& secondary_activation_config_, + const _FiLMParams& conv_pre_film_params_, const _FiLMParams& conv_post_film_params_, + const _FiLMParams& input_mixin_pre_film_params_, const _FiLMParams& input_mixin_post_film_params_, + const _FiLMParams& activation_pre_film_params_, const _FiLMParams& activation_post_film_params_, + const _FiLMParams& _1x1_post_film_params_, const _FiLMParams& head1x1_post_film_params_) + : condition_size(condition_size_) + , channels(channels_) + , bottleneck(bottleneck_) + , kernel_size(kernel_size_) + , dilation(dilation_) + , activation_config(activation_config_) + , gating_mode(gating_mode_) + , groups_input(groups_input_) + , groups_input_mixin(groups_input_mixin_) + , groups_1x1(groups_1x1_) + , head1x1_params(head1x1_params_) + , secondary_activation_config(secondary_activation_config_) + , conv_pre_film_params(conv_pre_film_params_) + , conv_post_film_params(conv_post_film_params_) + , input_mixin_pre_film_params(input_mixin_pre_film_params_) + , input_mixin_post_film_params(input_mixin_post_film_params_) + , activation_pre_film_params(activation_pre_film_params_) + , activation_post_film_params(activation_post_film_params_) + , _1x1_post_film_params(_1x1_post_film_params_) + , head1x1_post_film_params(head1x1_post_film_params_) + { + } + + const int condition_size; ///< Size of the conditioning input + const int channels; ///< Number of input/output channels from layer to layer + const int bottleneck; ///< Internal channel count + const int kernel_size; ///< Kernel size for the dilated convolution + const int dilation; ///< Dilation factor for the convolution + const activations::ActivationConfig activation_config; ///< Primary activation function configuration + const GatingMode gating_mode; ///< Gating mode (NONE, GATED, or BLENDED) + const int groups_input; ///< Number of groups for the input convolution + const int groups_input_mixin; ///< Number of groups for the input mixin convolution + const int groups_1x1; ///< Number of groups for the 1x1 convolution + const Head1x1Params head1x1_params; ///< Configuration of the optional head1x1 convolution + const activations::ActivationConfig secondary_activation_config; ///< Secondary activation (for gating/blending) + const _FiLMParams conv_pre_film_params; ///< FiLM parameters before the input convolution + const _FiLMParams conv_post_film_params; ///< FiLM parameters after the input convolution + const _FiLMParams input_mixin_pre_film_params; ///< FiLM parameters before the input mixin + const _FiLMParams input_mixin_post_film_params; ///< FiLM parameters after the input mixin + const _FiLMParams activation_pre_film_params; ///< FiLM parameters before activation + const _FiLMParams activation_post_film_params; ///< FiLM parameters after activation + const _FiLMParams _1x1_post_film_params; ///< FiLM parameters after the 1x1 convolution + const _FiLMParams head1x1_post_film_params; ///< FiLM parameters after the head1x1 convolution +}; + /// \brief A single WaveNet layer block /// /// A WaveNet layer performs the following operations: @@ -92,107 +172,91 @@ struct _FiLMParams class _Layer { public: - /// \brief Constructor with GatingMode enum and typed ActivationConfig - /// \param condition_size Size of the conditioning input - /// \param channels Number of input/output channels from layer to layer - /// \param bottleneck Internal channel count - /// \param kernel_size Kernel size for the dilated convolution - /// \param dilation Dilation factor for the convolution - /// \param activation_config Primary activation function configuration - /// \param gating_mode Gating mode (NONE, GATED, or BLENDED) - /// \param groups_input Number of groups for the input convolution - /// \param groups_input_mixin Number of groups for the input mixin convolution - /// \param groups_1x1 Number of groups for the 1x1 convolution - /// \param head1x1_params Configuration of the optional head1x1 convolution - /// \param secondary_activation_config Secondary activation (for gating/blending) - /// \param conv_pre_film_params FiLM parameters before the input convolution - /// \param conv_post_film_params FiLM parameters after the input convolution - /// \param input_mixin_pre_film_params FiLM parameters before the input mixin - /// \param input_mixin_post_film_params FiLM parameters after the input mixin - /// \param activation_pre_film_params FiLM parameters after the input/mixin summed output before activation - /// \param activation_post_film_params FiLM parameters after the activation output before the 1x1 convolution - /// \param _1x1_post_film_params FiLM parameters after the 1x1 convolution - /// \param head1x1_post_film_params FiLM parameters after the head1x1 convolution + /// \brief Constructor with LayerParams + /// \param params Parameters for constructing the layer /// \throws std::invalid_argument If head1x1_post_film_params is active but head1x1 is not - _Layer(const int condition_size, const int channels, const int bottleneck, const int kernel_size, const int dilation, - const activations::ActivationConfig& activation_config, const GatingMode gating_mode, const int groups_input, - const int groups_input_mixin, const int groups_1x1, const Head1x1Params& head1x1_params, - const activations::ActivationConfig& secondary_activation_config, const _FiLMParams& conv_pre_film_params, - const _FiLMParams& conv_post_film_params, const _FiLMParams& input_mixin_pre_film_params, - const _FiLMParams& input_mixin_post_film_params, const _FiLMParams& activation_pre_film_params, - const _FiLMParams& activation_post_film_params, const _FiLMParams& _1x1_post_film_params, - const _FiLMParams& head1x1_post_film_params) - : _conv(channels, (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck, kernel_size, true, dilation, - groups_input) - , _input_mixin( - condition_size, (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck, false, groups_input_mixin) - , _1x1(bottleneck, channels, true, groups_1x1) - , _activation(activations::Activation::get_activation(activation_config)) - , _gating_mode(gating_mode) - , _bottleneck(bottleneck) + _Layer(const LayerParams& params) + : _conv(params.channels, (params.gating_mode != GatingMode::NONE) ? 2 * params.bottleneck : params.bottleneck, + params.kernel_size, true, params.dilation, params.groups_input) + , _input_mixin(params.condition_size, + (params.gating_mode != GatingMode::NONE) ? 2 * params.bottleneck : params.bottleneck, false, + params.groups_input_mixin) + , _1x1(params.bottleneck, params.channels, true, params.groups_1x1) + , _activation(activations::Activation::get_activation(params.activation_config)) + , _gating_mode(params.gating_mode) + , _bottleneck(params.bottleneck) { - if (head1x1_params.active) + if (params.head1x1_params.active) { - _head1x1 = std::make_unique(bottleneck, head1x1_params.out_channels, true, head1x1_params.groups); + _head1x1 = std::make_unique( + params.bottleneck, params.head1x1_params.out_channels, true, params.head1x1_params.groups); } else { // If there's a post-head 1x1 FiLM but no head 1x1, this is redundant--don't allow it - if (head1x1_post_film_params.active) + if (params.head1x1_post_film_params.active) { throw std::invalid_argument("Do not use post-head 1x1 FiLM if there is no head 1x1"); } } // Validate & initialize gating/blending activation - if (gating_mode == GatingMode::GATED) + if (params.gating_mode == GatingMode::GATED) { _gating_activation = std::make_unique( - _activation, activations::Activation::get_activation(secondary_activation_config), bottleneck); + _activation, activations::Activation::get_activation(params.secondary_activation_config), params.bottleneck); } - else if (gating_mode == GatingMode::BLENDED) + else if (params.gating_mode == GatingMode::BLENDED) { _blending_activation = std::make_unique( - _activation, activations::Activation::get_activation(secondary_activation_config), bottleneck); + _activation, activations::Activation::get_activation(params.secondary_activation_config), params.bottleneck); } // Initialize FiLM objects - if (conv_pre_film_params.active) + if (params.conv_pre_film_params.active) { - _conv_pre_film = std::make_unique(condition_size, channels, conv_pre_film_params.shift); + _conv_pre_film = + std::make_unique(params.condition_size, params.channels, params.conv_pre_film_params.shift); } - if (conv_post_film_params.active) + if (params.conv_post_film_params.active) { - const int conv_out_channels = (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck; - _conv_post_film = std::make_unique(condition_size, conv_out_channels, conv_post_film_params.shift); + const int conv_out_channels = + (params.gating_mode != GatingMode::NONE) ? 2 * params.bottleneck : params.bottleneck; + _conv_post_film = + std::make_unique(params.condition_size, conv_out_channels, params.conv_post_film_params.shift); } - if (input_mixin_pre_film_params.active) + if (params.input_mixin_pre_film_params.active) { - _input_mixin_pre_film = std::make_unique(condition_size, condition_size, input_mixin_pre_film_params.shift); + _input_mixin_pre_film = + std::make_unique(params.condition_size, params.condition_size, params.input_mixin_pre_film_params.shift); } - if (input_mixin_post_film_params.active) + if (params.input_mixin_post_film_params.active) { - const int input_mixin_out_channels = (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck; - _input_mixin_post_film = - std::make_unique(condition_size, input_mixin_out_channels, input_mixin_post_film_params.shift); + const int input_mixin_out_channels = + (params.gating_mode != GatingMode::NONE) ? 2 * params.bottleneck : params.bottleneck; + _input_mixin_post_film = std::make_unique( + params.condition_size, input_mixin_out_channels, params.input_mixin_post_film_params.shift); } - if (activation_pre_film_params.active) + if (params.activation_pre_film_params.active) { - const int z_channels = (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck; - _activation_pre_film = std::make_unique(condition_size, z_channels, activation_pre_film_params.shift); + const int z_channels = (params.gating_mode != GatingMode::NONE) ? 2 * params.bottleneck : params.bottleneck; + _activation_pre_film = + std::make_unique(params.condition_size, z_channels, params.activation_pre_film_params.shift); } - if (activation_post_film_params.active) + if (params.activation_post_film_params.active) { - _activation_post_film = std::make_unique(condition_size, bottleneck, activation_post_film_params.shift); + _activation_post_film = + std::make_unique(params.condition_size, params.bottleneck, params.activation_post_film_params.shift); } - if (_1x1_post_film_params.active) + if (params._1x1_post_film_params.active) { - _1x1_post_film = std::make_unique(condition_size, channels, _1x1_post_film_params.shift); + _1x1_post_film = + std::make_unique(params.condition_size, params.channels, params._1x1_post_film_params.shift); } - if (head1x1_post_film_params.active && head1x1_params.active) + if (params.head1x1_post_film_params.active && params.head1x1_params.active) { - _head1x1_post_film = - std::make_unique(condition_size, head1x1_params.out_channels, head1x1_post_film_params.shift); + _head1x1_post_film = std::make_unique( + params.condition_size, params.head1x1_params.out_channels, params.head1x1_post_film_params.shift); } }; diff --git a/tools/test/test_wavenet/test_head1x1.cpp b/tools/test/test_wavenet/test_head1x1.cpp index 42aea33..7fd13ec 100644 --- a/tools/test/test_wavenet/test_head1x1.cpp +++ b/tools/test/test_wavenet/test_head1x1.cpp @@ -28,10 +28,11 @@ static nam::wavenet::_Layer make_layer(const int condition_size, const int chann const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); - return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, - secondary_activation_config, film_params, film_params, film_params, film_params, - film_params, film_params, film_params, film_params); + nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, + gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + secondary_activation_config, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); + return nam::wavenet::_Layer(layer_params); } void test_head1x1_inactive() diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp index 8c6cc12..cee5b51 100644 --- a/tools/test/test_wavenet/test_layer.cpp +++ b/tools/test/test_wavenet/test_layer.cpp @@ -28,10 +28,11 @@ static nam::wavenet::_Layer make_layer(const int condition_size, const int chann const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); - return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, - secondary_activation_config, film_params, film_params, film_params, film_params, - film_params, film_params, film_params, film_params); + nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, + gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + secondary_activation_config, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); + return nam::wavenet::_Layer(layer_params); } void test_gated() { diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index bf4be07..56f7ef8 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -32,10 +32,11 @@ static nam::wavenet::_Layer make_layer(const int condition_size, const int chann const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); - return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, - secondary_activation_config, film_params, film_params, film_params, film_params, - film_params, film_params, film_params, film_params); + nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, + gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + secondary_activation_config, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); + return nam::wavenet::_Layer(layer_params); } // Helper function to create a LayerArray with default FiLM parameters @@ -96,10 +97,11 @@ static nam::wavenet::_Layer make_layer_all_films(const int condition_size, const // Don't activate head1x1_post_film if head1x1 is not active (validation will fail) nam::wavenet::_FiLMParams head1x1_post_film_params = head1x1_params.active ? film_params : nam::wavenet::_FiLMParams(false, false); - return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, - secondary_activation_config, film_params, film_params, film_params, film_params, - film_params, film_params, film_params, head1x1_post_film_params); + nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, + gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + secondary_activation_config, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, head1x1_post_film_params); + return nam::wavenet::_Layer(layer_params); } // Test that pre-allocated Eigen operations with noalias() don't allocate @@ -721,18 +723,18 @@ void test_layer_post_activation_film_gated_realtime_safe() nam::wavenet::_FiLMParams inactive_film(false, false); nam::wavenet::_FiLMParams active_film(true, true); // activation_post_film will be active - auto layer = - nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, - groups_input, groups_input_mixin, groups_1x1, head1x1_params, secondary_activation, - inactive_film, // conv_pre_film - inactive_film, // conv_post_film - inactive_film, // input_mixin_pre_film - inactive_film, // input_mixin_post_film - inactive_film, // activation_pre_film - active_film, // activation_post_film - THIS IS THE KEY ONE - inactive_film, // _1x1_post_film - inactive_film // head1x1_post_film - ); + nam::wavenet::LayerParams layer_params( + condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, groups_input, + groups_input_mixin, groups_1x1, head1x1_params, secondary_activation, inactive_film, // conv_pre_film + inactive_film, // conv_post_film + inactive_film, // input_mixin_pre_film + inactive_film, // input_mixin_post_film + inactive_film, // activation_pre_film + active_film, // activation_post_film - THIS IS THE KEY ONE + inactive_film, // _1x1_post_film + inactive_film // head1x1_post_film + ); + auto layer = nam::wavenet::_Layer(layer_params); // Set weights - Order: conv, input_mixin, 1x1, then FiLMs // NOTE: In GATED mode, conv and input_mixin output 2*bottleneck channels! @@ -828,18 +830,18 @@ void test_layer_post_activation_film_blended_realtime_safe() nam::wavenet::_FiLMParams inactive_film(false, false); nam::wavenet::_FiLMParams active_film(true, true); // activation_post_film will be active - auto layer = - nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, - groups_input, groups_input_mixin, groups_1x1, head1x1_params, secondary_activation, - inactive_film, // conv_pre_film - inactive_film, // conv_post_film - inactive_film, // input_mixin_pre_film - inactive_film, // input_mixin_post_film - inactive_film, // activation_pre_film - active_film, // activation_post_film - THIS IS THE KEY ONE - inactive_film, // _1x1_post_film - inactive_film // head1x1_post_film - ); + nam::wavenet::LayerParams layer_params( + condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, groups_input, + groups_input_mixin, groups_1x1, head1x1_params, secondary_activation, inactive_film, // conv_pre_film + inactive_film, // conv_post_film + inactive_film, // input_mixin_pre_film + inactive_film, // input_mixin_post_film + inactive_film, // activation_pre_film + active_film, // activation_post_film - THIS IS THE KEY ONE + inactive_film, // _1x1_post_film + inactive_film // head1x1_post_film + ); + auto layer = nam::wavenet::_Layer(layer_params); // Set weights - Order: conv, input_mixin, 1x1, then FiLMs // NOTE: In BLENDED mode, conv and input_mixin output 2*bottleneck channels! diff --git a/tools/test/test_wavenet_configurable_gating.cpp b/tools/test/test_wavenet_configurable_gating.cpp index 0b9997f..b304248 100644 --- a/tools/test/test_wavenet_configurable_gating.cpp +++ b/tools/test/test_wavenet_configurable_gating.cpp @@ -25,10 +25,11 @@ static nam::wavenet::_Layer make_layer(const int condition_size, const int chann const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); - return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, - secondary_activation_config, film_params, film_params, film_params, film_params, - film_params, film_params, film_params, film_params); + nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, + gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + secondary_activation_config, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); + return nam::wavenet::_Layer(layer_params); } // Helper function to create LayerArrayParams with default FiLM parameters