From 88a06a1974a545e33f631d7e1e06d9de9e133bde Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Wed, 28 Jan 2026 15:51:06 -0800 Subject: [PATCH 1/5] Layer1x1 --- NAM/wavenet.cpp | 93 +++++++++++++++++++++++++++++++----------- NAM/wavenet.h | 92 ++++++++++++++++++++++++++++------------- generate_weights_a2.py | 46 +++++++++++++-------- 3 files changed, 162 insertions(+), 69 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 5d2ae99..6ea7b5e 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -17,7 +17,10 @@ void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize) _input_mixin.SetMaxBufferSize(maxBufferSize); const long z_channels = this->_conv.get_out_channels(); // This is 2*bottleneck when gated, bottleneck when not _z.resize(z_channels, maxBufferSize); - _1x1.SetMaxBufferSize(maxBufferSize); + if (this->_layer1x1) + { + this->_layer1x1->SetMaxBufferSize(maxBufferSize); + } // Pre-allocate output buffers const long channels = this->get_channels(); this->_output_next_layer.resize(channels, maxBufferSize); @@ -47,8 +50,8 @@ void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize) this->_activation_pre_film->SetMaxBufferSize(maxBufferSize); if (this->_activation_post_film) this->_activation_post_film->SetMaxBufferSize(maxBufferSize); - if (this->_1x1_post_film) - this->_1x1_post_film->SetMaxBufferSize(maxBufferSize); + if (this->_layer1x1_post_film) + this->_layer1x1_post_film->SetMaxBufferSize(maxBufferSize); if (this->_head1x1_post_film) this->_head1x1_post_film->SetMaxBufferSize(maxBufferSize); } @@ -57,7 +60,10 @@ void nam::wavenet::_Layer::set_weights_(std::vector::iterator& weights) { this->_conv.set_weights_(weights); this->_input_mixin.set_weights_(weights); - this->_1x1.set_weights_(weights); + if (this->_layer1x1) + { + this->_layer1x1->set_weights_(weights); + } if (this->_head1x1) { this->_head1x1->set_weights_(weights); @@ -75,8 +81,8 @@ void nam::wavenet::_Layer::set_weights_(std::vector::iterator& weights) this->_activation_pre_film->set_weights_(weights); if (this->_activation_post_film) this->_activation_post_film->set_weights_(weights); - if (this->_1x1_post_film) - this->_1x1_post_film->set_weights_(weights); + if (this->_layer1x1_post_film) + this->_layer1x1_post_film->set_weights_(weights); if (this->_head1x1_post_film) this->_head1x1_post_film->set_weights_(weights); } @@ -137,7 +143,10 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma { this->_activation_post_film->Process_(this->_z, condition, num_frames); } - _1x1.process_(_z, num_frames); + if (this->_layer1x1) + { + this->_layer1x1->process_(this->_z, num_frames); + } } else if (this->_gating_mode == GatingMode::GATED) { @@ -153,7 +162,10 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma this->_z.topRows(bottleneck).leftCols(num_frames).noalias() = this->_activation_post_film->GetOutput().leftCols(num_frames); } - _1x1.process_(this->_z.topRows(bottleneck), num_frames); + if (this->_layer1x1) + { + this->_layer1x1->process_(this->_z.topRows(bottleneck), num_frames); + } } else if (this->_gating_mode == GatingMode::BLENDED) { @@ -169,11 +181,14 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma this->_z.topRows(bottleneck).leftCols(num_frames).noalias() = this->_activation_post_film->GetOutput().leftCols(num_frames); } - _1x1.process_(this->_z.topRows(bottleneck), num_frames); - if (this->_1x1_post_film) + if (this->_layer1x1) { - Eigen::MatrixXf& _1x1_output = this->_1x1.GetOutput(); - this->_1x1_post_film->Process_(_1x1_output, condition, num_frames); + this->_layer1x1->process_(this->_z.topRows(bottleneck), num_frames); + if (this->_layer1x1_post_film) + { + Eigen::MatrixXf& layer1x1_output = this->_layer1x1->GetOutput(); + this->_layer1x1_post_film->Process_(layer1x1_output, condition, num_frames); + } } } @@ -205,9 +220,17 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma this->_output_head.leftCols(num_frames).noalias() = this->_z.topRows(bottleneck).leftCols(num_frames); } - // Store output to next layer (residual connection: input + _1x1 output) - this->_output_next_layer.leftCols(num_frames).noalias() = - input.leftCols(num_frames) + _1x1.GetOutput().leftCols(num_frames); + // Store output to next layer (residual connection: input + layer1x1 output, or just input if layer1x1 inactive) + if (this->_layer1x1) + { + this->_output_next_layer.leftCols(num_frames).noalias() = + input.leftCols(num_frames) + this->_layer1x1->GetOutput().leftCols(num_frames); + } + else + { + // If layer1x1 is inactive, residual connection is just the input (identity) + this->_output_next_layer.leftCols(num_frames).noalias() = input.leftCols(num_frames); + } } // LayerArray ================================================================= @@ -224,10 +247,10 @@ nam::wavenet::_LayerArray::_LayerArray(const LayerArrayParams& params) LayerParams layer_params( params.condition_size, params.channels, params.bottleneck, params.kernel_size, params.dilations[i], params.activation_configs[i], params.gating_modes[i], params.groups_input, params.groups_input_mixin, - params.groups_1x1, params.head1x1_params, params.secondary_activation_configs[i], params.conv_pre_film_params, - params.conv_post_film_params, params.input_mixin_pre_film_params, params.input_mixin_post_film_params, - params.activation_pre_film_params, params.activation_post_film_params, params._1x1_post_film_params, - params.head1x1_post_film_params); + params.layer1x1_params, params.head1x1_params, params.secondary_activation_configs[i], + params.conv_pre_film_params, params.conv_post_film_params, params.input_mixin_pre_film_params, + params.input_mixin_post_film_params, params.activation_pre_film_params, params.activation_post_film_params, + params._layer1x1_post_film_params, params.head1x1_post_film_params); this->_layers.push_back(_Layer(layer_params)); } } @@ -570,11 +593,28 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st const int groups = layer_config.value("groups_input", 1); // defaults to 1 const int groups_input_mixin = layer_config.value("groups_input_mixin", 1); // defaults to 1 - const int groups_1x1 = layer_config.value("groups_1x1", 1); // defaults to 1 const int channels = layer_config["channels"]; const int bottleneck = layer_config.value("bottleneck", channels); // defaults to channels if not present + // Parse layer1x1 parameters + bool layer1x1_active = true; // default to active if not present + int layer1x1_groups = 1; + if (layer_config.find("layer1x1") != layer_config.end()) + { + const auto& layer1x1_config = layer_config["layer1x1"]; + layer1x1_active = layer1x1_config["active"]; // default to active + layer1x1_groups = layer1x1_config["groups"]; + } + // Validation: if layer1x1 is inactive, bottleneck must equal channels + if (!layer1x1_active && bottleneck != channels) + { + throw std::runtime_error("Layer array " + std::to_string(i) + ": when layer1x1.active is false, bottleneck (" + + std::to_string(bottleneck) + ") must equal channels (" + std::to_string(channels) + + ")"); + } + nam::wavenet::Layer1x1Params layer1x1_params(layer1x1_active, layer1x1_groups); + const int input_size = layer_config["input_size"]; const int condition_size = layer_config["condition_size"]; const int head_size = layer_config["head_size"]; @@ -771,15 +811,22 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st nam::wavenet::_FiLMParams input_mixin_post_film_params = parse_film_params("input_mixin_post_film"); nam::wavenet::_FiLMParams activation_pre_film_params = parse_film_params("activation_pre_film"); nam::wavenet::_FiLMParams activation_post_film_params = parse_film_params("activation_post_film"); - nam::wavenet::_FiLMParams _1x1_post_film_params = parse_film_params("1x1_post_film"); + nam::wavenet::_FiLMParams _layer1x1_post_film_params = parse_film_params("layer1x1_post_film"); nam::wavenet::_FiLMParams head1x1_post_film_params = parse_film_params("head1x1_post_film"); + // Validation: if layer1x1_post_film is active, layer1x1 must also be active + if (_layer1x1_post_film_params.active && !layer1x1_active) + { + throw std::runtime_error("Layer array " + std::to_string(i) + + ": layer1x1_post_film cannot be active when layer1x1.active is false"); + } + layer_array_params.push_back(nam::wavenet::LayerArrayParams( input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, - std::move(activation_configs), std::move(gating_modes), head_bias, groups, groups_input_mixin, groups_1x1, + std::move(activation_configs), std::move(gating_modes), head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), conv_pre_film_params, conv_post_film_params, input_mixin_pre_film_params, input_mixin_post_film_params, activation_pre_film_params, - activation_post_film_params, _1x1_post_film_params, head1x1_post_film_params)); + activation_post_film_params, _layer1x1_post_film_params, head1x1_post_film_params)); } const bool with_head = !config["head"].is_null(); const float head_scale = config["head_scale"]; diff --git a/NAM/wavenet.h b/NAM/wavenet.h index e324849..5a95e16 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -59,6 +59,25 @@ struct Head1x1Params const int groups; ///< Number of groups for grouped convolution }; +/// \brief Parameters for layer1x1 configuration +/// +/// Configures an optional 1x1 convolution that processes the activation output +/// for the residual connection to the next layer. +struct Layer1x1Params +{ + /// \brief Constructor + /// \param active_ Whether the layer1x1 convolution is active + /// \param groups_ Number of groups for grouped convolution + Layer1x1Params(bool active_, int groups_) + : active(active_) + , groups(groups_) + { + } + + const bool active; ///< Whether the layer1x1 convolution is active + const int groups; ///< Number of groups for grouped convolution +}; + /// \brief Parameters for FiLM (Feature-wise Linear Modulation) configuration /// /// FiLM applies per-channel scaling and optional shifting based on conditioning input. @@ -94,7 +113,7 @@ struct LayerParams /// \param gating_mode_ Gating mode (NONE, GATED, or BLENDED) /// \param groups_input_ Number of groups for the input convolution /// \param groups_input_mixin_ Number of groups for the input mixin convolution - /// \param groups_1x1_ Number of groups for the 1x1 convolution + /// \param layer1x1_params_ Configuration of the optional layer1x1 convolution /// \param head1x1_params_ Configuration of the optional head1x1 convolution /// \param secondary_activation_config_ Secondary activation (for gating/blending) /// \param conv_pre_film_params_ FiLM parameters before the input convolution @@ -102,18 +121,18 @@ struct LayerParams /// \param input_mixin_pre_film_params_ FiLM parameters before the input mixin /// \param input_mixin_post_film_params_ FiLM parameters after the input mixin /// \param activation_pre_film_params_ FiLM parameters after the input/mixin summed output before activation - /// \param activation_post_film_params_ FiLM parameters after the activation output before the 1x1 convolution - /// \param _1x1_post_film_params_ FiLM parameters after the 1x1 convolution + /// \param activation_post_film_params_ FiLM parameters after the activation output before the layer1x1 convolution + /// \param _layer1x1_post_film_params_ FiLM parameters after the layer1x1 convolution /// \param head1x1_post_film_params_ FiLM parameters after the head1x1 convolution LayerParams(const int condition_size_, const int channels_, const int bottleneck_, const int kernel_size_, const int dilation_, const activations::ActivationConfig& activation_config_, const GatingMode gating_mode_, const int groups_input_, const int groups_input_mixin_, - const int groups_1x1_, const Head1x1Params& head1x1_params_, + const Layer1x1Params& layer1x1_params_, const Head1x1Params& head1x1_params_, const activations::ActivationConfig& secondary_activation_config_, const _FiLMParams& conv_pre_film_params_, const _FiLMParams& conv_post_film_params_, const _FiLMParams& input_mixin_pre_film_params_, const _FiLMParams& input_mixin_post_film_params_, const _FiLMParams& activation_pre_film_params_, const _FiLMParams& activation_post_film_params_, - const _FiLMParams& _1x1_post_film_params_, const _FiLMParams& head1x1_post_film_params_) + const _FiLMParams& _layer1x1_post_film_params_, const _FiLMParams& head1x1_post_film_params_) : condition_size(condition_size_) , channels(channels_) , bottleneck(bottleneck_) @@ -123,7 +142,7 @@ struct LayerParams , gating_mode(gating_mode_) , groups_input(groups_input_) , groups_input_mixin(groups_input_mixin_) - , groups_1x1(groups_1x1_) + , layer1x1_params(layer1x1_params_) , head1x1_params(head1x1_params_) , secondary_activation_config(secondary_activation_config_) , conv_pre_film_params(conv_pre_film_params_) @@ -132,7 +151,7 @@ struct LayerParams , input_mixin_post_film_params(input_mixin_post_film_params_) , activation_pre_film_params(activation_pre_film_params_) , activation_post_film_params(activation_post_film_params_) - , _1x1_post_film_params(_1x1_post_film_params_) + , _layer1x1_post_film_params(_layer1x1_post_film_params_) , head1x1_post_film_params(head1x1_post_film_params_) { } @@ -146,7 +165,7 @@ struct LayerParams const GatingMode gating_mode; ///< Gating mode (NONE, GATED, or BLENDED) const int groups_input; ///< Number of groups for the input convolution const int groups_input_mixin; ///< Number of groups for the input mixin convolution - const int groups_1x1; ///< Number of groups for the 1x1 convolution + const Layer1x1Params layer1x1_params; ///< Configuration of the optional layer1x1 convolution const Head1x1Params head1x1_params; ///< Configuration of the optional head1x1 convolution const activations::ActivationConfig secondary_activation_config; ///< Secondary activation (for gating/blending) const _FiLMParams conv_pre_film_params; ///< FiLM parameters before the input convolution @@ -155,7 +174,7 @@ struct LayerParams const _FiLMParams input_mixin_post_film_params; ///< FiLM parameters after the input mixin const _FiLMParams activation_pre_film_params; ///< FiLM parameters before activation const _FiLMParams activation_post_film_params; ///< FiLM parameters after activation - const _FiLMParams _1x1_post_film_params; ///< FiLM parameters after the 1x1 convolution + const _FiLMParams _layer1x1_post_film_params; ///< FiLM parameters after the layer1x1 convolution (layer1x1_post_film) const _FiLMParams head1x1_post_film_params; ///< FiLM parameters after the head1x1 convolution }; @@ -166,9 +185,10 @@ struct LayerParams /// 2. Input mixin (conditioning input processing, with optional pre/post-FiLM) /// 3. Sum of conv and input mixin outputs /// 4. Activation (with optional gating/blending and pre/post FiLM) -/// 5. 1x1 convolution for the next layer (with optional post-FiLM) +/// 5. Optional layer1x1 convolution for the next layer (with optional post-FiLM) /// 6. Optional 1x1 convolution for the head output (with optional post-FiLM) -/// 7. Residual connection (input + 1x1 output) and skip connection (to next layer) +/// 7. Residual connection (input + layer1x1 output, or just input if layer1x1 inactive) and skip connection (to next +/// layer) /// /// The layer supports multiple gating modes and FiLM at various points in the computation. /// See the walkthrough documentation for detailed step-by-step explanation. @@ -184,11 +204,23 @@ class _Layer , _input_mixin(params.condition_size, (params.gating_mode != GatingMode::NONE) ? 2 * params.bottleneck : params.bottleneck, false, params.groups_input_mixin) - , _1x1(params.bottleneck, params.channels, true, params.groups_1x1) , _activation(activations::Activation::get_activation(params.activation_config)) , _gating_mode(params.gating_mode) , _bottleneck(params.bottleneck) { + if (params.layer1x1_params.active) + { + _layer1x1 = std::make_unique(params.bottleneck, params.channels, true, params.layer1x1_params.groups); + } + else + { + // If there's a post-layer1x1 FiLM but no layer1x1, this is redundant--don't allow it + if (params._layer1x1_post_film_params.active) + { + throw std::invalid_argument("layer1x1_post_film cannot be active when layer1x1 is not active"); + } + } + if (params.head1x1_params.active) { _head1x1 = std::make_unique( @@ -255,10 +287,11 @@ class _Layer std::make_unique(params.condition_size, params.bottleneck, params.activation_post_film_params.shift, params.activation_post_film_params.groups); } - if (params._1x1_post_film_params.active) + if (params._layer1x1_post_film_params.active && params.layer1x1_params.active) { - _1x1_post_film = std::make_unique(params.condition_size, params.channels, - params._1x1_post_film_params.shift, params._1x1_post_film_params.groups); + _layer1x1_post_film = + std::make_unique(params.condition_size, params.channels, params._layer1x1_post_film_params.shift, + params._layer1x1_post_film_params.groups); } if (params.head1x1_post_film_params.active && params.head1x1_params.active) { @@ -282,7 +315,7 @@ class _Layer /// 1. Input convolution (with optional pre/post-FiLM) /// 2. Input mixin processing (with optional pre/post-FiLM) /// 3. Sum and activation (with optional gating/blending and pre/post-FiLM) - /// 4. 1x1 convolution toward the skip connection for next layer (with optional post-FiLM) + /// 4. Optional layer1x1 convolution toward the skip connection for next layer (with optional post-FiLM) /// 5. Optional 1x1 convolution for the head output (with optional post-FiLM) /// 6. Store outputs for next layer and the layer array head /// @@ -306,7 +339,7 @@ class _Layer /// \return Kernel size long get_kernel_size() const { return this->_conv.get_kernel_size(); }; - /// \brief Get output to next layer (residual connection: input + _1x1 output) + /// \brief Get output to next layer (residual connection: input + layer1x1 output) /// /// Returns the full pre-allocated buffer; only the first num_frames columns /// are valid for a given processing call. Slice with .leftCols(num_frames) as needed. @@ -341,13 +374,13 @@ class _Layer Conv1D _conv; // Input mixin Conv1x1 _input_mixin; - // The post-activation 1x1 convolution - Conv1x1 _1x1; + // The post-activation layer1x1 convolution (optional) + std::unique_ptr _layer1x1; // The post-activation 1x1 convolution outputting to the head, optional std::unique_ptr _head1x1; // The internal state Eigen::MatrixXf _z; - // Output to next layer (residual connection: input + _1x1 output) + // Output to next layer (residual connection: input + layer1x1 output, or just input if layer1x1 inactive) Eigen::MatrixXf _output_next_layer; // Output to head (skip connection: activated conv output) Eigen::MatrixXf _output_head; @@ -367,7 +400,7 @@ class _Layer std::unique_ptr _input_mixin_post_film; std::unique_ptr _activation_pre_film; std::unique_ptr _activation_post_film; - std::unique_ptr _1x1_post_film; + std::unique_ptr _layer1x1_post_film; std::unique_ptr _head1x1_post_film; }; @@ -391,7 +424,7 @@ class LayerArrayParams /// \param head_bias_ Whether to use bias in the head rechannel /// \param groups_input Number of groups for input convolutions /// \param groups_input_mixin_ Number of groups for input mixin convolutions - /// \param groups_1x1_ Number of groups for 1x1 convolutions + /// \param layer1x1_params_ Parameters for optional layer1x1 convolutions /// \param head1x1_params_ Parameters for optional head1x1 convolutions /// \param secondary_activation_configs_ Vector of secondary activation configs for gating/blending, one per layer /// \param conv_pre_film_params_ FiLM parameters before input convolutions @@ -400,7 +433,7 @@ class LayerArrayParams /// \param input_mixin_post_film_params_ FiLM parameters after input mixin /// \param activation_pre_film_params_ FiLM parameters before activation /// \param activation_post_film_params_ FiLM parameters after activation - /// \param _1x1_post_film_params_ FiLM parameters after 1x1 convolutions + /// \param _layer1x1_post_film_params_ FiLM parameters after layer1x1 convolutions /// \param head1x1_post_film_params_ FiLM parameters after head1x1 convolutions /// \throws std::invalid_argument If dilations, activation_configs, gating_modes, or secondary_activation_configs /// sizes don't match @@ -408,12 +441,13 @@ class LayerArrayParams const int bottleneck_, const int kernel_size_, const std::vector&& dilations_, const std::vector&& activation_configs_, const std::vector&& gating_modes_, const bool head_bias_, const int groups_input, - const int groups_input_mixin_, const int groups_1x1_, const Head1x1Params& head1x1_params_, + const int groups_input_mixin_, const Layer1x1Params& layer1x1_params_, + const Head1x1Params& head1x1_params_, const std::vector&& secondary_activation_configs_, const _FiLMParams& conv_pre_film_params_, const _FiLMParams& conv_post_film_params_, const _FiLMParams& input_mixin_pre_film_params_, const _FiLMParams& input_mixin_post_film_params_, const _FiLMParams& activation_pre_film_params_, const _FiLMParams& activation_post_film_params_, - const _FiLMParams& _1x1_post_film_params_, const _FiLMParams& head1x1_post_film_params_) + const _FiLMParams& _layer1x1_post_film_params_, const _FiLMParams& head1x1_post_film_params_) : input_size(input_size_) , condition_size(condition_size_) , head_size(head_size_) @@ -426,7 +460,7 @@ class LayerArrayParams , head_bias(head_bias_) , groups_input(groups_input) , groups_input_mixin(groups_input_mixin_) - , groups_1x1(groups_1x1_) + , layer1x1_params(layer1x1_params_) , head1x1_params(head1x1_params_) , secondary_activation_configs(std::move(secondary_activation_configs_)) , conv_pre_film_params(conv_pre_film_params_) @@ -435,7 +469,7 @@ class LayerArrayParams , input_mixin_post_film_params(input_mixin_post_film_params_) , activation_pre_film_params(activation_pre_film_params_) , activation_post_film_params(activation_post_film_params_) - , _1x1_post_film_params(_1x1_post_film_params_) + , _layer1x1_post_film_params(_layer1x1_post_film_params_) , head1x1_post_film_params(head1x1_post_film_params_) { const size_t num_layers = dilations.size(); @@ -470,7 +504,7 @@ class LayerArrayParams const bool head_bias; ///< Whether to use bias in head rechannel const int groups_input; ///< Number of groups for input convolutions const int groups_input_mixin; ///< Number of groups for input mixin - const int groups_1x1; ///< Number of groups for 1x1 convolutions + const Layer1x1Params layer1x1_params; ///< Parameters for optional layer1x1 const Head1x1Params head1x1_params; ///< Parameters for optional head1x1 std::vector secondary_activation_configs; ///< Secondary activation configs for gating/blending, one per layer @@ -480,7 +514,7 @@ class LayerArrayParams const _FiLMParams input_mixin_post_film_params; ///< FiLM params after input mixin const _FiLMParams activation_pre_film_params; ///< FiLM params before activation const _FiLMParams activation_post_film_params; ///< FiLM params after activation - const _FiLMParams _1x1_post_film_params; ///< FiLM params after 1x1 conv + const _FiLMParams _layer1x1_post_film_params; ///< FiLM params after layer1x1 conv const _FiLMParams head1x1_post_film_params; ///< FiLM params after head1x1 conv }; diff --git a/generate_weights_a2.py b/generate_weights_a2.py index 1186928..0bb9962 100644 --- a/generate_weights_a2.py +++ b/generate_weights_a2.py @@ -85,7 +85,7 @@ def count_layer_weights(layer_config: Dict[str, Any], condition_size: int, layer A layer consists of: 1. Conv1D: (channels, bottleneck*(2 if gated/blended else 1), kernel_size, bias=True, groups_input) 2. Input mixin Conv1x1: (condition_size, bottleneck*(2 if gated/blended else 1), bias=False, groups_input_mixin) - 3. 1x1 Conv1x1: (bottleneck, channels, bias=True, groups_1x1) + 3. Optional layer1x1 Conv1x1: (bottleneck, channels, bias=True, layer1x1_groups) 4. Optional head1x1 Conv1x1: (bottleneck, head1x1_out_channels, bias=True, head1x1_groups) 5. FiLM modules (optional, various configurations) @@ -99,7 +99,14 @@ def count_layer_weights(layer_config: Dict[str, Any], condition_size: int, layer kernel_size = layer_config["kernel_size"] groups_input = layer_config.get("groups_input", 1) groups_input_mixin = layer_config.get("groups_input_mixin", 1) - groups_1x1 = layer_config.get("groups_1x1", 1) + + # Parse layer1x1 parameters + layer1x1_active = True # default to active if not present + layer1x1_groups = 1 + if "layer1x1" in layer_config: + layer1x1_config = layer_config["layer1x1"] + layer1x1_active = layer1x1_config.get("active", True) # default to active + layer1x1_groups = layer1x1_config.get("groups", 1) gating_mode = parse_gating_mode(layer_config, layer_index) @@ -120,11 +127,12 @@ def count_layer_weights(layer_config: Dict[str, Any], condition_size: int, layer has_bias=False, groups=groups_input_mixin ) - # 3. 1x1 Conv1x1 weights - weight_count += count_conv1x1_weights( - bottleneck, channels, - has_bias=True, groups=groups_1x1 - ) + # 3. layer1x1 Conv1x1 weights (only if active) + if layer1x1_active: + weight_count += count_conv1x1_weights( + bottleneck, channels, + has_bias=True, groups=layer1x1_groups + ) # 4. Optional head1x1 weights head1x1_config = layer_config.get("head_1x1") or layer_config.get("head1x1") @@ -145,7 +153,7 @@ def count_layer_weights(layer_config: Dict[str, Any], condition_size: int, layer ("input_mixin_post_film", conv_out_channels), ("activation_pre_film", conv_out_channels), ("activation_post_film", bottleneck), - ("1x1_post_film", channels), + ("layer1x1_post_film", channels if layer1x1_active else 0), # Only count if layer1x1 is active ("head1x1_post_film", head1x1_config.get("out_channels", channels) if head1x1_config and head1x1_config.get("active") else 0) ] @@ -243,15 +251,7 @@ def generate_weights(weight_count: int, seed: int = None, return [random.uniform(*weight_range) for _ in range(weight_count)] -def process_model(input_path: Path, output_path: Path, seed: int = None) -> None: - """ - Load a .nam file with empty weights and generate random weights for it. - """ - # Load the input file - with open(input_path, 'r') as f: - model_data = json.load(f) - - print(f"Processing: {input_path}") +def add_weights_to_model(model_data: Dict[str, Any], seed: int = None) -> None: print(f"Architecture: {model_data.get('architecture', 'Unknown')}") # Process condition_dsp if present @@ -299,6 +299,18 @@ def process_model(input_path: Path, output_path: Path, seed: int = None) -> None total_weights += 1 # head_scale print(f"\nTotal weights generated: {total_weights}") + + +def process_model(input_path: Path, output_path: Path, seed: int = None) -> None: + """ + Load a .nam file with empty weights and generate random weights for it. + """ + # Load the input file + with open(input_path, 'r') as f: + model_data = json.load(f) + + print(f"Processing: {input_path}") + add_weights_to_model(model_data, seed) # Write output file output_path.parent.mkdir(parents=True, exist_ok=True) From e9ff18af9d51f335ff413964ec88b67de2073810 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Wed, 28 Jan 2026 16:00:45 -0800 Subject: [PATCH 2/5] Add tests for Layer1x1 functionality in WaveNet - Introduced a new test file `test_layer1x1.cpp` to validate the behavior of the Layer1x1 component in the WaveNet architecture. - Implemented multiple test cases to check both active and inactive states of Layer1x1, ensuring correct processing of inputs and outputs. - Added validation for error handling when the bottleneck does not match channels in inactive Layer1x1 configurations. - Enhanced tests to cover scenarios with grouped Layer1x1 convolutions and post-FiLM behavior, ensuring comprehensive coverage of the new functionality. --- NAM/wavenet.cpp | 7 - NAM/wavenet.h | 9 +- tools/test/test_wavenet/test_layer1x1.cpp | 481 ++++++++++++++++++++++ 3 files changed, 489 insertions(+), 8 deletions(-) create mode 100644 tools/test/test_wavenet/test_layer1x1.cpp diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 6ea7b5e..3d56859 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -606,13 +606,6 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st layer1x1_active = layer1x1_config["active"]; // default to active layer1x1_groups = layer1x1_config["groups"]; } - // Validation: if layer1x1 is inactive, bottleneck must equal channels - if (!layer1x1_active && bottleneck != channels) - { - throw std::runtime_error("Layer array " + std::to_string(i) + ": when layer1x1.active is false, bottleneck (" - + std::to_string(bottleneck) + ") must equal channels (" + std::to_string(channels) - + ")"); - } nam::wavenet::Layer1x1Params layer1x1_params(layer1x1_active, layer1x1_groups); const int input_size = layer_config["input_size"]; diff --git a/NAM/wavenet.h b/NAM/wavenet.h index 5a95e16..63e1378 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -197,7 +197,8 @@ class _Layer public: /// \brief Constructor with LayerParams /// \param params Parameters for constructing the layer - /// \throws std::invalid_argument If head1x1_post_film_params is active but head1x1 is not + /// \throws std::invalid_argument If head1x1_post_film_params is active but head1x1 is not, or if layer1x1 is inactive + /// but bottleneck != channels _Layer(const LayerParams& params) : _conv(params.channels, (params.gating_mode != GatingMode::NONE) ? 2 * params.bottleneck : params.bottleneck, params.kernel_size, true, params.dilation, params.groups_input) @@ -214,6 +215,12 @@ class _Layer } else { + // Validation: if layer1x1 is inactive, bottleneck must equal channels + if (params.bottleneck != params.channels) + { + throw std::invalid_argument("When layer1x1.active is false, bottleneck (" + std::to_string(params.bottleneck) + + ") must equal channels (" + std::to_string(params.channels) + ")"); + } // If there's a post-layer1x1 FiLM but no layer1x1, this is redundant--don't allow it if (params._layer1x1_post_film_params.active) { diff --git a/tools/test/test_wavenet/test_layer1x1.cpp b/tools/test/test_wavenet/test_layer1x1.cpp new file mode 100644 index 0000000..d18f967 --- /dev/null +++ b/tools/test/test_wavenet/test_layer1x1.cpp @@ -0,0 +1,481 @@ +// Tests for WaveNet layer1x1 functionality + +#include +#include +#include +#include +#include +#include + +#include "NAM/wavenet.h" + +namespace test_wavenet +{ +namespace test_layer1x1 +{ +// Helper function to create default (inactive) FiLM parameters +static nam::wavenet::_FiLMParams make_default_film_params() +{ + return nam::wavenet::_FiLMParams(false, false); +} + +// Helper function to create a Layer with default FiLM parameters +static nam::wavenet::_Layer make_layer(const int condition_size, const int channels, const int bottleneck, + const int kernel_size, const int dilation, + const nam::activations::ActivationConfig& activation_config, + const nam::wavenet::GatingMode gating_mode, const int groups_input, + const int groups_input_mixin, + const nam::wavenet::Layer1x1Params& layer1x1_params, + const nam::wavenet::Head1x1Params& head1x1_params, + const nam::activations::ActivationConfig& secondary_activation_config) +{ + auto film_params = make_default_film_params(); + nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, + secondary_activation_config, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); + return nam::wavenet::_Layer(layer_params); +} + +void test_layer1x1_active() +{ + // Test that when layer1x1 is active (default), it processes the activation output + const int conditionSize = 1; + const int channels = 2; + const int bottleneck = channels; + const int kernelSize = 1; + const int dilation = 1; + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const int groups_input = 1; + const int groups_input_mixin = 1; + const bool layer1x1_active = true; + const int layer1x1_groups = 1; + + nam::wavenet::Layer1x1Params layer1x1_params(layer1x1_active, layer1x1_groups); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + auto layer = + make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, groups_input, + groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); + + // Set weights: conv, input_mixin, layer1x1 + // With bottleneck=channels=2: + // Conv: (channels, bottleneck, kernelSize) + bias = (2, 2, 1) + 2 = 4 + 2 = 6 weights + // Input mixin: (conditionSize, bottleneck) = (1, 2) = 2 weights + // layer1x1: (bottleneck, channels) + bias = (2, 2) + 2 = 4 + 2 = 6 weights + std::vector weights{ + // Conv: weights=1.0, bias=0.0 (identity) + 1.0f, 0.0f, 0.0f, 1.0f, // weights + 0.0f, 0.0f, // bias + // Input mixin: weights=1.0 + 1.0f, 1.0f, + // layer1x1: weights=1.0, bias=0.0 (identity) + 1.0f, 0.0f, 0.0f, 1.0f, // weights + 0.0f, 0.0f // bias + }; + + auto it = weights.begin(); + layer.set_weights_(it); + assert(it == weights.end()); + + const int numFrames = 2; + layer.SetMaxBufferSize(numFrames); + + Eigen::MatrixXf input(channels, numFrames); + Eigen::MatrixXf condition(conditionSize, numFrames); + input.fill(1.0f); + condition.fill(1.0f); + + layer.Process(input, condition, numFrames); + + auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames); + + // With identity-like weights: input=1, condition=1 + // conv output = 1*1 + 0 = 1 + // input_mixin output = 1*1 = 1 + // z = 1 + 1 = 2 + // ReLU(2) = 2 + // layer1x1 output = 1*2 + 0 = 2 + // layer_output = input + layer1x1_output = 1 + 2 = 3 + const float expectedLayerOutput = 3.0f; + for (int i = 0; i < numFrames; i++) + { + assert(std::abs(layer_output(0, i) - expectedLayerOutput) < 0.01f); + assert(std::abs(layer_output(1, i) - expectedLayerOutput) < 0.01f); + } +} + +void test_layer1x1_inactive() +{ + // Test that when layer1x1 is inactive, residual connection passes through input directly + const int conditionSize = 1; + const int channels = 2; + const int bottleneck = channels; // Must equal channels when layer1x1 is inactive + const int kernelSize = 1; + const int dilation = 1; + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const int groups_input = 1; + const int groups_input_mixin = 1; + const bool layer1x1_active = false; + const int layer1x1_groups = 1; + + nam::wavenet::Layer1x1Params layer1x1_params(layer1x1_active, layer1x1_groups); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + auto layer = + make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, groups_input, + groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); + + // Set weights: conv, input_mixin (no layer1x1 weights needed) + // With bottleneck=channels=2: + // Conv: (channels, bottleneck, kernelSize) + bias = (2, 2, 1) + 2 = 4 + 2 = 6 weights + // Input mixin: (conditionSize, bottleneck) = (1, 2) = 2 weights + std::vector weights{ + // Conv: weights=1.0, bias=0.0 (identity) + 1.0f, 0.0f, 0.0f, 1.0f, // weights + 0.0f, 0.0f, // bias + // Input mixin: weights=1.0 + 1.0f, 1.0f + // No layer1x1 weights since it's inactive + }; + + auto it = weights.begin(); + layer.set_weights_(it); + assert(it == weights.end()); + + const int numFrames = 2; + layer.SetMaxBufferSize(numFrames); + + Eigen::MatrixXf input(channels, numFrames); + Eigen::MatrixXf condition(conditionSize, numFrames); + input.fill(1.0f); + condition.fill(1.0f); + + layer.Process(input, condition, numFrames); + + auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames); + + // With layer1x1 inactive: + // conv output = 1*1 + 0 = 1 + // input_mixin output = 1*1 = 1 + // z = 1 + 1 = 2 + // ReLU(2) = 2 + // layer1x1 is skipped + // layer_output = input (identity residual) = 1 + const float expectedLayerOutput = 1.0f; + for (int i = 0; i < numFrames; i++) + { + assert(std::abs(layer_output(0, i) - expectedLayerOutput) < 0.01f); + assert(std::abs(layer_output(1, i) - expectedLayerOutput) < 0.01f); + } +} + +void test_layer1x1_inactive_bottleneck_mismatch() +{ + // Test that creating a layer with layer1x1 inactive but bottleneck != channels throws an error + const int conditionSize = 1; + const int channels = 2; + const int bottleneck = 4; // Different from channels - should fail + const int kernelSize = 1; + const int dilation = 1; + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const int groups_input = 1; + const int groups_input_mixin = 1; + const bool layer1x1_active = false; + const int layer1x1_groups = 1; + + nam::wavenet::Layer1x1Params layer1x1_params(layer1x1_active, layer1x1_groups); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + auto film_params = make_default_film_params(); + + nam::wavenet::LayerParams layer_params(conditionSize, channels, bottleneck, kernelSize, dilation, activation, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, + nam::activations::ActivationConfig{}, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); + + // This should throw an exception at construction time + bool threw_exception = false; + try + { + auto layer = nam::wavenet::_Layer(layer_params); + } + catch (const std::invalid_argument& e) + { + threw_exception = true; + // Verify the error message mentions bottleneck and channels + std::string error_msg = e.what(); + assert(error_msg.find("bottleneck") != std::string::npos); + assert(error_msg.find("channels") != std::string::npos); + } + assert(threw_exception); +} + +void test_layer1x1_post_film_active() +{ + // Test that layer1x1_post_film works when layer1x1 is active + const int conditionSize = 1; + const int channels = 2; + const int bottleneck = channels; + const int kernelSize = 1; + const int dilation = 1; + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const int groups_input = 1; + const int groups_input_mixin = 1; + const bool layer1x1_active = true; + const int layer1x1_groups = 1; + + nam::wavenet::Layer1x1Params layer1x1_params(layer1x1_active, layer1x1_groups); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + auto film_params = make_default_film_params(); + nam::wavenet::_FiLMParams layer1x1_post_film_params(true, true, 1); // Active FiLM + + nam::wavenet::LayerParams layer_params(conditionSize, channels, bottleneck, kernelSize, dilation, activation, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, + nam::activations::ActivationConfig{}, film_params, film_params, film_params, + film_params, film_params, film_params, layer1x1_post_film_params, film_params); + + auto layer = nam::wavenet::_Layer(layer_params); + + // Set weights: conv, input_mixin, layer1x1, layer1x1_post_film + // With bottleneck=channels=2: + // Conv: (channels, bottleneck, kernelSize) + bias = (2, 2, 1) + 2 = 4 + 2 = 6 weights + // Input mixin: (conditionSize, bottleneck) = (1, 2) = 2 weights + // layer1x1: (bottleneck, channels) + bias = (2, 2) + 2 = 4 + 2 = 6 weights + // layer1x1_post_film: (conditionSize, 2*channels) + bias = (1, 4) + 4 = 4 + 4 = 8 weights (with shift) + std::vector weights{ + // Conv: weights=1.0, bias=0.0 (identity) + 1.0f, 0.0f, 0.0f, 1.0f, // weights + 0.0f, 0.0f, // bias + // Input mixin: weights=1.0 + 1.0f, 1.0f, + // layer1x1: weights=1.0, bias=0.0 (identity) + 1.0f, 0.0f, 0.0f, 1.0f, // weights + 0.0f, 0.0f, // bias + // layer1x1_post_film: (conditionSize, 2*channels) + bias (with shift) + 1.0f, 1.0f, 1.0f, 1.0f, // scale weights + 0.0f, 0.0f, 0.0f, 0.0f, // shift weights + 0.0f, 0.0f, 0.0f, 0.0f // bias + }; + + auto it = weights.begin(); + layer.set_weights_(it); + assert(it == weights.end()); + + const int numFrames = 2; + layer.SetMaxBufferSize(numFrames); + + Eigen::MatrixXf input(channels, numFrames); + Eigen::MatrixXf condition(conditionSize, numFrames); + input.fill(1.0f); + condition.fill(1.0f); + + layer.Process(input, condition, numFrames); + + auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames); + + // Verify outputs are reasonable (not NaN, not infinite) + for (int i = 0; i < numFrames; i++) + { + assert(!std::isnan(layer_output(0, i))); + assert(!std::isinf(layer_output(0, i))); + assert(!std::isnan(layer_output(1, i))); + assert(!std::isinf(layer_output(1, i))); + } +} + +void test_layer1x1_post_film_inactive_with_layer1x1_inactive() +{ + // Test that layer1x1_post_film cannot be active when layer1x1 is inactive + const int conditionSize = 1; + const int channels = 2; + const int bottleneck = channels; + const int kernelSize = 1; + const int dilation = 1; + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const int groups_input = 1; + const int groups_input_mixin = 1; + const bool layer1x1_active = false; + const int layer1x1_groups = 1; + + nam::wavenet::Layer1x1Params layer1x1_params(layer1x1_active, layer1x1_groups); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + auto film_params = make_default_film_params(); + nam::wavenet::_FiLMParams layer1x1_post_film_params(true, true, 1); // Active FiLM - should fail + + nam::wavenet::LayerParams layer_params(conditionSize, channels, bottleneck, kernelSize, dilation, activation, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, + nam::activations::ActivationConfig{}, film_params, film_params, film_params, + film_params, film_params, film_params, layer1x1_post_film_params, film_params); + + // This should throw an exception + bool threw_exception = false; + try + { + auto layer = nam::wavenet::_Layer(layer_params); + } + catch (const std::invalid_argument& e) + { + threw_exception = true; + // Verify the error message mentions layer1x1_post_film + std::string error_msg = e.what(); + assert(error_msg.find("layer1x1_post_film") != std::string::npos); + } + assert(threw_exception); +} + +void test_layer1x1_gated() +{ + // Test layer1x1 with gated activation + const int conditionSize = 1; + const int channels = 2; + const int bottleneck = channels; + const int kernelSize = 1; + const int dilation = 1; + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::GATED; + const int groups_input = 1; + const int groups_input_mixin = 1; + const bool layer1x1_active = true; + const int layer1x1_groups = 1; + + nam::wavenet::Layer1x1Params layer1x1_params(layer1x1_active, layer1x1_groups); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + auto sigmoid_config = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::Sigmoid); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_input_mixin, layer1x1_params, head1x1_params, sigmoid_config); + + // With gated: conv outputs 2*bottleneck, input_mixin outputs 2*bottleneck, layer1x1 outputs channels + // With gated=true, bottleneck=channels=2: + // Conv: (channels, 2*bottleneck, kernelSize) + bias = (2, 4, 1) + 4 = 8 + 4 = 12 weights + // Input mixin: (conditionSize, 2*bottleneck) = (1, 4) = 4 weights + // layer1x1: (bottleneck, channels) + bias = (2, 2) + 2 = 4 + 2 = 6 weights + std::vector weights; + // Conv weights: (2, 4, 1) + bias(4) + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(1.0f); + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + // Input mixin: (1, 4) + weights.push_back(1.0f); + weights.push_back(1.0f); + weights.push_back(1.0f); + weights.push_back(1.0f); + // layer1x1: (2, 2) + bias(2) + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + + auto it = weights.begin(); + layer.set_weights_(it); + assert(it == weights.end()); + + const int numFrames = 2; + layer.SetMaxBufferSize(numFrames); + + Eigen::MatrixXf input(channels, numFrames); + Eigen::MatrixXf condition(conditionSize, numFrames); + input.fill(1.0f); + condition.fill(1.0f); + + layer.Process(input, condition, numFrames); + + auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames); + + // Verify outputs are reasonable + for (int i = 0; i < numFrames; i++) + { + assert(!std::isnan(layer_output(0, i))); + assert(!std::isinf(layer_output(0, i))); + assert(!std::isnan(layer_output(1, i))); + assert(!std::isinf(layer_output(1, i))); + } +} + +void test_layer1x1_groups() +{ + // Test layer1x1 with groups + const int conditionSize = 1; + const int channels = 4; + const int bottleneck = channels; + const int kernelSize = 1; + const int dilation = 1; + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const int groups_input = 1; + const int groups_input_mixin = 1; + const bool layer1x1_active = true; + const int layer1x1_groups = 2; // Grouped layer1x1 + + nam::wavenet::Layer1x1Params layer1x1_params(layer1x1_active, layer1x1_groups); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + auto layer = + make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, groups_input, + groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); + + // With grouped layer1x1, we need to provide weights for each group + // For groups=2, channels=4, bottleneck=4: each group has 2 in_channels and 2 out_channels + // With bottleneck=channels=4: + // Conv: (channels, bottleneck, kernelSize) + bias = (4, 4, 1) + 4 = 16 + 4 = 20 weights + // Input mixin: (conditionSize, bottleneck) = (1, 4) = 4 weights + // layer1x1: grouped with groups=2, (bottleneck, channels) + bias = (4, 4) + 4 = 16 + 4 = 20 weights + // For grouped conv1x1: weights are organized per group + // Each group: (out_channels_per_group, in_channels_per_group) + bias_per_group = (2, 2) + 2 = 6 weights per group + std::vector weights{ + // Conv: (channels, bottleneck, kernelSize=1) + bias (identity weights) + 1.0f, 0.0f, 0.0f, 0.0f, // output channel 0 + 0.0f, 1.0f, 0.0f, 0.0f, // output channel 1 + 0.0f, 0.0f, 1.0f, 0.0f, // output channel 2 + 0.0f, 0.0f, 0.0f, 1.0f, // output channel 3 + // Conv bias: bottleneck values + 0.0f, 0.0f, 0.0f, 0.0f, + // Input mixin: (conditionSize, bottleneck) weights + 1.0f, 1.0f, 1.0f, 1.0f, + // layer1x1: for each group, (out_channels_per_group, in_channels_per_group) + bias_per_group + // Group 1: (2,2) weights + 2 bias + 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, + // Group 2: (2,2) weights + 2 bias + 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f}; + + auto it = weights.begin(); + layer.set_weights_(it); + assert(it == weights.end()); + + const int numFrames = 2; + layer.SetMaxBufferSize(numFrames); + + Eigen::MatrixXf input(channels, numFrames); + Eigen::MatrixXf condition(conditionSize, numFrames); + input.fill(1.0f); + condition.fill(1.0f); + + layer.Process(input, condition, numFrames); + + auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames); + + // Verify outputs are reasonable + for (int i = 0; i < numFrames; i++) + { + for (int c = 0; c < channels; c++) + { + assert(!std::isnan(layer_output(c, i))); + assert(!std::isinf(layer_output(c, i))); + } + } +} + +}; // namespace test_layer1x1 +} // namespace test_wavenet From 4a8af7e3090e67fb2806dce1b453dccd9134f763 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Wed, 28 Jan 2026 16:18:30 -0800 Subject: [PATCH 3/5] Fix up tests --- example_models/wavenet_a2_max.nam | 105 +++--------------- tools/run_tests.cpp | 8 ++ .../test_condition_processing.cpp | 9 +- tools/test/test_wavenet/test_full.cpp | 27 ++--- tools/test/test_wavenet/test_head1x1.cpp | 4 +- tools/test/test_wavenet/test_layer.cpp | 4 +- tools/test/test_wavenet/test_layer_array.cpp | 43 +++---- .../test/test_wavenet/test_real_time_safe.cpp | 87 ++++++++------- .../test/test_wavenet_configurable_gating.cpp | 57 +++++----- 9 files changed, 149 insertions(+), 195 deletions(-) diff --git a/example_models/wavenet_a2_max.nam b/example_models/wavenet_a2_max.nam index e820526..42d0a0a 100644 --- a/example_models/wavenet_a2_max.nam +++ b/example_models/wavenet_a2_max.nam @@ -57,7 +57,6 @@ "head_bias": false, "groups_input": 3, "groups_input_mixin": 1, - "groups_1x1": 3, "head_1x1": { "active": true, "out_channels": 6, @@ -94,7 +93,7 @@ "shift": true, "groups": 1 }, - "1x1_post_film": { + "layer1x1_post_film": { "active": true, "shift": true, "groups": 1 @@ -103,6 +102,10 @@ "active": true, "shift": true, "groups": 1 + }, + "layer1x1": { + "active": true, + "groups": 3 } }, { @@ -144,7 +147,6 @@ "head_bias": false, "groups_input": 1, "groups_input_mixin": 1, - "groups_1x1": 1, "head_1x1": { "active": true, "out_channels": 4, @@ -191,7 +193,7 @@ "shift": false, "groups": 1 }, - "1x1_post_film": { + "layer1x1_post_film": { "active": true, "shift": false, "groups": 1 @@ -200,6 +202,10 @@ "active": true, "shift": false, "groups": 1 + }, + "layer1x1": { + "active": true, + "groups": 1 } } ], @@ -1210,55 +1216,7 @@ -0.8012907507877394, 0.3713605309625707, 0.08893172296428986, - 0.9556850589040935, - -0.28265231757536413, - -0.20372071451125384, - -0.6203828756778409, - -0.7556805618254725, - 0.6960663769273621, - -0.09056526285896571, - 0.325537476123956, - 0.2834089344664352, - 0.19429191903909016, - -0.9572850905272587, - 0.5735891809092335, - -0.5128622056719527, - -0.7481522293839142, - 0.12915595181592665, - -0.8627796943512882, - 0.5303147517771689, - -0.585685259306683, - -0.5680972961626505, - 0.7393908535390894, - -0.34288089313553916, - -0.7048916401171126, - 0.8010620712635164, - -0.9943288970399673, - 0.7168122527603527, - -0.710624039358984, - -0.7400157371131264, - -0.49869160654375233, - -0.6510057581972131, - 0.3221152851946336, - -0.9484397004276033, - -0.9702793455385823, - 0.5799693284695078, - -0.5241367878190739, - -0.3524570760759551, - -0.6515075971876456, - -0.895201964277647, - 0.483436113908299, - 0.05217105319573423, - 0.4913305500679914, - -0.04750806915349948, - 0.5560340786284055, - 0.026475915218383328, - -0.781891979992309, - 0.007677379571642717, - 0.8908312859402125, - -0.9132699262016566, - 0.5664539919607592, - 0.7339618155196765 + 0.9556850589040935 ], "sample_rate": 48000 }, @@ -1281,7 +1239,6 @@ "head_bias": true, "groups_input": 1, "groups_input_mixin": 4, - "groups_1x1": 2, "head_1x1": { "active": true, "out_channels": 4, @@ -1318,7 +1275,7 @@ "shift": true, "groups": 2 }, - "1x1_post_film": { + "layer1x1_post_film": { "active": true, "shift": true, "groups": 8 @@ -1327,6 +1284,10 @@ "active": true, "shift": true, "groups": 4 + }, + "layer1x1": { + "active": true, + "groups": 2 } } ], @@ -2119,39 +2080,7 @@ 0.4332066519835822, 0.01774403013902348, -0.4531502065734516, - 0.6694478911533888, - 0.9604892652067509, - -0.5125381878497233, - 0.10253015376093932, - -0.23282797350570017, - 0.8437362998631237, - 0.016481783185787968, - 0.7586525102929522, - 0.7280538688571763, - -0.44750519435591807, - 0.5800123640062269, - -0.17011515289730972, - 0.8684967873650402, - 0.015475351619312905, - 0.6410989463711567, - -0.4343220334343467, - -0.4028883004564656, - 0.17387544482832218, - 0.9978046664586775, - -0.020719306688598005, - -0.7028091632344218, - 0.07716115540774737, - -0.30975211661398494, - 0.10383483414162198, - 0.08686012591734604, - -0.08931076626699785, - -0.35644529954192516, - -0.6226952525857892, - 0.3949968552411427, - 0.14359528396980226, - -0.5328751078842138, - 0.5510889501985177, - -0.9127054018053962 + 0.6694478911533888 ], "sample_rate": 48000 } \ No newline at end of file diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index be65760..9b3bdec 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -18,6 +18,7 @@ #include "test/test_wavenet/test_real_time_safe.cpp" #include "test/test_wavenet/test_condition_processing.cpp" #include "test/test_wavenet/test_head1x1.cpp" +#include "test/test_wavenet/test_layer1x1.cpp" #include "test/test_gating_activations.cpp" #include "test/test_wavenet_gating_compatibility.cpp" #include "test/test_blending_detailed.cpp" @@ -160,6 +161,13 @@ int main() test_wavenet::test_head1x1::test_head1x1_gated(); test_wavenet::test_head1x1::test_head1x1_groups(); test_wavenet::test_head1x1::test_head1x1_different_out_channels(); + test_wavenet::test_layer1x1::test_layer1x1_active(); + test_wavenet::test_layer1x1::test_layer1x1_inactive(); + test_wavenet::test_layer1x1::test_layer1x1_inactive_bottleneck_mismatch(); + test_wavenet::test_layer1x1::test_layer1x1_post_film_active(); + test_wavenet::test_layer1x1::test_layer1x1_post_film_inactive_with_layer1x1_inactive(); + test_wavenet::test_layer1x1::test_layer1x1_gated(); + test_wavenet::test_layer1x1::test_layer1x1_groups(); test_wavenet::test_allocation_tracking_pass(); test_wavenet::test_allocation_tracking_fail(); test_wavenet::test_conv1d_process_realtime_safe(); diff --git a/tools/test/test_wavenet/test_condition_processing.cpp b/tools/test/test_wavenet/test_condition_processing.cpp index 250f943..455fd9d 100644 --- a/tools/test/test_wavenet/test_condition_processing.cpp +++ b/tools/test/test_wavenet/test_condition_processing.cpp @@ -24,7 +24,8 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, const int kernel_size, std::vector&& dilations, const nam::activations::ActivationConfig& activation_config, const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, - const int groups_input_mixin, const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, + const int groups_input_mixin, const nam::wavenet::Layer1x1Params& layer1x1_params, + const nam::wavenet::Head1x1Params& head1x1_params, const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); @@ -35,7 +36,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), std::move(activation_configs), std::move(gating_modes), - head_bias, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); } @@ -59,7 +60,7 @@ std::unique_ptr create_simple_wavenet( const bool with_head = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); const bool head1x1_active = false; const int head1x1_groups = 1; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, head1x1_groups); @@ -67,7 +68,7 @@ std::unique_ptr create_simple_wavenet( nam::wavenet::LayerArrayParams params = make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, gating_mode, head_bias, groups, groups_input_mixin, - groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp index 9ac7372..e383cc7 100644 --- a/tools/test/test_wavenet/test_full.cpp +++ b/tools/test/test_wavenet/test_full.cpp @@ -23,7 +23,8 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, const int kernel_size, std::vector&& dilations, const nam::activations::ActivationConfig& activation_config, const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, - const int groups_input_mixin, const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, + const int groups_input_mixin, const nam::wavenet::Layer1x1Params& layer1x1_params, + const nam::wavenet::Head1x1Params& head1x1_params, const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); @@ -34,7 +35,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), std::move(activation_configs), std::move(gating_modes), - head_bias, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); } @@ -55,14 +56,14 @@ void test_wavenet_model() const bool with_head = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); nam::activations::ActivationConfig empty_config{}; nam::wavenet::LayerArrayParams params = make_layer_array_params( input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, - gating_mode, head_bias, groups, groups_input_mixin, groups_1x1, head1x1_params, empty_config); + gating_mode, head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, empty_config); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -123,19 +124,19 @@ void test_wavenet_multiple_arrays() // First array std::vector dilations1{1}; const int bottleneck = channels; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); layer_array_params.push_back(make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations1), activation, gating_mode, - head_bias, groups, groups_input_mixin, groups_1x1, + head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{})); // Second array (head_size of first must match channels of second) std::vector dilations2{1}; layer_array_params.push_back(make_layer_array_params(head_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations2), activation, gating_mode, - head_bias, groups, groups_input_mixin, groups_1x1, + head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{})); std::vector weights; @@ -184,14 +185,14 @@ void test_wavenet_zero_input() const bool with_head = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); nam::wavenet::LayerArrayParams params = make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, gating_mode, head_bias, groups, groups_input_mixin, - groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -235,14 +236,14 @@ void test_wavenet_different_buffer_sizes() const bool with_head = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); nam::wavenet::LayerArrayParams params = make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, gating_mode, head_bias, groups, groups_input_mixin, - groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -289,7 +290,7 @@ void test_wavenet_prewarm() const bool with_head = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); @@ -297,7 +298,7 @@ void test_wavenet_prewarm() nam::wavenet::LayerArrayParams params = make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, gating_mode, head_bias, groups, groups_input_mixin, - groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); diff --git a/tools/test/test_wavenet/test_head1x1.cpp b/tools/test/test_wavenet/test_head1x1.cpp index 7fd13ec..8bb31b3 100644 --- a/tools/test/test_wavenet/test_head1x1.cpp +++ b/tools/test/test_wavenet/test_head1x1.cpp @@ -28,8 +28,10 @@ static nam::wavenet::_Layer make_layer(const int condition_size, const int chann const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); + // Create layer1x1_params with active=true and groups=groups_1x1 for backward compatibility + nam::wavenet::Layer1x1Params layer1x1_params(true, groups_1x1); nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, secondary_activation_config, film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); return nam::wavenet::_Layer(layer_params); diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp index cee5b51..4494781 100644 --- a/tools/test/test_wavenet/test_layer.cpp +++ b/tools/test/test_wavenet/test_layer.cpp @@ -28,8 +28,10 @@ static nam::wavenet::_Layer make_layer(const int condition_size, const int chann const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); + // Create layer1x1_params with active=true and groups=groups_1x1 for backward compatibility + nam::wavenet::Layer1x1Params layer1x1_params(true, groups_1x1); nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, secondary_activation_config, film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); return nam::wavenet::_Layer(layer_params); diff --git a/tools/test/test_wavenet/test_layer_array.cpp b/tools/test/test_wavenet/test_layer_array.cpp index aa6c5a5..b8445c9 100644 --- a/tools/test/test_wavenet/test_layer_array.cpp +++ b/tools/test/test_wavenet/test_layer_array.cpp @@ -19,12 +19,15 @@ static nam::wavenet::_FiLMParams make_default_film_params() } // Helper function to create a LayerArray with default FiLM parameters -static nam::wavenet::_LayerArray make_layer_array( - const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, - const int kernel_size, const std::vector& dilations, const nam::activations::ActivationConfig& activation_config, - const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, - const int groups_input_mixin, const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, - const nam::activations::ActivationConfig& secondary_activation_config) +static nam::wavenet::_LayerArray make_layer_array(const int input_size, const int condition_size, const int head_size, + const int channels, const int bottleneck, const int kernel_size, + const std::vector& dilations, + const nam::activations::ActivationConfig& activation_config, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, + const int groups_input, const int groups_input_mixin, + const nam::wavenet::Layer1x1Params& layer1x1_params, + const nam::wavenet::Head1x1Params& head1x1_params, + const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); // Duplicate activation_config, gating_mode, and secondary_activation_config for each layer (based on dilations size) @@ -35,9 +38,9 @@ static nam::wavenet::_LayerArray make_layer_array( std::vector dilations_copy = dilations; // Make a copy since we need to move it nam::wavenet::LayerArrayParams params( input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations_copy), - std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, groups_1x1, - head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, - film_params, film_params, film_params, film_params); + std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, + layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); return nam::wavenet::_LayerArray(params); } // Test layer array construction and basic processing @@ -55,12 +58,12 @@ void test_layer_array_basic() const bool head_bias = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer_array = make_layer_array(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation, gating_mode, head_bias, groups, groups_input_mixin, - groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); const int numFrames = 4; layer_array.SetMaxBufferSize(numFrames); @@ -115,12 +118,12 @@ void test_layer_array_receptive_field() const bool head_bias = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer_array = make_layer_array(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation, gating_mode, head_bias, groups, groups_input_mixin, - groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); long rf = layer_array.get_receptive_field(); // Expected: sum of dilation * (kernel_size - 1) for each layer @@ -147,12 +150,12 @@ void test_layer_array_with_head_input() const bool head_bias = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer_array = make_layer_array(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation, gating_mode, head_bias, groups, groups_input_mixin, - groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); const int numFrames = 2; layer_array.SetMaxBufferSize(numFrames); @@ -188,7 +191,7 @@ void test_layer_array_different_activations() const bool head_bias = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); // Create different activation configs for each layer @@ -219,7 +222,7 @@ void test_layer_array_different_activations() auto film_params = make_default_film_params(); nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), std::move(activation_configs), std::move(gating_modes), - head_bias, groups, groups_input_mixin, groups_1x1, head1x1_params, + head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); nam::wavenet::_LayerArray layer_array(params); @@ -301,9 +304,9 @@ void test_layer_array_different_activations() dilations_all_relu.size(), nam::activations::ActivationConfig{}); nam::wavenet::LayerArrayParams params_all_relu( input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations_all_relu), - std::move(all_relu_configs), std::move(all_none_gating_modes), head_bias, groups, groups_input_mixin, groups_1x1, - head1x1_params, std::move(all_empty_secondary_configs), film_params, film_params, film_params, film_params, - film_params, film_params, film_params, film_params); + std::move(all_relu_configs), std::move(all_none_gating_modes), head_bias, groups, groups_input_mixin, + layer1x1_params, head1x1_params, std::move(all_empty_secondary_configs), film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); nam::wavenet::_LayerArray layer_array_all_relu(params_all_relu); layer_array_all_relu.SetMaxBufferSize(numFrames); diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index 56f7ef8..7fbd5a6 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -27,25 +27,29 @@ static nam::wavenet::_Layer make_layer(const int condition_size, const int chann const int kernel_size, const int dilation, const nam::activations::ActivationConfig& activation_config, const nam::wavenet::GatingMode gating_mode, const int groups_input, - const int groups_input_mixin, const int groups_1x1, + const int groups_input_mixin, + const nam::wavenet::Layer1x1Params& layer1x1_params, const nam::wavenet::Head1x1Params& head1x1_params, const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, secondary_activation_config, film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); return nam::wavenet::_Layer(layer_params); } // Helper function to create a LayerArray with default FiLM parameters -static nam::wavenet::_LayerArray make_layer_array( - const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, - const int kernel_size, const std::vector& dilations, const nam::activations::ActivationConfig& activation_config, - const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, - const int groups_input_mixin, const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, - const nam::activations::ActivationConfig& secondary_activation_config) +static nam::wavenet::_LayerArray make_layer_array(const int input_size, const int condition_size, const int head_size, + const int channels, const int bottleneck, const int kernel_size, + const std::vector& dilations, + const nam::activations::ActivationConfig& activation_config, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, + const int groups_input, const int groups_input_mixin, + const nam::wavenet::Layer1x1Params& layer1x1_params, + const nam::wavenet::Head1x1Params& head1x1_params, + const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); // Duplicate activation_config, gating_mode, and secondary_activation_config for each layer (based on dilations size) @@ -56,9 +60,9 @@ static nam::wavenet::_LayerArray make_layer_array( std::vector dilations_copy = dilations; // Make a copy since we need to move it nam::wavenet::LayerArrayParams params( input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations_copy), - std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, groups_1x1, - head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, - film_params, film_params, film_params, film_params); + std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, + layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); return nam::wavenet::_LayerArray(params); } @@ -67,7 +71,8 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, const int kernel_size, std::vector&& dilations, const nam::activations::ActivationConfig& activation_config, const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, - const int groups_input_mixin, const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, + const int groups_input_mixin, const nam::wavenet::Layer1x1Params& layer1x1_params, + const nam::wavenet::Head1x1Params& head1x1_params, const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); @@ -78,27 +83,25 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), std::move(activation_configs), std::move(gating_modes), - head_bias, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); } // Helper function to create a Layer with all FiLMs active -static nam::wavenet::_Layer make_layer_all_films(const int condition_size, const int channels, const int bottleneck, - const int kernel_size, const int dilation, - const nam::activations::ActivationConfig& activation_config, - const nam::wavenet::GatingMode gating_mode, const int groups_input, - const int groups_input_mixin, const int groups_1x1, - const nam::wavenet::Head1x1Params& head1x1_params, - const nam::activations::ActivationConfig& secondary_activation_config, - const bool shift) +static nam::wavenet::_Layer make_layer_all_films( + const int condition_size, const int channels, const int bottleneck, const int kernel_size, const int dilation, + const nam::activations::ActivationConfig& activation_config, const nam::wavenet::GatingMode gating_mode, + const int groups_input, const int groups_input_mixin, const nam::wavenet::Layer1x1Params& layer1x1_params, + const nam::wavenet::Head1x1Params& head1x1_params, + const nam::activations::ActivationConfig& secondary_activation_config, const bool shift) { nam::wavenet::_FiLMParams film_params(true, shift); // Don't activate head1x1_post_film if head1x1 is not active (validation will fail) nam::wavenet::_FiLMParams head1x1_post_film_params = head1x1_params.active ? film_params : nam::wavenet::_FiLMParams(false, false); nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, secondary_activation_config, film_params, film_params, film_params, film_params, film_params, film_params, film_params, head1x1_post_film_params); return nam::wavenet::_Layer(layer_params); @@ -351,12 +354,12 @@ void test_layer_process_realtime_safe() const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer = make_layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, groups_input, - groups_input_mixin, groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); // Set weights std::vector weights{1.0f, 0.0f, // Conv (weight, bias) @@ -409,12 +412,12 @@ void test_layer_bottleneck_process_realtime_safe() const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer = make_layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, groups_input, - groups_input_mixin, groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); // Set weights for bottleneck != channels // Conv: (channels, bottleneck, kernelSize=1) = (4, 2, 1) + bias @@ -497,12 +500,12 @@ void test_layer_grouped_process_realtime_safe() const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; const int groups_input = 2; // groups_input > 1 const int groups_input_mixin = 1; - const int groups_1x1 = 2; // 1x1 is also grouped + nam::wavenet::Layer1x1Params layer1x1_params(true, 2); // layer1x1 is grouped nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer = make_layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, groups_input, - groups_input_mixin, groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); // Set weights for grouped convolution // With groups_input=2, channels=4: each group has 2 in_channels and 2 out_channels @@ -540,7 +543,7 @@ void test_layer_grouped_process_realtime_safe() weights.push_back(1.0f); weights.push_back(1.0f); weights.push_back(1.0f); - // 1x1: grouped with groups_1x1=2, channels=4 + // layer1x1: grouped with groups=2, channels=4 // Each group processes 2 channels: Group 0 (channels 0-1), Group 1 (channels 2-3) // Weight layout: for each group g, for each (out_ch, in_ch) in that group // Group 0: identity matrix for channels 0-1 (2x2) @@ -608,11 +611,11 @@ static void test_layer_all_films_realtime_safe_impl(const bool shift) const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer = make_layer_all_films(condition_size, channels, bottleneck, kernel_size, dilation, activation, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}, shift); // Set weights @@ -716,7 +719,7 @@ void test_layer_post_activation_film_gated_realtime_safe() const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::GATED; const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); // Create FiLM params with activation_post_film enabled @@ -725,7 +728,7 @@ void test_layer_post_activation_film_gated_realtime_safe() nam::wavenet::LayerParams layer_params( condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, groups_input, - groups_input_mixin, groups_1x1, head1x1_params, secondary_activation, inactive_film, // conv_pre_film + groups_input_mixin, layer1x1_params, head1x1_params, secondary_activation, inactive_film, // conv_pre_film inactive_film, // conv_post_film inactive_film, // input_mixin_pre_film inactive_film, // input_mixin_post_film @@ -823,7 +826,7 @@ void test_layer_post_activation_film_blended_realtime_safe() const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::BLENDED; const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); // Create FiLM params with activation_post_film enabled @@ -832,7 +835,7 @@ void test_layer_post_activation_film_blended_realtime_safe() nam::wavenet::LayerParams layer_params( condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, groups_input, - groups_input_mixin, groups_1x1, head1x1_params, secondary_activation, inactive_film, // conv_pre_film + groups_input_mixin, layer1x1_params, head1x1_params, secondary_activation, inactive_film, // conv_pre_film inactive_film, // conv_post_film inactive_film, // input_mixin_pre_film inactive_film, // input_mixin_post_film @@ -929,12 +932,12 @@ void test_layer_array_process_realtime_safe() const bool head_bias = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer_array = make_layer_array(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation, gating_mode, head_bias, groups, groups_input_mixin, - groups_1x1, head1x1_params, nam::activations::ActivationConfig{}); + layer1x1_params, head1x1_params, nam::activations::ActivationConfig{}); // Set weights: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1) std::vector weights{1.0f, // Rechannel @@ -1001,17 +1004,17 @@ void test_process_realtime_safe() // First layer array std::vector dilations1{1}; const int bottleneck = channels; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); layer_array_params.push_back(make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations1), activation, gating_mode, - head_bias, groups, groups_input_mixin, groups_1x1, + head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{})); // Second layer array (head_size of first must match channels of second) std::vector dilations2{1}; layer_array_params.push_back(make_layer_array_params(head_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations2), activation, gating_mode, - head_bias, groups, groups_input_mixin, groups_1x1, + head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{})); // Weights: Array 0: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1) @@ -1077,7 +1080,7 @@ void test_process_3in_2out_realtime_safe() const bool with_head = false; const int groups = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); @@ -1085,7 +1088,7 @@ void test_process_3in_2out_realtime_safe() std::vector dilations1{1}; layer_array_params.push_back(make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations1), activation, gating_mode, - head_bias, groups, groups_input_mixin, groups_1x1, + head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig{})); // Calculate weights: diff --git a/tools/test/test_wavenet_configurable_gating.cpp b/tools/test/test_wavenet_configurable_gating.cpp index b304248..77870d7 100644 --- a/tools/test/test_wavenet_configurable_gating.cpp +++ b/tools/test/test_wavenet_configurable_gating.cpp @@ -20,13 +20,14 @@ static nam::wavenet::_Layer make_layer(const int condition_size, const int chann const int kernel_size, const int dilation, const nam::activations::ActivationConfig& activation_config, const nam::wavenet::GatingMode gating_mode, const int groups_input, - const int groups_input_mixin, const int groups_1x1, + const int groups_input_mixin, + const nam::wavenet::Layer1x1Params& layer1x1_params, const nam::wavenet::Head1x1Params& head1x1_params, const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); nam::wavenet::LayerParams layer_params(condition_size, channels, bottleneck, kernel_size, dilation, activation_config, - gating_mode, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + gating_mode, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, secondary_activation_config, film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); return nam::wavenet::_Layer(layer_params); @@ -37,7 +38,8 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, const int kernel_size, std::vector&& dilations, const nam::activations::ActivationConfig& activation_config, const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, - const int groups_input_mixin, const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, + const int groups_input_mixin, const nam::wavenet::Layer1x1Params& layer1x1_params, + const nam::wavenet::Head1x1Params& head1x1_params, const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); @@ -48,18 +50,21 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), std::move(activation_configs), std::move(gating_modes), - head_bias, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); } // Helper function to create a LayerArray with default FiLM parameters -static nam::wavenet::_LayerArray make_layer_array( - const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, - const int kernel_size, const std::vector& dilations, const nam::activations::ActivationConfig& activation_config, - const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, - const int groups_input_mixin, const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, - const nam::activations::ActivationConfig& secondary_activation_config) +static nam::wavenet::_LayerArray make_layer_array(const int input_size, const int condition_size, const int head_size, + const int channels, const int bottleneck, const int kernel_size, + const std::vector& dilations, + const nam::activations::ActivationConfig& activation_config, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, + const int groups_input, const int groups_input_mixin, + const nam::wavenet::Layer1x1Params& layer1x1_params, + const nam::wavenet::Head1x1Params& head1x1_params, + const nam::activations::ActivationConfig& secondary_activation_config) { auto film_params = make_default_film_params(); // Duplicate activation_config, gating_mode, and secondary_activation_config for each layer (based on dilations size) @@ -70,9 +75,9 @@ static nam::wavenet::_LayerArray make_layer_array( std::vector dilations_copy = dilations; // Make a copy since we need to move it nam::wavenet::LayerArrayParams params( input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations_copy), - std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, groups_1x1, - head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, - film_params, film_params, film_params, film_params); + std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, + layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); return nam::wavenet::_LayerArray(params); } @@ -90,7 +95,7 @@ class TestConfigurableGating const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::Tanh); const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); // Test different gating activation configurations @@ -102,7 +107,7 @@ class TestConfigurableGating for (const auto& gating_act : gating_activations) { auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, - nam::wavenet::GatingMode::GATED, groups_input, groups_input_mixin, groups_1x1, + nam::wavenet::GatingMode::GATED, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, gating_act); // Verify that the layer was created successfully and has correct dimensions @@ -121,7 +126,7 @@ class TestConfigurableGating const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::Tanh); const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); // Test different blending activation configurations @@ -133,7 +138,7 @@ class TestConfigurableGating for (const auto& blending_act : blending_activations) { auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, - nam::wavenet::GatingMode::BLENDED, groups_input, groups_input_mixin, groups_1x1, + nam::wavenet::GatingMode::BLENDED, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, blending_act); // Verify that the layer was created successfully and has correct dimensions @@ -156,7 +161,7 @@ class TestConfigurableGating const bool head_bias = false; const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); // Test with different gating activations @@ -164,7 +169,7 @@ class TestConfigurableGating auto params_gated = make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::vector{1, 2}, activation, nam::wavenet::GatingMode::GATED, head_bias, - groups_input, groups_input_mixin, groups_1x1, head1x1_params, tanh_config); + groups_input, groups_input_mixin, layer1x1_params, head1x1_params, tanh_config); assert(params_gated.gating_modes.size() == 2); // Two layers (dilations = {1, 2}) assert(params_gated.gating_modes[0] == nam::wavenet::GatingMode::GATED); @@ -178,7 +183,7 @@ class TestConfigurableGating auto params_blended = make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::vector{1, 2}, activation, nam::wavenet::GatingMode::BLENDED, head_bias, - groups_input, groups_input_mixin, groups_1x1, head1x1_params, relu_config); + groups_input, groups_input_mixin, layer1x1_params, head1x1_params, relu_config); assert(params_blended.gating_modes.size() == 2); // Two layers (dilations = {1, 2}) assert(params_blended.gating_modes[0] == nam::wavenet::GatingMode::BLENDED); @@ -202,12 +207,12 @@ class TestConfigurableGating const bool head_bias = false; const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer_array = make_layer_array( input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::vector{1}, activation, - nam::wavenet::GatingMode::GATED, head_bias, groups_input, groups_input_mixin, groups_1x1, head1x1_params, + nam::wavenet::GatingMode::GATED, head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU)); // Verify that layers were created correctly by checking receptive field @@ -266,23 +271,23 @@ class TestConfigurableGating const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::Tanh); const int groups_input = 1; const int groups_input_mixin = 1; - const int groups_1x1 = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); // Create layers with different gating activations auto layer_sigmoid = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, nam::wavenet::GatingMode::GATED, - groups_input, groups_input_mixin, groups_1x1, head1x1_params, + groups_input, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig::simple(nam::activations::ActivationType::Sigmoid)); auto layer_tanh = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, nam::wavenet::GatingMode::GATED, - groups_input, groups_input_mixin, groups_1x1, head1x1_params, + groups_input, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig::simple(nam::activations::ActivationType::Tanh)); auto layer_relu = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, nam::wavenet::GatingMode::GATED, - groups_input, groups_input_mixin, groups_1x1, head1x1_params, + groups_input, groups_input_mixin, layer1x1_params, head1x1_params, nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU)); // Set max buffer size for all layers From 78d57c5a84a2527ddbf392e937ba1bae94aa69fe Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Wed, 28 Jan 2026 16:33:40 -0800 Subject: [PATCH 4/5] Fix bug --- NAM/wavenet.cpp | 1 - example_models/wavenet_a2_max.nam | 84 ++++++++++++++++++++++- tools/test/test_wavenet/test_layer1x1.cpp | 4 +- 3 files changed, 84 insertions(+), 5 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 3d56859..c07bba1 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -202,7 +202,6 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma { this->_head1x1->process_(this->_z.topRows(bottleneck).leftCols(num_frames), num_frames); } - this->_head1x1->process(this->_z.topRows(bottleneck).leftCols(num_frames), num_frames); if (this->_head1x1_post_film) { Eigen::MatrixXf& head1x1_output = this->_head1x1->GetOutput(); diff --git a/example_models/wavenet_a2_max.nam b/example_models/wavenet_a2_max.nam index 42d0a0a..70983a1 100644 --- a/example_models/wavenet_a2_max.nam +++ b/example_models/wavenet_a2_max.nam @@ -1216,7 +1216,55 @@ -0.8012907507877394, 0.3713605309625707, 0.08893172296428986, - 0.9556850589040935 + 0.9556850589040935, + -0.28265231757536413, + -0.20372071451125384, + -0.6203828756778409, + -0.7556805618254725, + 0.6960663769273621, + -0.09056526285896571, + 0.325537476123956, + 0.2834089344664352, + 0.19429191903909016, + -0.9572850905272587, + 0.5735891809092335, + -0.5128622056719527, + -0.7481522293839142, + 0.12915595181592665, + -0.8627796943512882, + 0.5303147517771689, + -0.585685259306683, + -0.5680972961626505, + 0.7393908535390894, + -0.34288089313553916, + -0.7048916401171126, + 0.8010620712635164, + -0.9943288970399673, + 0.7168122527603527, + -0.710624039358984, + -0.7400157371131264, + -0.49869160654375233, + -0.6510057581972131, + 0.3221152851946336, + -0.9484397004276033, + -0.9702793455385823, + 0.5799693284695078, + -0.5241367878190739, + -0.3524570760759551, + -0.6515075971876456, + -0.895201964277647, + 0.483436113908299, + 0.05217105319573423, + 0.4913305500679914, + -0.04750806915349948, + 0.5560340786284055, + 0.026475915218383328, + -0.781891979992309, + 0.007677379571642717, + 0.8908312859402125, + -0.9132699262016566, + 0.5664539919607592, + 0.7339618155196765 ], "sample_rate": 48000 }, @@ -2080,7 +2128,39 @@ 0.4332066519835822, 0.01774403013902348, -0.4531502065734516, - 0.6694478911533888 + 0.6694478911533888, + 0.9604892652067509, + -0.5125381878497233, + 0.10253015376093932, + -0.23282797350570017, + 0.8437362998631237, + 0.016481783185787968, + 0.7586525102929522, + 0.7280538688571763, + -0.44750519435591807, + 0.5800123640062269, + -0.17011515289730972, + 0.8684967873650402, + 0.015475351619312905, + 0.6410989463711567, + -0.4343220334343467, + -0.4028883004564656, + 0.17387544482832218, + 0.9978046664586775, + -0.020719306688598005, + -0.7028091632344218, + 0.07716115540774737, + -0.30975211661398494, + 0.10383483414162198, + 0.08686012591734604, + -0.08931076626699785, + -0.35644529954192516, + -0.6226952525857892, + 0.3949968552411427, + 0.14359528396980226, + -0.5328751078842138, + 0.5510889501985177, + -0.9127054018053962 ], "sample_rate": 48000 } \ No newline at end of file diff --git a/tools/test/test_wavenet/test_layer1x1.cpp b/tools/test/test_wavenet/test_layer1x1.cpp index d18f967..889f210 100644 --- a/tools/test/test_wavenet/test_layer1x1.cpp +++ b/tools/test/test_wavenet/test_layer1x1.cpp @@ -254,8 +254,8 @@ void test_layer1x1_post_film_active() 1.0f, 0.0f, 0.0f, 1.0f, // weights 0.0f, 0.0f, // bias // layer1x1_post_film: (conditionSize, 2*channels) + bias (with shift) - 1.0f, 1.0f, 1.0f, 1.0f, // scale weights - 0.0f, 0.0f, 0.0f, 0.0f, // shift weights + 1.0f, 1.0f, // scale weights + 0.0f, 0.0f, // shift weights 0.0f, 0.0f, 0.0f, 0.0f // bias }; From bbf6a8da99f0af645e0feafa4ce012a8de0f1185 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Wed, 28 Jan 2026 16:54:14 -0800 Subject: [PATCH 5/5] Change .nam key from head_1x1 to head1x1 --- NAM/wavenet.cpp | 4 ++-- example_models/wavenet_a2_max.nam | 6 +++--- generate_weights_a2.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index c07bba1..7d9b5d0 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -774,9 +774,9 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st bool head1x1_active = false; int head1x1_out_channels = channels; int head1x1_groups = 1; - if (layer_config.find("head_1x1") != layer_config.end()) + if (layer_config.find("head1x1") != layer_config.end()) { - const auto& head1x1_config = layer_config["head_1x1"]; + const auto& head1x1_config = layer_config["head1x1"]; head1x1_active = head1x1_config["active"]; head1x1_out_channels = head1x1_config["out_channels"]; head1x1_groups = head1x1_config["groups"]; diff --git a/example_models/wavenet_a2_max.nam b/example_models/wavenet_a2_max.nam index 70983a1..6ee3b0f 100644 --- a/example_models/wavenet_a2_max.nam +++ b/example_models/wavenet_a2_max.nam @@ -57,7 +57,7 @@ "head_bias": false, "groups_input": 3, "groups_input_mixin": 1, - "head_1x1": { + "head1x1": { "active": true, "out_channels": 6, "groups": 3 @@ -147,7 +147,7 @@ "head_bias": false, "groups_input": 1, "groups_input_mixin": 1, - "head_1x1": { + "head1x1": { "active": true, "out_channels": 4, "groups": 2 @@ -1287,7 +1287,7 @@ "head_bias": true, "groups_input": 1, "groups_input_mixin": 4, - "head_1x1": { + "head1x1": { "active": true, "out_channels": 4, "groups": 2 diff --git a/generate_weights_a2.py b/generate_weights_a2.py index 0bb9962..bf091c4 100644 --- a/generate_weights_a2.py +++ b/generate_weights_a2.py @@ -135,7 +135,7 @@ def count_layer_weights(layer_config: Dict[str, Any], condition_size: int, layer ) # 4. Optional head1x1 weights - head1x1_config = layer_config.get("head_1x1") or layer_config.get("head1x1") + head1x1_config = layer_config.get("head1x1") if head1x1_config and head1x1_config.get("active", False): head1x1_out_channels = head1x1_config.get("out_channels", channels) head1x1_groups = head1x1_config.get("groups", 1) @@ -177,7 +177,7 @@ def count_layer_array_weights(layer_config: Dict[str, Any]) -> int: 1. Rechannel Conv1x1: (input_size, channels, bias=False) 2. Layers (one per dilation) 3. Head rechannel Conv1x1: (head_output_size, head_size, bias=head_bias) - where head_output_size = head_1x1.out_channels if head_1x1 active, else bottleneck + where head_output_size = head1x1.out_channels if head1x1 active, else bottleneck """ input_size = layer_config["input_size"] condition_size = layer_config["condition_size"] @@ -187,8 +187,8 @@ def count_layer_array_weights(layer_config: Dict[str, Any]) -> int: dilations = layer_config["dilations"] head_bias = layer_config.get("head_bias", False) - # Determine head output size: head_1x1.out_channels if active, else bottleneck - head1x1_config = layer_config.get("head_1x1") or layer_config.get("head1x1") + # Determine head output size: head1x1.out_channels if active, else bottleneck + head1x1_config = layer_config.get("head1x1") if head1x1_config and head1x1_config.get("active", False): head_output_size = head1x1_config.get("out_channels", channels) else: