diff --git a/pyproject.toml b/pyproject.toml index 1de0410b..afbbe709 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ dependencies = [ [tool.ruff] src = ["src"] -select = ["E", "F", "W", "C9", "N8", "I"] +lint.select = ["E", "F", "W", "C9", "N8", "I"] target-version = "py39" line-length = 120 diff --git a/src/invoke_training/pipelines/_experimental/sd_dpo_lora/config.py b/src/invoke_training/pipelines/_experimental/sd_dpo_lora/config.py index f6ad39a4..988a8b8a 100644 --- a/src/invoke_training/pipelines/_experimental/sd_dpo_lora/config.py +++ b/src/invoke_training/pipelines/_experimental/sd_dpo_lora/config.py @@ -95,11 +95,12 @@ class SdDirectPreferenceOptimizationLoraConfig(BasePipelineConfig): text_encoder_learning_rate: float | None = None """The learning rate to use for the text encoder model. If set, this overrides the optimizer's default learning - rate. + rate. Set to null or 0 to use the optimizer's default learning rate. """ unet_learning_rate: float | None = None """The learning rate to use for the UNet model. If set, this overrides the optimizer's default learning rate. + Set to null or 0 to use the optimizer's default learning rate. """ lr_scheduler: Literal[ @@ -194,7 +195,7 @@ class SdDirectPreferenceOptimizationLoraConfig(BasePipelineConfig): """ max_grad_norm: float | None = None - """Max gradient norm for clipping. Set to None for no clipping. + """Max gradient norm for clipping. Set to null or 0 for no clipping. """ validation_prompts: list[str] = [] diff --git a/src/invoke_training/pipelines/flux/lora/config.py b/src/invoke_training/pipelines/flux/lora/config.py index e631fb71..eaa67405 100644 --- a/src/invoke_training/pipelines/flux/lora/config.py +++ b/src/invoke_training/pipelines/flux/lora/config.py @@ -52,12 +52,12 @@ class FluxLoraConfig(BasePipelineConfig): text_encoder_learning_rate: float | None = 1e-4 """The learning rate to use for the text encoder model. If set, this overrides the optimizer's default learning - rate. + rate. Set to null or 0 to use the optimizer's default learning rate. """ transformer_learning_rate: float | None = 1e-4 """The learning rate to use for the transformer model. If set, this overrides the optimizer's default learning - rate. + rate. Set to null or 0 to use the optimizer's default learning rate. """ lr_scheduler: Literal[ @@ -176,7 +176,7 @@ class FluxLoraConfig(BasePipelineConfig): """ max_grad_norm: float | None = None - """Max gradient norm for clipping. Set to None for no clipping. + """Max gradient norm for clipping. Set to null or 0 for no clipping. """ validation_prompts: list[str] = [] diff --git a/src/invoke_training/pipelines/stable_diffusion/lora/config.py b/src/invoke_training/pipelines/stable_diffusion/lora/config.py index 2a472506..0e9c08ce 100644 --- a/src/invoke_training/pipelines/stable_diffusion/lora/config.py +++ b/src/invoke_training/pipelines/stable_diffusion/lora/config.py @@ -59,11 +59,12 @@ class SdLoraConfig(BasePipelineConfig): text_encoder_learning_rate: float | None = None """The learning rate to use for the text encoder model. If set, this overrides the optimizer's default learning - rate. + rate. Set to null or 0 to use the optimizer's default learning rate. """ unet_learning_rate: float | None = None """The learning rate to use for the UNet model. If set, this overrides the optimizer's default learning rate. + Set to null or 0 to use the optimizer's default learning rate. """ lr_scheduler: Literal[ @@ -188,7 +189,7 @@ class SdLoraConfig(BasePipelineConfig): """ max_grad_norm: float | None = None - """Max gradient norm for clipping. Set to None for no clipping. + """Max gradient norm for clipping. Set to null or 0 for no clipping. """ validation_prompts: list[str] = [] diff --git a/src/invoke_training/pipelines/stable_diffusion/textual_inversion/config.py b/src/invoke_training/pipelines/stable_diffusion/textual_inversion/config.py index 4b763b84..69295d5a 100644 --- a/src/invoke_training/pipelines/stable_diffusion/textual_inversion/config.py +++ b/src/invoke_training/pipelines/stable_diffusion/textual_inversion/config.py @@ -160,7 +160,7 @@ class SdTextualInversionConfig(BasePipelineConfig): """ max_grad_norm: float | None = None - """Maximum gradient norm for gradient clipping. Set to `None` for no clipping. + """Maximum gradient norm for gradient clipping. Set to `null` or 0 for no clipping. """ validation_prompts: list[str] = [] diff --git a/src/invoke_training/pipelines/stable_diffusion_xl/finetune/config.py b/src/invoke_training/pipelines/stable_diffusion_xl/finetune/config.py index 08dbfce8..6230ca82 100644 --- a/src/invoke_training/pipelines/stable_diffusion_xl/finetune/config.py +++ b/src/invoke_training/pipelines/stable_diffusion_xl/finetune/config.py @@ -125,7 +125,7 @@ class SdxlFinetuneConfig(BasePipelineConfig): """ max_grad_norm: float | None = None - """Max gradient norm for clipping. Set to None for no clipping. + """Max gradient norm for clipping. Set to null or 0 for no clipping. """ validation_prompts: list[str] = [] diff --git a/src/invoke_training/pipelines/stable_diffusion_xl/lora/config.py b/src/invoke_training/pipelines/stable_diffusion_xl/lora/config.py index 125db57a..390e8f90 100644 --- a/src/invoke_training/pipelines/stable_diffusion_xl/lora/config.py +++ b/src/invoke_training/pipelines/stable_diffusion_xl/lora/config.py @@ -59,11 +59,12 @@ class SdxlLoraConfig(BasePipelineConfig): text_encoder_learning_rate: float | None = None """The learning rate to use for the text encoder model. If set, this overrides the optimizer's default learning - rate. + rate. Set to null or 0 to use the optimizer's default learning rate. """ unet_learning_rate: float | None = None """The learning rate to use for the UNet model. If set, this overrides the optimizer's default learning rate. + Set to null or 0 to use the optimizer's default learning rate. """ lr_scheduler: Literal[ @@ -188,7 +189,7 @@ class SdxlLoraConfig(BasePipelineConfig): """ max_grad_norm: float | None = None - """Max gradient norm for clipping. Set to None for no clipping. + """Max gradient norm for clipping. Set to null or 0 for no clipping. """ validation_prompts: list[str] = [] diff --git a/src/invoke_training/pipelines/stable_diffusion_xl/lora_and_textual_inversion/config.py b/src/invoke_training/pipelines/stable_diffusion_xl/lora_and_textual_inversion/config.py index d348e493..a2cc6876 100644 --- a/src/invoke_training/pipelines/stable_diffusion_xl/lora_and_textual_inversion/config.py +++ b/src/invoke_training/pipelines/stable_diffusion_xl/lora_and_textual_inversion/config.py @@ -84,16 +84,18 @@ class SdxlLoraAndTextualInversionConfig(BasePipelineConfig): optimizer: AdamOptimizerConfig | ProdigyOptimizerConfig = AdamOptimizerConfig() - text_encoder_learning_rate: float = 1e-5 - """The learning rate to use for the text encoder model. + text_encoder_learning_rate: float | None = 1e-5 + """The learning rate to use for the text encoder model. Set to null or 0 to use the optimizer's default learning + rate. """ - unet_learning_rate: float = 1e-4 - """The learning rate to use for the UNet model. + unet_learning_rate: float | None = 1e-4 + """The learning rate to use for the UNet model. Set to null or 0 to use the optimizer's default learning rate. """ - textual_inversion_learning_rate: float = 1e-3 - """The learning rate to use for textual inversion training of the embeddings. + textual_inversion_learning_rate: float | None = 1e-3 + """The learning rate to use for textual inversion training of the embeddings. Set to null or 0 to use the + optimizer's default learning rate. """ lr_scheduler: Literal[ @@ -187,7 +189,7 @@ class SdxlLoraAndTextualInversionConfig(BasePipelineConfig): """ max_grad_norm: float | None = None - """Max gradient norm for clipping. Set to None for no clipping. + """Max gradient norm for clipping. Set to null or 0 for no clipping. """ validation_prompts: list[str] = [] diff --git a/src/invoke_training/ui/config_groups/flux_lora_config_group.py b/src/invoke_training/ui/config_groups/flux_lora_config_group.py index 69b6c93f..b9b49819 100644 --- a/src/invoke_training/ui/config_groups/flux_lora_config_group.py +++ b/src/invoke_training/ui/config_groups/flux_lora_config_group.py @@ -67,7 +67,7 @@ def __init__(self): with gr.Row(): self.transformer_learning_rate = gr.Number( label="Transformer Learning Rate", - info="The transformer learning rate. If None, then it is inherited from the base optimizer " + info="The transformer learning rate. Set to 0 or leave empty to inherit from the base optimizer " "learning rate.", interactive=True, ) @@ -103,7 +103,7 @@ def __init__(self): ) self.max_grad_norm = gr.Number( label="Max Gradient Norm", - info="Max gradient norm for clipping. Set to None for no clipping.", + info="Max gradient norm for clipping. Set to 0 or leave empty for no clipping (null).", interactive=True, ) self.train_batch_size = gr.Number( @@ -313,13 +313,9 @@ def safe_pop(component, default=None): # Set basic properties new_config.model = safe_pop(self.model, new_config.model) new_config.train_transformer = safe_pop(self.train_transformer, new_config.train_transformer) - new_config.train_text_encoder = safe_pop(self.train_text_encoder, new_config.train_text_encoder) - new_config.transformer_learning_rate = safe_pop( - self.transformer_learning_rate, new_config.transformer_learning_rate - ) - new_config.text_encoder_learning_rate = safe_pop( - self.text_encoder_learning_rate, new_config.text_encoder_learning_rate - ) + # Note: train_text_encoder and text_encoder_learning_rate are not supported for Flux LoRA + transformer_lr_value = safe_pop(self.transformer_learning_rate, new_config.transformer_learning_rate) + new_config.transformer_learning_rate = None if transformer_lr_value == 0 else transformer_lr_value new_config.gradient_accumulation_steps = safe_pop( self.gradient_accumulation_steps, new_config.gradient_accumulation_steps ) @@ -330,7 +326,8 @@ def safe_pop(component, default=None): new_config.lora_rank_dim = safe_pop(self.lora_rank_dim, new_config.lora_rank_dim) new_config.min_snr_gamma = safe_pop(self.min_snr_gamma, new_config.min_snr_gamma) - new_config.max_grad_norm = safe_pop(self.max_grad_norm, new_config.max_grad_norm) + max_grad_norm_value = safe_pop(self.max_grad_norm, new_config.max_grad_norm) + new_config.max_grad_norm = None if max_grad_norm_value == 0 else max_grad_norm_value new_config.train_batch_size = safe_pop(self.train_batch_size, new_config.train_batch_size) new_config.weight_dtype = safe_pop(self.weight_dtype, new_config.weight_dtype) new_config.mixed_precision = safe_pop(self.mixed_precision, new_config.mixed_precision) diff --git a/src/invoke_training/ui/config_groups/sd_lora_config_group.py b/src/invoke_training/ui/config_groups/sd_lora_config_group.py index e77a4048..e3ad87cd 100644 --- a/src/invoke_training/ui/config_groups/sd_lora_config_group.py +++ b/src/invoke_training/ui/config_groups/sd_lora_config_group.py @@ -109,12 +109,13 @@ def __init__(self): with gr.Row(): self.unet_learning_rate = gr.Number( label="UNet Learning Rate", - info="The UNet learning rate. If None, then it is inherited from the base optimizer learning rate.", + info="The UNet learning rate. Set to 0 or leave empty to inherit from the base optimizer " + "learning rate.", interactive=True, ) self.text_encoder_learning_rate = gr.Number( label="Text Encoder Learning Rate", - info="The text encoder learning rate. If None, then it is inherited from the base optimizer " + info="The text encoder learning rate. Set to 0 or leave empty to inherit from the base optimizer " "learning rate.", interactive=True, ) @@ -153,7 +154,7 @@ def __init__(self): ) self.max_grad_norm = gr.Number( label="Max Gradient Norm", - info="Max gradient norm for clipping. Set to None for no clipping.", + info="Max gradient norm for clipping. Set to 0 or leave empty for no clipping (null).", interactive=True, ) self.train_batch_size = gr.Number( @@ -223,13 +224,16 @@ def update_config_with_ui_component_data( new_config.hf_variant = ui_data.pop(self.hf_variant) or None new_config.max_checkpoints = ui_data.pop(self.max_checkpoints) new_config.train_unet = ui_data.pop(self.train_unet) - new_config.unet_learning_rate = ui_data.pop(self.unet_learning_rate) + unet_lr_value = ui_data.pop(self.unet_learning_rate) + new_config.unet_learning_rate = None if unet_lr_value == 0 else unet_lr_value new_config.train_text_encoder = ui_data.pop(self.train_text_encoder) - new_config.text_encoder_learning_rate = ui_data.pop(self.text_encoder_learning_rate) + text_encoder_lr_value = ui_data.pop(self.text_encoder_learning_rate) + new_config.text_encoder_learning_rate = None if text_encoder_lr_value == 0 else text_encoder_lr_value new_config.lr_scheduler = ui_data.pop(self.lr_scheduler) new_config.lr_warmup_steps = ui_data.pop(self.lr_warmup_steps) new_config.use_masks = ui_data.pop(self.use_masks) - new_config.max_grad_norm = ui_data.pop(self.max_grad_norm) + max_grad_norm_value = ui_data.pop(self.max_grad_norm) + new_config.max_grad_norm = None if max_grad_norm_value == 0 else max_grad_norm_value new_config.train_batch_size = ui_data.pop(self.train_batch_size) new_config.cache_text_encoder_outputs = ui_data.pop(self.cache_text_encoder_outputs) new_config.cache_vae_outputs = ui_data.pop(self.cache_vae_outputs) diff --git a/src/invoke_training/ui/config_groups/sd_textual_inversion_config_group.py b/src/invoke_training/ui/config_groups/sd_textual_inversion_config_group.py index 55fa9dd8..484159d6 100644 --- a/src/invoke_training/ui/config_groups/sd_textual_inversion_config_group.py +++ b/src/invoke_training/ui/config_groups/sd_textual_inversion_config_group.py @@ -154,7 +154,7 @@ def __init__(self): ) self.max_grad_norm = gr.Number( label="Max Gradient Norm", - info="Max gradient norm for clipping. Set to None for no clipping.", + info="Max gradient norm for clipping. Set to 0 or leave empty for no clipping (null).", interactive=True, ) self.train_batch_size = gr.Number( @@ -230,7 +230,8 @@ def update_config_with_ui_component_data( new_config.lr_scheduler = ui_data.pop(self.lr_scheduler) new_config.lr_warmup_steps = ui_data.pop(self.lr_warmup_steps) new_config.use_masks = ui_data.pop(self.use_masks) - new_config.max_grad_norm = ui_data.pop(self.max_grad_norm) + max_grad_norm_value = ui_data.pop(self.max_grad_norm) + new_config.max_grad_norm = None if max_grad_norm_value == 0 else max_grad_norm_value new_config.train_batch_size = ui_data.pop(self.train_batch_size) new_config.cache_vae_outputs = ui_data.pop(self.cache_vae_outputs) new_config.enable_cpu_offload_during_validation = ui_data.pop(self.enable_cpu_offload_during_validation) diff --git a/src/invoke_training/ui/config_groups/sdxl_finetune_config_group.py b/src/invoke_training/ui/config_groups/sdxl_finetune_config_group.py index 363c318e..572151c2 100644 --- a/src/invoke_training/ui/config_groups/sdxl_finetune_config_group.py +++ b/src/invoke_training/ui/config_groups/sdxl_finetune_config_group.py @@ -151,7 +151,7 @@ def __init__(self): ) self.max_grad_norm = gr.Number( label="Max Gradient Norm", - info="Max gradient norm for clipping. Set to None for no clipping.", + info="Max gradient norm for clipping. Set to 0 or leave empty for no clipping (null).", interactive=True, ) self.train_batch_size = gr.Number( @@ -227,7 +227,8 @@ def update_config_with_ui_component_data( new_config.lr_warmup_steps = ui_data.pop(self.lr_warmup_steps) new_config.use_masks = ui_data.pop(self.use_masks) new_config.min_snr_gamma = ui_data.pop(self.min_snr_gamma) - new_config.max_grad_norm = ui_data.pop(self.max_grad_norm) + max_grad_norm_value = ui_data.pop(self.max_grad_norm) + new_config.max_grad_norm = None if max_grad_norm_value == 0 else max_grad_norm_value new_config.train_batch_size = ui_data.pop(self.train_batch_size) new_config.cache_text_encoder_outputs = ui_data.pop(self.cache_text_encoder_outputs) new_config.cache_vae_outputs = ui_data.pop(self.cache_vae_outputs) diff --git a/src/invoke_training/ui/config_groups/sdxl_lora_and_textual_inversion_config_group.py b/src/invoke_training/ui/config_groups/sdxl_lora_and_textual_inversion_config_group.py index 657c8ebb..2bb041c0 100644 --- a/src/invoke_training/ui/config_groups/sdxl_lora_and_textual_inversion_config_group.py +++ b/src/invoke_training/ui/config_groups/sdxl_lora_and_textual_inversion_config_group.py @@ -145,10 +145,23 @@ def __init__(self): self.train_text_encoder = gr.Checkbox(label="Train Text Encoder", interactive=True) self.train_ti = gr.Checkbox(label="Train Textual Inversion Token", scale=2, interactive=True) with gr.Row(): - self.unet_learning_rate = gr.Number(label="UNet Learning Rate", interactive=True) - self.text_encoder_learning_rate = gr.Number(label="Text Encoder Learning Rate", interactive=True) + self.unet_learning_rate = gr.Number( + label="UNet Learning Rate", + info="The UNet learning rate. Set to 0 or leave empty to inherit from the base optimizer " + "learning rate.", + interactive=True, + ) + self.text_encoder_learning_rate = gr.Number( + label="Text Encoder Learning Rate", + info="The text encoder learning rate. Set to 0 or leave empty to inherit from the base optimizer " + "learning rate.", + interactive=True, + ) self.textual_inversion_learning_rate = gr.Number( - label="Textual Inversion Learning Rate", interactive=True + label="Textual Inversion Learning Rate", + info="The textual inversion learning rate. Set to 0 or leave empty to inherit from the base " + "optimizer learning rate.", + interactive=True, ) self.ti_train_steps_ratio = gr.Number(label="Textual Inversion Train Steps Ratio", interactive=True) with gr.Row(): @@ -186,7 +199,7 @@ def __init__(self): ) self.max_grad_norm = gr.Number( label="Max Gradient Norm", - info="Max gradient norm for clipping. Set to None for no clipping.", + info="Max gradient norm for clipping. Set to 0 or leave empty for no clipping (null).", interactive=True, ) self.train_batch_size = gr.Number( @@ -273,14 +286,18 @@ def update_config_with_ui_component_data( new_config.train_unet = ui_data.pop(self.train_unet) new_config.train_text_encoder = ui_data.pop(self.train_text_encoder) new_config.train_ti = ui_data.pop(self.train_ti) - new_config.unet_learning_rate = ui_data.pop(self.unet_learning_rate) - new_config.text_encoder_learning_rate = ui_data.pop(self.text_encoder_learning_rate) - new_config.textual_inversion_learning_rate = ui_data.pop(self.textual_inversion_learning_rate) + unet_lr_value = ui_data.pop(self.unet_learning_rate) + new_config.unet_learning_rate = None if unet_lr_value == 0 else unet_lr_value + text_encoder_lr_value = ui_data.pop(self.text_encoder_learning_rate) + new_config.text_encoder_learning_rate = None if text_encoder_lr_value == 0 else text_encoder_lr_value + ti_lr_value = ui_data.pop(self.textual_inversion_learning_rate) + new_config.textual_inversion_learning_rate = None if ti_lr_value == 0 else ti_lr_value new_config.ti_train_steps_ratio = ui_data.pop(self.ti_train_steps_ratio) new_config.lr_scheduler = ui_data.pop(self.lr_scheduler) new_config.lr_warmup_steps = ui_data.pop(self.lr_warmup_steps) new_config.use_masks = ui_data.pop(self.use_masks) - new_config.max_grad_norm = ui_data.pop(self.max_grad_norm) + max_grad_norm_value = ui_data.pop(self.max_grad_norm) + new_config.max_grad_norm = None if max_grad_norm_value == 0 else max_grad_norm_value new_config.train_batch_size = ui_data.pop(self.train_batch_size) new_config.cache_text_encoder_outputs = ui_data.pop(self.cache_text_encoder_outputs) new_config.cache_vae_outputs = ui_data.pop(self.cache_vae_outputs) diff --git a/src/invoke_training/ui/config_groups/sdxl_lora_config_group.py b/src/invoke_training/ui/config_groups/sdxl_lora_config_group.py index 70ad8bf2..be111193 100644 --- a/src/invoke_training/ui/config_groups/sdxl_lora_config_group.py +++ b/src/invoke_training/ui/config_groups/sdxl_lora_config_group.py @@ -115,12 +115,13 @@ def __init__(self): with gr.Row(): self.unet_learning_rate = gr.Number( label="UNet Learning Rate", - info="The UNet learning rate. If None, then it is inherited from the base optimizer learning rate.", + info="The UNet learning rate. Set to 0 or leave empty to inherit from the base optimizer " + "learning rate.", interactive=True, ) self.text_encoder_learning_rate = gr.Number( label="Text Encoder Learning Rate", - info="The text encoder learning rate. If None, then it is inherited from the base optimizer " + info="The text encoder learning rate. Set to 0 or leave empty to inherit from the base optimizer " "learning rate.", interactive=True, ) @@ -159,7 +160,7 @@ def __init__(self): ) self.max_grad_norm = gr.Number( label="Max Gradient Norm", - info="Max gradient norm for clipping. Set to None for no clipping.", + info="Max gradient norm for clipping. Set to 0 or leave empty for no clipping (null).", interactive=True, ) self.train_batch_size = gr.Number( @@ -233,13 +234,16 @@ def update_config_with_ui_component_data( new_config.vae_model = ui_data.pop(self.vae_model) or None new_config.max_checkpoints = ui_data.pop(self.max_checkpoints) new_config.train_unet = ui_data.pop(self.train_unet) - new_config.unet_learning_rate = ui_data.pop(self.unet_learning_rate) + unet_lr_value = ui_data.pop(self.unet_learning_rate) + new_config.unet_learning_rate = None if unet_lr_value == 0 else unet_lr_value new_config.train_text_encoder = ui_data.pop(self.train_text_encoder) - new_config.text_encoder_learning_rate = ui_data.pop(self.text_encoder_learning_rate) + text_encoder_lr_value = ui_data.pop(self.text_encoder_learning_rate) + new_config.text_encoder_learning_rate = None if text_encoder_lr_value == 0 else text_encoder_lr_value new_config.lr_scheduler = ui_data.pop(self.lr_scheduler) new_config.lr_warmup_steps = ui_data.pop(self.lr_warmup_steps) new_config.use_masks = ui_data.pop(self.use_masks) - new_config.max_grad_norm = ui_data.pop(self.max_grad_norm) + max_grad_norm_value = ui_data.pop(self.max_grad_norm) + new_config.max_grad_norm = None if max_grad_norm_value == 0 else max_grad_norm_value new_config.train_batch_size = ui_data.pop(self.train_batch_size) new_config.cache_text_encoder_outputs = ui_data.pop(self.cache_text_encoder_outputs) new_config.cache_vae_outputs = ui_data.pop(self.cache_vae_outputs) diff --git a/src/invoke_training/ui/config_groups/sdxl_textual_inversion_config_group.py b/src/invoke_training/ui/config_groups/sdxl_textual_inversion_config_group.py index 14610610..dcc477b3 100644 --- a/src/invoke_training/ui/config_groups/sdxl_textual_inversion_config_group.py +++ b/src/invoke_training/ui/config_groups/sdxl_textual_inversion_config_group.py @@ -160,7 +160,7 @@ def __init__(self): ) self.max_grad_norm = gr.Number( label="Max Gradient Norm", - info="Max gradient norm for clipping. Set to None for no clipping.", + info="Max gradient norm for clipping. Set to 0 or leave empty for no clipping (null).", interactive=True, ) self.train_batch_size = gr.Number( @@ -238,7 +238,8 @@ def update_config_with_ui_component_data( new_config.lr_scheduler = ui_data.pop(self.lr_scheduler) new_config.lr_warmup_steps = ui_data.pop(self.lr_warmup_steps) new_config.use_masks = ui_data.pop(self.use_masks) - new_config.max_grad_norm = ui_data.pop(self.max_grad_norm) + max_grad_norm_value = ui_data.pop(self.max_grad_norm) + new_config.max_grad_norm = None if max_grad_norm_value == 0 else max_grad_norm_value new_config.train_batch_size = ui_data.pop(self.train_batch_size) new_config.cache_vae_outputs = ui_data.pop(self.cache_vae_outputs) new_config.enable_cpu_offload_during_validation = ui_data.pop(self.enable_cpu_offload_during_validation)