From 63696c71d37f8ed50404e82ceaf407ac12e01890 Mon Sep 17 00:00:00 2001 From: wangwl Date: Mon, 22 Dec 2025 02:02:58 +0000 Subject: [PATCH 1/3] add SigLIP --- .../Classification/SigLIP/SigLIP/README.md | 19 + .../SigLIP/SigLIP/modeling_gemma.py | 170 +++++ .../SigLIP/SigLIP/modeling_siglip.py | 192 +++++ .../SigLIP/SigLIP/processing_paligemma.py | 123 ++++ .../Classification/SigLIP/coverage.txt | 3 + PyTorch/build-in/Classification/SigLIP/run | 1 + .../build-in/Classification/SigLIP/siglip.py | 334 +++++++++ .../Classification/SigLIP/siglip_loss.jpg | Bin 0 -> 36621 bytes .../Classification/SigLIP/siglip_loss.txt | 29 + .../Classification/SigLIP/weloTrainStep.py | 692 ++++++++++++++++++ 10 files changed, 1563 insertions(+) create mode 100644 PyTorch/build-in/Classification/SigLIP/SigLIP/README.md create mode 100644 PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_gemma.py create mode 100644 PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_siglip.py create mode 100644 PyTorch/build-in/Classification/SigLIP/SigLIP/processing_paligemma.py create mode 100644 PyTorch/build-in/Classification/SigLIP/coverage.txt create mode 100644 PyTorch/build-in/Classification/SigLIP/run create mode 100644 PyTorch/build-in/Classification/SigLIP/siglip.py create mode 100644 PyTorch/build-in/Classification/SigLIP/siglip_loss.jpg create mode 100644 PyTorch/build-in/Classification/SigLIP/siglip_loss.txt create mode 100644 PyTorch/build-in/Classification/SigLIP/weloTrainStep.py diff --git a/PyTorch/build-in/Classification/SigLIP/SigLIP/README.md b/PyTorch/build-in/Classification/SigLIP/SigLIP/README.md new file mode 100644 index 000000000..1bd26c877 --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/SigLIP/README.md @@ -0,0 +1,19 @@ +# PaliGemma + +It combines SigLip Visual Encoder, with Gemma Language Model to create a Vision language model. + + +SigLip Visual Encoder +https://huggingface.co/docs/transformers/en/model_doc/siglip + +Sigmoid Loss for Language Image Pre-Training +https://arxiv.org/abs/2303.15343 + +Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer + + + + +Took around 20 hours to understand the model and follow the tutorial, but it was worth it, can implement Multimodal Language Models from Research paper now. + +Credits to Umar Jamil -> https://www.youtube.com/watch?v=vAmKB7iPkWw \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_gemma.py b/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_gemma.py new file mode 100644 index 000000000..781cdd4b3 --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_gemma.py @@ -0,0 +1,170 @@ +import torch +from torch import nn +from typing import Optional, Tuple, List +from torch.nn import CrossEntropyLoss +import math +from modeling_siglip import SiglipVisionConfig, SiglipVisionModel + + +class GemmaConfig(): + def __init__( + self, + vocab_size, + hidden_size, + intermediate_size, + num_hidden_layers, + num_attention_heads, + num_key_value_heads, + head_dim=256, + max_position_embeddings=8192, + rms_norm_eps=1e-6, + rope_theta=10000.0, + attention_bias=False, + attention_dropout=0.0, + pad_token_id=None, + **kwargs, + ): + super().__init__() + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.head_dim = head_dim + self.num_key_value_heads = num_key_value_heads + self.rms_norm_eps = rms_norm_eps + self.rope_theta = rope_theta + self.attention_bias = attention_bias + self.attention_dropout = attention_dropout + self.pad_token_id = pad_token_id + + +class PaliGemmaConfig(): + def __init__( + self, + vision_config=None, + text_config=None, + ignore_index=-100, + image_token_index=256000, + vocab_size=257152, + projection_dim=2048, + hidden_size=2048, + pad_token_id=None, + **kwargs, + ): + super().__init__() + self.ignore_index = ignore_index + self.image_token_index = image_token_index + self.vocab_size = vocab_size + self.projection_dim = projection_dim + self.hidden_size = hidden_size + self.vision_config = vision_config + self.is_encoder_decoder = False + self.pad_token_id = pad_token_id + + self.vision_config = SiglipVisionConfig(**vision_config) + self.text_config = text_config + + self.text_config = GemmaConfig(**text_config, pad_token_id=pad_token_id) + self.vocab_size = self.text_config.vocab_size + + self.text_config.num_image_tokens = (self.vision_config.image_size // self.vision_config.patch_size)**2 + self.vision_config.projection_dim = projection_dim + + +class PaliGemmaForConditionalGeneration(nn.Module): + def __init__(self, config: PaliGemmaConfig): + super().__init__() + self.config = config + self.vision_tower = SiglipVisionModel(config.vision_config) + self.multi_modal_projector = PaliGemmaMultiModalProjector(config) + self.vocab_size = config.vocab_size + + language_model = GemmaForCausalLM(config.text_config) + self.language_model = language_model + + self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1 + + def tie_weights(self): + return self.language_model.tie_weights() + + def _merge_input_ids_with_image_features( + self, image_features: torch.Tensor, inputs_embeds: torch.Tensor, input_ids: torch.Tensor, attention_mask: torch.Tensor, kv_cache: Optional[KVCache] = None + ): + + _, _, embed_dim = image_features.shape + batch_size, sequence_length = input_ids.shape + dtype, device = inputs_embeds.dtype, inputs_embeds.device + scaled_image_features = image_features / (self.config.hidden_size**0.5) + + final_embedding = torch.zeros(batch_size, sequence_length, embed_dim, dtype=inputs_embeds.dtype, device=inputs_embeds.device) + + text_mask = (input_ids != self.config.image_token_index) & (input_ids != self.pad_token_id) + image_mask = input_ids == self.config.image_token_index + pad_mask = input_ids == self.pad_token_id + + text_mask_expanded = text_mask.unsqueeze(-1).expand(-1, -1, embed_dim) + pad_mask_expanded = pad_mask.unsqueeze(-1).expand(-1, -1, embed_dim) + image_mask_expanded = image_mask.unsqueeze(-1).expand(-1, -1, embed_dim) + + final_embedding = torch.where(text_mask_expanded, inputs_embeds, final_embedding) + final_embedding = final_embedding.masked_scatter(image_mask_expanded, scaled_image_features) + final_embedding = torch.where(pad_mask_expanded, torch.zeros_like(final_embedding), final_embedding) + + + + dtype, device = inputs_embeds.dtype, inputs_embeds.device + min_dtype = torch.finfo(dtype).min + q_len = inputs_embeds.shape[1] + + if kv_cache is None or kv_cache.num_items() == 0: + + causal_mask = torch.full( + (batch_size, q_len, q_len), fill_value=0, dtype=dtype, device=device + ) + else: + assert q_len == 1 + kv_len = kv_cache.num_items() + q_len + + causal_mask = torch.full( + (batch_size, q_len, kv_len), fill_value=0, dtype=dtype, device=device + ) + + causal_mask = causal_mask.unsqueeze(1) + + + if kv_cache is not None and kv_cache.num_items() > 0: + position_ids = attention_mask.cumsum(-1)[:, -1] + if position_ids.dim() == 1: + position_ids = position_ids.unsqueeze(0) + else: + position_ids = (attention_mask.cumsum(-1)).masked_fill_((attention_mask == 0), 1).to(device) + + return final_embedding, causal_mask, position_ids + + + def forward( + self, + input_ids: torch.LongTensor = None, + pixel_values: torch.FloatTensor = None, + attention_mask: Optional[torch.Tensor] = None, + kv_cache: Optional[KVCache] = None, + ) -> Tuple: + assert torch.all(attention_mask == 1), "The input cannot be padded" + + inputs_embeds = self.language_model.get_input_embeddings()(input_ids) + + selected_image_feature = self.vision_tower(pixel_values.to(inputs_embeds.dtype)) + image_features = self.multi_modal_projector(selected_image_feature) + input_embeds, attention_mask, position_ids = self._merge_input_ids_with_image_features(image_features, input_embeds, input_ids, attention_mask, kv_cache) + + outputs = self.language_model( + attention_mask=attention_mask, + position_ids=position_ids, + input_embeds=input_embeds, + kv_cache=kv_cache, + ) + + return outputs + \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_siglip.py b/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_siglip.py new file mode 100644 index 000000000..71f3c2ce5 --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_siglip.py @@ -0,0 +1,192 @@ +import torch +import torch.nn as nn +from typing import Tuple, Optional +class SiglipVisionConfig: + + def __init__( + self, + hidden_size=768, + intermediate_size=3072, + num_hidden_layers=12, + num_hidden_attention_heads=12, + num_channels=3, + image_size=224, + patch_size=16, + layer_norm_eps=1e-6, + attention_dropout=0.0, + num_image_tokens: int = None, + **kwargs + ): + super().__init__() + + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_hidden_attention_heads = num_hidden_attention_heads + self.num_channels = num_channels + self.image_size = image_size + self.patch_size = patch_size + self.layer_norm_eps = layer_norm_eps + self.attention_dropout = attention_dropout + self.num_image_tokens = num_image_tokens + +class SiglipVisionEmbeddings(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super.__init__() + self.config = config + self.embed_dim = config.hidden_size + self.image_size = config.image_size + self.patch_size = config.patch_size + + self.patch_embeddings = nn.Convo2d( + in_channels=config.num_channels, + out_channels=self.embed_dim, + kernel_size=self.patch_size, + stride=self.patch_size, + padding="valid", + ) + + self.num_patches = (self.image_size // self.patch_size)**2 + self.num_positions = self.num_patches + self.position_embeddings = nn.Embedding(self.num_positions, self.embed_dim) + self.register_buffer( + "position_ids", + torch.arange(self.num_positions).expand((1, -1)), + persistent=False, + ) + + def forward(self, pixel_values: torch.FloatTesor) -> torch.Tensor: + _, _, height, width = pixel_values.shape + patch_embeds = self.patch_embedding(pixel_values) + embeddings = patch_embeds.flatten(2) + embeddings = embeddings.transpose(1, 2) + embeddings = embeddings + self.position_embedding(self.position_ids) + return embeddings + +class SiglipAttention(nn.Module): + + def __init__(self, config): + super().__init__() + self.config = config + self.embed_dim = config.hidden_size + self.num_heads = config.num_attention_heads + self.head_dim = self.embed_dim // self.num_heads + self.scale = self.head_dim**-0.5 + self.dropout = config.attention_dropout + + self.k_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.v_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.q_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.out_proj = nn.Linear(self.embed_dim, self.embed_dim) + + def forward( + self, + hidden_states:torch.Tensor, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + batch_size, seq_len, _ = hidden_states.size() + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1,2) + key_states = key_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1,2) + value_states = value_states.query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1,2) + attn_weights = (torch.matmul(query_states, key_states.transpose(2,3))*self.scale) + + if attn_weights.size() != (batch_size, self.num_heads, seq_len, seq_len): + raise ValueError( + f"Attention weights should be of size {(batch_size, self.num_heads, seq_len, seq_len)} but is" + f"{attn_weights.size()}" + ) + + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) + attn_weights = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) + attn_output = torch.matmul(attn_weights, value_states) + + if attn_output.size() != (batch_size, self.num_heads, seq_len, seq_len): + raise ValueError( + f"attn output' should be of size {(batch_size, self.num_heads, seq_len, seq_len)}, but is" + f" {attn_output.size()}" + ) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(batch_size, seq_len, self.embed_dim) + attn_output = self.out_proj(attn_output) + return attn_output, attn_weights + + +class SiglipMLP(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size) + self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.fc1(hidden_states) + hidden_states = nn.functional.gelu(hidden_states, approximate="tanh") + hidden_states = self.fc2(hidden_states) + return hidden_states + +class SiglipEncoderLayer(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() + self.embed_dim = config.hidden_size + self.self_attn = SiglipAttention(config) + self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + self.mlp = SiglipMLP(config) + self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + + def forward( + self, + hidden_states: torch.Tensor + ) -> torch.Tensor: + residual = hidden_states + hidden_states = self.layer_norm1(hidden_states) + hidden_states, _ = self.self_attn(hidden_states=hidden_states) + hidden_states = residual + hidden_states + residual = hidden_states + hidden_states = self.layer_norm2(hidden_states) + hidden_states = self.mlp(hidden_states) + +class SiglipEncoder(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() + self.config = config + self.layers = nn.ModuleList( + [SiglipEncoderLayer(config) for _ in range(config.num_hidden_layers)] + ) + def forward( + self, + inputs_embeds: torch.Tensor + ) -> torch.Tensor: + hidden_states = inputs_embeds + for encoder_layer in self.layers: + hidden_states = encoder_layer(hidden_states) + + return hidden_states + +class SiglipVisionTransformer(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() + self.config = config + embed_dim = config.hidden_size + + self.embeddings = SiglipVisionEmbeddings(config) + self.encoder = SiglipEncoder(config) + self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) + + def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: + hidden_states = self.embeddings(pixel_values) + last_hidden_state = self.encoder(input_embeds=hidden_states) + last_hidden_state = self.post_layernorm(last_hidden_state) + return last_hidden_state + +class SiglipVisionModel(nn.Module): + + def __init__(self, config:SiglipVisionConfig): + super().__init__() + self.config = config + self.vision_model = SiglipVisionTransformer(config) + + def forward(self, pixel_values) -> tuple: + return self.vision_model(pixel_values=pixel_values) \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SigLIP/SigLIP/processing_paligemma.py b/PyTorch/build-in/Classification/SigLIP/SigLIP/processing_paligemma.py new file mode 100644 index 000000000..39a98f6e9 --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/SigLIP/processing_paligemma.py @@ -0,0 +1,123 @@ +from typing import Dict, List, Optional, Union, Tuple, Iterable +import numpy as np +from PIL import Image +import torch + +IMAGENET_STANDARD_MEAN = [0.5, 0.5, 0.5] +IMAGENET_STANDARD_STD = [0.5, 0.5, 0.5] + +def add_image_tokens_to_prompt(prefix_prompt, bos_token, image_seq_len, image_token): + return f"{image_token*image_seq_len}{bos_token}{prefix_prompt}\n" + +def resize( + image: Image, + size: Tuple[int, int], + resample: Image.Resampling = None, + reducing_gap: Optional[int] = None, +) -> np.ndarray: + height, width = size + resized_image = image.resize( + (width, height), resample=resample, reducing_gap=reducing_gap + ) + return resized_image + +def rescale( + image: np.ndarray, scale: float, dtype: np.dtype = np.float32 +) -> np.ndarray: + rescaled_image = image*scale + rescaled_image = rescaled_image.astype(dtype) + return rescaled_image + +def normalize( + image: np.ndarray, + mean: Union[float, Iterable[float]], + std: Union[float, Iterable[float]], +) -> np.ndarray: + mean = np.array(mean, dtype=image.dtype) + std = np.array(std, dtype=image.dtype) + image = (image-mean)/std + return image + +def process_images( + images: List[Image.Image], + size: Dict[str, int] = None, + resample: Image.Resampling = None, + rescale_factor: float = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, +) -> List[np.ndarray]: + height, width = size[0], size[1] + images = [ + resize(image=image, size=(height, width), resample = resample) for image in images + ] + images = [np.array(image) for image in images] + images = [rescale(image, scale=rescale_factor) for image in images] + images = [normalize(image, mean=image_mean, std=image_std) for image in images] + images = [image.transpose(2, 0, 1) for image in images] + return images + + +class PaliGemmaProcessor: + + IMAGE_TOKEN = "" + + def __init__(self, tokenizer, num_image_tokens: int, image_size: int): + super().__init__() + + self.image_seq_length = num_image_tokens + self.image_size = image_size + + tokens_to_add = {"additional_special_tokens": {self.IMAGE_TOKEN}} + tokenizer.add_special_tokens(tokens_to_add) + EXTRA_TOKENS = [ + f"" for i in range(1024) + ] + EXTRA_TOKENS += [ + f"" for i in range(128) + ] + tokenizer.add_tokens(EXTRA_TOKENS) + self.image_token_id = tokenizer.convert_tokens_to_ids(self.IMAGE_TOKEN) + tokenizer.add_bos_token = False + tokenizer.add_eos_token = False + + self.tokenizer = tokenizer + + def __call__( + self, + text: List[str], + images: List[Image.Image], + padding: str = "longest", + truncation: bool = True, + ) -> dict: + assert len(images) == 1 and len(text) == 1, f"Recieved {len(images)} images for {len(text)} prompts." + + pixel_values = process_images( + images, + size=(self.image_size, self.image_size), + resample=Image.Resampling.BICUBIC, + rescale_factor = 1/255.0, + image_mean=IMAGENET_STANDARD_MEAN, + image_std=IMAGENET_STANDARD_STD, + ) + + pixel_values = np.stack(pixel_values, axis=0) + pixel_values = torch.tensor(pixel_values) + + input_strings = [ + add_image_tokens_to_prompt( + prefix_prompt=prompt, + bos_token=self.tokenizer.bos_token, + image_seq_len=self.image_seq_length, + iamge_token=self.IMAGE_TOKEN, + ) + for prompt in text + ] + + inputs = self.tokenizer( + input_strings, + return_tensors="pt", + padding=padding, + truncation=truncation, + ) + return_data = {"pixel_values": pixel_values, **inputs} + return return_data \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SigLIP/coverage.txt b/PyTorch/build-in/Classification/SigLIP/coverage.txt new file mode 100644 index 000000000..ec3e34ba9 --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/coverage.txt @@ -0,0 +1,3 @@ +all api: ['_amp_foreach_non_finite_check_and_unscale_', '_amp_update_scale_', '_copy_from', '_has_compatible_shallow_copy_type', '_local_scalar_dense', '_log_softmax', '_log_softmax_backward_data', '_pin_memory', '_reshape_alias', '_softmax', '_softmax_backward_data', 'add', 'add_', 'addmm', 'as_strided', 'bmm', 'bmm_backward', 'bmm_forward', 'contiguous', 'convolution', 'convolution_backward', 'copy_stride', 'div', 'dropout', 'embedding', 'embedding_dense_backward', 'eq', 'fill_', 'fused_sgd', 'gelu', 'gelu_backward', 'is_pinned', 'linear', 'matmul', 'mean', 'mm', 'mul', 'mul_', 'native_layer_norm', 'native_layer_norm_backward', 'nll_loss_backward', 'nll_loss_forward', 'reciprocal', 'set_', 'sum', 'topk_out', 'view', 'zero_'], total: 48 +fallback op: [], total: 0 +coverage rate: 100.00% diff --git a/PyTorch/build-in/Classification/SigLIP/run b/PyTorch/build-in/Classification/SigLIP/run new file mode 100644 index 000000000..2e83eab02 --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/run @@ -0,0 +1 @@ +bash ../sdaaTest.sh diff --git a/PyTorch/build-in/Classification/SigLIP/siglip.py b/PyTorch/build-in/Classification/SigLIP/siglip.py new file mode 100644 index 000000000..d2aeadb9d --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/siglip.py @@ -0,0 +1,334 @@ +import torch +import torch.nn as nn +from typing import Tuple, Optional + +class SiglipVisionConfig: + def __init__( + self, + hidden_size=768, + intermediate_size=3072, + num_hidden_layers=12, + num_hidden_attention_heads=12, # 注意:代码里要统一用这个名字 + num_channels=3, + image_size=224, + patch_size=16, + layer_norm_eps=1e-6, + attention_dropout=0.0, + num_image_tokens: int = None, + **kwargs + ): + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_hidden_attention_heads = num_hidden_attention_heads + self.num_channels = num_channels + self.image_size = image_size + self.patch_size = patch_size + self.layer_norm_eps = layer_norm_eps + self.attention_dropout = attention_dropout + self.num_image_tokens = num_image_tokens + +class SiglipVisionEmbeddings(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() # 修复:加括号 + self.config = config + self.embed_dim = config.hidden_size + self.image_size = config.image_size + self.patch_size = config.patch_size + + # 修复:Convo2d -> Conv2d + self.patch_embeddings = nn.Conv2d( + in_channels=config.num_channels, + out_channels=self.embed_dim, + kernel_size=self.patch_size, + stride=self.patch_size, + padding="valid", + ) + + self.num_patches = (self.image_size // self.patch_size)**2 + self.num_positions = self.num_patches + self.position_embeddings = nn.Embedding(self.num_positions, self.embed_dim) + self.register_buffer( + "position_ids", + torch.arange(self.num_positions).expand((1, -1)), + persistent=False, + ) + + # 修复:FloatTesor -> FloatTensor + def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: + _, _, height, width = pixel_values.shape + # 修复:变量名一致性 patch_embedding -> patch_embeddings + patch_embeds = self.patch_embeddings(pixel_values) + embeddings = patch_embeds.flatten(2) + embeddings = embeddings.transpose(1, 2) + # 修复:变量名一致性 position_embedding -> position_embeddings + embeddings = embeddings + self.position_embeddings(self.position_ids) + return embeddings + +class SiglipAttention(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.embed_dim = config.hidden_size + # 修复:使用 Config 中定义的正确字段名 + self.num_heads = config.num_hidden_attention_heads + self.head_dim = self.embed_dim // self.num_heads + self.scale = self.head_dim**-0.5 + self.dropout = config.attention_dropout + + self.k_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.v_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.q_proj = nn.Linear(self.embed_dim, self.embed_dim) + self.out_proj = nn.Linear(self.embed_dim, self.embed_dim) + + def forward( + self, + hidden_states: torch.Tensor, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + batch_size, seq_len, _ = hidden_states.size() + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1,2) + key_states = key_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1,2) + + # 修复:value_states.query_states 是错误的写法,直接 view 即可 + value_states = value_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1,2) + + attn_weights = (torch.matmul(query_states, key_states.transpose(2,3)) * self.scale) + + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) + attn_weights = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) + attn_output = torch.matmul(attn_weights, value_states) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(batch_size, seq_len, self.embed_dim) + attn_output = self.out_proj(attn_output) + return attn_output, attn_weights + +class SiglipMLP(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size) + self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.fc1(hidden_states) + hidden_states = nn.functional.gelu(hidden_states, approximate="tanh") + hidden_states = self.fc2(hidden_states) + return hidden_states + +class SiglipEncoderLayer(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() + self.embed_dim = config.hidden_size + self.self_attn = SiglipAttention(config) + self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + self.mlp = SiglipMLP(config) + self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + # Self Attention Block + residual = hidden_states + hidden_states = self.layer_norm1(hidden_states) + hidden_states, _ = self.self_attn(hidden_states=hidden_states) + hidden_states = residual + hidden_states + + # MLP Block + residual = hidden_states + hidden_states = self.layer_norm2(hidden_states) + hidden_states = self.mlp(hidden_states) + # 修复:加上残差连接,否则深层网络无法训练 + hidden_states = residual + hidden_states + + return hidden_states + +class SiglipEncoder(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() + self.config = config + self.layers = nn.ModuleList( + [SiglipEncoderLayer(config) for _ in range(config.num_hidden_layers)] + ) + + def forward(self, inputs_embeds: torch.Tensor) -> torch.Tensor: + hidden_states = inputs_embeds + for encoder_layer in self.layers: + hidden_states = encoder_layer(hidden_states) + return hidden_states + +class SiglipVisionTransformer(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() + self.config = config + embed_dim = config.hidden_size + + self.embeddings = SiglipVisionEmbeddings(config) + self.encoder = SiglipEncoder(config) + self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) + + def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: + hidden_states = self.embeddings(pixel_values) + # 修复:参数名对应 inputs_embeds + last_hidden_state = self.encoder(inputs_embeds=hidden_states) + last_hidden_state = self.post_layernorm(last_hidden_state) + return last_hidden_state + +class SiglipVisionModel(nn.Module): + def __init__(self, config: SiglipVisionConfig): + super().__init__() + self.config = config + self.vision_model = SiglipVisionTransformer(config) + + def forward(self, pixel_values) -> torch.Tensor: + return self.vision_model(pixel_values=pixel_values) + +import torch +import torch.nn as nn + +# 假设之前的类定义 (SiglipVisionConfig, SiglipVisionModel 等) 已经在上面定义好了 +# 如果是在同一个文件中,直接接在后面即可。 + +class SiglipForImageClassification(nn.Module): + + def __init__(self, config: SiglipVisionConfig, num_classes: int = 1000): + super().__init__() + self.config = config + self.num_classes = num_classes + + self.vision_model = SiglipVisionModel(config) + + self.classifier = nn.Linear(config.hidden_size, num_classes) + + self._init_weights(self.classifier) + + def _init_weights(self, module): + if isinstance(module, nn.Linear): + nn.init.normal_(module.weight, std=0.02) + if module.bias is not None: + nn.init.zeros_(module.bias) + + def forward(self, pixel_values: torch.Tensor, labels: torch.Tensor = None): + + backbone_output = self.vision_model(pixel_values) + + pooled_output = backbone_output.mean(dim=1) + + logits = self.classifier(pooled_output) + + loss = None + if labels is not None: + loss_fct = nn.CrossEntropyLoss() + loss = loss_fct(logits, labels) + + if loss is not None: + return logits, loss + return logits + +def Model(num_classes=1000, model_size='base'): + + if model_size == 'base': + config = SiglipVisionConfig( + hidden_size=768, + intermediate_size=3072, + num_hidden_layers=12, + num_hidden_attention_heads=12, + image_size=224, + patch_size=16 + ) + elif model_size == 'large': + config = SiglipVisionConfig( + hidden_size=1024, + intermediate_size=4096, + num_hidden_layers=24, + num_hidden_attention_heads=16, + image_size=224, + patch_size=16 + ) + else: + raise ValueError(f"Unknown model_size: {model_size}") + + # 2. 实例化分类模型 + model = SiglipForImageClassification(config, num_classes=num_classes) + + return model + +if __name__ == "__main__": + import torch + + # 1. 设置测试参数 + MODEL_SIZE = 'base' + NUM_CLASSES = 1000 + IMAGE_SIZE = 224 + BATCH_SIZE = 2 + + print(f"--- 开始测试 SigLIP (Generation 1) Image Classification ---") + print(f"配置: Size={MODEL_SIZE}, Classes={NUM_CLASSES}, Input={IMAGE_SIZE}x{IMAGE_SIZE}") + + # 2. 初始化模型 + try: + model = Model(num_classes=NUM_CLASSES, model_size=MODEL_SIZE) + print("✅ 模型初始化成功") + except Exception as e: + print(f"❌ 模型初始化失败: {e}") + exit(1) + + # 3. 构造伪造的输入数据 + # 输入形状: [Batch_Size, Channels, Height, Width] + pixel_values = torch.randn(BATCH_SIZE, 3, IMAGE_SIZE, IMAGE_SIZE) + # 标签形状: [Batch_Size] (范围 0 ~ NUM_CLASSES-1) + labels = torch.randint(0, NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long) + + print(f"输入数据形状: {pixel_values.shape}") + print(f"标签数据形状: {labels.shape}") + + # 4. 前向传播测试 (Forward Pass) + print("\n--- 执行前向传播 ---") + # 开启训练模式 (启用 Dropout 等) + model.train() + + try: + # 注意:你定义的 SiglipForImageClassification 返回顺序是 (logits, loss) + logits, loss = model(pixel_values, labels=labels) + + print(f"✅ 前向传播成功") + print(f"Loss 值: {loss.item():.6f}") + print(f"Logits 形状: {logits.shape} (预期: [{BATCH_SIZE}, {NUM_CLASSES}])") + + assert logits.shape == (BATCH_SIZE, NUM_CLASSES), "输出 Logits 形状不匹配!" + assert not torch.isnan(loss), "Loss 出现了 NaN!请检查初始化或输入数据。" + + except Exception as e: + print(f"❌ 前向传播出错: {e}") + exit(1) + + # 5. 反向传播测试 (Backward Pass) + print("\n--- 执行反向传播 ---") + try: + # 清空梯度 + model.zero_grad() + + # 反向传播 + loss.backward() + + # 检查分类头的梯度是否存在,且不为 0 + # 如果模型中间有断层(例如忘了加 residual),梯度可能传不回来 + grad_norm = model.classifier.weight.grad.norm().item() + + print(f"✅ 反向传播成功") + print(f"分类层梯度范数 (Grad Norm): {grad_norm:.6f}") + + if grad_norm == 0.0: + print("⚠️ 警告: 梯度为 0,可能模型结构有问题或被冻结。") + else: + print("🎉 测试通过:梯度正常回传。") + + except Exception as e: + print(f"❌ 反向传播出错: {e}") + exit(1) + + # 6. (可选) 打印模型参数量 + total_params = sum(p.numel() for p in model.parameters()) + print(f"\n模型总参数量: {total_params / 1e6:.2f} M") \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SigLIP/siglip_loss.jpg b/PyTorch/build-in/Classification/SigLIP/siglip_loss.jpg new file mode 100644 index 0000000000000000000000000000000000000000..be3ed9b64f872b373ce2c50a4fa5511217afceb8 GIT binary patch literal 36621 zcmeFa1z22Lwk=#Z0Rq7tf(3^Jhae#Y3&DbWa4Fm+PzeMG9z3{90fhv22yVgMg1Z#1 z#a}tyeP5r`r~CFj-F;ra|2BO4qh!~vwbq_%%rWO2YjHPq_Z{%yrIf4`00993U;_UI z+|2?c0O+WwXs9UYXlQ8n?xAB~5!}bZ#Ka=S!^a_@B%`LHB%`2sNYBCekd}>(f`W;k zneFit9v&WQMnMq)E@2LC9s;ryA}X0I-(|WDG~xL z01+1f2^Zn69Y75LAfUio`=h~s{6RoOLPkMFL%)ZC2|uCo0RRyJ2?-Gy2?Yfi8Gg1m z{Cxm2E(#va6LD00WkWPt2Li75ks0W8&&yf~Uk@MBa~nDO-oqdwCLtwbc*MxW%)-OV z$1fl#B=JI0N?JztrHZPWx`w8fwy}w+nYo3fm6NlJtDC!rr{4$vfWVJI!BNpMv2pRA z6B0ACvU76t^1%gP$}1|Xs%vWNzP7b@bar+3^p1>&wkM_TzJ0_k&%&*(SGy`0nrV9AmJjT&^$rK6IVtvbik+O zdXG-q9~#$6*X2dY&Z)=#Q@b)U&^@W4^zsXaCi)|I@D-02UGg{N^Fy z0>l94S4^4jF#dD=XAJ&h1OIDmKp^d76X&=v9cnB`f~B3)Sybt;4$+WWo#5Sj%*3Ht7i@i!&E-afOF7B~tZqdaEPke_DMz|zaMEn z@$HR{cYt=h+qhZf@+u+?M>+ypk)>rUgf^3a*+t%gX$g+VYw)v?jmUOs!&_8?H-s<Kvq%g2l{`LyVO!fJu-a6cBjqwRdfYI*kQvJj=C z2*TV#iW2usXruaN{8!MCz{Mi$>jTghkLp3N!O<&Wi93M4%}obDoH)d6$PU)Gyl4Cp zU*493{d;Lyie9|y=y;k#4>i*Uc#~D?4$##^HcE+itmGc@A}Z~~Qd26F3-ePpBicoo z7A@22MNi(-bhC)IC@wtFt>YmRALNM6vKzVOr&Um@>Ky$G-wl~?0?D${$}(A#X7x}! z2_e4SGFJc2Cj$W2^Q%zMtSGgni+=!XS4>V_>pJETjTu1f5FzaHAk?jZpBlG;5$M2! zZ?zqPyl2P1Z$itcKIN8Dqg;bq^PR$8-eG~<%nRp(e-L9|vePCzJ!0PIn-Cb`WV_Q^ zvB?;(?~V5|KSEl!5}F(PzOaF%8LwWZ9H}X6@LKfkoDdFw|6aAg?4qlex=UU==$`%n zkg%yV+%{Pn--8QfK>;z2)!%sP3G%9EaK{KqbGR^5zhhS$9-bm76=Q=vGn{j}nD%3V zj}{ji)Vqk{*6A5lY|%xV_z6rq>w1RL+Xh}}tF zY)z%PvlgKYav`C$3HJBn#YzV{tkam;mA6xe5Pu*RW`X`CmtqbGbY7O+(P?zCM7N%L2`#w9D;6P({x=+V$b~Mqg@C(evdHMb7Mvl z6g>&2$Z){{kJBO{l?&}Vfa*El+?tpHXrfEv<%0$!Xe95CW82P~OdW*QR41OF~7Xmx)rGXM^b75fg}X zJZRpK3kbL%`}uXB6!m(syADs@Q5GcQ=0)mLrstvyfuvVYBuaXclgAf8E5;gM_MGZWJ(I(kvnuomCE zP}gF&wn*ta0H$K+s5O;&{F{ujZYmWDP}<8imu}O*4mTLW3(%K~5T!t+*Jzlwj*}81 zUxrTZ0J#+io1PJ&6_k%K1?@P4y}vx+!7aiQF8++>7FVE&Ev@uKF{xt)nGB0;cH2Hh zXvtTC3lU~a@vb19h`seY)}c4+2PoAo@v`JuM!tK%v3OZSneS7bZpca$ty0rwLJGNI zA1YGl>S+h#R0F5^6027(RP9D)rXhZ!u3pD~+Fmu_@@-=hwoX1^_4^Q7=H|?PO36%* zLvH_37x|@K6LC;%DKLUV9y0C9M-5VHI zY40lcBy_6A{O9}K&L*l6S>+X2oiOHeTV!WIb?wE{XcQ_Osxxojl-9(5JgCsJ$rL*o z5mf8GQ7gX#Af%&bPBti(Sj{vDA#R?X)tOG0+f@fq$(pF4ggTm$^85hZb@iwPV6PyB3Jiu?r(qZK>g*)V?u4 z?TpOI;bp$?PDEM}jvWP-_}f*9q3ou1HHxfj!ckdl6pvA4D89c}bB-XtaMMeaao&?2 zNEB#TA(x=H9=-#3NHB4TO!96BK^hhqM&EiBZdR4+ZpBBuF}z1gZF5Azvxczt-XLDA zqnfbkYCT~Ehss^u07u3)UY;LcUm@~K5pIPfvb79}nUSb|Gl!QFQjrgXWbuWVZf(Ue zq*6`}IBj?bcoA>(u^QRtTN3xzsO*h=IzMd~DRd#EP$EZDG(T#4lBm01I9Dy|*}EF` znFpnUz2FtJaMtLNN*xkWs*AR^k-2Qo2F!vUhAimA=OYiML{tONL{BsFB`K(|FC5Eg z)8!=;FyDuo(c)~%16+iv!-WMi78~O-z-(@~`Ic%%9>~v5xCme4Pp;DzZNr#{omqjv zu;;h99O3Nk79$mN=Buj6rCowZ6qBC=8ULwe}k0kGL3gJmr{ zLwz@|3Lg8a%5#{8p@)9PJrt+7S>14>i|iEGddmxuUgo@SInw|v)ob~}1#&1m=KSs` zhOsJ)0M?Q#a9>dgB3tO*#uVLpkgL)C+yLzpdE7=BBqBh4WadV2IL?IWsPmj$Q4Q~@ z4BG0~cS61YMghZvPg(TuM7H?z&f|}Z&AZMH=iSX%y*@@+n2w#CLPPphg1?6=s6IBOBbv1Dwf6ulp{C8^s!$Q=IMqhwm<)!u<8{?*IVWJHWEV9pD36 z>*5K^X4(Y;@f`sF871lo>gYE$Da6wQWgrT3V zEA`_mTn6s|Uy;5aToF1e-T*@G06|&*Y5IRg{XYit|MzK%Gs&~(dbKJVxvY88{Yr<5 zzf2Zpot7eN=;w8B2@(ra{v}>X{}P}8 zn?HeMmM6#qB(!k{_&Hv|ee}-|>YdR2t=qMm&4siDgKxEWfL{U@=;sKPj_vo;b^n0F zpr7Lv^)Im%DgCF*hW+@4owR?CmV-SEjX~_$)l4M}*$Z#6DZtXLy1wYU?YKjJ1E#;| z?f+stCc6rYO@-v?YU<}lO)OzKrhh746Oj@}LoH$sdq7K|ru+^SfLQtqXZXL~xc=Xr z7XOTv%YzUvI2iga3?!7dt&OO!*#XWvf5yGU4g=iDVg8M1aO#!$N9x7hvUtM2DRzPK z2kOQ5XVh!(h9(si&b;j4NyQeXpO_ccA^$C=Ck_nZ56nyc&zKiC9M{a>H*e|;fetZ# zVqOx$_uiK12gL}TM9X2gu6D%l7tzbl_d!PzsI5G0pKq~kNK&{!EkKQL*tgPKUvX0BB7}3|c7U8oi7OyvS{YGS0G3KK*`;C*ACe){iZP zjk6??N6^G@0U!KvS^h(@wwr;yKYcA2*n8SF%5w*x9B@~NEn4jJvc>Ct^Yv=%CgeK$ zo09j@CaxFU5^Ae27kBqN=dNv5cy2+eA9s$eh_-Sxx8v$ol`duN=nQy_mXzvy zYEZ()PFPd}02Gn$aIAmyEz9Z(r?)|I85{hd35qgj3IEKDbI_UNq0IR)imD--H3@P= zwUjRNpsq&M5UOi@p^nv{&nIcxB=V|?=zb-x?E+a6#)!|J-mDToT`BY2eA>mJ$_~~A zrw^6Q6=xfEk*mf0D;L6poNm@GE4<|5*>jdcC$2)Vm4W`SMe7MWu5vAz?esn>1nOEN zfB=n~IaLh8ju*VEsx}7u5xoumi_&AloHpx}n6l*+W2^TF3I=km1VZn(?SFrJ<3j%F zcKhH4--U4F!|s}&LWE)n7mGOVBVjbdS}ugCsW|}(+-GEGH`)S}2=<~0ykXy|FByHv z?f|8F)v#dg>FPT`gr(R5XdOu2B6g&3!+QsKQ*U=mP=PS~8_gAg;<(Gkj=g=Lis@5* z756(-+2t=McKBO~*EqZI8}{XMdm;S<{@2hv5|$%c@Hc6R(ZoiuchWV>@m~1r$!?Hf zpvjCIl}NcRY9QEBTffyZw;D|6c0ZbY5WkkS!iR&o)xbmza76)J+5M%K7zEHOh8k|IQ39W{N& z+JK&cd8x@bvaERq$XGw0hU6Z_F)uR>`Hc_Sq5^YCE6veWNDX;) z=>O5!*}c~uR>@DL_+7B8(UG{+>WVt#kv!_c{Y2l5p2#w)T~HC#2Z~kftV)}$RJ>~c z)rRpoE6>orAchpj0-VFl6QmpHNO5!?L;qS;+zx2;<|BB|=u-9V!N_9CWeS!f{cura zUESl}FUh_l;&~bmuO5My62iIy6=#Yx*X6m)I~P^RsnR5_#tZGpw*n09zzm)7j-a)a zRa1m3{#Rbf>fzCzZE6>{?JPZ(BGgOgDeMH@IpG!)GASlu0ctU;)ERjbk6I-Olo0+^ zhmzP0ND{UJdZIFha|fUv2GuO~DLAo-y_Q{xlCjN*q$72eMZgdlv|>NSd(vN9P=DDxCp<0V+QRb~{-ig?hW)K)l{J7bv`p2b_WSjn4h@fxx1;S)36mZ@uiXs(XN(Qh?wz~Pa zh@jnt;SbucQ7zkNLuGa_ql=wx%h=!q0&TogSG0YaRa>xyWjzb4#QttbW$ zDkYxv!g`lmUYM4r@*aPTt?JYO622KY@+J4hc6df9LilZPxuarqWc^rboIN*g%_hCV z`C}hX=sg72+E;I={g^5h#mh9 zF6d883*nplBJ3I8;u+y+^T`*o&7I_*qxR50&u9rHUXRm_HpEPgDsuaW6I>5$gD?`l zJ&xc>^?#0+Tx(SMNV;-;6&X9aG0zxEsF`k`82pkQgabT{=yA zIWB^`mNY%rVu-2eMo#3MX&%)Gg{><%5~qu{uWFcUpnoHXN*b?%>ER}?kxtlR%49cr zRS_sl()tE7r6V)t_z{u^0#+jBBnc4omwPe652H-x4n;4%Irynn+-lizm{%my*|}j- zw08;W_v1Kd^Qtd*F6ewLA)}mpfN0EOuSh9!2O#UTkBW*B^#L7fNLajN7Z_YygIAeO z*;V=Z;?7Gy+olJ*N2o?3;dGCYndMOBS#HqAxEMuA%_|2gS?yWg1Lp`dVr`%ituTOl z2flZdyQQ_w_;+n6c2FxU1rP;{viC(5u|C(p05aai00byn>}Lb2?Y zZq2Cib(s0~KDv;Md9QBLcjd2~oELxY89z5|Ry-0kC6rqXpV!zq653`i>yCaA)!Dk@ zf>neqj+T+9j`r*~@|Hgt&0qbi`d{#)dWzgS;l8HIff)M`H`M_~fPlXx!x;A^s{gWN{X9YAyh288i= z9DxQaniKB;VHnAT6;mSF^QUvEwRXEHZZ9WKpEgohj?P#)W*_Cum5{eRlnjlXfiye_ zws$>F1!n$QJBs}LlfF{X5`|=o^CjHI6MduV@>q@9-~vV%NDd|4JN3gu_NgKo&6sN&^W{ILNve=5Lez(`i4+W?x5t#I&lQm7)a zGtb!D%xBia6m0pWko3Xl*TQyPYDRgnjDBfWF&AE1jY;Zbr%!Fjyk`3>tXE3SJ=hy! zT+J5N5Ew$L55K>0Db9cO9l&k^a0ZlIj9mWL00SI|njVb589soRmJRQ}sjUf@IVlVQ zXIE8A>QY8=5n<`v{|b201#uR|8$PIzrN-S^}UfbrJzEs~PAZ0ybX zvu44;I1#kGMp1Ur8P*=o7c3m60PYROJn=0}DyF8I#@fI(ZQx#g18GeRs1IH2f6mr1hVwuEL6%cH6^w@adJ!KLu^dk+(kI%s2EY(G$;gP9AK!k?T*enxogt5+&+sER}%|?#X382Has$iefXa@d=;n|Da9c|tCiQ5757!$6Tfl| zKfqUh4+<9@LD+8IXtYhM9ML(y18}EwO?wSrc$ORq&9JZnKijcq)>eO##VR1B^n75Y z-Lg6NuSuNz?PLt_4p3;9Q4IQ7JX$#RIZ6!;LD zT4^*hL2529zc<4Qrp1s*j-g5&xBpb2^$~y-<)y*uwhV-*ufbI@nC;bg%(Lt#Y1=ec z)XVTVEJav&N#b{8~~1nN?)zYGa9tIB%Jmx0}11*_{rUy)!Fb|ITDLTuE{>OC9` z^FnH#R526uK1t5CchEG#Q&yv%p1ipOAbH-e6$0*A!VH4)BzY-KB%R;%wml60pi<}r z$TroGl2+<%L&p6i$xeXK%9k6Aa-F5aqU1a*Z+z+RRDE z4u-Qp#NTjW%!E1({PJb747qRzHXl~-f6>x?l#@SW zF!Ri2;_M#rKooaq;r?q#RQk#HqbJ%z3bDXUOMAbD&g~&TDTvITa^IJoE>saq1`g@=jmebJu6)#G-8N?AAK`gTFsW z;eWFG6wXqwva1d&JM--07Z_`aqrmf&$#2Dpmu{qYuEv(OIzfAQu8MYw3>$L=rwlk} z?yQx~z{_Ti?m}3~N`lxLBt8)CeO=`S4;7D3b|CERyX;Xj69fSSB&y=?Sk#mX2%lk~ z0w__~?SztN*l|OdkG4;}zXmfE4g9L`ZA9x*I1kA8GwxBZE&kp zM=^GfRbX1x))6l!)Uh&Re9^n<8u>J{QW=9)amYitO1}s9H8sUc#Zk<1o@pwDLjCIK zrXp`=HXNPpn8n?*evGqBNuS})Z`d4#dfICn@$IR~Cf1Q&eGOLltZ zKQTr4-&_l!1O{mefY;CgcYvO!hm*Iyd(9`WC*f6t?b%IW+9A$Obn|`C6vylxKoVXW zgWKLeL<9d~7HtUI-2onw+g)R9EnaEecXs}(k=GB4S22mC30jLBr+cQA5Dq{B^K7rZ zm1MQLYOD^lnG4NN-7GoGbw|5gxt&SiC#hxda;13fsBAmAukv7UC1lirwg)=|ldCn- zkC}qF^+s9*AkvdpR2SiAi_BF~eZ>kYG=jT|Yr#$Fvaae^8hSt*!juZvLyoBx66M!i ztY8hLUQb`H`x2p*F+8>lK!p^ZlXCW#LQ>i}AjT5?=CI*jj@ptPQX#1r_C48_+FBR- zTLLQ4hEZ(D^1Oh|r1lHq(sr&v3g1VwJ5e$sPZ9Lo0a(~5Gyu4{iij7!)a}){bOx0y z%dM?BR1cP1`kxoGk7*J{2t(M%zKx0j0T?WHR8`C~7)qrNwu zzJ6+5ESV|;skrpPc=g84Rz_$l+2n>e;d$`>+gS2vRYAs%B66n#8WLi@E~+{qg&`@} zP~Xc*oJPpnsqyV7%+qN`mAFd%DlDcdUODJttZQAGBDAM-kCX*iK#$EOuW5d~wB)720J6w+0#;`_$A@mFM?41yFcFapnjv7tBk4JrGBIB%) zrKfqAz3ZleGBmY@!uZN%63X{+rZ_IQ|6|D>iM+*1Sn*SD9L2to#4!3xoyMy(SY6bb zx?BRQP9A6{qgK)s9^}cp$pr6m^aS<@>#cn9UV?d%jTJEv6Pr0py?nXJR8-%6z)KXd zOBQ3V{XNiq>?zUUGcT1%nb55gQ<0AT?Y-JmSyAgl&j<%iFWIUw<23cf-&%yp7aN45+ih~e8$(_mF{x6eo0UcrS0mU9OEJ8K=e zVj?$dKd85J;3o|0p?2mfmd+PxJ*Q-uchoikW?ZZvyb579 z%@;qb$$}AnqIgV|Kmk+xa=v6T?dx{s4q&u)s^o+t0@;iSgY&e6|)6W=6{2*|6Q>M=@D+B|IB2 zrJ-;K80iLP{Y@%|ZpV>XR-U+R{BimDzy(OV?Jy@hjB=vPFC%f;Drq@&>GK?hym7sJe~|RaJXg930~M^Lsj9$~l{KczKy- zX*UiePZEncjNsNlQx(^)#;$Dh6N3_jgVGc6kh-|Oho12IfUQEj&K}u_#ON|(5eB%u z9|w_jbUIIRMO(5%3gL9yItL?sfPQuXZu=-Rg^zN~Dcc{2d__yfrxxa487Y5MUpH}5 zqoIh3%T2OR8BX>I`k!a;^#cZPdA(i|St)05yDo8td zvx#9Go`+#Cf+V9snv&~fsr(1U`ZY8poux8;U*gJL9Sg3>4^#{C-+GX0YXqF65SaB2 z5a}QD575B*3l&g3n1z9n(RF~1kkFzKZsjtDzl^+ov?NjzQS}IvV1aI!TN~pRI#FuoDW?V zzfUiZbdWJJ7_eZ~;#{LV`$3+%(-n@&p8TWvPf4pq+**j$-Q_l+t+y!Ka^{OlrTlJSIG){Fe8Uqid*634*222cjDb5_t{lOIoS`a z>EfpKj7XoAjJrbpG*~hi1i&2G-#EA&yx}oU0sY!rN|>Ju#3C=EY(9aODzclO2?k1W zQR_7TwHGd<_%F)v3Xn}GiqjtDZMzeWPgGV~=}q2qCDURk}4GhUSKGV%d@HC=e+tj6Err%OD%Yz%G9>k^FM z4r(x+a=mN!!P?_%-#X_H zSJb)Pc!U+B!2bJ8jxqzf_WxQ)UV~w z?>Doud4FanSAZ)q~iSoCs z;7$G-KLn=#8khW?^^S!Li#nEP>t5hEfi9=9v}Z!mC)qPEcP;sLLkRuzFXBjSlN3jl z^tb8!@F+eGUQ1v3xEDN`-)>^jou#jHjqAF#_QQK6@@yC9k}=12%TqEu!e&+LS(&fn zUPQtmSY(@ijgg6dybcCByaOaSti9wOUd%ha^(5ufC-n+h++%ljt*@iIhhR7V93COz zG@tJ1Jp^DOyq6@j;-jgDd-Yep1o7VrQGfn9Wa3oNRNghWO`74+H|}HOU_a?b*wBbg zv20(R;T>Rsf_hACySuJFMEnU#IqRFTP^(sW+TFfiu$6 zIR^%c>aD|jJv}EX zA^*we5y~ITY=oAd=5*O(qhGz*ID}Z-=r*1_DLHgoRr|I zdy45dLg)YVbN{wA7Ue;VIh`KN{b|}5n`}SdC#{rBNW`j8k^uEDBk9*Z>5j-&K9XX4 z)W`dh(uPaZX0diITX9Fn)Z1e|+hT8U431ABHc#c?dpz7E$@t#jrVq_IE58t zpEjS)jSoE6$etNloP4td_n&{kApYEl{^7^+&RV#UKZ5SpTai?^!@upLLZ2?m;>#f$QZjg_@bW`(YVXE zHc~AXT#|nq^-a`<+ySJI+2IKou(zRuCF5KwUs%%zqrA*&g_+O1-q=np=ZazhG}TGB z-qDpsSwk5u)N!m?#Tqj36?=B!l_>&H%2iA62j|x%muYKyYCtWob3K>q{(LdV!Q3=^ zxLP?f|M$Sn-vu+t>4OMt=)?0rVsbxe0l3glxF#*jV``zC_1w zRcMG4jgpg+n^k1>`1798n`|D?Vr2buev{P8mL6X4(g2f%KKZC~hG5M#L!HEW+RDt6 zG{e1Tq%i)UOWK!(%~9)Us^jFP6>Ye$4HP@-rUjCyfg#il$1*#9=OQ&bdX4{3y7q6T zlm1F7m@tX*W97tpqgEtcZJ2VVFp zEDPbOcxJ5sDX#uc&;QrsFlp+;@#fN#fTp8~SMpezW;19BEYdZpEw(*Z>I`Wib{seI zl%?g>LXsV;6GZ2j%v=>^Qr~tpNTy?4c9KgPE#1GKI>Rf<^SZJq4Tp9?j5?ps+i5Kc zP0aL{l**}d9IrU?Hh@zl_rOPO(Zwws+)7qmgSx#siG8^5Byry^p}D z9V$oGLk`xECdI-*#+_3m7sJ`CdyC&{nyslLVV`3bPHrOki3eeDG*Zqf`=+9s!A)vC%!GPdbtNoE&C(V;ygn`%*}lF ze{DU;-4wW$01gv9LFFM&gmD+p9kF`pHbKGGy;ap90Ce-l6JDDwhy1%WS={xn; z3LplVbYy+a7R z%C9%PG|3ZGWcpO@{&2TY=Smhv&XS&y>M4IcT*?ncbr`MMojAp@zN+s*O|CjEDgs2 zH?}xdWb0=01gYh`=-PPFbew0If2iU8W7*xmcTE4M=V6dRd=12LEgRx|#ku+->Z{=k zYcy!%1edjp?4hMUbmGkY88L+bagv7<`hGwMLm+Q*_X~$tUs35PyA`BQi>T{0B%!vq ze7ExPjdB`QeUqS}@yuXWrw7G~?T0;?+mGEd2V{@R2v6YH#!kaXdIj#<+R`(X7RH^| z=|S$MVph!pZ;F%97AIet`g~9g6t)@D%#mG^HF^WB3s9#-7xq&u*7=IxV|1QMUgtP! zx%Zj0t_R66@9>*Q4dK}EYl^5+l{8^nxPKBi{3oS1evRt=J}Hks`HNrHLS25LT>j^- z9r>ey{&(dGK#}*$WgsO`F}Ziq^mCF2z8I5nZ?Y$R>kn&HejWdc@JO~Vv~e$NchLxA zEovpCfxvUJ;i%9`jixjYUP2YQCOtWM-(3J!$T7ln9q#3LU5%^)i}G}&K#gpt4&1%6 z1PLkfBa~4G>4mFIH?D5eqy`XyA5SZJAC|h>TcJ$l=}=y!9@$3n$uc=Da|qZcDk6JBHU3 zT*lzKogWtSFJnZ%lSU^*mm|S-BH>!bzh4uOa;+dBdQmLaT9=!1w#oKZ+#A9H=1ogF z(|f)!P3ubjwlgS0-!(dv+x&jATk*&mL)?VL_oSYL$T&gQ{i$~QDG_^*4yT|ch_;Q4 zm1kT%#ree9NUO<^7JAnEY$Ps1j*i(i{d-6y6hlb7K#0z3DCi7330##xL4RN?hJtud0)~NU)&!HG$4G>?7rS7R42{k{kZ~J`k|L z@@hnTJIM0vX;5|rUiMbX^mMIjsZG1n`x@W-RutGEVN zY|?{x`*c6gX8JmGQL?@)=RCR08QZmkBoN7H7A1uvNqI(;Dz=zuuP7xVfm+EVT%F!XOUwmUxb}+Mj zU-+mxbJidoV}yfVald9Ma6?e)|g3wpw6WQ8P|Rh?iv% z2kx=uqwZC@K!rj-n3sB^nbpzaEE~V8@jy83t3!mol6=w3(xMn`NO4NN*wNfVw4*}2 z&+X*puH~?j)_jMzC1pSI=T%Zt5?&D-Qf5|2sJ4Tv*NjWl(v_K?eG-g%a%-98%EEyF?h*1~sWY$cznm5e0&S#I zWi{UcimUwdsHEX*Egq*VaavUEz|8d|B`jS~PO~-CNAA(YO3;Nb=N4&~)wzxTwcG!X zzWyKc`7bJt1<}`;B@8~B#IeJEdu`wB0k3cqX4wh5sY3@bZQld@(3nXy<5E0{Ka>FzFLwCO=4HlMq}Y+p7$;2`{$8!oxE8)#S7$6$wvYCY zBq$pz3&0-euls$B3IIq^GI^Vc^G)^U^oP>IoLB?MLkdYI!IIlW2y``q$l;ztsBXX8 zI*i}Ofe0yvBpq-n*f%%@n68qH2_lXo!$-ekF+dYunfmCda-2jTo9U$)AC{k^P_i$J)ynR>!?a+9w6H{l{EofmdaU`;sh@D(I z65`J>!k;L0zRLU^QRu;A5x!u+_nX&8KxKE&D?NVAkXKqcQL8#FJ6=dAEFX|>m%*3i z<;l2HiNlBYb2?JPyeQ(%A8S|Ml$f7Y%-SC&Q;uw`v+01?k8wkf_4Ei3+T91Am4}mi zE1QkoZyi55E~~uPFFfBaHckwuW8u)re z!d&WgOFr&$IM1#@`C>_dA2n=NF@KWR!F4}4TYJGXNw4(>ZW`fmJEnj$!*ycDv3%3NvrYg)dp*GPPro+5pBa!@bWh}vPV8+dj@4}(ucx*tunShYOCD7R^*k< z4dZOy5kwEaU45oc4lSA&asu#}C@&@zx$SDS)?i$YxQO1-Hvc&Kak#0+BjR#(!`CP9 zN*(GmIUjdRI>>a*>G{;fVAUl?dCWvvlk(TB;ImR#r5}rvBCFd3geKbbi8S^v&utL(PfIe6l!Y?L=C^t^)q`4i zW3to)<*yghZtxWftPisc>`k@ZUByknCjX7Ge#yb#h zxKCxRgA+=&lupoVrY@*pORC=8V=?{;01}wERV`g#+D{o@2VFAu>R#5hwCl+kyn^m57Y56HzhE=Fk zD2E2UGGV>u1m{15QYJ@d<4ZaLSxeQlmRX~>w7;Xk|49rbAU)sO7Vv%5#I4AKRm-dQ z7GC)9wHthSFz>oma)V$|q_r`2(sv#?d{AU&iwwN(R?K}GaeO)kEObul@mSGgB4qoowgjdjD!cNM%5`IL5jH$3>mt4)cZGL&inIiRJ zKG<#oQk8d0)~s|=tQA~(Q0hhMcq~F(a8u254+-31rtj|iwRa7Yk{z|=*+F3%Q8JQQ zH561EexTeCx4(qF+B5UfJ4U@l>E1(tqRNi4=V*9H{G^_!`pvX@);+7sMjlwxsFJh+ow2Ibp$N_O7NjBxZO?kWEi)FU5A`2*Nrzt zn4W8X({miW7T?X`5`QdUAn00@wx4>Mqqe^bDOozQ&CpUA4phw8Q?oM~Z*Q&Gf)G7O z_i&=wIVGqqP^phv*JE!!z14hMv0+L{t+RIv(%{K2niyjoJsEZaxAOG*mrY)rk1)#d z663MVrn>7_&lmJWmryE9Ri#OK0SfRPKtA1D-2)7vl_={nMs(c5s+S-b(>4n;Fs&Jv z+BqXT4v}6;Ro|7kr;Z3L+$|7Aq+UPyrqQ^s-+JZwuH>}vdR~4Vs5Ee7zD^G$rTD)7 zj{fI+YQT3w5z|eB8u} zS3HoMWCGmc*Q-1&V6`qta2leH(wPiUg!U7@4BO!+_roW5D;n75keId z1Ox&|7o~+3KnW7SLK6sx^cq7ER9d7&-0xI&1BfbJlnE_np0W_WtePL98n0fJTA#rsACthLh{Fiyk+O;nYV?`t2g! zV6|u3_3@9l4oA?v#1eYO4<&TU&X#bmLdrd_w* zXt+j2dtQLqb0DIWd+pC$TAp;BOIlpE%Zsqz%($9J}~eK#Ywofn&WJku6N#G8&I zwMCRaYXuR`^VcM}@|oLM^Hl4barqiLWloMyLdY9pGkPwW^_)wLumLaoO9Qa8d_+QX-p+ze6C#$>#Q0qF1&ALJt}rCfEz6rCwF(HzEXaD9u~CH!y>aMGv!Dv z*3qu86jiA_o;%|L-#jidDb7b*Fj$E3BfvSK>8$>J%UOhZldgx9h~20AL{ytR#u((uRYnCm=0oSHwWX^qwo^IpZ4x#>5#={D|Exa{DV({QpWm{C}Dd z{c%30&1=~p58H;P;CJEzUydE5c1`Iv1BuOfey7&>6Xc#L8j0kwpU&YKgpip)w^SA3 z0!9~1G}y_2BYi5ZpJ=4+>zsUwwMtR_0>5`&&+kx*oW11;UEIBK52EgpVw?Ux3F$v{ zdmn*1!;Lo6JtSV0fb1VDsBEahhS*l7$Yr|JnzC_*B@MZZIE06&ivm%lO3X}q^k$lk zKABHdu>9oQg~pYm+n8}(XkC2PAZcW2PZq&6*U-s=Zz{&qu}PiDi0${B==IYQCI0PA zms?%-9%Gfm^aMc@lGj^(9c3>lGIWt*PJWqsL>l}KrR{-F$lVi_PI$#1 zP+No>W2BQHJcE9yF?%z27WDFq$8pbr;4r>od2(%FdOEZ%{Op>!TW9~AdT5U56pHf@ z(Tm|*7gXI!P5%UAGWL_ZAee=pgzO{RQteq;UKk(Yz5-6$PT~%Y2Tq7AR|5fV6@;2X z;bPH{taYH-O^d)z+bdcsFu47OOh)xbl@v@lZxrQH^#^;+N%64$A;@P=Z0!F1ChjfK zf3NC%+w-3j82o3?!@npT=6mJ)9~m`_J%pPjqlrzke6?K-S5VEI zCSdg&_s$=SwQ0Y&w<+>sSoy@pwvi9Yc8pe@ec@j7cDoC-sDXD5*ZIiRUM3&VEFqMj z`JqO-TK#&VVf|+bVbEZ;@KKZrfk6aEm#nr)jxXWjt!dl*0#H(c8M@5dA>Bt|dM#Xn zrlzXLbiS*0Ir!xL80#aZMwWrMRoU~@kXduPDiZ?~mMiO{mJ;E9ld93R`Cj@3aqCn_ z&223C@WlcZ9(DH-&Z>=GlnmR)4!u>{^Gm_|nR`fGIl)GD1hMDu5_iW#_DE_js8m$< z+1TU!dhf$#I%#*jVIj%EdgEI)J0&pg&_p}Iv$VvzD&(fSZ~#YKz+2-0o(OusfnG2o zagRtoObaW$TQ=@7NOc6kXrhaDSKELsJJa6Sdsk$b48|#Hes?2y5?Aept*^jx&2=|$T! zOUqchUA<#b6r8_40j&x4k zGPMMh>bsOMR$rFmEe(+LU7-h`<-Z3%K}$>HLbdT>w7X57eCDMGk}Cl%&?D*KMH3P7 zOK$cP>JH+SP?ve$QRoz{c4(JF_;{6ex}P}WKci%fE_bb^o#^p9FNoP!F{vN(7qQ8^ zeNn{Yk_%6u_ABFYZ?YJ9-(jhJYk2fc3wW^rEW1oA#WvAT@l^VgPI6N)nefhb)`n*2 z$Bp99yF>S0Ex;>~d%*D)$oLL=c(!x9)3PR{HB;>rK(l0iyAUlLx4@Mo`N*d3JJHCW6w80dUAaA! zn^BbGd`kA&2Nn|1_w4FmV_V(R=OG5**FoM~dB{20!KS4#Vv~If*AkH+u_!7Gu%z;X zFq6QD5FsEu`i~iAkem6-%W1T3zZOCE^|qtG0>tW;2$Y{x_D9^~9@%K^Fp>1M%;V3j z&M1U?S0<&!U+WO?6`dyr$YcyGl>AsQ(cm=36NweS&vLLs!|c$sso9$R+6mBsNlZ!0 zoiHZG1LMO#@=QPO;ty`^|K&1I{gLUF-IE2du9plJA2>u87oY+c_}_MtVeag;Y!_wZ zUdtAc|1wh`V0EsyLZ)5uL556p;IaSE!6^XI&OVwYlE9~0xP1kXd8HmZpolNO_+V#D zWkb{FE1(^d`3(*JYgz7JO{$YWPx|7oq(7GewEwoQ5>q=4xhgEdA4V;&t_EfeI5V+W zGHXhB2n0~Ls(*9czdlKD4kI{=;Ut(}BhQVzQ&=jNDUXNy=@k`@rYoFF$AX6@$A`Go zeg1~8fcYE9*Us6<{-Ro9-d&-eAZ@|LMAG+zO!L=Mj0XmOF7sVqzonZ7pvt>-D@j$B za$yQ4BvTcw4rov!Z!=N{ z!JO*AoMFb%j~pm@wkV~se+<_3Jv95TBXhs^3<0OVQLtsId#46%Cz^1FM8c$_OYbZ`J{gEzxQ-P`&H9$r=}1AuK3cD*&L?B#o+!Ia!{!3S{3P zlq1}nBnGXq!!$42S<%)Kg44^Y0Cu*QS=NCQ)zZaOV&z3Jhk4WCwIWeIiPs4P9gRn^ ziPQqlkNG2_T7k6*MAi+4Rfb;_(XY~h>6Yx4!+dt5mTZ8Q(B4O!rz2}R$*h=2D1w~D ziroD$e%+62V_p*$JI%p+3tAs}b%6M4K*~oG@C^`xZ|vF-e@Qv5)zzMU{`)>$&&w5n zYd2#c*xP?We4hBdMyz}X-}oX__OydZnUCjaDX6SUDuh%htXk_WyLJijI?;C&5nc0L zPQ^RD`r-^Q^J$%Qr4ej|Y#s>u`J_{BKGo^f04av91C*{+S$S3M#%u)0Ny)?P2#3%f zL{?CDhbHcoI`XOaPxmd7Yp_PYahlpU(`gtHb1veYx$mg=sN@-$6N5kspilpUR7yMx zYEHq4?~C}YuK8v1cx>(pCQ^p7Y(wFMkib+~*p6BBU>~5MCG3 znC1Rbt?Maeyw-ijy%el3jGray|H|tkgVBbYQ#u zm`AonReI+5tvW||xc7pv0*ism;iydEdkhV1cipR@sVeG#MFHn$f3?Fi{}Sl}m|Og< zcD%p?c&6IT0YfI?+-8GIfHd;ILb60Cv2OMrv+y8a zOZ2n2VZT+dRi1+hS|&EDF&jaJ9kVS@N-+{EOjnCBr0yDf$e{5aLemPYoWkAW4mM5c zTf%#JnpD{whhsDiRyhIPp->}Q3Nm0&9`ZFa@h%o?#A7^bqt^ssdiN6ukxA^8AM%uV z;a}4((Jpa?!5eB8?gVe+5J>c5h3+z59o;hTw2n5?YGo4haio8suTQyHhwdH#i6G91 zpv3&blf(1;ap9N*8iD?Rv=;yX$(3(V2@#|JU^D$&ygKs)UR4lGQ9ipd9qA;Eoz4(8 zo2syzTepNxv7T*&@~6lZy)NB;(f8T6v#(1c!za6x^K7tx0N>^)o0`HFZ@7 z>7UBw7otETI)W>9D~I?>QgFJlx=&i*O3AtI5drM9J9nD#My_-`zzV=D+03-X@^y8e z2YXUCXU)eE>4a?zQ$bkor;5`@!4_PG1KY_KbUKeA<#ZoAYSyExrOMCr^!Sxpf|Xwt z|5FHg{wToe56v@2hAaAThd7@L(o@z6NOl$tjZ#Z;R z+H8EyDM8KY0$)T$Sp9-oeBwPJeqP`0Jt8l$GppV1uIHGPj(gB@bu3kBYCj_9w0~iW zEd?jOgMaw?eZ_loM`m#9r_z9dRg&JEc*aiK$;g(gc?zKq*QLgyXx!=+qlrMI>Bmo0 z%B1h4TKjq~}Ugt*=3q9>Tec!nXb3)VKq5wDjzgtO!aDelh%N4_k19m7N0s zEzV{SJp?8rd~&lE55OUXjtf2et48$f&7?(cQ9B@l#5>Wo0 bXt2SQDD3hK1QZyP zc3#Mf%&N#6_r?_s3@P^Z9BVb12gAqLw!0j~3g%O!V;u%gU5 z;#7RB;N|ATX17Cqgv$am6;`K|F{=)FQU^YhU#bLn!l1=gh2zw{%19ZmwMTKpGVo>L zx6RBvyF48A#LP@6Xc8+w-$vuXnotcZNTl~~UXSQtqX_O0o&v(t{(JZJdv5Kg{HN~z L4}1f3U&sFqpv1Kq literal 0 HcmV?d00001 diff --git a/PyTorch/build-in/Classification/SigLIP/siglip_loss.txt b/PyTorch/build-in/Classification/SigLIP/siglip_loss.txt new file mode 100644 index 000000000..899e082e1 --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/siglip_loss.txt @@ -0,0 +1,29 @@ +=== CUDA === +4.598600 5.078500 4.634700 5.369500 5.446500 5.547900 5.462300 6.285000 5.508400 5.709800 +5.490900 6.323200 5.549800 5.892800 5.058500 5.731100 5.398200 5.147800 5.467100 5.150100 +5.114700 6.221800 5.323600 4.941700 5.687600 5.310300 5.684600 5.102700 5.503000 4.995000 +6.201700 5.872000 5.899000 6.706300 5.223900 5.587600 5.982100 6.127200 6.364800 5.207300 +5.855800 6.675700 6.006200 6.277900 5.877400 5.242900 5.683500 5.629500 5.107200 5.035200 +5.179700 5.259000 5.067500 5.007000 4.620100 5.062000 4.798700 4.054500 4.564700 4.695700 +4.838800 4.922400 4.416900 4.693200 4.663500 4.669700 4.929600 4.415700 4.412000 4.732400 +4.601800 4.719100 4.985100 4.437400 4.501600 4.763700 4.748200 4.589100 4.821000 4.584100 +4.371500 4.702600 4.652700 4.818600 4.928400 4.789900 4.389700 4.694300 4.635200 4.764100 +4.243000 4.635700 4.610700 4.616800 4.396400 4.577500 4.482200 4.629400 4.617600 4.642600 + +=== SDAA === +4.598600 5.078700 4.634800 5.369400 5.446300 5.548800 5.463400 6.285500 5.507600 5.711500 +5.493200 6.324900 5.548200 5.892900 5.073400 5.740500 5.404200 5.139800 5.458100 5.157100 +5.136300 6.253800 5.330500 4.916400 5.672700 5.399100 6.077100 5.811800 5.643800 4.976600 +6.019600 5.382200 5.546100 5.684400 5.265100 5.979200 5.989300 5.777400 5.813900 5.651500 +5.529600 6.738700 6.192100 6.600300 5.850900 5.733400 5.734200 5.733800 5.473700 5.849100 +5.292800 5.802500 5.141200 5.480600 5.032100 5.049200 5.664400 4.978500 4.691300 4.774900 +4.748200 4.611400 4.443100 4.548200 5.300900 4.813600 4.814600 4.574500 4.550600 4.657500 +4.580500 4.620500 4.630300 4.521800 4.271700 4.718300 4.309400 4.577000 4.657000 4.375800 +4.317900 4.590800 4.487900 4.486900 4.594500 4.764500 4.328100 4.515700 4.669400 4.501500 +4.408400 4.643500 4.416800 4.520200 4.382700 4.311300 4.288000 4.411600 4.533400 4.435400 + +=== RESULT === +MeanRelativeError: 0.0033615679983984457 +MeanAbsoluteError: 0.012283999999999988 +Rule,mean_relative_error 0.0033615679983984457 +pass mean_relative_error=0.0033615679983984457 <= 0.05 or mean_absolute_error=0.012283999999999988 <= 0.0002 diff --git a/PyTorch/build-in/Classification/SigLIP/weloTrainStep.py b/PyTorch/build-in/Classification/SigLIP/weloTrainStep.py new file mode 100644 index 000000000..13297c11b --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/weloTrainStep.py @@ -0,0 +1,692 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +import os +import random +import sys +import time +import json +import argparse +from collections import OrderedDict +from pathlib import Path +import numpy as np +import pandas as pd +from tqdm import tqdm +import importlib + +os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # 强烈推荐在 shell/最顶端设置 +os.environ["PYTHONHASHSEED"] = "12345" +os.environ["OMP_NUM_THREADS"] = "1" +os.environ["MKL_NUM_THREADS"] = "1" + +def ensure_cublas_workspace(config=":4096:8"): + """ + 尝试为 cuBLAS 设置可复现 workspace。强烈建议在主脚本入口处(import torch 之前) + 通过 export 设置该 env。此函数会在运行时设置,但如果 torch 已经被 import, + 则可能为时已晚——函数会打印提醒。 + """ + already = os.environ.get("CUBLAS_WORKSPACE_CONFIG") + if already: + print(f"[seed_utils] CUBLAS_WORKSPACE_CONFIG 已存在:{already}") + else: + os.environ["CUBLAS_WORKSPACE_CONFIG"] = config + print(f"[seed_utils] 已设置 CUBLAS_WORKSPACE_CONFIG={config} (注意:请在 import torch 前设置以保证生效)") + +def set_global_seed(seed: int = 42, set_threads: bool = True): + """ + 统一随机性设置。注意:若希望完全发挥效果,请在主脚本入口(import torch 之前) + 先调用 ensure_cublas_workspace(...) 或在 shell 中 export CUBLAS_WORKSPACE_CONFIG。 + """ + ensure_cublas_workspace() # 会设置 env 并提醒 + os.environ["PYTHONHASHSEED"] = str(seed) + + if set_threads: + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + + random.seed(seed) + np.random.seed(seed) + + # 现在导入 torch(晚导入以便前面 env 生效) + import torch + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + # 强制确定性(如果存在不确定性算子,PyTorch 会报错并提示) + try: + torch.use_deterministic_algorithms(True) + except Exception as e: + print("[seed_utils] 设置 deterministic 模式时出错:", e) + print("[seed_utils] 请确认 CUBLAS_WORKSPACE_CONFIG 已在 import torch 之前设置。") + + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + if set_threads: + torch.set_num_threads(1) + torch.set_num_interop_threads(1) + + print(f"[seed_utils] 全局 seed 已设置为 {seed}") + +set_global_seed(2025) + +""" +通用训练模版(优先从本地导入 Model -> 支持 DDP / 单卡,AMP,resume,日志,checkpoint) +保存为 train_template_localmodel.py +""" +import torch +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as tv_models + +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler + +from torch.sdaa import amp +# from torch.cuda import amp + + +# ---------------------------- +# Helper utilities (self-contained) +# ---------------------------- +class AverageMeter(object): + def __init__(self, name='Meter', fmt=':.4f'): + self.name = name + self.fmt = fmt + self.reset() + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / max(1, self.count) + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} (avg {avg' + self.fmt + '})' + return fmtstr.format(name=self.name, val=self.val, avg=self.avg) + +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k + 返回一个 list,每个元素是 tensor(百分比形式) + """ + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + # output: (N, C) -> pred: (maxk, N) + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() # (maxk, N) + correct = pred.eq(target.view(1, -1).expand_as(pred)) # (maxk, N) bool + + res = [] + for k in topk: + # 把前 k 行展平后求和(返回 0-dim tensor),随后换算为百分比 + correct_k = correct[:k].reshape(-1).float().sum() # 注意:不传 keepdim + # 乘以 100.0 / batch_size,保持返回 tensor(和之前代码兼容) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + +def save_checkpoint(state, is_best, save_dir, filename='checkpoint.pth'): + save_path = os.path.join(save_dir, filename) + torch.save(state, save_path) + if is_best: + best_path = os.path.join(save_dir, 'model_best.pth') + torch.save(state, best_path) + +def set_seed(seed, deterministic=False): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + if deterministic: + cudnn.deterministic = True + cudnn.benchmark = False + else: + cudnn.deterministic = False + cudnn.benchmark = True + +# ---------------------------- +# Argument parser +# ---------------------------- +def parse_args(): + parser = argparse.ArgumentParser(description='Generic PyTorch training template (DDP/AMP) with LocalModel priority') + parser.add_argument('--name', default='run', type=str, help='experiment name (log/checkpoints dir)') + parser.add_argument('--seed', default=42, type=int, help='random seed') + parser.add_argument('--arch', default='None', type=str, help='model name') + parser.add_argument('--deterministic', action='store_true', help='set cudnn deterministic (may be slower)') + parser.add_argument('--dataset', default='cifar10', choices=['cifar10','cifar100','imagenet','custom'], help='which dataset') + parser.add_argument('--datapath', default='./data', type=str, help='dataset root / imagenet root / custom root') + parser.add_argument('--imagenet_dir', default='./imagenet', type=str, help='if dataset=imagenet, path to imagenet root') + parser.add_argument('--custom_eval_dir', default=None, help='if dataset=custom, provide val dir') + parser.add_argument('--num_workers', default=4, type=int, help='dataloader workers per process') + parser.add_argument('--epochs', default=200, type=int) + parser.add_argument('--steps', default=0, type=int, help='max steps to run (if >0, training will stop when global_step reaches this).') + parser.add_argument('--batch_size', default=128, type=int) + parser.add_argument('--model_name', default='resnet18', help='torchvision model name or python path e.g. mypkg.mymodule.Model (used if no local Model)') + parser.add_argument('--num_classes', default=None, type=int, help='override num classes (auto-detect for common sets)') + parser.add_argument('--pretrained', action='store_true', help='use torchvision pretrained weights when available') + parser.add_argument('--optimizer', default='sgd', choices=['sgd','adam','adamw'], help='optimizer') + parser.add_argument('--lr', '--learning_rate', default=0.1, type=float) + parser.add_argument('--momentum', default=0.9, type=float) + parser.add_argument('--weight_decay', default=5e-4, type=float) + parser.add_argument('--nesterov', action='store_true') + parser.add_argument('--scheduler', default='multistep', choices=['multistep','step','cosine','none'], help='lr scheduler') + parser.add_argument('--milestones', default='100,150', type=str, help='milestones for multistep (comma sep)') + parser.add_argument('--step_size', default=30, type=int, help='step size for StepLR or cosine max epochs') + parser.add_argument('--gamma', default=0.1, type=float) + parser.add_argument('--scheduler_step_per_batch', action='store_true', help='call scheduler.step() per batch (for some schedulers)') + parser.add_argument('--resume', default='', type=str, help='path to checkpoint to resume from') + parser.add_argument('--start_epoch', default=0, type=int) + parser.add_argument('--print_freq', default=100, type=int) + parser.add_argument('--save_freq', default=10, type=int, help='save checkpoint every N epochs (rank0 only)') + parser.add_argument('--amp', action='store_true', default = True,help='use automatic mixed precision (AMP)') + parser.add_argument('--grad_accum_steps', default=1, type=int, help='gradient accumulation steps') + parser.add_argument('--local_rank', default=None, type=int, help='local rank passed by torchrun (if any). Use -1 or None for non-distributed') + parser.add_argument('--cutmix_prob', default=0.0, type=float) + parser.add_argument('--beta', default=1.0, type=float) + parser.add_argument('--seed_sampler', default=False, action='store_true', help='set sampler epoch seeds to make deterministic distributed shuffling') + args = parser.parse_args() + args.milestones = [int(x) for x in args.milestones.split(',')] if args.milestones else [] + return args + +# ---------------------------- +# build model (优先 LocalModel) +# ---------------------------- +def build_model_with_local_priority(args, device=None): + """ + 用参数 args.arch 作为模块名导入 Model() + 如果模块不存在或没有 Model 类,则报错停止。 + """ + try: + # 动态导入模块,比如 args.arch = "rexnet" + mod = importlib.import_module(args.arch) + Model = getattr(mod, "Model") # 从模块中获取 Model 类 + except Exception as e: + raise RuntimeError( + f"无法导入模型模块 '{args.arch}' 或未找到类 Model。" + f"\n错误信息:{e}" + ) + + # 解析数据集类别数 + if args.dataset == 'cifar10': + num_classes = 10 + elif args.dataset == 'cifar100': + num_classes = 100 + else: + print(f"[ERROR] 不支持的数据集类型:{args.dataset},无法确定类别数。程序终止。") + sys.exit(1) + + + # 实例化 + try: + model = Model(num_classes) + except Exception as e: + raise RuntimeError( + f"Model() 实例化失败,请检查模型构造函数。\n错误信息:{e}" + ) + + return model + +# ---------------------------- +# Data loader factory +# ---------------------------- +def build_dataloaders(args, rank, world_size): + if args.dataset == 'cifar10' or args.dataset == 'cifar100': + mean = (0.4914, 0.4822, 0.4465) + std = (0.2470, 0.2435, 0.2616) if args.dataset == 'cifar10' else (0.2023, 0.1994, 0.2010) + # train_transform = transforms.Compose([ + # transforms.RandomCrop(32, padding=4), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # transforms.Normalize(mean, std), + # ]) + # test_transform = transforms.Compose([ + # transforms.ToTensor(), + # transforms.Normalize(mean, std), + # ]) + + train_transform = transforms.Compose([ # 2025/12/3 从visformer模型开始 + transforms.Resize(256), # 先放大到 256 + transforms.RandomCrop(224), # 再随机裁剪为 224(更符合 ImageNet 风格增强) + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean, std), + ]) + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean, std), + ]) + root = args.datapath + if args.dataset == 'cifar10': + train_set = datasets.CIFAR10(root=root, train=True, download=False, transform=train_transform) + val_set = datasets.CIFAR10(root=root, train=False, download=False, transform=test_transform) + num_classes = 10 + else: + train_set = datasets.CIFAR100(root=root, train=True, download=False, transform=train_transform) + val_set = datasets.CIFAR100(root=root, train=False, download=False, transform=test_transform) + num_classes = 100 + + elif args.dataset == 'imagenet': + train_dir = os.path.join(args.imagenet_dir, 'train') + val_dir = os.path.join(args.imagenet_dir, 'val') + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)), + ]) + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)), + ]) + train_set = datasets.ImageFolder(train_dir, train_transform) + val_set = datasets.ImageFolder(val_dir, test_transform) + num_classes = args.num_classes or 1000 + + elif args.dataset == 'custom': + train_dir = os.path.join(args.datapath, 'train') + val_dir = args.custom_eval_dir or os.path.join(args.datapath, 'val') + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + ]) + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + ]) + train_set = datasets.ImageFolder(train_dir, train_transform) + val_set = datasets.ImageFolder(val_dir, test_transform) + num_classes = len(train_set.classes) + else: + raise ValueError("Unknown dataset") + + if dist.is_initialized() and world_size > 1: + train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=rank, shuffle=True) + else: + train_sampler = None + + train_loader = DataLoader(train_set, + batch_size=args.batch_size, + shuffle=(train_sampler is None), + num_workers=args.num_workers, + pin_memory=True, + sampler=train_sampler, + drop_last=False) + val_loader = DataLoader(val_set, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.num_workers, + pin_memory=True) + + return train_loader, val_loader, num_classes, train_sampler + +# ---------------------------- +# Train & validate +# ---------------------------- +def train_one_epoch(args, epoch, model, criterion, optimizer, train_loader, device, scaler, scheduler=None, train_sampler=None, global_step_start=0, max_global_steps=None): + """ + 现在支持:若 max_global_steps 非 None,则当 global_step 达到该值时提前退出 + 返回: epoch_summary_dict, step_logs_list, global_step_end + step_logs_list: list of dicts with per-step info (for logging to CSV if需要) + """ + batch_time = AverageMeter('Time') + data_time = AverageMeter('Data') + losses = AverageMeter('Loss') + top1 = AverageMeter('Acc@1') + top5 = AverageMeter('Acc@5') + + model.train() + end = time.time() + optimizer.zero_grad() + + iters = len(train_loader) + step_logs = [] + global_step = global_step_start + + for i, (images, targets) in enumerate(train_loader): + # check global steps limit + if (max_global_steps is not None) and (global_step >= max_global_steps): + break + + data_time.update(time.time() - end) + images = images.to(device, non_blocking=True) + targets = targets.to(device, non_blocking=True) + + if args.amp: + with amp.autocast(): + outputs = model(images) + loss = criterion(outputs, targets) / args.grad_accum_steps + else: + outputs = model(images) + loss = criterion(outputs, targets) / args.grad_accum_steps + + if args.amp: + scaler.scale(loss).backward() + else: + loss.backward() + + # 每当累积步满足 grad_accum_steps 就 step + if (i + 1) % args.grad_accum_steps == 0: + if args.amp: + scaler.step(optimizer) + scaler.update() + else: + optimizer.step() + optimizer.zero_grad() + if scheduler is not None and args.scheduler_step_per_batch: + scheduler.step() + + with torch.no_grad(): + acc1, acc5 = accuracy(outputs, targets, topk=(1,5)) + losses.update(loss.item() * args.grad_accum_steps, images.size(0)) + top1.update(acc1.item(), images.size(0)) + top5.update(acc5.item(), images.size(0)) + + batch_time.update(time.time() - end) + end = time.time() + + # increment global step AFTER processing this batch + global_step += 1 + + # per-step print (controlled by print_freq) + if ((global_step % args.print_freq == 0) or (i == iters - 1)) and ((dist.get_rank() if dist.is_initialized() else 0) == 0): + lr = optimizer.param_groups[0]['lr'] + print(f"Epoch[{epoch}]:step[{i+1}/{iters}] step_train_loss {losses.val:.4f} acc1 {top1.val:.2f} acc5 {top5.val:.2f}") + + # collect per-step log + step_logs.append({ + 'epoch': epoch, + 'batch_idx': i, + 'global_step': global_step, + 'lr': optimizer.param_groups[0]['lr'], + 'loss': losses.val, + 'loss_avg': losses.avg, + 'acc1': top1.val, + 'acc1_avg': top1.avg, + 'acc5': top5.val, + 'acc5_avg': top5.avg, + 'time': batch_time.val + }) + + # if reached max_global_steps inside epoch, break (handled at loop start next iter) + if (max_global_steps is not None) and (global_step >= max_global_steps): + if (dist.get_rank() if dist.is_initialized() else 0) == 0: + print(f"[Info] 达到 max_global_steps={max_global_steps},将在 epoch 内提前停止。") + break + + # --- flush remaining grads if needed (handle gradient accumulation leftovers) --- + processed_batches = global_step - global_step_start # 实际处理的 batch 数 + if args.grad_accum_steps > 1 and (processed_batches % args.grad_accum_steps) != 0: + # only step if there are gradients + grads_present = any((p.grad is not None and p.requires_grad) for p in model.parameters()) + if grads_present: + if args.amp: + try: + scaler.step(optimizer) + scaler.update() + except Exception as e: + # 防御性:若 scaler.step 因某些原因失败,尝试普通 step(只在极端情况下) + print("[Warning] scaler.step 失败,尝试普通 optimizer.step():", e) + optimizer.step() + else: + optimizer.step() + optimizer.zero_grad() + if scheduler is not None and args.scheduler_step_per_batch: + scheduler.step() + if (dist.get_rank() if dist.is_initialized() else 0) == 0: + print(f"[Info] flushed remaining gradients after early stop (processed_batches={processed_batches}, grad_accum={args.grad_accum_steps}).") + + if scheduler is not None and not args.scheduler_step_per_batch: + scheduler.step() + + return OrderedDict([('loss', losses.avg), ('acc1', top1.avg), ('acc5', top5.avg)]), step_logs, global_step + +def validate(args, model, val_loader, criterion, device, max_batches=None): + """ + Validate on the val_loader. + If max_batches is not None, only process up to that many batches (useful for quick checks). + Returns an OrderedDict with loss/acc1/acc5 (averaged over processed samples). + """ + losses = AverageMeter('Loss') + top1 = AverageMeter('Acc@1') + top5 = AverageMeter('Acc@5') + + model.eval() + processed_batches = 0 + processed_samples = 0 + with torch.no_grad(): + for i, (images, targets) in enumerate(tqdm(val_loader)): + images = images.to(device, non_blocking=True) + targets = targets.to(device, non_blocking=True) + outputs = model(images) + loss = criterion(outputs, targets) + acc1, acc5 = accuracy(outputs, targets, topk=(1,5)) + batch_n = images.size(0) + losses.update(loss.item(), batch_n) + top1.update(acc1.item(), batch_n) + top5.update(acc5.item(), batch_n) + + processed_batches += 1 + processed_samples += batch_n + + if (max_batches is not None) and (processed_batches >= max_batches): + break + + # 如果没处理任何样本,避免除0(不太可能,但防御性) + if processed_samples == 0: + return OrderedDict([('loss', 0.0), ('acc1', 0.0), ('acc5', 0.0)]) + return OrderedDict([('loss', losses.avg), ('acc1', top1.avg), ('acc5', top5.avg)]) + +# ---------------------------- +# Main +# ---------------------------- +def main(): + args = parse_args() + + # handle local_rank from env if not provided + local_rank_env = os.environ.get('LOCAL_RANK', None) + if args.local_rank is None and local_rank_env is not None: + args.local_rank = int(local_rank_env) + + distributed = (args.local_rank is not None and args.local_rank != -1) + if distributed: + dist.init_process_group(backend='nccl', init_method='env://') + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + + if distributed: + torch.cuda.set_device(args.local_rank) + device = torch.device('cuda', args.local_rank) + else: + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + set_seed(args.seed + (rank if distributed else 0), deterministic=args.deterministic) + + save_dir = os.path.join('models', args.name) + if rank == 0: + os.makedirs(save_dir, exist_ok=True) + with open(os.path.join(save_dir, 'args.json'), 'w') as f: + json.dump(vars(args), f, indent=2) + if distributed: + dist.barrier() + + train_loader, val_loader, auto_num_classes, train_sampler = build_dataloaders(args, rank, world_size) + if args.num_classes is None: + args.num_classes = auto_num_classes + + # 使用本地 Model 优先(LocalModel 已在文件顶部尝试导入) + model = build_model_with_local_priority(args, device) + model.to(device) + + if distributed: + model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) + + criterion = nn.CrossEntropyLoss().to(device) + params = [p for p in model.parameters() if p.requires_grad] + if args.optimizer == 'sgd': + optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, + weight_decay=args.weight_decay, nesterov=args.nesterov) + elif args.optimizer == 'adam': + optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) + elif args.optimizer == 'adamw': + optimizer = optim.AdamW(params, lr=args.lr, weight_decay=args.weight_decay) + else: + raise ValueError('Unknown optimizer') + + scheduler = None + if args.scheduler == 'multistep': + scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=args.gamma) + elif args.scheduler == 'step': + scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) + elif args.scheduler == 'cosine': + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) + elif args.scheduler == 'none': + scheduler = None + + scaler = amp.GradScaler() if args.amp else None + + start_epoch = args.start_epoch + best_acc = 0.0 + if args.resume: + if os.path.isfile(args.resume): + ckpt = torch.load(args.resume, map_location='cpu') + model_state = ckpt.get('state_dict', ckpt) + if isinstance(model, DDP): + model.module.load_state_dict(model_state) + else: + model.load_state_dict(model_state) + if 'optimizer' in ckpt: + optimizer.load_state_dict(ckpt['optimizer']) + start_epoch = ckpt.get('epoch', start_epoch) + best_acc = ckpt.get('best_acc', best_acc) + print(f"=> resumed from {args.resume}, start_epoch={start_epoch}") + else: + print(f"=> resume path {args.resume} not found") + + log_columns = ['epoch', 'lr', 'loss', 'acc1', 'acc5', 'val_loss', 'val_acc1', 'val_acc5'] + log_df = pd.DataFrame(columns=log_columns) + # step-level log + step_log_columns = ['epoch', 'batch_idx', 'global_step', 'lr', 'loss', 'loss_avg', 'acc1', 'acc1_avg', 'acc5', 'acc5_avg', 'time'] + step_log_df = pd.DataFrame(columns=step_log_columns) + + total_epochs = args.epochs + # global_step计数器(训练过程中跨epoch持续) + global_step = 0 + + epoch = start_epoch + # loop until either epoch criteria or step criteria met + while True: + if train_sampler is not None: + if args.seed_sampler: + train_sampler.set_epoch(epoch + args.seed) + else: + train_sampler.set_epoch(epoch) + + if rank == 0: + print(f"==== Epoch {epoch}/{total_epochs - 1} ====") + + # 如果传入了 args.steps (>0),则把剩余允许的 step 数传给 train_one_epoch, + # 否则 max_global_steps=None(按整 epoch 执行完) + if args.steps and args.steps > 0: + max_global_steps = args.steps + else: + max_global_steps = None + + train_log, step_logs, global_step = train_one_epoch( + args, epoch, model, criterion, optimizer, train_loader, device, scaler, + scheduler, train_sampler, global_step_start=global_step, max_global_steps=max_global_steps + ) + + # 如果启用了按 steps 的模式且已经达到上限,标记需要在做一次验证后退出 + if max_global_steps is not None and global_step >= max_global_steps: + if rank == 0: + print(f"[Main] 达到 max_global_steps={max_global_steps}(global_step={global_step}),将在完成验证后退出训练。") + # 我们不 return 立刻退出;后面的 validate / 保存逻辑会执行一次,然后 main 返回/结束 + end_due_to_steps = True + else: + end_due_to_steps = False + + # 验证并记录 epoch 级别日志(如果在 step 模式下很可能在中间某个 epoch 提前结束,但我们仍做一次 validate) + val_log = validate(args, model, val_loader, criterion, device, args.batch_size) + current_lr = optimizer.param_groups[0]['lr'] + + if rank == 0: + # epoch summary print, 格式与示例对齐 + print(f"Epoch[{epoch}]: epoch_train_loss {train_log['loss']:.4f} acc1 {train_log['acc1']:.2f} acc5 {train_log['acc5']:.2f} | " + f"val_loss {val_log['loss']:.4f} acc1 {val_log['acc1']:.2f} acc5 {val_log['acc5']:.2f} lr {current_lr:.6f}") + row = { + 'epoch': epoch, + 'lr': current_lr, + 'loss': train_log['loss'], + 'acc1': train_log['acc1'], + 'acc5': train_log['acc5'], + 'val_loss': val_log['loss'], + 'val_acc1': val_log['acc1'], + 'val_acc5': val_log['acc5'], + } + new_row_df = pd.DataFrame([row]) + log_df = pd.concat([log_df, new_row_df], ignore_index=True) + log_df.to_csv(os.path.join(save_dir, 'log.csv'), index=False) + + is_best = val_log['acc1'] > best_acc + if is_best: + best_acc = val_log['acc1'] + if (epoch % args.save_freq == 0) or is_best or ( (max_global_steps is None) and (epoch == total_epochs - 1) ) : + state = { + 'epoch': epoch, + 'state_dict': model.module.state_dict() if isinstance(model, DDP) else model.state_dict(), + 'best_acc': best_acc, + 'optimizer': optimizer.state_dict(), + 'args': vars(args) + } + save_checkpoint(state, is_best, save_dir, filename=f'checkpoint_epoch_{epoch}.pth') + + # 如果是因为 steps 模式达到上限,则在完成 validation / 保存后退出训练 + if end_due_to_steps: + if rank == 0: + print(f"[Main] 已在 steps 模式下完成最后一次验证并保存,训练结束(global_step={global_step})。") + break + + # increment epoch + epoch += 1 + + # stopping conditions: + # 1) if steps mode enabled and reached steps -> stop + if args.steps and args.steps > 0: + if global_step >= args.steps: + if rank == 0: + print(f"[Main] 已达到指定 steps={args.steps}(global_step={global_step}),训练结束。") + break + + # 2) if steps not used, stop when epoch >= epochs + else: + if epoch >= total_epochs: + if rank == 0: + print(f"[Main] 已达到指定 epochs={total_epochs}(epoch={epoch}),训练结束。") + break + + if dist.is_initialized(): + dist.barrier() + if rank == 0: + print("Training finished. Best val acc1: {:.2f}".format(best_acc)) + +if __name__ == '__main__': + main() \ No newline at end of file From fa704705acac0b4ea8a58f1bb6b651906a6354cc Mon Sep 17 00:00:00 2001 From: wangwl Date: Wed, 7 Jan 2026 05:50:26 +0000 Subject: [PATCH 2/3] fix: cleanup code and update --- .../Classification/SigLIP/SigLIP/README.md | 19 -- .../SigLIP/SigLIP/modeling_gemma.py | 170 ---------------- .../SigLIP/SigLIP/modeling_siglip.py | 192 ------------------ .../SigLIP/SigLIP/processing_paligemma.py | 123 ----------- .../Classification/SigLIP/coverage.txt | 3 - PyTorch/build-in/Classification/SigLIP/readme | 65 ++++++ .../SigLIP/requirements_exact.txt | 89 ++++++++ PyTorch/build-in/Classification/SigLIP/run | 1 - .../Classification/SigLIP/siglip_loss.jpg | Bin 36621 -> 0 bytes .../Classification/SigLIP/siglip_loss.txt | 29 --- 10 files changed, 154 insertions(+), 537 deletions(-) delete mode 100644 PyTorch/build-in/Classification/SigLIP/SigLIP/README.md delete mode 100644 PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_gemma.py delete mode 100644 PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_siglip.py delete mode 100644 PyTorch/build-in/Classification/SigLIP/SigLIP/processing_paligemma.py delete mode 100644 PyTorch/build-in/Classification/SigLIP/coverage.txt create mode 100644 PyTorch/build-in/Classification/SigLIP/readme create mode 100644 PyTorch/build-in/Classification/SigLIP/requirements_exact.txt delete mode 100644 PyTorch/build-in/Classification/SigLIP/run delete mode 100644 PyTorch/build-in/Classification/SigLIP/siglip_loss.jpg delete mode 100644 PyTorch/build-in/Classification/SigLIP/siglip_loss.txt diff --git a/PyTorch/build-in/Classification/SigLIP/SigLIP/README.md b/PyTorch/build-in/Classification/SigLIP/SigLIP/README.md deleted file mode 100644 index 1bd26c877..000000000 --- a/PyTorch/build-in/Classification/SigLIP/SigLIP/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# PaliGemma - -It combines SigLip Visual Encoder, with Gemma Language Model to create a Vision language model. - - -SigLip Visual Encoder -https://huggingface.co/docs/transformers/en/model_doc/siglip - -Sigmoid Loss for Language Image Pre-Training -https://arxiv.org/abs/2303.15343 - -Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer - - - - -Took around 20 hours to understand the model and follow the tutorial, but it was worth it, can implement Multimodal Language Models from Research paper now. - -Credits to Umar Jamil -> https://www.youtube.com/watch?v=vAmKB7iPkWw \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_gemma.py b/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_gemma.py deleted file mode 100644 index 781cdd4b3..000000000 --- a/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_gemma.py +++ /dev/null @@ -1,170 +0,0 @@ -import torch -from torch import nn -from typing import Optional, Tuple, List -from torch.nn import CrossEntropyLoss -import math -from modeling_siglip import SiglipVisionConfig, SiglipVisionModel - - -class GemmaConfig(): - def __init__( - self, - vocab_size, - hidden_size, - intermediate_size, - num_hidden_layers, - num_attention_heads, - num_key_value_heads, - head_dim=256, - max_position_embeddings=8192, - rms_norm_eps=1e-6, - rope_theta=10000.0, - attention_bias=False, - attention_dropout=0.0, - pad_token_id=None, - **kwargs, - ): - super().__init__() - self.vocab_size = vocab_size - self.max_position_embeddings = max_position_embeddings - self.hidden_size = hidden_size - self.intermediate_size = intermediate_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.head_dim = head_dim - self.num_key_value_heads = num_key_value_heads - self.rms_norm_eps = rms_norm_eps - self.rope_theta = rope_theta - self.attention_bias = attention_bias - self.attention_dropout = attention_dropout - self.pad_token_id = pad_token_id - - -class PaliGemmaConfig(): - def __init__( - self, - vision_config=None, - text_config=None, - ignore_index=-100, - image_token_index=256000, - vocab_size=257152, - projection_dim=2048, - hidden_size=2048, - pad_token_id=None, - **kwargs, - ): - super().__init__() - self.ignore_index = ignore_index - self.image_token_index = image_token_index - self.vocab_size = vocab_size - self.projection_dim = projection_dim - self.hidden_size = hidden_size - self.vision_config = vision_config - self.is_encoder_decoder = False - self.pad_token_id = pad_token_id - - self.vision_config = SiglipVisionConfig(**vision_config) - self.text_config = text_config - - self.text_config = GemmaConfig(**text_config, pad_token_id=pad_token_id) - self.vocab_size = self.text_config.vocab_size - - self.text_config.num_image_tokens = (self.vision_config.image_size // self.vision_config.patch_size)**2 - self.vision_config.projection_dim = projection_dim - - -class PaliGemmaForConditionalGeneration(nn.Module): - def __init__(self, config: PaliGemmaConfig): - super().__init__() - self.config = config - self.vision_tower = SiglipVisionModel(config.vision_config) - self.multi_modal_projector = PaliGemmaMultiModalProjector(config) - self.vocab_size = config.vocab_size - - language_model = GemmaForCausalLM(config.text_config) - self.language_model = language_model - - self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1 - - def tie_weights(self): - return self.language_model.tie_weights() - - def _merge_input_ids_with_image_features( - self, image_features: torch.Tensor, inputs_embeds: torch.Tensor, input_ids: torch.Tensor, attention_mask: torch.Tensor, kv_cache: Optional[KVCache] = None - ): - - _, _, embed_dim = image_features.shape - batch_size, sequence_length = input_ids.shape - dtype, device = inputs_embeds.dtype, inputs_embeds.device - scaled_image_features = image_features / (self.config.hidden_size**0.5) - - final_embedding = torch.zeros(batch_size, sequence_length, embed_dim, dtype=inputs_embeds.dtype, device=inputs_embeds.device) - - text_mask = (input_ids != self.config.image_token_index) & (input_ids != self.pad_token_id) - image_mask = input_ids == self.config.image_token_index - pad_mask = input_ids == self.pad_token_id - - text_mask_expanded = text_mask.unsqueeze(-1).expand(-1, -1, embed_dim) - pad_mask_expanded = pad_mask.unsqueeze(-1).expand(-1, -1, embed_dim) - image_mask_expanded = image_mask.unsqueeze(-1).expand(-1, -1, embed_dim) - - final_embedding = torch.where(text_mask_expanded, inputs_embeds, final_embedding) - final_embedding = final_embedding.masked_scatter(image_mask_expanded, scaled_image_features) - final_embedding = torch.where(pad_mask_expanded, torch.zeros_like(final_embedding), final_embedding) - - - - dtype, device = inputs_embeds.dtype, inputs_embeds.device - min_dtype = torch.finfo(dtype).min - q_len = inputs_embeds.shape[1] - - if kv_cache is None or kv_cache.num_items() == 0: - - causal_mask = torch.full( - (batch_size, q_len, q_len), fill_value=0, dtype=dtype, device=device - ) - else: - assert q_len == 1 - kv_len = kv_cache.num_items() + q_len - - causal_mask = torch.full( - (batch_size, q_len, kv_len), fill_value=0, dtype=dtype, device=device - ) - - causal_mask = causal_mask.unsqueeze(1) - - - if kv_cache is not None and kv_cache.num_items() > 0: - position_ids = attention_mask.cumsum(-1)[:, -1] - if position_ids.dim() == 1: - position_ids = position_ids.unsqueeze(0) - else: - position_ids = (attention_mask.cumsum(-1)).masked_fill_((attention_mask == 0), 1).to(device) - - return final_embedding, causal_mask, position_ids - - - def forward( - self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - attention_mask: Optional[torch.Tensor] = None, - kv_cache: Optional[KVCache] = None, - ) -> Tuple: - assert torch.all(attention_mask == 1), "The input cannot be padded" - - inputs_embeds = self.language_model.get_input_embeddings()(input_ids) - - selected_image_feature = self.vision_tower(pixel_values.to(inputs_embeds.dtype)) - image_features = self.multi_modal_projector(selected_image_feature) - input_embeds, attention_mask, position_ids = self._merge_input_ids_with_image_features(image_features, input_embeds, input_ids, attention_mask, kv_cache) - - outputs = self.language_model( - attention_mask=attention_mask, - position_ids=position_ids, - input_embeds=input_embeds, - kv_cache=kv_cache, - ) - - return outputs - \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_siglip.py b/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_siglip.py deleted file mode 100644 index 71f3c2ce5..000000000 --- a/PyTorch/build-in/Classification/SigLIP/SigLIP/modeling_siglip.py +++ /dev/null @@ -1,192 +0,0 @@ -import torch -import torch.nn as nn -from typing import Tuple, Optional -class SiglipVisionConfig: - - def __init__( - self, - hidden_size=768, - intermediate_size=3072, - num_hidden_layers=12, - num_hidden_attention_heads=12, - num_channels=3, - image_size=224, - patch_size=16, - layer_norm_eps=1e-6, - attention_dropout=0.0, - num_image_tokens: int = None, - **kwargs - ): - super().__init__() - - self.hidden_size = hidden_size - self.intermediate_size = intermediate_size - self.num_hidden_layers = num_hidden_layers - self.num_hidden_attention_heads = num_hidden_attention_heads - self.num_channels = num_channels - self.image_size = image_size - self.patch_size = patch_size - self.layer_norm_eps = layer_norm_eps - self.attention_dropout = attention_dropout - self.num_image_tokens = num_image_tokens - -class SiglipVisionEmbeddings(nn.Module): - def __init__(self, config: SiglipVisionConfig): - super.__init__() - self.config = config - self.embed_dim = config.hidden_size - self.image_size = config.image_size - self.patch_size = config.patch_size - - self.patch_embeddings = nn.Convo2d( - in_channels=config.num_channels, - out_channels=self.embed_dim, - kernel_size=self.patch_size, - stride=self.patch_size, - padding="valid", - ) - - self.num_patches = (self.image_size // self.patch_size)**2 - self.num_positions = self.num_patches - self.position_embeddings = nn.Embedding(self.num_positions, self.embed_dim) - self.register_buffer( - "position_ids", - torch.arange(self.num_positions).expand((1, -1)), - persistent=False, - ) - - def forward(self, pixel_values: torch.FloatTesor) -> torch.Tensor: - _, _, height, width = pixel_values.shape - patch_embeds = self.patch_embedding(pixel_values) - embeddings = patch_embeds.flatten(2) - embeddings = embeddings.transpose(1, 2) - embeddings = embeddings + self.position_embedding(self.position_ids) - return embeddings - -class SiglipAttention(nn.Module): - - def __init__(self, config): - super().__init__() - self.config = config - self.embed_dim = config.hidden_size - self.num_heads = config.num_attention_heads - self.head_dim = self.embed_dim // self.num_heads - self.scale = self.head_dim**-0.5 - self.dropout = config.attention_dropout - - self.k_proj = nn.Linear(self.embed_dim, self.embed_dim) - self.v_proj = nn.Linear(self.embed_dim, self.embed_dim) - self.q_proj = nn.Linear(self.embed_dim, self.embed_dim) - self.out_proj = nn.Linear(self.embed_dim, self.embed_dim) - - def forward( - self, - hidden_states:torch.Tensor, - ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: - batch_size, seq_len, _ = hidden_states.size() - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) - query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1,2) - key_states = key_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1,2) - value_states = value_states.query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1,2) - attn_weights = (torch.matmul(query_states, key_states.transpose(2,3))*self.scale) - - if attn_weights.size() != (batch_size, self.num_heads, seq_len, seq_len): - raise ValueError( - f"Attention weights should be of size {(batch_size, self.num_heads, seq_len, seq_len)} but is" - f"{attn_weights.size()}" - ) - - attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) - attn_weights = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) - attn_output = torch.matmul(attn_weights, value_states) - - if attn_output.size() != (batch_size, self.num_heads, seq_len, seq_len): - raise ValueError( - f"attn output' should be of size {(batch_size, self.num_heads, seq_len, seq_len)}, but is" - f" {attn_output.size()}" - ) - - attn_output = attn_output.transpose(1, 2).contiguous() - attn_output = attn_output.reshape(batch_size, seq_len, self.embed_dim) - attn_output = self.out_proj(attn_output) - return attn_output, attn_weights - - -class SiglipMLP(nn.Module): - def __init__(self, config): - super().__init__() - self.config = config - self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size) - self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size) - - def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: - hidden_states = self.fc1(hidden_states) - hidden_states = nn.functional.gelu(hidden_states, approximate="tanh") - hidden_states = self.fc2(hidden_states) - return hidden_states - -class SiglipEncoderLayer(nn.Module): - def __init__(self, config: SiglipVisionConfig): - super().__init__() - self.embed_dim = config.hidden_size - self.self_attn = SiglipAttention(config) - self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) - self.mlp = SiglipMLP(config) - self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) - - def forward( - self, - hidden_states: torch.Tensor - ) -> torch.Tensor: - residual = hidden_states - hidden_states = self.layer_norm1(hidden_states) - hidden_states, _ = self.self_attn(hidden_states=hidden_states) - hidden_states = residual + hidden_states - residual = hidden_states - hidden_states = self.layer_norm2(hidden_states) - hidden_states = self.mlp(hidden_states) - -class SiglipEncoder(nn.Module): - def __init__(self, config: SiglipVisionConfig): - super().__init__() - self.config = config - self.layers = nn.ModuleList( - [SiglipEncoderLayer(config) for _ in range(config.num_hidden_layers)] - ) - def forward( - self, - inputs_embeds: torch.Tensor - ) -> torch.Tensor: - hidden_states = inputs_embeds - for encoder_layer in self.layers: - hidden_states = encoder_layer(hidden_states) - - return hidden_states - -class SiglipVisionTransformer(nn.Module): - def __init__(self, config: SiglipVisionConfig): - super().__init__() - self.config = config - embed_dim = config.hidden_size - - self.embeddings = SiglipVisionEmbeddings(config) - self.encoder = SiglipEncoder(config) - self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps) - - def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: - hidden_states = self.embeddings(pixel_values) - last_hidden_state = self.encoder(input_embeds=hidden_states) - last_hidden_state = self.post_layernorm(last_hidden_state) - return last_hidden_state - -class SiglipVisionModel(nn.Module): - - def __init__(self, config:SiglipVisionConfig): - super().__init__() - self.config = config - self.vision_model = SiglipVisionTransformer(config) - - def forward(self, pixel_values) -> tuple: - return self.vision_model(pixel_values=pixel_values) \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SigLIP/SigLIP/processing_paligemma.py b/PyTorch/build-in/Classification/SigLIP/SigLIP/processing_paligemma.py deleted file mode 100644 index 39a98f6e9..000000000 --- a/PyTorch/build-in/Classification/SigLIP/SigLIP/processing_paligemma.py +++ /dev/null @@ -1,123 +0,0 @@ -from typing import Dict, List, Optional, Union, Tuple, Iterable -import numpy as np -from PIL import Image -import torch - -IMAGENET_STANDARD_MEAN = [0.5, 0.5, 0.5] -IMAGENET_STANDARD_STD = [0.5, 0.5, 0.5] - -def add_image_tokens_to_prompt(prefix_prompt, bos_token, image_seq_len, image_token): - return f"{image_token*image_seq_len}{bos_token}{prefix_prompt}\n" - -def resize( - image: Image, - size: Tuple[int, int], - resample: Image.Resampling = None, - reducing_gap: Optional[int] = None, -) -> np.ndarray: - height, width = size - resized_image = image.resize( - (width, height), resample=resample, reducing_gap=reducing_gap - ) - return resized_image - -def rescale( - image: np.ndarray, scale: float, dtype: np.dtype = np.float32 -) -> np.ndarray: - rescaled_image = image*scale - rescaled_image = rescaled_image.astype(dtype) - return rescaled_image - -def normalize( - image: np.ndarray, - mean: Union[float, Iterable[float]], - std: Union[float, Iterable[float]], -) -> np.ndarray: - mean = np.array(mean, dtype=image.dtype) - std = np.array(std, dtype=image.dtype) - image = (image-mean)/std - return image - -def process_images( - images: List[Image.Image], - size: Dict[str, int] = None, - resample: Image.Resampling = None, - rescale_factor: float = None, - image_mean: Optional[Union[float, List[float]]] = None, - image_std: Optional[Union[float, List[float]]] = None, -) -> List[np.ndarray]: - height, width = size[0], size[1] - images = [ - resize(image=image, size=(height, width), resample = resample) for image in images - ] - images = [np.array(image) for image in images] - images = [rescale(image, scale=rescale_factor) for image in images] - images = [normalize(image, mean=image_mean, std=image_std) for image in images] - images = [image.transpose(2, 0, 1) for image in images] - return images - - -class PaliGemmaProcessor: - - IMAGE_TOKEN = "" - - def __init__(self, tokenizer, num_image_tokens: int, image_size: int): - super().__init__() - - self.image_seq_length = num_image_tokens - self.image_size = image_size - - tokens_to_add = {"additional_special_tokens": {self.IMAGE_TOKEN}} - tokenizer.add_special_tokens(tokens_to_add) - EXTRA_TOKENS = [ - f"" for i in range(1024) - ] - EXTRA_TOKENS += [ - f"" for i in range(128) - ] - tokenizer.add_tokens(EXTRA_TOKENS) - self.image_token_id = tokenizer.convert_tokens_to_ids(self.IMAGE_TOKEN) - tokenizer.add_bos_token = False - tokenizer.add_eos_token = False - - self.tokenizer = tokenizer - - def __call__( - self, - text: List[str], - images: List[Image.Image], - padding: str = "longest", - truncation: bool = True, - ) -> dict: - assert len(images) == 1 and len(text) == 1, f"Recieved {len(images)} images for {len(text)} prompts." - - pixel_values = process_images( - images, - size=(self.image_size, self.image_size), - resample=Image.Resampling.BICUBIC, - rescale_factor = 1/255.0, - image_mean=IMAGENET_STANDARD_MEAN, - image_std=IMAGENET_STANDARD_STD, - ) - - pixel_values = np.stack(pixel_values, axis=0) - pixel_values = torch.tensor(pixel_values) - - input_strings = [ - add_image_tokens_to_prompt( - prefix_prompt=prompt, - bos_token=self.tokenizer.bos_token, - image_seq_len=self.image_seq_length, - iamge_token=self.IMAGE_TOKEN, - ) - for prompt in text - ] - - inputs = self.tokenizer( - input_strings, - return_tensors="pt", - padding=padding, - truncation=truncation, - ) - return_data = {"pixel_values": pixel_values, **inputs} - return return_data \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SigLIP/coverage.txt b/PyTorch/build-in/Classification/SigLIP/coverage.txt deleted file mode 100644 index ec3e34ba9..000000000 --- a/PyTorch/build-in/Classification/SigLIP/coverage.txt +++ /dev/null @@ -1,3 +0,0 @@ -all api: ['_amp_foreach_non_finite_check_and_unscale_', '_amp_update_scale_', '_copy_from', '_has_compatible_shallow_copy_type', '_local_scalar_dense', '_log_softmax', '_log_softmax_backward_data', '_pin_memory', '_reshape_alias', '_softmax', '_softmax_backward_data', 'add', 'add_', 'addmm', 'as_strided', 'bmm', 'bmm_backward', 'bmm_forward', 'contiguous', 'convolution', 'convolution_backward', 'copy_stride', 'div', 'dropout', 'embedding', 'embedding_dense_backward', 'eq', 'fill_', 'fused_sgd', 'gelu', 'gelu_backward', 'is_pinned', 'linear', 'matmul', 'mean', 'mm', 'mul', 'mul_', 'native_layer_norm', 'native_layer_norm_backward', 'nll_loss_backward', 'nll_loss_forward', 'reciprocal', 'set_', 'sum', 'topk_out', 'view', 'zero_'], total: 48 -fallback op: [], total: 0 -coverage rate: 100.00% diff --git a/PyTorch/build-in/Classification/SigLIP/readme b/PyTorch/build-in/Classification/SigLIP/readme new file mode 100644 index 000000000..96b259aa3 --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/readme @@ -0,0 +1,65 @@ +```markdown +## 1. 模型链接 +- 原始仓库链接: +https://github.com/huggingface/pytorch-image-models?tab=readme-ov-file#models + +## 2. 快速开始 + +使用本模型执行训练的主要流程如下: + +1. **基础环境安装**:介绍训练前需要完成的基础环境检查和安装。 +2. **获取数据集**:介绍如何获取训练所需的数据集。 +3. **构建环境**:介绍如何构建模型运行所需要的环境。 +4. **启动训练**:介绍如何运行训练。 + +### 2.1 基础环境安装 + +请参考主仓库的基础环境安装章节,完成训练前的基础环境检查和安装(如驱动、固件等)。 + +### 2.2 准备数据集 + +#### 2.2.1 获取数据集 + +训练使用 **CIFAR-100** 数据集。该数据集为开源数据集,包含 100 个类别的 60000 张彩色图像。 + +#### 2.2.2 处理数据集 + +请确保数据集已下载并解压。根据训练脚本的默认配置,建议将数据集存放在模型目录的上级 `data` 目录中(即 `../data`),或者根据实际路径修改训练命令中的 `--datapath` 参数。 + +### 2.3 构建环境 + +所使用的环境下需包含 PyTorch 框架虚拟环境。 + +1. 执行以下命令,启动虚拟环境(根据实际环境名称修改): + + ```bash + conda activate torch_env_py310 + +``` + +2. 安装 Python 依赖。确保已安装项目所需的依赖包: +```bash +pip install -r requirements_exact.txt + +``` + + + +### 2.4 启动训练 + +1. 在构建好的环境中,进入模型训练脚本所在目录。 + +2. 运行训练。该模型支持单机单卡训练。 +执行以下命令启动训练(使用 CIFAR-100 数据集,Batch Size 为 128): +```bash +python weloTrainStep.py \ + --name train \ + --arch Siglip \ + --print_freq 1 \ + --steps 100 \ + --dataset cifar100 \ + --datapath ../data \ + --batch_size 32 \ + --epochs 100 + +``` diff --git a/PyTorch/build-in/Classification/SigLIP/requirements_exact.txt b/PyTorch/build-in/Classification/SigLIP/requirements_exact.txt new file mode 100644 index 000000000..7394b3319 --- /dev/null +++ b/PyTorch/build-in/Classification/SigLIP/requirements_exact.txt @@ -0,0 +1,89 @@ +addict==2.4.0 +aliyun-python-sdk-core==2.16.0 +aliyun-python-sdk-kms==2.16.5 +anyio==4.11.0 +astunparse==1.6.3 +certifi==2024.12.14 +cffi==2.0.0 +charset-normalizer==3.4.1 +click==8.3.1 +colorama==0.4.6 +contourpy==1.3.2 +crcmod==1.7 +cryptography==46.0.3 +cycler==0.12.1 +einops==0.8.1 +exceptiongroup==1.3.1 +filelock==3.14.0 +fonttools==4.60.1 +fsspec==2024.12.0 +future @ file:///croot/future_1730902796226/work +git-filter-repo==2.47.0 +h11==0.16.0 +hf-xet==1.2.0 +httpcore==1.0.9 +httpx==0.28.1 +huggingface_hub==1.1.5 +idna==3.10 +inplace-abn @ git+https://github.com/mapillary/inplace_abn.git@b50bfe9c7cd7116a3ab091a352b48d6ba5ee701c +Jinja2==3.1.5 +jmespath==0.10.0 +joblib==1.5.2 +kiwisolver==1.4.9 +Markdown==3.10 +markdown-it-py==4.0.0 +MarkupSafe==3.0.2 +matplotlib==3.10.7 +mdurl==0.1.2 +mmdet==3.3.0 +mmengine==0.10.7 +model-index==0.1.11 +mpmath==1.3.0 +networkx==3.4.2 +numpy==1.23.5 +opencv-python==4.12.0.88 +opendatalab==0.0.10 +openmim==0.3.9 +openxlab==0.1.3 +ordered-set==4.1.0 +oss2==2.17.0 +packaging @ file:///croot/packaging_1734472117206/work +pandas==2.3.3 +pillow==11.1.0 +platformdirs==4.5.1 +pycocotools==2.0.11 +pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work +pycryptodome==3.23.0 +Pygments==2.19.2 +pyparsing==3.2.5 +python-dateutil==2.9.0.post0 +pytz==2023.4 +PyYAML @ file:///croot/pyyaml_1728657952215/work +requests==2.28.2 +rich==13.4.2 +safetensors==0.7.0 +scikit-learn==1.7.2 +scipy==1.15.3 +shapely==2.1.2 +shellingham==1.5.4 +six @ file:///tmp/build/80754af9/six_1644875935023/work +sniffio==1.3.1 +sympy==1.13.3 +tabulate==0.9.0 +termcolor==3.2.0 +terminaltables==3.1.10 +threadpoolctl==3.6.0 +timm==1.0.22 +tomli==2.3.0 +torch @ file:///apps/torch-2.4.0a0%2Bgit4451b0e-cp310-cp310-linux_x86_64.whl#sha256=2e472c916044cac5a1a0e0d8b0e12bb943d8522b24ff826c8014dd444dccd378 +torch_sdaa @ file:///apps/torch_sdaa-2.0.0-cp310-cp310-linux_x86_64.whl#sha256=5aa57889b002e1231fbf806642e1353bfa016297bc25178396e89adc2b1f92e7 +torchaudio @ file:///apps/torchaudio-2.0.2%2Bda3eb8d-cp310-cp310-linux_x86_64.whl#sha256=46525c02fb7eaa8dafea860428de3d01e437ba8d6ff2cc228d7c71975ac4054b +torchdata @ file:///apps/torchdata-0.6.1%2Be1feeb2-py3-none-any.whl#sha256=aa2dc1a7732ea68adfad186978049bf68cc1afdbbdd1e17a8024227ab770e433 +torchtext @ file:///apps/torchtext-0.15.2a0%2B4571036-cp310-cp310-linux_x86_64.whl#sha256=7e42c684ba366f97b59ec37488bf95e416cce3892b6589200d2b3ad159ee5788 +torchvision @ file:///apps/torchvision-0.15.1a0%2B42759b1-cp310-cp310-linux_x86_64.whl#sha256=4b904db2d50102415536bc764bbc31c669b90b1b014f90964e9eccaadb2fd9eb +tqdm==4.65.2 +typer-slim==0.20.0 +typing_extensions==4.15.0 +tzdata==2025.2 +urllib3==1.26.20 +yapf==0.43.0 diff --git a/PyTorch/build-in/Classification/SigLIP/run b/PyTorch/build-in/Classification/SigLIP/run deleted file mode 100644 index 2e83eab02..000000000 --- a/PyTorch/build-in/Classification/SigLIP/run +++ /dev/null @@ -1 +0,0 @@ -bash ../sdaaTest.sh diff --git a/PyTorch/build-in/Classification/SigLIP/siglip_loss.jpg b/PyTorch/build-in/Classification/SigLIP/siglip_loss.jpg deleted file mode 100644 index be3ed9b64f872b373ce2c50a4fa5511217afceb8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 36621 zcmeFa1z22Lwk=#Z0Rq7tf(3^Jhae#Y3&DbWa4Fm+PzeMG9z3{90fhv22yVgMg1Z#1 z#a}tyeP5r`r~CFj-F;ra|2BO4qh!~vwbq_%%rWO2YjHPq_Z{%yrIf4`00993U;_UI z+|2?c0O+WwXs9UYXlQ8n?xAB~5!}bZ#Ka=S!^a_@B%`LHB%`2sNYBCekd}>(f`W;k zneFit9v&WQMnMq)E@2LC9s;ryA}X0I-(|WDG~xL z01+1f2^Zn69Y75LAfUio`=h~s{6RoOLPkMFL%)ZC2|uCo0RRyJ2?-Gy2?Yfi8Gg1m z{Cxm2E(#va6LD00WkWPt2Li75ks0W8&&yf~Uk@MBa~nDO-oqdwCLtwbc*MxW%)-OV z$1fl#B=JI0N?JztrHZPWx`w8fwy}w+nYo3fm6NlJtDC!rr{4$vfWVJI!BNpMv2pRA z6B0ACvU76t^1%gP$}1|Xs%vWNzP7b@bar+3^p1>&wkM_TzJ0_k&%&*(SGy`0nrV9AmJjT&^$rK6IVtvbik+O zdXG-q9~#$6*X2dY&Z)=#Q@b)U&^@W4^zsXaCi)|I@D-02UGg{N^Fy z0>l94S4^4jF#dD=XAJ&h1OIDmKp^d76X&=v9cnB`f~B3)Sybt;4$+WWo#5Sj%*3Ht7i@i!&E-afOF7B~tZqdaEPke_DMz|zaMEn z@$HR{cYt=h+qhZf@+u+?M>+ypk)>rUgf^3a*+t%gX$g+VYw)v?jmUOs!&_8?H-s<Kvq%g2l{`LyVO!fJu-a6cBjqwRdfYI*kQvJj=C z2*TV#iW2usXruaN{8!MCz{Mi$>jTghkLp3N!O<&Wi93M4%}obDoH)d6$PU)Gyl4Cp zU*493{d;Lyie9|y=y;k#4>i*Uc#~D?4$##^HcE+itmGc@A}Z~~Qd26F3-ePpBicoo z7A@22MNi(-bhC)IC@wtFt>YmRALNM6vKzVOr&Um@>Ky$G-wl~?0?D${$}(A#X7x}! z2_e4SGFJc2Cj$W2^Q%zMtSGgni+=!XS4>V_>pJETjTu1f5FzaHAk?jZpBlG;5$M2! zZ?zqPyl2P1Z$itcKIN8Dqg;bq^PR$8-eG~<%nRp(e-L9|vePCzJ!0PIn-Cb`WV_Q^ zvB?;(?~V5|KSEl!5}F(PzOaF%8LwWZ9H}X6@LKfkoDdFw|6aAg?4qlex=UU==$`%n zkg%yV+%{Pn--8QfK>;z2)!%sP3G%9EaK{KqbGR^5zhhS$9-bm76=Q=vGn{j}nD%3V zj}{ji)Vqk{*6A5lY|%xV_z6rq>w1RL+Xh}}tF zY)z%PvlgKYav`C$3HJBn#YzV{tkam;mA6xe5Pu*RW`X`CmtqbGbY7O+(P?zCM7N%L2`#w9D;6P({x=+V$b~Mqg@C(evdHMb7Mvl z6g>&2$Z){{kJBO{l?&}Vfa*El+?tpHXrfEv<%0$!Xe95CW82P~OdW*QR41OF~7Xmx)rGXM^b75fg}X zJZRpK3kbL%`}uXB6!m(syADs@Q5GcQ=0)mLrstvyfuvVYBuaXclgAf8E5;gM_MGZWJ(I(kvnuomCE zP}gF&wn*ta0H$K+s5O;&{F{ujZYmWDP}<8imu}O*4mTLW3(%K~5T!t+*Jzlwj*}81 zUxrTZ0J#+io1PJ&6_k%K1?@P4y}vx+!7aiQF8++>7FVE&Ev@uKF{xt)nGB0;cH2Hh zXvtTC3lU~a@vb19h`seY)}c4+2PoAo@v`JuM!tK%v3OZSneS7bZpca$ty0rwLJGNI zA1YGl>S+h#R0F5^6027(RP9D)rXhZ!u3pD~+Fmu_@@-=hwoX1^_4^Q7=H|?PO36%* zLvH_37x|@K6LC;%DKLUV9y0C9M-5VHI zY40lcBy_6A{O9}K&L*l6S>+X2oiOHeTV!WIb?wE{XcQ_Osxxojl-9(5JgCsJ$rL*o z5mf8GQ7gX#Af%&bPBti(Sj{vDA#R?X)tOG0+f@fq$(pF4ggTm$^85hZb@iwPV6PyB3Jiu?r(qZK>g*)V?u4 z?TpOI;bp$?PDEM}jvWP-_}f*9q3ou1HHxfj!ckdl6pvA4D89c}bB-XtaMMeaao&?2 zNEB#TA(x=H9=-#3NHB4TO!96BK^hhqM&EiBZdR4+ZpBBuF}z1gZF5Azvxczt-XLDA zqnfbkYCT~Ehss^u07u3)UY;LcUm@~K5pIPfvb79}nUSb|Gl!QFQjrgXWbuWVZf(Ue zq*6`}IBj?bcoA>(u^QRtTN3xzsO*h=IzMd~DRd#EP$EZDG(T#4lBm01I9Dy|*}EF` znFpnUz2FtJaMtLNN*xkWs*AR^k-2Qo2F!vUhAimA=OYiML{tONL{BsFB`K(|FC5Eg z)8!=;FyDuo(c)~%16+iv!-WMi78~O-z-(@~`Ic%%9>~v5xCme4Pp;DzZNr#{omqjv zu;;h99O3Nk79$mN=Buj6rCowZ6qBC=8ULwe}k0kGL3gJmr{ zLwz@|3Lg8a%5#{8p@)9PJrt+7S>14>i|iEGddmxuUgo@SInw|v)ob~}1#&1m=KSs` zhOsJ)0M?Q#a9>dgB3tO*#uVLpkgL)C+yLzpdE7=BBqBh4WadV2IL?IWsPmj$Q4Q~@ z4BG0~cS61YMghZvPg(TuM7H?z&f|}Z&AZMH=iSX%y*@@+n2w#CLPPphg1?6=s6IBOBbv1Dwf6ulp{C8^s!$Q=IMqhwm<)!u<8{?*IVWJHWEV9pD36 z>*5K^X4(Y;@f`sF871lo>gYE$Da6wQWgrT3V zEA`_mTn6s|Uy;5aToF1e-T*@G06|&*Y5IRg{XYit|MzK%Gs&~(dbKJVxvY88{Yr<5 zzf2Zpot7eN=;w8B2@(ra{v}>X{}P}8 zn?HeMmM6#qB(!k{_&Hv|ee}-|>YdR2t=qMm&4siDgKxEWfL{U@=;sKPj_vo;b^n0F zpr7Lv^)Im%DgCF*hW+@4owR?CmV-SEjX~_$)l4M}*$Z#6DZtXLy1wYU?YKjJ1E#;| z?f+stCc6rYO@-v?YU<}lO)OzKrhh746Oj@}LoH$sdq7K|ru+^SfLQtqXZXL~xc=Xr z7XOTv%YzUvI2iga3?!7dt&OO!*#XWvf5yGU4g=iDVg8M1aO#!$N9x7hvUtM2DRzPK z2kOQ5XVh!(h9(si&b;j4NyQeXpO_ccA^$C=Ck_nZ56nyc&zKiC9M{a>H*e|;fetZ# zVqOx$_uiK12gL}TM9X2gu6D%l7tzbl_d!PzsI5G0pKq~kNK&{!EkKQL*tgPKUvX0BB7}3|c7U8oi7OyvS{YGS0G3KK*`;C*ACe){iZP zjk6??N6^G@0U!KvS^h(@wwr;yKYcA2*n8SF%5w*x9B@~NEn4jJvc>Ct^Yv=%CgeK$ zo09j@CaxFU5^Ae27kBqN=dNv5cy2+eA9s$eh_-Sxx8v$ol`duN=nQy_mXzvy zYEZ()PFPd}02Gn$aIAmyEz9Z(r?)|I85{hd35qgj3IEKDbI_UNq0IR)imD--H3@P= zwUjRNpsq&M5UOi@p^nv{&nIcxB=V|?=zb-x?E+a6#)!|J-mDToT`BY2eA>mJ$_~~A zrw^6Q6=xfEk*mf0D;L6poNm@GE4<|5*>jdcC$2)Vm4W`SMe7MWu5vAz?esn>1nOEN zfB=n~IaLh8ju*VEsx}7u5xoumi_&AloHpx}n6l*+W2^TF3I=km1VZn(?SFrJ<3j%F zcKhH4--U4F!|s}&LWE)n7mGOVBVjbdS}ugCsW|}(+-GEGH`)S}2=<~0ykXy|FByHv z?f|8F)v#dg>FPT`gr(R5XdOu2B6g&3!+QsKQ*U=mP=PS~8_gAg;<(Gkj=g=Lis@5* z756(-+2t=McKBO~*EqZI8}{XMdm;S<{@2hv5|$%c@Hc6R(ZoiuchWV>@m~1r$!?Hf zpvjCIl}NcRY9QEBTffyZw;D|6c0ZbY5WkkS!iR&o)xbmza76)J+5M%K7zEHOh8k|IQ39W{N& z+JK&cd8x@bvaERq$XGw0hU6Z_F)uR>`Hc_Sq5^YCE6veWNDX;) z=>O5!*}c~uR>@DL_+7B8(UG{+>WVt#kv!_c{Y2l5p2#w)T~HC#2Z~kftV)}$RJ>~c z)rRpoE6>orAchpj0-VFl6QmpHNO5!?L;qS;+zx2;<|BB|=u-9V!N_9CWeS!f{cura zUESl}FUh_l;&~bmuO5My62iIy6=#Yx*X6m)I~P^RsnR5_#tZGpw*n09zzm)7j-a)a zRa1m3{#Rbf>fzCzZE6>{?JPZ(BGgOgDeMH@IpG!)GASlu0ctU;)ERjbk6I-Olo0+^ zhmzP0ND{UJdZIFha|fUv2GuO~DLAo-y_Q{xlCjN*q$72eMZgdlv|>NSd(vN9P=DDxCp<0V+QRb~{-ig?hW)K)l{J7bv`p2b_WSjn4h@fxx1;S)36mZ@uiXs(XN(Qh?wz~Pa zh@jnt;SbucQ7zkNLuGa_ql=wx%h=!q0&TogSG0YaRa>xyWjzb4#QttbW$ zDkYxv!g`lmUYM4r@*aPTt?JYO622KY@+J4hc6df9LilZPxuarqWc^rboIN*g%_hCV z`C}hX=sg72+E;I={g^5h#mh9 zF6d883*nplBJ3I8;u+y+^T`*o&7I_*qxR50&u9rHUXRm_HpEPgDsuaW6I>5$gD?`l zJ&xc>^?#0+Tx(SMNV;-;6&X9aG0zxEsF`k`82pkQgabT{=yA zIWB^`mNY%rVu-2eMo#3MX&%)Gg{><%5~qu{uWFcUpnoHXN*b?%>ER}?kxtlR%49cr zRS_sl()tE7r6V)t_z{u^0#+jBBnc4omwPe652H-x4n;4%Irynn+-lizm{%my*|}j- zw08;W_v1Kd^Qtd*F6ewLA)}mpfN0EOuSh9!2O#UTkBW*B^#L7fNLajN7Z_YygIAeO z*;V=Z;?7Gy+olJ*N2o?3;dGCYndMOBS#HqAxEMuA%_|2gS?yWg1Lp`dVr`%ituTOl z2flZdyQQ_w_;+n6c2FxU1rP;{viC(5u|C(p05aai00byn>}Lb2?Y zZq2Cib(s0~KDv;Md9QBLcjd2~oELxY89z5|Ry-0kC6rqXpV!zq653`i>yCaA)!Dk@ zf>neqj+T+9j`r*~@|Hgt&0qbi`d{#)dWzgS;l8HIff)M`H`M_~fPlXx!x;A^s{gWN{X9YAyh288i= z9DxQaniKB;VHnAT6;mSF^QUvEwRXEHZZ9WKpEgohj?P#)W*_Cum5{eRlnjlXfiye_ zws$>F1!n$QJBs}LlfF{X5`|=o^CjHI6MduV@>q@9-~vV%NDd|4JN3gu_NgKo&6sN&^W{ILNve=5Lez(`i4+W?x5t#I&lQm7)a zGtb!D%xBia6m0pWko3Xl*TQyPYDRgnjDBfWF&AE1jY;Zbr%!Fjyk`3>tXE3SJ=hy! zT+J5N5Ew$L55K>0Db9cO9l&k^a0ZlIj9mWL00SI|njVb589soRmJRQ}sjUf@IVlVQ zXIE8A>QY8=5n<`v{|b201#uR|8$PIzrN-S^}UfbrJzEs~PAZ0ybX zvu44;I1#kGMp1Ur8P*=o7c3m60PYROJn=0}DyF8I#@fI(ZQx#g18GeRs1IH2f6mr1hVwuEL6%cH6^w@adJ!KLu^dk+(kI%s2EY(G$;gP9AK!k?T*enxogt5+&+sER}%|?#X382Has$iefXa@d=;n|Da9c|tCiQ5757!$6Tfl| zKfqUh4+<9@LD+8IXtYhM9ML(y18}EwO?wSrc$ORq&9JZnKijcq)>eO##VR1B^n75Y z-Lg6NuSuNz?PLt_4p3;9Q4IQ7JX$#RIZ6!;LD zT4^*hL2529zc<4Qrp1s*j-g5&xBpb2^$~y-<)y*uwhV-*ufbI@nC;bg%(Lt#Y1=ec z)XVTVEJav&N#b{8~~1nN?)zYGa9tIB%Jmx0}11*_{rUy)!Fb|ITDLTuE{>OC9` z^FnH#R526uK1t5CchEG#Q&yv%p1ipOAbH-e6$0*A!VH4)BzY-KB%R;%wml60pi<}r z$TroGl2+<%L&p6i$xeXK%9k6Aa-F5aqU1a*Z+z+RRDE z4u-Qp#NTjW%!E1({PJb747qRzHXl~-f6>x?l#@SW zF!Ri2;_M#rKooaq;r?q#RQk#HqbJ%z3bDXUOMAbD&g~&TDTvITa^IJoE>saq1`g@=jmebJu6)#G-8N?AAK`gTFsW z;eWFG6wXqwva1d&JM--07Z_`aqrmf&$#2Dpmu{qYuEv(OIzfAQu8MYw3>$L=rwlk} z?yQx~z{_Ti?m}3~N`lxLBt8)CeO=`S4;7D3b|CERyX;Xj69fSSB&y=?Sk#mX2%lk~ z0w__~?SztN*l|OdkG4;}zXmfE4g9L`ZA9x*I1kA8GwxBZE&kp zM=^GfRbX1x))6l!)Uh&Re9^n<8u>J{QW=9)amYitO1}s9H8sUc#Zk<1o@pwDLjCIK zrXp`=HXNPpn8n?*evGqBNuS})Z`d4#dfICn@$IR~Cf1Q&eGOLltZ zKQTr4-&_l!1O{mefY;CgcYvO!hm*Iyd(9`WC*f6t?b%IW+9A$Obn|`C6vylxKoVXW zgWKLeL<9d~7HtUI-2onw+g)R9EnaEecXs}(k=GB4S22mC30jLBr+cQA5Dq{B^K7rZ zm1MQLYOD^lnG4NN-7GoGbw|5gxt&SiC#hxda;13fsBAmAukv7UC1lirwg)=|ldCn- zkC}qF^+s9*AkvdpR2SiAi_BF~eZ>kYG=jT|Yr#$Fvaae^8hSt*!juZvLyoBx66M!i ztY8hLUQb`H`x2p*F+8>lK!p^ZlXCW#LQ>i}AjT5?=CI*jj@ptPQX#1r_C48_+FBR- zTLLQ4hEZ(D^1Oh|r1lHq(sr&v3g1VwJ5e$sPZ9Lo0a(~5Gyu4{iij7!)a}){bOx0y z%dM?BR1cP1`kxoGk7*J{2t(M%zKx0j0T?WHR8`C~7)qrNwu zzJ6+5ESV|;skrpPc=g84Rz_$l+2n>e;d$`>+gS2vRYAs%B66n#8WLi@E~+{qg&`@} zP~Xc*oJPpnsqyV7%+qN`mAFd%DlDcdUODJttZQAGBDAM-kCX*iK#$EOuW5d~wB)720J6w+0#;`_$A@mFM?41yFcFapnjv7tBk4JrGBIB%) zrKfqAz3ZleGBmY@!uZN%63X{+rZ_IQ|6|D>iM+*1Sn*SD9L2to#4!3xoyMy(SY6bb zx?BRQP9A6{qgK)s9^}cp$pr6m^aS<@>#cn9UV?d%jTJEv6Pr0py?nXJR8-%6z)KXd zOBQ3V{XNiq>?zUUGcT1%nb55gQ<0AT?Y-JmSyAgl&j<%iFWIUw<23cf-&%yp7aN45+ih~e8$(_mF{x6eo0UcrS0mU9OEJ8K=e zVj?$dKd85J;3o|0p?2mfmd+PxJ*Q-uchoikW?ZZvyb579 z%@;qb$$}AnqIgV|Kmk+xa=v6T?dx{s4q&u)s^o+t0@;iSgY&e6|)6W=6{2*|6Q>M=@D+B|IB2 zrJ-;K80iLP{Y@%|ZpV>XR-U+R{BimDzy(OV?Jy@hjB=vPFC%f;Drq@&>GK?hym7sJe~|RaJXg930~M^Lsj9$~l{KczKy- zX*UiePZEncjNsNlQx(^)#;$Dh6N3_jgVGc6kh-|Oho12IfUQEj&K}u_#ON|(5eB%u z9|w_jbUIIRMO(5%3gL9yItL?sfPQuXZu=-Rg^zN~Dcc{2d__yfrxxa487Y5MUpH}5 zqoIh3%T2OR8BX>I`k!a;^#cZPdA(i|St)05yDo8td zvx#9Go`+#Cf+V9snv&~fsr(1U`ZY8poux8;U*gJL9Sg3>4^#{C-+GX0YXqF65SaB2 z5a}QD575B*3l&g3n1z9n(RF~1kkFzKZsjtDzl^+ov?NjzQS}IvV1aI!TN~pRI#FuoDW?V zzfUiZbdWJJ7_eZ~;#{LV`$3+%(-n@&p8TWvPf4pq+**j$-Q_l+t+y!Ka^{OlrTlJSIG){Fe8Uqid*634*222cjDb5_t{lOIoS`a z>EfpKj7XoAjJrbpG*~hi1i&2G-#EA&yx}oU0sY!rN|>Ju#3C=EY(9aODzclO2?k1W zQR_7TwHGd<_%F)v3Xn}GiqjtDZMzeWPgGV~=}q2qCDURk}4GhUSKGV%d@HC=e+tj6Err%OD%Yz%G9>k^FM z4r(x+a=mN!!P?_%-#X_H zSJb)Pc!U+B!2bJ8jxqzf_WxQ)UV~w z?>Doud4FanSAZ)q~iSoCs z;7$G-KLn=#8khW?^^S!Li#nEP>t5hEfi9=9v}Z!mC)qPEcP;sLLkRuzFXBjSlN3jl z^tb8!@F+eGUQ1v3xEDN`-)>^jou#jHjqAF#_QQK6@@yC9k}=12%TqEu!e&+LS(&fn zUPQtmSY(@ijgg6dybcCByaOaSti9wOUd%ha^(5ufC-n+h++%ljt*@iIhhR7V93COz zG@tJ1Jp^DOyq6@j;-jgDd-Yep1o7VrQGfn9Wa3oNRNghWO`74+H|}HOU_a?b*wBbg zv20(R;T>Rsf_hACySuJFMEnU#IqRFTP^(sW+TFfiu$6 zIR^%c>aD|jJv}EX zA^*we5y~ITY=oAd=5*O(qhGz*ID}Z-=r*1_DLHgoRr|I zdy45dLg)YVbN{wA7Ue;VIh`KN{b|}5n`}SdC#{rBNW`j8k^uEDBk9*Z>5j-&K9XX4 z)W`dh(uPaZX0diITX9Fn)Z1e|+hT8U431ABHc#c?dpz7E$@t#jrVq_IE58t zpEjS)jSoE6$etNloP4td_n&{kApYEl{^7^+&RV#UKZ5SpTai?^!@upLLZ2?m;>#f$QZjg_@bW`(YVXE zHc~AXT#|nq^-a`<+ySJI+2IKou(zRuCF5KwUs%%zqrA*&g_+O1-q=np=ZazhG}TGB z-qDpsSwk5u)N!m?#Tqj36?=B!l_>&H%2iA62j|x%muYKyYCtWob3K>q{(LdV!Q3=^ zxLP?f|M$Sn-vu+t>4OMt=)?0rVsbxe0l3glxF#*jV``zC_1w zRcMG4jgpg+n^k1>`1798n`|D?Vr2buev{P8mL6X4(g2f%KKZC~hG5M#L!HEW+RDt6 zG{e1Tq%i)UOWK!(%~9)Us^jFP6>Ye$4HP@-rUjCyfg#il$1*#9=OQ&bdX4{3y7q6T zlm1F7m@tX*W97tpqgEtcZJ2VVFp zEDPbOcxJ5sDX#uc&;QrsFlp+;@#fN#fTp8~SMpezW;19BEYdZpEw(*Z>I`Wib{seI zl%?g>LXsV;6GZ2j%v=>^Qr~tpNTy?4c9KgPE#1GKI>Rf<^SZJq4Tp9?j5?ps+i5Kc zP0aL{l**}d9IrU?Hh@zl_rOPO(Zwws+)7qmgSx#siG8^5Byry^p}D z9V$oGLk`xECdI-*#+_3m7sJ`CdyC&{nyslLVV`3bPHrOki3eeDG*Zqf`=+9s!A)vC%!GPdbtNoE&C(V;ygn`%*}lF ze{DU;-4wW$01gv9LFFM&gmD+p9kF`pHbKGGy;ap90Ce-l6JDDwhy1%WS={xn; z3LplVbYy+a7R z%C9%PG|3ZGWcpO@{&2TY=Smhv&XS&y>M4IcT*?ncbr`MMojAp@zN+s*O|CjEDgs2 zH?}xdWb0=01gYh`=-PPFbew0If2iU8W7*xmcTE4M=V6dRd=12LEgRx|#ku+->Z{=k zYcy!%1edjp?4hMUbmGkY88L+bagv7<`hGwMLm+Q*_X~$tUs35PyA`BQi>T{0B%!vq ze7ExPjdB`QeUqS}@yuXWrw7G~?T0;?+mGEd2V{@R2v6YH#!kaXdIj#<+R`(X7RH^| z=|S$MVph!pZ;F%97AIet`g~9g6t)@D%#mG^HF^WB3s9#-7xq&u*7=IxV|1QMUgtP! zx%Zj0t_R66@9>*Q4dK}EYl^5+l{8^nxPKBi{3oS1evRt=J}Hks`HNrHLS25LT>j^- z9r>ey{&(dGK#}*$WgsO`F}Ziq^mCF2z8I5nZ?Y$R>kn&HejWdc@JO~Vv~e$NchLxA zEovpCfxvUJ;i%9`jixjYUP2YQCOtWM-(3J!$T7ln9q#3LU5%^)i}G}&K#gpt4&1%6 z1PLkfBa~4G>4mFIH?D5eqy`XyA5SZJAC|h>TcJ$l=}=y!9@$3n$uc=Da|qZcDk6JBHU3 zT*lzKogWtSFJnZ%lSU^*mm|S-BH>!bzh4uOa;+dBdQmLaT9=!1w#oKZ+#A9H=1ogF z(|f)!P3ubjwlgS0-!(dv+x&jATk*&mL)?VL_oSYL$T&gQ{i$~QDG_^*4yT|ch_;Q4 zm1kT%#ree9NUO<^7JAnEY$Ps1j*i(i{d-6y6hlb7K#0z3DCi7330##xL4RN?hJtud0)~NU)&!HG$4G>?7rS7R42{k{kZ~J`k|L z@@hnTJIM0vX;5|rUiMbX^mMIjsZG1n`x@W-RutGEVN zY|?{x`*c6gX8JmGQL?@)=RCR08QZmkBoN7H7A1uvNqI(;Dz=zuuP7xVfm+EVT%F!XOUwmUxb}+Mj zU-+mxbJidoV}yfVald9Ma6?e)|g3wpw6WQ8P|Rh?iv% z2kx=uqwZC@K!rj-n3sB^nbpzaEE~V8@jy83t3!mol6=w3(xMn`NO4NN*wNfVw4*}2 z&+X*puH~?j)_jMzC1pSI=T%Zt5?&D-Qf5|2sJ4Tv*NjWl(v_K?eG-g%a%-98%EEyF?h*1~sWY$cznm5e0&S#I zWi{UcimUwdsHEX*Egq*VaavUEz|8d|B`jS~PO~-CNAA(YO3;Nb=N4&~)wzxTwcG!X zzWyKc`7bJt1<}`;B@8~B#IeJEdu`wB0k3cqX4wh5sY3@bZQld@(3nXy<5E0{Ka>FzFLwCO=4HlMq}Y+p7$;2`{$8!oxE8)#S7$6$wvYCY zBq$pz3&0-euls$B3IIq^GI^Vc^G)^U^oP>IoLB?MLkdYI!IIlW2y``q$l;ztsBXX8 zI*i}Ofe0yvBpq-n*f%%@n68qH2_lXo!$-ekF+dYunfmCda-2jTo9U$)AC{k^P_i$J)ynR>!?a+9w6H{l{EofmdaU`;sh@D(I z65`J>!k;L0zRLU^QRu;A5x!u+_nX&8KxKE&D?NVAkXKqcQL8#FJ6=dAEFX|>m%*3i z<;l2HiNlBYb2?JPyeQ(%A8S|Ml$f7Y%-SC&Q;uw`v+01?k8wkf_4Ei3+T91Am4}mi zE1QkoZyi55E~~uPFFfBaHckwuW8u)re z!d&WgOFr&$IM1#@`C>_dA2n=NF@KWR!F4}4TYJGXNw4(>ZW`fmJEnj$!*ycDv3%3NvrYg)dp*GPPro+5pBa!@bWh}vPV8+dj@4}(ucx*tunShYOCD7R^*k< z4dZOy5kwEaU45oc4lSA&asu#}C@&@zx$SDS)?i$YxQO1-Hvc&Kak#0+BjR#(!`CP9 zN*(GmIUjdRI>>a*>G{;fVAUl?dCWvvlk(TB;ImR#r5}rvBCFd3geKbbi8S^v&utL(PfIe6l!Y?L=C^t^)q`4i zW3to)<*yghZtxWftPisc>`k@ZUByknCjX7Ge#yb#h zxKCxRgA+=&lupoVrY@*pORC=8V=?{;01}wERV`g#+D{o@2VFAu>R#5hwCl+kyn^m57Y56HzhE=Fk zD2E2UGGV>u1m{15QYJ@d<4ZaLSxeQlmRX~>w7;Xk|49rbAU)sO7Vv%5#I4AKRm-dQ z7GC)9wHthSFz>oma)V$|q_r`2(sv#?d{AU&iwwN(R?K}GaeO)kEObul@mSGgB4qoowgjdjD!cNM%5`IL5jH$3>mt4)cZGL&inIiRJ zKG<#oQk8d0)~s|=tQA~(Q0hhMcq~F(a8u254+-31rtj|iwRa7Yk{z|=*+F3%Q8JQQ zH561EexTeCx4(qF+B5UfJ4U@l>E1(tqRNi4=V*9H{G^_!`pvX@);+7sMjlwxsFJh+ow2Ibp$N_O7NjBxZO?kWEi)FU5A`2*Nrzt zn4W8X({miW7T?X`5`QdUAn00@wx4>Mqqe^bDOozQ&CpUA4phw8Q?oM~Z*Q&Gf)G7O z_i&=wIVGqqP^phv*JE!!z14hMv0+L{t+RIv(%{K2niyjoJsEZaxAOG*mrY)rk1)#d z663MVrn>7_&lmJWmryE9Ri#OK0SfRPKtA1D-2)7vl_={nMs(c5s+S-b(>4n;Fs&Jv z+BqXT4v}6;Ro|7kr;Z3L+$|7Aq+UPyrqQ^s-+JZwuH>}vdR~4Vs5Ee7zD^G$rTD)7 zj{fI+YQT3w5z|eB8u} zS3HoMWCGmc*Q-1&V6`qta2leH(wPiUg!U7@4BO!+_roW5D;n75keId z1Ox&|7o~+3KnW7SLK6sx^cq7ER9d7&-0xI&1BfbJlnE_np0W_WtePL98n0fJTA#rsACthLh{Fiyk+O;nYV?`t2g! zV6|u3_3@9l4oA?v#1eYO4<&TU&X#bmLdrd_w* zXt+j2dtQLqb0DIWd+pC$TAp;BOIlpE%Zsqz%($9J}~eK#Ywofn&WJku6N#G8&I zwMCRaYXuR`^VcM}@|oLM^Hl4barqiLWloMyLdY9pGkPwW^_)wLumLaoO9Qa8d_+QX-p+ze6C#$>#Q0qF1&ALJt}rCfEz6rCwF(HzEXaD9u~CH!y>aMGv!Dv z*3qu86jiA_o;%|L-#jidDb7b*Fj$E3BfvSK>8$>J%UOhZldgx9h~20AL{ytR#u((uRYnCm=0oSHwWX^qwo^IpZ4x#>5#={D|Exa{DV({QpWm{C}Dd z{c%30&1=~p58H;P;CJEzUydE5c1`Iv1BuOfey7&>6Xc#L8j0kwpU&YKgpip)w^SA3 z0!9~1G}y_2BYi5ZpJ=4+>zsUwwMtR_0>5`&&+kx*oW11;UEIBK52EgpVw?Ux3F$v{ zdmn*1!;Lo6JtSV0fb1VDsBEahhS*l7$Yr|JnzC_*B@MZZIE06&ivm%lO3X}q^k$lk zKABHdu>9oQg~pYm+n8}(XkC2PAZcW2PZq&6*U-s=Zz{&qu}PiDi0${B==IYQCI0PA zms?%-9%Gfm^aMc@lGj^(9c3>lGIWt*PJWqsL>l}KrR{-F$lVi_PI$#1 zP+No>W2BQHJcE9yF?%z27WDFq$8pbr;4r>od2(%FdOEZ%{Op>!TW9~AdT5U56pHf@ z(Tm|*7gXI!P5%UAGWL_ZAee=pgzO{RQteq;UKk(Yz5-6$PT~%Y2Tq7AR|5fV6@;2X z;bPH{taYH-O^d)z+bdcsFu47OOh)xbl@v@lZxrQH^#^;+N%64$A;@P=Z0!F1ChjfK zf3NC%+w-3j82o3?!@npT=6mJ)9~m`_J%pPjqlrzke6?K-S5VEI zCSdg&_s$=SwQ0Y&w<+>sSoy@pwvi9Yc8pe@ec@j7cDoC-sDXD5*ZIiRUM3&VEFqMj z`JqO-TK#&VVf|+bVbEZ;@KKZrfk6aEm#nr)jxXWjt!dl*0#H(c8M@5dA>Bt|dM#Xn zrlzXLbiS*0Ir!xL80#aZMwWrMRoU~@kXduPDiZ?~mMiO{mJ;E9ld93R`Cj@3aqCn_ z&223C@WlcZ9(DH-&Z>=GlnmR)4!u>{^Gm_|nR`fGIl)GD1hMDu5_iW#_DE_js8m$< z+1TU!dhf$#I%#*jVIj%EdgEI)J0&pg&_p}Iv$VvzD&(fSZ~#YKz+2-0o(OusfnG2o zagRtoObaW$TQ=@7NOc6kXrhaDSKELsJJa6Sdsk$b48|#Hes?2y5?Aept*^jx&2=|$T! zOUqchUA<#b6r8_40j&x4k zGPMMh>bsOMR$rFmEe(+LU7-h`<-Z3%K}$>HLbdT>w7X57eCDMGk}Cl%&?D*KMH3P7 zOK$cP>JH+SP?ve$QRoz{c4(JF_;{6ex}P}WKci%fE_bb^o#^p9FNoP!F{vN(7qQ8^ zeNn{Yk_%6u_ABFYZ?YJ9-(jhJYk2fc3wW^rEW1oA#WvAT@l^VgPI6N)nefhb)`n*2 z$Bp99yF>S0Ex;>~d%*D)$oLL=c(!x9)3PR{HB;>rK(l0iyAUlLx4@Mo`N*d3JJHCW6w80dUAaA! zn^BbGd`kA&2Nn|1_w4FmV_V(R=OG5**FoM~dB{20!KS4#Vv~If*AkH+u_!7Gu%z;X zFq6QD5FsEu`i~iAkem6-%W1T3zZOCE^|qtG0>tW;2$Y{x_D9^~9@%K^Fp>1M%;V3j z&M1U?S0<&!U+WO?6`dyr$YcyGl>AsQ(cm=36NweS&vLLs!|c$sso9$R+6mBsNlZ!0 zoiHZG1LMO#@=QPO;ty`^|K&1I{gLUF-IE2du9plJA2>u87oY+c_}_MtVeag;Y!_wZ zUdtAc|1wh`V0EsyLZ)5uL556p;IaSE!6^XI&OVwYlE9~0xP1kXd8HmZpolNO_+V#D zWkb{FE1(^d`3(*JYgz7JO{$YWPx|7oq(7GewEwoQ5>q=4xhgEdA4V;&t_EfeI5V+W zGHXhB2n0~Ls(*9czdlKD4kI{=;Ut(}BhQVzQ&=jNDUXNy=@k`@rYoFF$AX6@$A`Go zeg1~8fcYE9*Us6<{-Ro9-d&-eAZ@|LMAG+zO!L=Mj0XmOF7sVqzonZ7pvt>-D@j$B za$yQ4BvTcw4rov!Z!=N{ z!JO*AoMFb%j~pm@wkV~se+<_3Jv95TBXhs^3<0OVQLtsId#46%Cz^1FM8c$_OYbZ`J{gEzxQ-P`&H9$r=}1AuK3cD*&L?B#o+!Ia!{!3S{3P zlq1}nBnGXq!!$42S<%)Kg44^Y0Cu*QS=NCQ)zZaOV&z3Jhk4WCwIWeIiPs4P9gRn^ ziPQqlkNG2_T7k6*MAi+4Rfb;_(XY~h>6Yx4!+dt5mTZ8Q(B4O!rz2}R$*h=2D1w~D ziroD$e%+62V_p*$JI%p+3tAs}b%6M4K*~oG@C^`xZ|vF-e@Qv5)zzMU{`)>$&&w5n zYd2#c*xP?We4hBdMyz}X-}oX__OydZnUCjaDX6SUDuh%htXk_WyLJijI?;C&5nc0L zPQ^RD`r-^Q^J$%Qr4ej|Y#s>u`J_{BKGo^f04av91C*{+S$S3M#%u)0Ny)?P2#3%f zL{?CDhbHcoI`XOaPxmd7Yp_PYahlpU(`gtHb1veYx$mg=sN@-$6N5kspilpUR7yMx zYEHq4?~C}YuK8v1cx>(pCQ^p7Y(wFMkib+~*p6BBU>~5MCG3 znC1Rbt?Maeyw-ijy%el3jGray|H|tkgVBbYQ#u zm`AonReI+5tvW||xc7pv0*ism;iydEdkhV1cipR@sVeG#MFHn$f3?Fi{}Sl}m|Og< zcD%p?c&6IT0YfI?+-8GIfHd;ILb60Cv2OMrv+y8a zOZ2n2VZT+dRi1+hS|&EDF&jaJ9kVS@N-+{EOjnCBr0yDf$e{5aLemPYoWkAW4mM5c zTf%#JnpD{whhsDiRyhIPp->}Q3Nm0&9`ZFa@h%o?#A7^bqt^ssdiN6ukxA^8AM%uV z;a}4((Jpa?!5eB8?gVe+5J>c5h3+z59o;hTw2n5?YGo4haio8suTQyHhwdH#i6G91 zpv3&blf(1;ap9N*8iD?Rv=;yX$(3(V2@#|JU^D$&ygKs)UR4lGQ9ipd9qA;Eoz4(8 zo2syzTepNxv7T*&@~6lZy)NB;(f8T6v#(1c!za6x^K7tx0N>^)o0`HFZ@7 z>7UBw7otETI)W>9D~I?>QgFJlx=&i*O3AtI5drM9J9nD#My_-`zzV=D+03-X@^y8e z2YXUCXU)eE>4a?zQ$bkor;5`@!4_PG1KY_KbUKeA<#ZoAYSyExrOMCr^!Sxpf|Xwt z|5FHg{wToe56v@2hAaAThd7@L(o@z6NOl$tjZ#Z;R z+H8EyDM8KY0$)T$Sp9-oeBwPJeqP`0Jt8l$GppV1uIHGPj(gB@bu3kBYCj_9w0~iW zEd?jOgMaw?eZ_loM`m#9r_z9dRg&JEc*aiK$;g(gc?zKq*QLgyXx!=+qlrMI>Bmo0 z%B1h4TKjq~}Ugt*=3q9>Tec!nXb3)VKq5wDjzgtO!aDelh%N4_k19m7N0s zEzV{SJp?8rd~&lE55OUXjtf2et48$f&7?(cQ9B@l#5>Wo0 bXt2SQDD3hK1QZyP zc3#Mf%&N#6_r?_s3@P^Z9BVb12gAqLw!0j~3g%O!V;u%gU5 z;#7RB;N|ATX17Cqgv$am6;`K|F{=)FQU^YhU#bLn!l1=gh2zw{%19ZmwMTKpGVo>L zx6RBvyF48A#LP@6Xc8+w-$vuXnotcZNTl~~UXSQtqX_O0o&v(t{(JZJdv5Kg{HN~z L4}1f3U&sFqpv1Kq diff --git a/PyTorch/build-in/Classification/SigLIP/siglip_loss.txt b/PyTorch/build-in/Classification/SigLIP/siglip_loss.txt deleted file mode 100644 index 899e082e1..000000000 --- a/PyTorch/build-in/Classification/SigLIP/siglip_loss.txt +++ /dev/null @@ -1,29 +0,0 @@ -=== CUDA === -4.598600 5.078500 4.634700 5.369500 5.446500 5.547900 5.462300 6.285000 5.508400 5.709800 -5.490900 6.323200 5.549800 5.892800 5.058500 5.731100 5.398200 5.147800 5.467100 5.150100 -5.114700 6.221800 5.323600 4.941700 5.687600 5.310300 5.684600 5.102700 5.503000 4.995000 -6.201700 5.872000 5.899000 6.706300 5.223900 5.587600 5.982100 6.127200 6.364800 5.207300 -5.855800 6.675700 6.006200 6.277900 5.877400 5.242900 5.683500 5.629500 5.107200 5.035200 -5.179700 5.259000 5.067500 5.007000 4.620100 5.062000 4.798700 4.054500 4.564700 4.695700 -4.838800 4.922400 4.416900 4.693200 4.663500 4.669700 4.929600 4.415700 4.412000 4.732400 -4.601800 4.719100 4.985100 4.437400 4.501600 4.763700 4.748200 4.589100 4.821000 4.584100 -4.371500 4.702600 4.652700 4.818600 4.928400 4.789900 4.389700 4.694300 4.635200 4.764100 -4.243000 4.635700 4.610700 4.616800 4.396400 4.577500 4.482200 4.629400 4.617600 4.642600 - -=== SDAA === -4.598600 5.078700 4.634800 5.369400 5.446300 5.548800 5.463400 6.285500 5.507600 5.711500 -5.493200 6.324900 5.548200 5.892900 5.073400 5.740500 5.404200 5.139800 5.458100 5.157100 -5.136300 6.253800 5.330500 4.916400 5.672700 5.399100 6.077100 5.811800 5.643800 4.976600 -6.019600 5.382200 5.546100 5.684400 5.265100 5.979200 5.989300 5.777400 5.813900 5.651500 -5.529600 6.738700 6.192100 6.600300 5.850900 5.733400 5.734200 5.733800 5.473700 5.849100 -5.292800 5.802500 5.141200 5.480600 5.032100 5.049200 5.664400 4.978500 4.691300 4.774900 -4.748200 4.611400 4.443100 4.548200 5.300900 4.813600 4.814600 4.574500 4.550600 4.657500 -4.580500 4.620500 4.630300 4.521800 4.271700 4.718300 4.309400 4.577000 4.657000 4.375800 -4.317900 4.590800 4.487900 4.486900 4.594500 4.764500 4.328100 4.515700 4.669400 4.501500 -4.408400 4.643500 4.416800 4.520200 4.382700 4.311300 4.288000 4.411600 4.533400 4.435400 - -=== RESULT === -MeanRelativeError: 0.0033615679983984457 -MeanAbsoluteError: 0.012283999999999988 -Rule,mean_relative_error 0.0033615679983984457 -pass mean_relative_error=0.0033615679983984457 <= 0.05 or mean_absolute_error=0.012283999999999988 <= 0.0002 From a5e1a75957e0d0600c76f7889e6068aadfcb258c Mon Sep 17 00:00:00 2001 From: root Date: Thu, 8 Jan 2026 10:33:49 +0000 Subject: [PATCH 3/3] fix: rename files and update code --- PyTorch/build-in/Classification/SigLIP/{readme => readme.md} | 0 .../SigLIP/{requirements_exact.txt => requirements.txt} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename PyTorch/build-in/Classification/SigLIP/{readme => readme.md} (100%) rename PyTorch/build-in/Classification/SigLIP/{requirements_exact.txt => requirements.txt} (100%) diff --git a/PyTorch/build-in/Classification/SigLIP/readme b/PyTorch/build-in/Classification/SigLIP/readme.md similarity index 100% rename from PyTorch/build-in/Classification/SigLIP/readme rename to PyTorch/build-in/Classification/SigLIP/readme.md diff --git a/PyTorch/build-in/Classification/SigLIP/requirements_exact.txt b/PyTorch/build-in/Classification/SigLIP/requirements.txt similarity index 100% rename from PyTorch/build-in/Classification/SigLIP/requirements_exact.txt rename to PyTorch/build-in/Classification/SigLIP/requirements.txt