diff --git a/excuter/cpp-common/src/stdutil/fs.cpp b/excuter/cpp-common/src/stdutil/fs.cpp index 6b30d551..fbe4fbde 100644 --- a/excuter/cpp-common/src/stdutil/fs.cpp +++ b/excuter/cpp-common/src/stdutil/fs.cpp @@ -17,9 +17,14 @@ namespace stdutil void save(const byte *data, size_t size, const string &path) { - ofstream ofs(path, ios::binary | ios::out | ios::trunc); + if (!ofs.is_open()) { + throw std::runtime_error("Failed to open file for writing: " + path); + } ofs.write(reinterpret_cast(data), size); + if (!ofs) { + throw std::runtime_error("Failed to write data to file: " + path); + } ofs.close(); } diff --git a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp index b1d9ef8b..c595e23a 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp @@ -54,6 +54,12 @@ namespace deepx::tf case Precision::Float32: reshape(*mem->gettensor(this->args[0].textvalue), shape, *mem->gettensor(this->returns[0].textvalue)); break; + case Precision::Float16: + reshape(*mem->gettensor(this->args[0].textvalue), shape, *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::BFloat16: + reshape(*mem->gettensor(this->args[0].textvalue), shape, *mem->gettensor(this->returns[0].textvalue)); + break; case Precision::Int64: reshape(*mem->gettensor(this->args[0].textvalue), shape, *mem->gettensor(this->returns[0].textvalue)); break; diff --git a/excuter/op-mem-cuda/src/deepx/tf/io.hpp b/excuter/op-mem-cuda/src/deepx/tf/io.hpp index d676c743..b15feb73 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/io.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/io.hpp @@ -24,24 +24,68 @@ namespace deepx::tf int run(shared_ptr mem, string &error) override { string name = this->args[0].textvalue; - if (mem->existstensor(name)) - { - auto t = mem->gettensor(name); - if (this->args.size() == 1) - { - tensorfunc::print(*t); - } - else - { - tensorfunc::print(*t, this->args[1].textvalue); - } - } - else - { + if (!mem->existstensor(name)) + { std::cerr << "print " << name << " not found" << std::endl; error = "print " + name + " not found"; return 1; } + string format=""; + if (this->args.size() > 1){ + format = this->args[1].textvalue; + } + + Precision dtype = mem->gettensor(name)->shape.dtype; + switch (dtype) + { + case Precision::Float64:{ + auto t = mem->gettensor(name); + tensorfunc::print(*t,format); + break; + } + case Precision::Float32:{ + auto t = mem->gettensor(name); + tensorfunc::print(*t,format); + break; + } + case Precision::Float16:{ + auto t = mem->gettensor(name); + tensorfunc::print(*t,format); + break; + } + case Precision::BFloat16:{ + auto t = mem->gettensor(name); + tensorfunc::print(*t,format); + break; + } + case Precision::Int64:{ + auto t = mem->gettensor(name); + tensorfunc::print(*t,format); + break; + } + case Precision::Int32:{ + auto t = mem->gettensor(name); + tensorfunc::print(*t,format); + break; + } + case Precision::Int16:{ + auto t = mem->gettensor(name); + tensorfunc::print(*t,format); + break; + } + case Precision::Int8:{ + auto t = mem->gettensor(name); + tensorfunc::print(*t,format); + break; + } + case Precision::Bool:{ + auto t = mem->gettensor(name); + tensorfunc::print(*t,format); + break; + } + default: + break; + } return 0; } diff --git a/front/py/deepx/__init__.py b/front/py/deepx/__init__.py index 37f47669..9547c60a 100644 --- a/front/py/deepx/__init__.py +++ b/front/py/deepx/__init__.py @@ -1,11 +1,12 @@ from .tensor import Tensor,Shape,Number from deepx.nn.functional import * # 导入所有functional函数 from deepx.nn.functional import __all__ as _func_all # 获取functional的导出列表 - +from deepx.utils import __all__ as _utils_all # 获取utils的导出列表 __all__ = [ #tensor 'Tensor','Shape','Number', - *_func_all + *_func_all, + *_utils_all, ] # 为了支持 import deepx as dx 的用法 diff --git a/front/py/deepx/nn/__init__.py b/front/py/deepx/nn/__init__.py index a879f49e..f20124f8 100644 --- a/front/py/deepx/nn/__init__.py +++ b/front/py/deepx/nn/__init__.py @@ -1,5 +1,6 @@ from .deepxir import * - +from .modules import __all__ as _modules_all __all__ = [ - "DeepxIR","DeepxIRResp" + "DeepxIR","DeepxIRResp", + *_modules_all ] \ No newline at end of file diff --git a/front/py/deepx/transformer/modeling_rope_utils.py b/front/py/deepx/transformer/modeling_rope_utils.py index ca27caa7..af693b91 100644 --- a/front/py/deepx/transformer/modeling_rope_utils.py +++ b/front/py/deepx/transformer/modeling_rope_utils.py @@ -1,42 +1,37 @@ -from typing import Tuple +from typing import Tuple,Optional import math +from deepx.utils import Config from deepx import arange,Tensor,where -def _compute_default_rope_parameters(config:dict={ - "rope_theta":10000.0, - "head_dim":0, - "partial_rotary_factor":1.0, -}) -> Tuple[Tensor, float]: - partial_rotary_factor = config.get("partial_rotary_factor", 1.0) - dim = config["head_dim"]* partial_rotary_factor - # 计算逆频率 - base=config["rope_theta"] - inv_freq = 1.0 / (base ** (arange(0, dim, 2, dtype='float64')/ dim)) - return inv_freq, 1.0 +def _compute_default_rope_parameters(config:Config=None,seq_len: Optional[int] = None, **rope_kwargs) -> Tuple[Tensor, float]: + if len(rope_kwargs) > 0: + base = rope_kwargs["base"] + dim = rope_kwargs["dim"] + elif config is not None: + base = config.rope_theta + partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0 + head_dim = getattr(config, "head_dim", None) or config.hidden_size // config.num_attention_heads + dim = int(head_dim * partial_rotary_factor) + + attention_factor = 1.0 # Unused in this type of RoPE + + # Compute the inverse frequencies + inv_freq = 1.0 / (base ** (arange(0, dim, 2, dtype="int64").float() / dim)) + return inv_freq, attention_factor -def _compute_llama3_parameters(config:dict={ - "rope_theta":10000.0, - "head_dim":0, - "partial_rotary_factor":1.0, - "factor":8, - "low_freq_factor":1, - "high_freq_factor":4, - "old_context_len":8192, - "seq_len":None -}) -> Tuple[Tensor, float]: +def _compute_llama3_parameters(config:Config,seq_len: Optional[int] = None,**rope_kwargs) -> Tuple[Tensor, float]: # Gets the default RoPE parameters - inv_freq, attention_factor = _compute_default_rope_parameters(config) + inv_freq, attention_factor = _compute_default_rope_parameters(config, seq_len, **rope_kwargs) - factor = config["rope_scaling"]["factor"] # `8` in the original implementation - low_freq_factor = config["rope_scaling"]["low_freq_factor"] # `1` in the original implementation - high_freq_factor = config["rope_scaling"]["high_freq_factor"] # `4` in the original implementation - old_context_len = config["rope_scaling"]["original_max_position_embeddings"] # `8192` in the original implementation + factor = config.rope_scaling["factor"] # `8` in the original implementation + low_freq_factor = config.rope_scaling["low_freq_factor"] # `1` in the original implementation + high_freq_factor = config.rope_scaling["high_freq_factor"] # `4` in the original implementation + old_context_len = config.rope_scaling["original_max_position_embeddings"] # `8192` in the original implementation low_freq_wavelen = old_context_len / low_freq_factor high_freq_wavelen = old_context_len / high_freq_factor wavelen = 2 * math.pi / inv_freq - wavelen.print() # wavelen < high_freq_wavelen: do nothing # wavelen > low_freq_wavelen: divide by factor inv_freq_llama = where(wavelen > low_freq_wavelen, inv_freq / factor, inv_freq) @@ -44,11 +39,8 @@ def _compute_llama3_parameters(config:dict={ smooth_factor = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor) smoothed_inv_freq = (1 - smooth_factor) * inv_freq_llama / factor + smooth_factor * inv_freq_llama is_medium_freq = ~(wavelen < high_freq_wavelen) * ~(wavelen > low_freq_wavelen) - is_medium_freq.print() - # TODO 这一步执行后,会导致an illegal memory access was encountered inv_freq_llama = where(is_medium_freq, smoothed_inv_freq, inv_freq_llama) - is_medium_freq.print() - inv_freq_llama.print() + return inv_freq_llama, attention_factor ROPE_INIT_FUNCTIONS = { diff --git a/front/py/deepx/transformer/models/llama/attention.py b/front/py/deepx/transformer/models/llama/attention.py index 6100c828..a028ce49 100644 --- a/front/py/deepx/transformer/models/llama/attention.py +++ b/front/py/deepx/transformer/models/llama/attention.py @@ -1,6 +1,8 @@ from typing import Optional,Tuple -from deepx.nn.modules import Module,Linear +from deepx import nn from deepx import Tensor,matmul,softmax,cat,dropout as dropout_func +from deepx.nn.modules import Module +from deepx.utils import Config @@ -52,7 +54,9 @@ def eager_attention_forward( return attn_output, attn_weights class LlamaAttention(Module): - def __init__(self, config:dict, layer_idx: int): + """Multi-headed attention from 'Attention Is All You Need' paper""" + + def __init__(self, config: Config, layer_idx: int): super().__init__() self.config = config self.layer_idx = layer_idx @@ -62,19 +66,20 @@ def __init__(self, config:dict, layer_idx: int): self.attention_dropout = config.attention_dropout self.is_causal = True - self.q_proj = Linear( + self.q_proj = nn.Linear( config.hidden_size, config.num_attention_heads * self.head_dim, bias=config.attention_bias ) - self.k_proj = Linear( + self.k_proj = nn.Linear( config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias ) - self.v_proj = Linear( + self.v_proj = nn.Linear( config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias ) - self.o_proj = Linear( + self.o_proj = nn.Linear( config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias ) + def forward( self, hidden_states: Tensor, @@ -90,17 +95,16 @@ def forward( cos, sin = position_embeddings query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) - - - attn_output, attn_weights = attention_interface( + + + attn_output, attn_weights =eager_attention_forward( self, query_states, key_states, value_states, attention_mask, - dropout=0.0 if not self.training else self.attention_dropout, scaling=self.scaling, - **kwargs, + dropout=0.0 if not self.training else self.attention_dropout ) attn_output = attn_output.reshape(*input_shape, -1) diff --git a/front/py/deepx/transformer/models/llama/embedding.py b/front/py/deepx/transformer/models/llama/embedding.py index 2de28f54..0ca7ab10 100644 --- a/front/py/deepx/transformer/models/llama/embedding.py +++ b/front/py/deepx/transformer/models/llama/embedding.py @@ -1,17 +1,17 @@ from deepx.nn.modules import Module -from deepx import Tensor,concat +from deepx import cat from deepx.transformer.modeling_rope_utils import ROPE_INIT_FUNCTIONS - +from deepx.utils import Config # https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py class LlamaRotaryEmbedding(Module): - def __init__(self,config:dict): + def __init__(self,config:Config): super().__init__() # 最大序列长度 - self.max_seq_len_cached = config["max_position_embeddings"] + self.max_seq_len_cached = config.max_position_embeddings # 原始最大序列长度 - self.original_max_seq_len = config["max_position_embeddings"] + self.original_max_seq_len = config.max_position_embeddings # 旋转类型 - self.rope_type=config["rope_scaling"]["rope_type"] + self.rope_type=config.rope_scaling.rope_type # 旋转初始化函数 self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type] # 旋转初始化函数 @@ -39,7 +39,7 @@ def __init__(self,config:dict): def forward(self, x, position_ids): # 扩展旋转频率 - inv_freq_expanded = self.inv_freq[None, :, None].todtype('float32').expand((position_ids.shape[0], -1, 1)) + inv_freq_expanded = self.inv_freq[None, :, None].float().expand((position_ids.shape[0], -1, 1)) # 使用torch.unsqueeze和type转换替代索引操作 position_ids_expanded = position_ids[:, None, :].float() @@ -48,7 +48,7 @@ def forward(self, x, position_ids): # 计算频率 freqs = (inv_freq_expanded @ position_ids_expanded).T # 拼接频率 - emb = concat((freqs, freqs), dim=-1) + emb = cat((freqs, freqs), dim=-1) # 计算余弦和正弦 cos = emb.cos() sin = emb.sin() diff --git a/front/py/deepx/transformer/models/llama/groupedquery_attention.py b/front/py/deepx/transformer/models/llama/groupedquery_attention.py deleted file mode 100644 index 58f56a00..00000000 --- a/front/py/deepx/transformer/models/llama/groupedquery_attention.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import Optional,Tuple -from deepx.nn.modules import Module,Linear -from deepx import Tensor,matmul,softmax,concat,arange,dropout as dropout_func - - -def repeat_kv(hidden_states: Tensor, n_rep: int) -> Tensor: - batch, num_key_value_heads, slen, head_dim = hidden_states.shape - if n_rep == 1: - return hidden_states - hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim) - return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim) - diff --git a/front/py/deepx/utils/__init__.py b/front/py/deepx/utils/__init__.py index d2525ce0..63af3a8b 100644 --- a/front/py/deepx/utils/__init__.py +++ b/front/py/deepx/utils/__init__.py @@ -1,19 +1,5 @@ -from .tensor import Tensor -from .creation import zeros, ones, arange -from .elementwise import add, sub, mul, div -from .matmul import matmul, dot -from .reduction import sum, mean, max, min -from .shape import reshape, transpose -from .comparison import lt, gt, eq -from .trigonometric import sin, cos, tan +from .config import Config __all__ = [ - 'Tensor', - 'zeros', 'ones', 'arange', - 'add', 'sub', 'mul', 'div', - 'matmul', 'dot', - 'sum', 'mean', 'max', 'min', - 'reshape', 'transpose', - 'lt', 'gt', 'eq', - 'sin', 'cos', 'tan' -] \ No newline at end of file + 'Config', + ] \ No newline at end of file diff --git a/front/py/deepx/utils/config.py b/front/py/deepx/utils/config.py new file mode 100644 index 00000000..e11624cf --- /dev/null +++ b/front/py/deepx/utils/config.py @@ -0,0 +1,96 @@ +import copy,os,json +from typing import Any, Dict, List, Optional, Tuple, Union + +class Config: + """ + 通用配置类,支持点号访问和递归嵌套结构。 + 可从字典或对象初始化,并提供字典式的访问方法。 + """ + def __init__(self, obj: Optional[Union[Dict, object]] = None) -> None: + """ + 初始化配置对象。 + + Args: + obj: 字典、对象或None(默认创建空配置)。 + """ + if obj is None: + obj = {} + if isinstance(obj, dict): + # 深拷贝字典以避免外部修改 + for key, value in copy.deepcopy(obj).items(): + setattr(self, key, self._process_value(value)) + else: + # 仅复制实例属性(vars()等价于obj.__dict__) + for key, value in vars(obj).items(): + setattr(self, key, self._process_value(value)) + + def _process_value(self, value: Any) -> Any: + """递归处理值,将字典转换为Config,列表/元组递归处理。""" + if isinstance(value, dict): + return Config(value) + elif isinstance(value, list): + return [self._process_value(item) for item in value] + elif isinstance(value, tuple): + # 可选:将元组转换为列表以支持点号访问 + # return ConfigList([self._process_value(item) for item in value]) + return tuple(self._process_value(item) for item in value) + else: + return value + + def __getitem__(self, key: str) -> Any: + return getattr(self, key) + + def __setitem__(self, key: str, value: Any) -> None: + setattr(self, key, value) + + def __delitem__(self, key: str) -> None: + delattr(self, key) + + def __contains__(self, key: str) -> bool: + return hasattr(self, key) + + def __len__(self) -> int: + return len(self.__dict__) + + def __iter__(self) -> Any: + return iter(self.__dict__) + + def __repr__(self) -> str: + return f"Config({self.__dict__})" + + def __str__(self) -> str: + return str(self.to_dict()) + + def get(self, key: str, default: Any = None) -> Any: + return getattr(self, key, default) + + def to_dict(self) -> Dict: + """将配置递归转换为字典。""" + result = {} + for key, value in self.__dict__.items(): + if isinstance(value, Config): + result[key] = value.to_dict() + elif isinstance(value, list): + result[key] = [item.to_dict() if isinstance(item, Config) else item for item in value] + elif isinstance(value, tuple): + result[key] = tuple(item.to_dict() if isinstance(item, Config) else item for item in value) + else: + result[key] = value + return result + + @classmethod + def from_file(cls, filepath: str) -> "Config": + """ + 从本地 JSON 或 YAML 文件加载配置,返回 Config 实例。 + 支持 .json, .yaml, .yml 文件。 + """ + ext = os.path.splitext(filepath)[-1].lower() + with open(filepath, "r", encoding="utf-8") as f: + if ext == ".json": + data = json.load(f) + elif ext in (".yaml", ".yml"): + import yaml + data = yaml.safe_load(f) + else: + raise ValueError(f"不支持的配置文件格式: {ext}") + return cls(data) \ No newline at end of file diff --git a/front/py/deepxutil/safetensors/__init__.py b/front/py/deepxutil/safetensors/__init__.py new file mode 100644 index 00000000..3bc44bf0 --- /dev/null +++ b/front/py/deepxutil/safetensors/__init__.py @@ -0,0 +1,4 @@ +from .io import * +__all__ = [ + 'save_numpy', +] diff --git a/front/py/deepxutil/safetensors/todeepx.py b/front/py/deepxutil/safetensors/todeepx.py new file mode 100644 index 00000000..030d9349 --- /dev/null +++ b/front/py/deepxutil/safetensors/todeepx.py @@ -0,0 +1,114 @@ +from safetensors import safe_open +import numpy as np +import os +import json +import yaml +import argparse +import shutil +import glob +import re + +class TensorInfo: + def __init__(self, dtype, ndim, shape, size, strides=None): + self.dtype = dtype # 数据精度类型,如"float32" + self.ndim = ndim # 维度数 + self.shape = shape # 形状元组 + self.size = size # 总元素数量 + self.strides = strides # 步长数组(可选) + + +class Tensor: + def __init__(self, data, tensorinfo: TensorInfo): + assert isinstance(tensorinfo, TensorInfo),"tensorinfo必须是TensorInfo实例" + self.data = data + self.tensorinfo = tensorinfo + +class SafeTensorExporter: + def __init__(self, model_dir, output_dir): + self.model_dir = model_dir + self.output_dir = output_dir + self.config = self._load_config() + + + def _load_config(self): + """加载模型配置""" + config_path = os.path.join(self.model_dir, "config.json") + if os.path.exists(config_path): + with open(config_path, 'r', encoding='utf-8') as f: + return json.load(f) + return {} + + def _find_model_files(self)->list: + """查找所有分片模型文件""" + single_file = os.path.join(self.model_dir, "model.safetensors") + shard_files = glob.glob(os.path.join(self.model_dir, "model-*-of-*.safetensors")) + + # 使用正则表达式提取分片编号 + pattern = re.compile(r"model-(\d+)-of-(\d+)\.safetensors") + filtered_shards = [] + for f in shard_files: + match = pattern.search(os.path.basename(f)) + if match: + filtered_shards.append( (int(match.group(1)), f) ) + + if os.path.exists(single_file): + return [single_file] + elif filtered_shards: + # 按分片编号排序后返回路径 + filtered_shards.sort(key=lambda x: x[0]) + return [f[1] for f in filtered_shards] + raise FileNotFoundError(f"No model files found in {self.model_dir}") + + def export(self): + """导出safetensor模型到指定目录""" + model_files = self._find_model_files() + + from deepxutil.numpy import save_numpy + + for model_path in model_files: + with safe_open(model_path, framework="numpy") as f: + for key in f.keys(): + t = f.get_tensor(key) + path= os.path.join(self.output_dir, key) + save_numpy(t,path) + + self.mvothers() + + def mvothers(self): + """复制tokenizer、config.json等相关文件到输出目录""" + required_files = [ + "config.json", + "tokenizer.json", + "tokenizer_config.json", + "special_tokens_map.json", + "vocab.json", + "merges.txt", + "added_tokens.json" + ] + + for filename in required_files: + src = os.path.join(self.model_dir, filename) + if os.path.exists(src): + shutil.copy2(src, os.path.join(self.output_dir, filename)) + + +if __name__ == "__main__": + print() + parser = argparse.ArgumentParser(description='Safetensor模型转换工具') + parser.add_argument('--model', type=str, required=True, + help='输入目录路径,包含model.safetensors和config.json') + parser.add_argument('--output', type=str, required=True, + help='输出目录路径,转换后的DeepX格式数据将保存于此') + + args = parser.parse_args() + + exporter = SafeTensorExporter( + model_dir=args.model, + output_dir=args.output + ) + try: + exporter.export() + print(f"转换成功!输出目录:{args.output_dir}") + except Exception as e: + print(f"转换失败:{str(e)}") + exit(1) \ No newline at end of file diff --git a/front/py/deepxutil/safetensors/tosafetensors.py b/front/py/deepxutil/safetensors/tosafetensors.py new file mode 100644 index 00000000..77eb0a24 --- /dev/null +++ b/front/py/deepxutil/safetensors/tosafetensors.py @@ -0,0 +1,19 @@ + +class SafeTensorSaver: + def __init__(self, tensors, metadata=None): + self.tensors = tensors + self.metadata = metadata or {} + + def save(self, save_path): + """保存模型到safetensor格式""" + tensor_dict = {} + for name, tensor in self.tensors.items(): + if isinstance(tensor, Tensor): + tensor_dict[name] = tensor.data + else: + tensor_dict[name] = tensor + + from safetensors.numpy import save_file + save_file(tensor_dict, save_path, metadata=self.metadata) + + # 使用示例 diff --git a/front/py/examples/1_tensor/2_saveload.py b/front/py/examples/1_tensor/2_saveload.py index ec2225d7..c5a45f89 100644 --- a/front/py/examples/1_tensor/2_saveload.py +++ b/front/py/examples/1_tensor/2_saveload.py @@ -1,22 +1,20 @@ -from deepx.nn.functional import arange,save,load +from deepx.nn.functional import arange,save,load,full -def saveloadfloat32(): - t1=arange(start=0,end=60 ,dtype='float32').reshape_((3,4,5)) - dir='/home/lipeng/model/deepxmodel/tester/' - t1.save(dir+'t1') +dir = '/home/lipeng/model/deepx/tester/' - t2=load(dir+'t1') - t2.print() +def saveload(dtype:str='float32'): + print() -def saveloadint8(): - t=arange(start=0,end=60 ,dtype='int8').reshape_((3,4,5)) - dir='/home/lipeng/model/deepxmodel/tester/' - t.save(dir+'tint8') - t2=load(dir+"tint8") + t1=full((3,4,5),2,dtype) + # t1=arange(start=0,end=60 ,dtype=dtype) + # t1=t.reshape_((3,4,5)) + t1.float().print() + t1.print() + name='t_'+dtype + t1.save(dir+name) + t2=load(dir+name) t2.print() - if __name__ == "__main__": - #saveloadfloat32() - saveloadint8() \ No newline at end of file + saveload("bfloat16") \ No newline at end of file diff --git a/front/py/examples/4_transformer/llama/llama_attention_torch.py b/front/py/examples/4_transformer/llama/llama_attention_torch.py index 3cb8aca8..57b418f6 100644 --- a/front/py/examples/4_transformer/llama/llama_attention_torch.py +++ b/front/py/examples/4_transformer/llama/llama_attention_torch.py @@ -1,8 +1,46 @@ from token_text import torch_input print() ############-------TORCH-------################ -from transformers.models.llama.modeling_llama import rotate_half +import torch +from transformers.models.llama.modeling_llama import rotate_half,LlamaAttention + +model_path = "/home/lipeng/model/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/" +from deepx.utils import Config +config=Config.from_file(model_path+"config.json") +config._attn_implementation= "eager" + +from transformers.models.llama.modeling_llama import LlamaAttention, LlamaRotaryEmbedding + +class NetTorch(torch.nn.Module): + def __init__(self, config): + super().__init__() + self.padding_idx = config.get("pad_token_id", None) + self.config = config + self.embed_tokens = torch.nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx) + self.rotary_emb = LlamaRotaryEmbedding(config=config) + self.attn = LlamaAttention(config, layer_idx=0) + + def forward(self, x): + # 1. 词嵌入 + inputs_embeds = self.embed_tokens(x) + hidden_states = inputs_embeds + # 2. 位置编码 + position_ids = torch.arange(hidden_states.shape[1], device=hidden_states.device).unsqueeze(0) + # 3. RoPE + cos, sin = self.rotary_emb(hidden_states, position_ids) + position_embeddings = (cos, sin) + # 4. Attention + attn_output, attn_weights = self.attn( + hidden_states=hidden_states, + position_embeddings=position_embeddings, + attention_mask=None + ) + return attn_output, attn_weights + +if __name__ == "__main__": + torch_net = NetTorch(config) + attn_output, attn_weights = torch_net(torch_input) + print("attn_output shape:", attn_output.shape) + print("attn_output:", attn_output) + print("attn_weights shape:", attn_weights.shape) -print(torch_input) -r=rotate_half(torch_input) -print(r) diff --git a/front/py/examples/4_transformer/llama/llama_rope.py b/front/py/examples/4_transformer/llama/llama_rope.py index 4f40b390..33f17daa 100644 --- a/front/py/examples/4_transformer/llama/llama_rope.py +++ b/front/py/examples/4_transformer/llama/llama_rope.py @@ -1,3 +1,4 @@ +from deepx.utils import Config from token_text import dir,config ############-------DEEPX-------################ @@ -10,12 +11,10 @@ embed_tokens_weight=load(dir+'weight') class NetDeepx(Module): - def __init__(self,configdict:dict): + def __init__(self,config:Config): super().__init__() - self.embed_tokens = Embedding(configdict["vocab_size"], configdict["hidden_size"],weight=embed_tokens_weight) - self.rotary_emb = LlamaRotaryEmbedding(config=configdict) - print("rotary_emb.inv_freq") - self.rotary_emb.inv_freq.print() + self.embed_tokens = Embedding(config.vocab_size, config.hidden_size,weight=embed_tokens_weight) + self.rotary_emb = LlamaRotaryEmbedding(config=config) def forward(self,x): inputs_embeds = self.embed_tokens(x) hidden_states = inputs_embeds @@ -23,7 +22,7 @@ def forward(self,x): return self.rotary_emb(hidden_states, position_ids) if __name__ == "__main__": - net = NetDeepx(configdict=config.to_dict()) + net = NetDeepx(config=config) out=net.forward(input) out[0].print() out[1].print() diff --git a/front/py/examples/4_transformer/llama/token_text.py b/front/py/examples/4_transformer/llama/token_text.py index 84a4a59d..34734970 100644 --- a/front/py/examples/4_transformer/llama/token_text.py +++ b/front/py/examples/4_transformer/llama/token_text.py @@ -1,7 +1,7 @@ hidden_size = 8 eps = 1e-6 dir = '/home/lipeng/model/deepxmodel/llama/' -model_path = "/home/lipeng/model/deepseek-ai/DeepSeek-R1-Distill-Llama-8B" +model_path = "/home/lipeng/model/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/" print() from transformers import AutoTokenizer, AutoConfig @@ -15,7 +15,8 @@ def init_tokenizer(model_path): tokenizer = init_tokenizer(model_path) config = AutoConfig.from_pretrained(model_path) - +# from deepx.utils import Config +# config=Config.from_file(model_path+"config.json") def tokenize_text(text, tokenizer): tokens = tokenizer(text, return_tensors="pt").input_ids diff --git a/model/safetensor_deepx/safetensor_deepx.egg-info/PKG-INFO b/model/safetensor_deepx/safetensor_deepx.egg-info/PKG-INFO deleted file mode 100644 index 7a25ed77..00000000 --- a/model/safetensor_deepx/safetensor_deepx.egg-info/PKG-INFO +++ /dev/null @@ -1,12 +0,0 @@ -Metadata-Version: 2.1 -Name: safetensor-deepx -Version: 0.1.0 -Summary: SafeTensor support for DeepX -Home-page: UNKNOWN -Author: igor.li -License: UNKNOWN -Platform: UNKNOWN -Requires-Python: >=3.7 - -UNKNOWN - diff --git a/model/safetensor_deepx/safetensor_deepx.egg-info/SOURCES.txt b/model/safetensor_deepx/safetensor_deepx.egg-info/SOURCES.txt deleted file mode 100644 index 14baa316..00000000 --- a/model/safetensor_deepx/safetensor_deepx.egg-info/SOURCES.txt +++ /dev/null @@ -1,10 +0,0 @@ -README.md -setup.py -safetensor_deepx/__init__.py -safetensor_deepx/graph.py -safetensor_deepx/loader.py -safetensor_deepx.egg-info/PKG-INFO -safetensor_deepx.egg-info/SOURCES.txt -safetensor_deepx.egg-info/dependency_links.txt -safetensor_deepx.egg-info/requires.txt -safetensor_deepx.egg-info/top_level.txt \ No newline at end of file diff --git a/model/safetensor_deepx/safetensor_deepx.egg-info/dependency_links.txt b/model/safetensor_deepx/safetensor_deepx.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891..00000000 --- a/model/safetensor_deepx/safetensor_deepx.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/model/safetensor_deepx/safetensor_deepx.egg-info/requires.txt b/model/safetensor_deepx/safetensor_deepx.egg-info/requires.txt deleted file mode 100644 index 5d91d60b..00000000 --- a/model/safetensor_deepx/safetensor_deepx.egg-info/requires.txt +++ /dev/null @@ -1,2 +0,0 @@ -numpy>=1.19.0 -safetensors>=0.3.0 diff --git a/model/safetensor_deepx/safetensor_deepx.egg-info/top_level.txt b/model/safetensor_deepx/safetensor_deepx.egg-info/top_level.txt deleted file mode 100644 index 14b5ff6f..00000000 --- a/model/safetensor_deepx/safetensor_deepx.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -safetensor_deepx diff --git a/model/safetensor_deepx/safetensor_deepx/__init__.py b/model/safetensor_deepx/safetensor_deepx/__init__.py deleted file mode 100644 index 80140701..00000000 --- a/model/safetensor_deepx/safetensor_deepx/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from .loader import SafeTensorLoader, SafeTensorSaver -from .graph import SafeTensorGraphBuilder - -__all__ = [ - 'SafeTensorLoader', - 'SafeTensorSaver', - 'SafeTensorGraphBuilder' -] \ No newline at end of file diff --git a/model/safetensor_deepx/safetensor_deepx/loader.py b/model/safetensor_deepx/safetensor_deepx/loader.py deleted file mode 100644 index a9675434..00000000 --- a/model/safetensor_deepx/safetensor_deepx/loader.py +++ /dev/null @@ -1,252 +0,0 @@ -from safetensors import safe_open -import numpy as np -import os -import json -import yaml -import argparse -import shutil -import glob -import re - - -class TensorInfo: - def __init__(self, dtype, ndim, shape, size, strides=None): - self.dtype = dtype # 数据精度类型,如"float32" - self.ndim = ndim # 维度数 - self.shape = shape # 形状元组 - self.size = size # 总元素数量 - self.strides = strides # 步长数组(可选) - - -class Tensor: - def __init__(self, data, tensorinfo): - """ - :param data: bytes 原始字节数据 - :param tensorinfo: TensorInfo 元数据 - """ - if not isinstance(tensorinfo, TensorInfo): - raise TypeError("tensorinfo必须是TensorInfo实例") - - self.data = data - self.tensorinfo = tensorinfo - self.graph = None # 所属计算图(与Go版对齐) - self.node = None # 对应计算图节点 - self.requires_grad = False # 是否需要梯度 - - def __repr__(self): - return f"Tensor(dtype={self.tensorinfo.dtype}, shape={self.tensorinfo.shape})" - - -class SafeTensorExporter: - def __init__(self, model_dir, output_dir): - self.model_dir = model_dir - self.output_dir = output_dir - self.config = self._load_config() - self.dtype_map = { # 添加数据类型映射表 - 'BF16': 'float32', # 将bfloat16转换为float32保存 - 'F16': 'float16', - 'F32': 'float32' - } - - def _load_config(self): - """加载模型配置""" - config_path = os.path.join(self.model_dir, "config.json") - if os.path.exists(config_path): - with open(config_path, 'r', encoding='utf-8') as f: - return json.load(f) - return {} - - def _find_model_files(self): - """查找所有分片模型文件""" - single_file = os.path.join(self.model_dir, "model.safetensors") - shard_files = glob.glob(os.path.join(self.model_dir, "model-*-of-*.safetensors")) - - # 使用正则表达式提取分片编号 - pattern = re.compile(r"model-(\d+)-of-(\d+)\.safetensors") - filtered_shards = [] - for f in shard_files: - match = pattern.search(os.path.basename(f)) - if match: - filtered_shards.append( (int(match.group(1)), f) ) - - if os.path.exists(single_file): - return [single_file] - elif filtered_shards: - # 按分片编号排序后返回路径 - filtered_shards.sort(key=lambda x: x[0]) - return [f[1] for f in filtered_shards] - raise FileNotFoundError(f"No model files found in {self.model_dir}") - - def export(self): - """导出safetensor模型到指定目录""" - model_files = self._find_model_files() - - for model_path in model_files: - with safe_open(model_path, framework="pt") as f: - for key in f.keys(): - tensor = f.get_tensor(key) - self._save_tensor(key, tensor) - - self._save_config() - self._copy_tokenizer_files() - - def _save_tensor(self, name, tensor): - """保存单个张量的元数据和二进制数据""" - # 将名称中的点转换为下划线,并创建统一路径 - base_path = os.path.join(self.output_dir, "tensors", name) - os.makedirs(os.path.dirname(base_path), exist_ok=True) - - # 处理bfloat16类型 - dtype_str = str(tensor.dtype).replace("torch.", "") - if dtype_str == "bfloat16": - tensor = tensor.float() - dtype_str = "float32" - - # 更新后的类型处理 - shape_info = { - 'dtype': self.dtype_map.get(dtype_str.upper(), dtype_str), - 'shape': list(tensor.shape) - } - - # 保存为numpy格式 - np_tensor = tensor.numpy().astype(shape_info['dtype']) - with open(f"{base_path}.data", 'wb') as f: - f.write(np_tensor.tobytes()) - - with open(f"{base_path}.shape", 'w') as f: - yaml.dump(shape_info, f, default_flow_style=False) - - def _save_config(self): - """保存模型全局配置""" - config_path = os.path.join(self.output_dir, "config.yaml") - with open(config_path, 'w') as f: - yaml.dump({ - 'model_config': self.config, - 'format_version': 'deepx' - }, f, default_flow_style=False) - - def _copy_tokenizer_files(self): - """复制tokenizer相关文件到输出目录""" - required_files = [ - "tokenizer.json", - "tokenizer_config.json", - "special_tokens_map.json", - "vocab.json", - "merges.txt", - "added_tokens.json" - ] - - for filename in required_files: - src = os.path.join(self.model_dir, filename) - if os.path.exists(src): - shutil.copy2(src, os.path.join(self.output_dir, filename)) - - -class SafeTensorLoader: - def __init__(self, model_dir): - self.model_dir = model_dir - self.config = self._load_config() - - def _load_config(self): - """加载模型配置""" - config_path = os.path.join(self.model_dir, "config.json") - if os.path.exists(config_path): - with open(config_path, 'r', encoding='utf-8') as f: - return json.load(f) - return {} - - def _find_model_files(self): - """查找所有分片模型文件""" - single_file = os.path.join(self.model_dir, "model.safetensors") - shard_files = glob.glob(os.path.join(self.model_dir, "model-*-of-*.safetensors")) - - # 统一使用正则表达式匹配 - pattern = re.compile(r"model-(\d+)-of-(\d+)\.safetensors") - filtered_shards = [] - for f in shard_files: - match = pattern.search(os.path.basename(f)) - if match: - filtered_shards.append( (int(match.group(1)), f) ) - - if os.path.exists(single_file): - return [single_file] - elif filtered_shards: - filtered_shards.sort(key=lambda x: x[0]) - return [f[1] for f in filtered_shards] - else: - raise FileNotFoundError(f"No model files found in {self.model_dir}") - - def load(self): - """加载safetensor模型文件""" - tensors = {} - metadata = {} - - model_files = self._find_model_files() - - for model_path in model_files: - with safe_open(model_path, framework="pt") as f: - # 合并metadata - file_metadata = f.metadata() if hasattr(f, 'metadata') else {} - metadata.update(file_metadata) - - for key in f.keys(): - pt_tensor = f.get_tensor(key).cpu().detach() - - # 构造TensorInfo - tensor_info = TensorInfo( - dtype=str(pt_tensor.dtype).replace("torch.", ""), - ndim=pt_tensor.ndim, - shape=tuple(pt_tensor.shape), - size=pt_tensor.numel(), - strides=pt_tensor.stride() if pt_tensor.is_contiguous() else None - ) - - # 转换为字节流(保持内存对齐) - byte_buffer = pt_tensor.numpy().tobytes() if pt_tensor.device == "cpu" \ - else pt_tensor.cpu().numpy().tobytes() - - tensors[key] = Tensor(byte_buffer, tensor_info) - - metadata["model_config"] = self.config - return tensors, metadata - - -class SafeTensorSaver: - def __init__(self, tensors, metadata=None): - self.tensors = tensors - self.metadata = metadata or {} - - def save(self, save_path): - """保存模型到safetensor格式""" - tensor_dict = {} - for name, tensor in self.tensors.items(): - if isinstance(tensor, Tensor): - tensor_dict[name] = tensor.data - else: - tensor_dict[name] = tensor - - from safetensors.numpy import save_file - save_file(tensor_dict, save_path, metadata=self.metadata) - - # 使用示例 - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Safetensor模型转换工具') - parser.add_argument('--model_dir', type=str, required=True, - help='输入目录路径,包含model.safetensors和config.json') - parser.add_argument('--output_dir', type=str, required=True, - help='输出目录路径,转换后的DeepX格式数据将保存于此') - - args = parser.parse_args() - - exporter = SafeTensorExporter( - model_dir=args.model_dir, - output_dir=args.output_dir - ) - try: - exporter.export() - print(f"转换成功!输出目录:{args.output_dir}") - except Exception as e: - print(f"转换失败:{str(e)}") - exit(1) \ No newline at end of file