From 75063c12cab7273e8539783a69812d8d5ebb26c5 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Thu, 24 Apr 2025 00:09:34 +0800 Subject: [PATCH 1/7] front/py deepxutil --- front/py/deepx/nn/functional/__init__.py | 2 +- front/py/deepx/nn/functional/activite.py | 1 - front/py/deepx/nn/functional/elementwise.py | 3 +- front/py/deepx/nn/functional/leaffunc_init.py | 1 - front/py/deepx/nn/functional/leaffunc_io.py | 27 +--- front/py/deepx/nn/functional/normalization.py | 4 - front/py/deepx/nn/functional/reduce.py | 13 +- front/py/deepx/nn/functional/rtf_matmul.py | 1 - front/py/deepx/nn/functional/rtf_reduce.py | 3 - front/py/deepx/nn/modules/module.py | 4 +- front/py/deepx/nn/modules/sparse.py | 2 +- front/py/deepx/tensor/reduce.py | 25 +++- front/py/deepx/tensor/tensor.py | 6 +- .../models/llama/modeling_llama.py | 136 ++++++++---------- front/py/deepxutil/numpy/__init__.py | 4 + front/py/deepxutil/numpy/io.py | 17 +++ front/py/deepxutil/torch/__init__.py | 4 + front/py/deepxutil/torch/io.py | 10 ++ front/py/examples/3_module/1_embedding.py | 3 +- .../4_transformer/llama/1_llamarmsnorm.py | 25 ---- .../llama/1_llamarmsnorm_torch.py | 35 ++++- 21 files changed, 161 insertions(+), 165 deletions(-) create mode 100644 front/py/deepxutil/numpy/__init__.py create mode 100644 front/py/deepxutil/numpy/io.py create mode 100644 front/py/deepxutil/torch/__init__.py create mode 100644 front/py/deepxutil/torch/io.py delete mode 100644 front/py/examples/4_transformer/llama/1_llamarmsnorm.py diff --git a/front/py/deepx/nn/functional/__init__.py b/front/py/deepx/nn/functional/__init__.py index ff85363e..93825af3 100644 --- a/front/py/deepx/nn/functional/__init__.py +++ b/front/py/deepx/nn/functional/__init__.py @@ -19,7 +19,7 @@ #leaffunc "newtensor","printtensor","load", #life - "printtensor","save","save_npy","save_torch",#io + "printtensor","save",#io "constant","constant_","full","zeros","ones","uniform","uniform_","arange","arange_","kaiming_uniform","kaiming_uniform_","calculate_fan_in_and_fan_out", "add","sub","mul","div","sqrt","pow","exp","log", "matmul", diff --git a/front/py/deepx/nn/functional/activite.py b/front/py/deepx/nn/functional/activite.py index 97be29f3..8510a052 100644 --- a/front/py/deepx/nn/functional/activite.py +++ b/front/py/deepx/nn/functional/activite.py @@ -1,4 +1,3 @@ -from typing import Union from deepx.tensor import Tensor from deepx.nn.functional import newtensor diff --git a/front/py/deepx/nn/functional/elementwise.py b/front/py/deepx/nn/functional/elementwise.py index 28e5b199..d6a4e6ea 100644 --- a/front/py/deepx/nn/functional/elementwise.py +++ b/front/py/deepx/nn/functional/elementwise.py @@ -1,5 +1,4 @@ -from typing import Union -from deepx.tensor import Tensor,Number +from deepx.tensor import Tensor from deepx.nn.functional import newtensor def rsqrt(input:Tensor)->Tensor: diff --git a/front/py/deepx/nn/functional/leaffunc_init.py b/front/py/deepx/nn/functional/leaffunc_init.py index d6188592..48160e3d 100644 --- a/front/py/deepx/nn/functional/leaffunc_init.py +++ b/front/py/deepx/nn/functional/leaffunc_init.py @@ -1,4 +1,3 @@ -from typing import Union import math import time import os diff --git a/front/py/deepx/nn/functional/leaffunc_io.py b/front/py/deepx/nn/functional/leaffunc_io.py index d9551c6c..9797eea9 100644 --- a/front/py/deepx/nn/functional/leaffunc_io.py +++ b/front/py/deepx/nn/functional/leaffunc_io.py @@ -1,4 +1,4 @@ -from deepx.tensor import Tensor,Shape,saveShape +from deepx.tensor import Tensor from .authormap import defaultauthor def printtensor(t:Tensor,format=''): @@ -10,28 +10,3 @@ def save(t:Tensor,path:str): from .rtf_io import rtf_save rtf_save(t,path) return t - -def save_npy(t,path:str): - r''' - 保存numpy.ndarray为deepx.tensor格式 - ''' - from numpy import ascontiguousarray - shape=Shape(t.shape) - shape._dtype=str(t.dtype) - saveShape(shape,path+".shape") - - array = ascontiguousarray(t) - array.tofile(path+'.data') - return t - -def save_torch(t,path:str): - r''' - 保存torch.Tensor为deepx.tensor格式 - ''' - from torch import Tensor as torch_Tensor - if isinstance(t,torch_Tensor): - t=t.detach().cpu().numpy() - else: - raise ValueError("t must be a torch.Tensor") - save_npy(t,path) - \ No newline at end of file diff --git a/front/py/deepx/nn/functional/normalization.py b/front/py/deepx/nn/functional/normalization.py index 87eacbbe..8a9d6030 100644 --- a/front/py/deepx/nn/functional/normalization.py +++ b/front/py/deepx/nn/functional/normalization.py @@ -1,8 +1,4 @@ - -from typing import Union from deepx import Tensor -from deepx.nn.functional import sub -from deepx.nn.functional import newtensor # 数学公式:softmax(x_i) = e^{x_i} / sum(e^{x_j}) def softmax(t: Tensor,dim:int=-1)->Tensor: diff --git a/front/py/deepx/nn/functional/reduce.py b/front/py/deepx/nn/functional/reduce.py index f2731fd4..474e98b1 100644 --- a/front/py/deepx/nn/functional/reduce.py +++ b/front/py/deepx/nn/functional/reduce.py @@ -1,18 +1,19 @@ from deepx.tensor import Tensor,Shape -from typing import Optional,Union from .leaffunc_reduce import sum from .leaffunc_life import newtensor #mean def mean(a:Tensor,dim:tuple[int,...]=None,keepdim:bool=False)->Tensor: - # 如果dim为None,则对所有维度求平均 + assert isinstance(a,Tensor) if dim is None: - dim = list(range(a.ndim)) - dim=list(dim) + dim = list(range(a.ndim)) + else: + dim=list(dim) + for i in dim: + if i < 0: + dim[i] = i + a.dim() total = 1 for i in dim: - if i < 0: - dim[i] = i + a.dim() total *= a.shape[i] reduceshape=Shape.reduceshape(a.shape,dim,keepdim) out=newtensor(reduceshape,dtype=a.dtype) diff --git a/front/py/deepx/nn/functional/rtf_matmul.py b/front/py/deepx/nn/functional/rtf_matmul.py index bfe23cee..93f4d5c8 100644 --- a/front/py/deepx/nn/functional/rtf_matmul.py +++ b/front/py/deepx/nn/functional/rtf_matmul.py @@ -1,7 +1,6 @@ from deepx.tensor import Tensor from deepx.nn import DeepxIR,Param from deepx.scheduler import send -from .rtf import A_B_op_C def rtf_matmul(a:Tensor,b:Tensor,out: Tensor ,author='cublas',bench:int=None): args=[Param.tensor(a),Param.tensor(b)] diff --git a/front/py/deepx/nn/functional/rtf_reduce.py b/front/py/deepx/nn/functional/rtf_reduce.py index 61da95b1..f089bfab 100644 --- a/front/py/deepx/nn/functional/rtf_reduce.py +++ b/front/py/deepx/nn/functional/rtf_reduce.py @@ -1,9 +1,6 @@ from deepx.tensor import Tensor -from deepx.nn.deepxir import DeepxIR,Param -from deepx.scheduler import send from .rtf import A_b1_b2_op_C - def rtf_sum(a:Tensor,dim:tuple[int],keepdim:bool,out: Tensor, author:str='miaobyte')->Tensor: A_b1_b2_op_C("sum",a,dim,keepdim,out,author) diff --git a/front/py/deepx/nn/modules/module.py b/front/py/deepx/nn/modules/module.py index 003223ad..7e55c5f3 100644 --- a/front/py/deepx/nn/modules/module.py +++ b/front/py/deepx/nn/modules/module.py @@ -17,7 +17,6 @@ def _generate_default_name(self) -> str: self.__class__._instance_counter = 0 count = self.__class__._instance_counter self.__class__._instance_counter += 1 - return count return f"{base_name}_{count}" @property @@ -40,9 +39,10 @@ def register_parameter(self, name: str, param: Optional[Tensor]) -> None: self._parameters.pop(name, None) else: self._parameters[name] = param - param.name=self.full_name + '.' + name + param._name=self.full_name + '.' + name from deepx.nn.functional.leaffunc_life import rnewtensor rnewtensor(param) + def parameters(self, recurse: bool = True) -> Iterator[Tensor]: for name, param in self.named_parameters(recurse=recurse): diff --git a/front/py/deepx/nn/modules/sparse.py b/front/py/deepx/nn/modules/sparse.py index ca00f28c..1cdae141 100644 --- a/front/py/deepx/nn/modules/sparse.py +++ b/front/py/deepx/nn/modules/sparse.py @@ -128,7 +128,7 @@ def __init__(self, embedding_dim, ], "权重形状与num_embeddings和embedding_dim不匹配" self.weight = weight - + self.weight.rtf_rename('embedding_0.weight') # self.sparse = sparse # if padding_idx is not None: diff --git a/front/py/deepx/tensor/reduce.py b/front/py/deepx/tensor/reduce.py index cdba12f8..654c8e4b 100644 --- a/front/py/deepx/tensor/reduce.py +++ b/front/py/deepx/tensor/reduce.py @@ -4,28 +4,43 @@ from deepx.tensor import Tensor,tensor_method @tensor_method -def reducemax(self, dim:tuple,keepdim:bool=False,out:Union[Tensor,str]=''): +def reducemax(self, dim:tuple[int,...],keepdim:bool=False,out:Union[Tensor,str]='')->Tensor: + assert isinstance(dim,tuple) + for i in dim: + assert isinstance(i,int) from deepx.nn.functional import reducemax as reduce_max_func return reduce_max_func(self,dim,keepdim,out) @tensor_method -def reducemin(self, dim:tuple,keepdim:bool=False,out:Union[Tensor,str]=''): +def reducemin(self, dim:tuple[int,...],keepdim:bool=False,out:Union[Tensor,str]='')->Tensor: + assert isinstance(dim,tuple) + for i in dim: + assert isinstance(i,int) from deepx.nn.functional import reducemin as reduce_min_func return reduce_min_func(self,dim,keepdim,out) @tensor_method -def sum(self, dim:tuple,keepdim:bool=False,out:Union[Tensor,str]=''): +def sum(self, dim:tuple[int,...],keepdim:bool=False,out:Union[Tensor,str]='')->Tensor: + assert isinstance(dim,tuple) + for i in dim: + assert isinstance(i,int) from deepx.nn.functional import sum as sum_func return sum_func(self,dim,keepdim,out) @tensor_method -def prod(self, dim:tuple,keepdim:bool=False,out:Union[Tensor,str]=''): +def prod(self, dim:tuple[int,...],keepdim:bool=False,out:Union[Tensor,str]='')->Tensor: + assert isinstance(dim,tuple) + for i in dim: + assert isinstance(i,int) from deepx.nn.functional import prod as prod_func return prod_func(self,dim,keepdim,out) @tensor_method -def mean(self,dim:tuple,keepdim:bool=False,out:Union[Tensor,str]=''): +def mean(self,dim:tuple[int,...],keepdim:bool=False)->Tensor: + assert isinstance(dim,tuple) + for i in dim: + assert isinstance(i,int) from deepx.nn.functional import mean as mean_func return mean_func(self,dim,keepdim) \ No newline at end of file diff --git a/front/py/deepx/tensor/tensor.py b/front/py/deepx/tensor/tensor.py index efe2255d..661bd4de 100644 --- a/front/py/deepx/tensor/tensor.py +++ b/front/py/deepx/tensor/tensor.py @@ -22,7 +22,7 @@ def __init__(self,shape:tuple[int,...],dtype:str='float32',name:str=None): self.__class__._instance_counter = 0 count = self.__class__._instance_counter self.__class__._instance_counter += 1 - self._name = count + self._name = str(count) # dtype self._dtype = dtype @@ -49,8 +49,8 @@ def clone(self,name:str=None): @property def name(self): return self._name - @name.setter - def name(self,name:str): + + def rtf_rename(self,name:str): assert isinstance(name,str) and name != '' assert self.name is not None and self.name != '' diff --git a/front/py/deepx/transformer/models/llama/modeling_llama.py b/front/py/deepx/transformer/models/llama/modeling_llama.py index c60f34f5..c8d9c403 100644 --- a/front/py/deepx/transformer/models/llama/modeling_llama.py +++ b/front/py/deepx/transformer/models/llama/modeling_llama.py @@ -10,83 +10,69 @@ def __init__(self, hidden_size, eps=1e-6): LlamaRMSNorm is equivalent to T5LayerNorm """ super().__init__() - self.weight = ones(hidden_size) + self.weight=ones(hidden_size) + self.register_parameter("weight",self.weight) self.variance_epsilon = eps - - # 和官方实现相比,尽可能inplace化 def forward(self, hidden_states:Tensor): - input_clone = hidden_states.clone() - input_clone.pow_(2) - variance = input_clone.mean([-1], keepdim=True) - - variance.add_(self.variance_epsilon) - variance = rsqrt(variance) - - hidden_states.mul_(variance) - hidden_states.mul_(self.weight) - return hidden_states - + variance = hidden_states.pow(2).mean((-1,), keepdim=True) + hidden_states = hidden_states * rsqrt(variance + self.variance_epsilon) + return self.weight * hidden_states + def extra_repr(self): return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}" - -class LlamaRotaryEmbedding(Module): - from transformers.models.llama.configuration_llama import LlamaConfig - def __init__(self, config: LlamaConfig, device=None): - super().__init__() - # BC: "rope_type" was originally "type" - if hasattr(config, "rope_scaling") and config.rope_scaling is not None: - self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type")) - else: - self.rope_type = "default" - self.max_seq_len_cached = config.max_position_embeddings - self.original_max_seq_len = config.max_position_embeddings - - self.config = config - self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type] - - inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device) - self.register_buffer("inv_freq", inv_freq, persistent=False) - self.original_inv_freq = self.inv_freq - - def _dynamic_frequency_update(self, position_ids, device): - """ - dynamic RoPE layers should recompute `inv_freq` in the following situations: - 1 - growing beyond the cached sequence length (allow scaling) - 2 - the current sequence length is in the original scale (avoid losing precision with small sequences) - """ - seq_len = torch.max(position_ids) + 1 - if seq_len > self.max_seq_len_cached: # growth - inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device, seq_len=seq_len) - self.register_buffer("inv_freq", inv_freq, persistent=False) # TODO joao: may break with compilation - self.max_seq_len_cached = seq_len - - if seq_len < self.original_max_seq_len and self.max_seq_len_cached > self.original_max_seq_len: # reset - # This .to() is needed if the model has been moved to a device after being initialized (because - # the buffer is automatically moved, but not the original copy) - self.original_inv_freq = self.original_inv_freq.to(device) - self.register_buffer("inv_freq", self.original_inv_freq, persistent=False) - self.max_seq_len_cached = self.original_max_seq_len - - @torch.no_grad() - def forward(self, x, position_ids): - if "dynamic" in self.rope_type: - self._dynamic_frequency_update(position_ids, device=x.device) - - # Core RoPE block - inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1) - position_ids_expanded = position_ids[:, None, :].float() - # Force float32 (see https://github.com/huggingface/transformers/pull/29285) - device_type = x.device.type - device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu" - with torch.autocast(device_type=device_type, enabled=False): - freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) - emb = torch.cat((freqs, freqs), dim=-1) - cos = emb.cos() - sin = emb.sin() - - # Advanced RoPE types (e.g. yarn) apply a post-processing scaling factor, equivalent to scaling attention - cos = cos * self.attention_scaling - sin = sin * self.attention_scaling - - return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) +# +# class LlamaRotaryEmbedding(Module): +# def __init__(self,rope_type:str="default",max_seq_len:int=1024,device=None): +# super().__init__() +# self.max_seq_len_cached = config.max_position_embeddings +# self.original_max_seq_len = config.max_position_embeddings +# +# self.config = config +# self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type] +# +# inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device) +# self.register_buffer("inv_freq", inv_freq, persistent=False) +# self.original_inv_freq = self.inv_freq +# +# def _dynamic_frequency_update(self, position_ids, device): +# """ +# dynamic RoPE layers should recompute `inv_freq` in the following situations: +# 1 - growing beyond the cached sequence length (allow scaling) +# 2 - the current sequence length is in the original scale (avoid losing precision with small sequences) +# """ +# seq_len = torch.max(position_ids) + 1 +# if seq_len > self.max_seq_len_cached: # growth +# inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device, seq_len=seq_len) +# self.register_buffer("inv_freq", inv_freq, persistent=False) # TODO joao: may break with compilation +# self.max_seq_len_cached = seq_len +# +# if seq_len < self.original_max_seq_len and self.max_seq_len_cached > self.original_max_seq_len: # reset +# # This .to() is needed if the model has been moved to a device after being initialized (because +# # the buffer is automatically moved, but not the original copy) +# self.original_inv_freq = self.original_inv_freq.to(device) +# self.register_buffer("inv_freq", self.original_inv_freq, persistent=False) +# self.max_seq_len_cached = self.original_max_seq_len +# +# @torch.no_grad() +# def forward(self, x, position_ids): +# if "dynamic" in self.rope_type: +# self._dynamic_frequency_update(position_ids, device=x.device) +# +# # Core RoPE block +# inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1) +# position_ids_expanded = position_ids[:, None, :].float() +# # Force float32 (see https://github.com/huggingface/transformers/pull/29285) +# device_type = x.device.type +# device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu" +# with torch.autocast(device_type=device_type, enabled=False): +# freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) +# emb = torch.cat((freqs, freqs), dim=-1) +# cos = emb.cos() +# sin = emb.sin() +# +# # Advanced RoPE types (e.g. yarn) apply a post-processing scaling factor, equivalent to scaling attention +# cos = cos * self.attention_scaling +# sin = sin * self.attention_scaling +# +# return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) diff --git a/front/py/deepxutil/numpy/__init__.py b/front/py/deepxutil/numpy/__init__.py new file mode 100644 index 00000000..3bc44bf0 --- /dev/null +++ b/front/py/deepxutil/numpy/__init__.py @@ -0,0 +1,4 @@ +from .io import * +__all__ = [ + 'save_numpy', +] diff --git a/front/py/deepxutil/numpy/io.py b/front/py/deepxutil/numpy/io.py new file mode 100644 index 00000000..ae95a17a --- /dev/null +++ b/front/py/deepxutil/numpy/io.py @@ -0,0 +1,17 @@ +from deepx.tensor import Shape,saveShape + +def save_numpy(t,tensorpath:str): + r''' + 保存numpy.ndarray为deepx.tensor格式 + t:numpy.ndarray + tensorpath:str, + ''' + from numpy import ascontiguousarray,ndarray + assert isinstance(t,ndarray) + shape=Shape(t.shape) + shape._dtype=str(t.dtype) + saveShape(shape,tensorpath+".shape") + + array = ascontiguousarray(t) + array.tofile(tensorpath+'.data') + return t diff --git a/front/py/deepxutil/torch/__init__.py b/front/py/deepxutil/torch/__init__.py new file mode 100644 index 00000000..6780053c --- /dev/null +++ b/front/py/deepxutil/torch/__init__.py @@ -0,0 +1,4 @@ +from .io import * +__all__ = [ + 'save_torch', +] diff --git a/front/py/deepxutil/torch/io.py b/front/py/deepxutil/torch/io.py new file mode 100644 index 00000000..388e0bd9 --- /dev/null +++ b/front/py/deepxutil/torch/io.py @@ -0,0 +1,10 @@ +def save_torch(t,path:str): + r''' + 保存torch.Tensor为deepx.tensor格式 + ''' + from torch import Tensor as torch_Tensor + assert isinstance(t,torch_Tensor) + t=t.detach().cpu().numpy() + from deepxutil.numpy.io import save_numpy + save_numpy(t,path) + \ No newline at end of file diff --git a/front/py/examples/3_module/1_embedding.py b/front/py/examples/3_module/1_embedding.py index ddf52f44..3204fc00 100644 --- a/front/py/examples/3_module/1_embedding.py +++ b/front/py/examples/3_module/1_embedding.py @@ -26,7 +26,7 @@ def tokenize_text(text, tokenizer): # 创建输入 text = "这是一个测试文本,用于演示嵌入层的使用。" torch_input = tokenize_text(text, tokenizer) -from deepx.nn.functional import save_torch +from deepxutil.torch import save_torch save_torch(torch_input,dir+'input') print(torch_input) # 创建网络 @@ -47,7 +47,6 @@ def tokenize_text(text, tokenizer): input.print() weight=load(dir+'weight') -weight.name='embedding_0.weight' net = Embedding(tokenizer.vocab_size, 4096,weight=weight) out=net.forward(input) out.print() diff --git a/front/py/examples/4_transformer/llama/1_llamarmsnorm.py b/front/py/examples/4_transformer/llama/1_llamarmsnorm.py deleted file mode 100644 index 8dfacfa1..00000000 --- a/front/py/examples/4_transformer/llama/1_llamarmsnorm.py +++ /dev/null @@ -1,25 +0,0 @@ - -# 使用小规模数据以便打印完整结果 -hidden_size = 8 -eps = 1e-6 - - -############### DeepX 实现部分 ############### -from deepx import arange, constant_ -from deepx.transformer.models.llama.modeling_llama import LlamaRMSNorm - -# 使用相同的数据 -input = arange(2, 3, hidden_size, dtype="float32") -input.div_(10.0) -input.sub_(2.0) -eps = 1e-6 - -input.print() - -# DeepX计算流程 -norm = LlamaRMSNorm(hidden_size=hidden_size, eps=eps) -# 设置相同的权重 -constant_(norm.weight, 0.5) -# 前向计算 -output = norm(input) -output.print() diff --git a/front/py/examples/4_transformer/llama/1_llamarmsnorm_torch.py b/front/py/examples/4_transformer/llama/1_llamarmsnorm_torch.py index 85ef6ced..0e350582 100644 --- a/front/py/examples/4_transformer/llama/1_llamarmsnorm_torch.py +++ b/front/py/examples/4_transformer/llama/1_llamarmsnorm_torch.py @@ -1,16 +1,21 @@ +hidden_size = 8 +eps = 1e-6 +dir='/home/lipeng/model/deepxmodel/llama/' + + + ############### PyTorch 实现部分 ############### import torch -from transformers.models.llama.modeling_llama import LlamaRMSNorm - # 使用小规模数据以便打印完整结果 -hidden_size = 8 -pt_input_data = torch.arange(48, dtype=torch.float32).reshape(2, 3, hidden_size) / 10.0 - 2.0 -pt_input = pt_input_data.clone() -eps = 1e-6 +pt_input = torch.arange(48, dtype=torch.float32).reshape(2, 3, hidden_size) / 10.0 - 2.0 print("PyTorch 输入:") print(pt_input) + +from transformers.models.llama.modeling_llama import LlamaRMSNorm as TransformersLlamaRMSNorm +from deepxutil.torch import save_torch +save_torch(pt_input,dir+'rmsnorm_input') # 使用transformers库中的官方LlamaRMSNorm实现 -pt_norm = LlamaRMSNorm(hidden_size, eps=eps) +pt_norm = TransformersLlamaRMSNorm(hidden_size, eps=eps) # 设置权重为固定值0.5 with torch.no_grad(): pt_norm.weight.fill_(0.5) @@ -19,4 +24,20 @@ print("\nPyTorch RMSNorm 结果:") +print(pt_output.shape) print(pt_output) + + +############### DeepX 实现部分 ############### +from deepx import constant_,load +from deepx.transformer.models.llama.modeling_llama import LlamaRMSNorm + +input=load(dir+'rmsnorm_input') + +# DeepX计算流程 +norm = LlamaRMSNorm(hidden_size=hidden_size, eps=eps) +# 设置相同的权重 +constant_(norm.weight, 0.5) +# 前向计算 +output = norm(input) +output.print() From 70a181f5b53eec2e217bac9414238005168c9369 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Thu, 24 Apr 2025 22:46:49 +0800 Subject: [PATCH 2/7] save,load,loadtensordata,loadshape --- excuter/cpp-common/src/deepx/tensor.hpp | 21 ++- .../cpp-common/src/deepx/tensorfunc/io.hpp | 11 +- excuter/cpp-common/src/stdutil/fs.cpp | 64 ++++++- excuter/cpp-common/src/stdutil/fs.hpp | 10 + excuter/cpp-common/test/2_saveload.cpp | 31 ++++ excuter/cpp-common/test/CMakeLists.txt | 4 +- .../op-mem-cuda/src/deepx/tensorfunc/cuda.hpp | 21 ++- .../src/deepx/tensorfunc/io_miaobyte.hpp | 149 +++------------ .../deepx/tensorfunc/tensorlife_miaobyte.hpp | 65 +++++-- .../src/deepx/tensorfunc/vector_cuda.cuh | 3 +- excuter/op-mem-cuda/src/deepx/tf/arg.hpp | 6 +- .../op-mem-cuda/src/deepx/tf/changeshape.hpp | 4 +- excuter/op-mem-cuda/src/deepx/tf/init.hpp | 174 +++++++++--------- excuter/op-mem-cuda/src/deepx/tf/io.hpp | 121 ++++++++++++ .../deepx/tensorfunc/tensorlife_miaobyte.hpp | 6 +- excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp | 6 +- .../src/deepx/tf/changeshape.hpp | 4 +- excuter/op-mem-ompsimd/src/deepx/tf/init.hpp | 115 ++++++------ excuter/op-mem-ompsimd/src/deepx/tf/io.hpp | 35 ++++ 19 files changed, 550 insertions(+), 300 deletions(-) create mode 100644 excuter/cpp-common/test/2_saveload.cpp diff --git a/excuter/cpp-common/src/deepx/tensor.hpp b/excuter/cpp-common/src/deepx/tensor.hpp index 463e42cc..fc33ecde 100644 --- a/excuter/cpp-common/src/deepx/tensor.hpp +++ b/excuter/cpp-common/src/deepx/tensor.hpp @@ -28,6 +28,12 @@ namespace deepx using CopyFn = void (*)(T *, T *, int); CopyFn copyer; // 拷贝内存 + using SaveFn = void (*)(T *,size_t,const std::string &); + SaveFn saver; // 保存内存 + + using LoadFn = int (*)(const std::string &, T *,int); + LoadFn loader; // 加载内存 + Tensor() = default; Tensor(const vector &s) { @@ -57,6 +63,8 @@ namespace deepx newer = tensor.newer; deleter = tensor.deleter; copyer = tensor.copyer; + loader = tensor.loader; + saver = tensor.saver; data = newer(shape.size); copyer(tensor.data, data, tensor.shape.size); @@ -76,6 +84,8 @@ namespace deepx deleter = other.deleter; copyer = other.copyer; newer = other.newer; + loader = other.loader; + saver = other.saver; data = other.data; @@ -84,6 +94,8 @@ namespace deepx other.deleter = nullptr; other.copyer = nullptr; other.newer = nullptr; + other.loader = nullptr; + other.saver = nullptr; } /** @@ -102,7 +114,8 @@ namespace deepx deleter = tensor.deleter; copyer = tensor.copyer; newer = tensor.newer; - + loader = tensor.loader; + saver = tensor.saver; data = newer(shape.size); if (data != nullptr) { @@ -126,6 +139,10 @@ namespace deepx newer = tensor.newer; deleter = tensor.deleter; copyer = tensor.copyer; + loader = tensor.loader; + saver = tensor.saver; + + if (data != nullptr) { deleter(data); @@ -135,6 +152,8 @@ namespace deepx tensor.deleter = nullptr; tensor.copyer = nullptr; tensor.newer = nullptr; + tensor.loader = nullptr; + tensor.saver = nullptr; return *this; } }; diff --git a/excuter/cpp-common/src/deepx/tensorfunc/io.hpp b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp index 86a1c396..ae4876bf 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/io.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp @@ -15,10 +15,13 @@ namespace deepx::tensorfunc{ void print(const Tensor &t, const std::string &f=""){ printDispatcher::print(t, f); } - + template - void save(Tensor &tensor,const std::string &path); + void save(const Tensor &tensor,const std::string &path); + + + //load template pair>> load(const std::string &path); @@ -33,7 +36,9 @@ namespace deepx::tensorfunc{ std::string tensor_name = filename.substr(0, filename.find_last_of('.')); return std::make_pair(tensor_name, shape); } - + //对loaddata,不同excuter的实现不同。gpu具有显存,可以直接从文件到显存。 + template + void loadData(const std::string &path,Tensor &tensor); } #endif // DEEPX_TENSORFUNC_IO_HPP diff --git a/excuter/cpp-common/src/stdutil/fs.cpp b/excuter/cpp-common/src/stdutil/fs.cpp index 63d297e0..7f7a57fe 100644 --- a/excuter/cpp-common/src/stdutil/fs.cpp +++ b/excuter/cpp-common/src/stdutil/fs.cpp @@ -1,7 +1,65 @@ #include "fs.hpp" - -namespace stdutil{ - string filename(const string &path){ +#include + +namespace stdutil +{ + string filename(const string &path) + { return path.substr(path.find_last_of('/') + 1); } + + /* + std::ios::binary 二进制打开 + std::ios::in 读 + std::ios::out 写,如果文件不存在,则创建文件 + std::ios::trunc 如果文件存在,则清空文件 + */ + + void save(const byte *data, size_t size, const string &path) + { + + ofstream ofs(path, ios::binary | ios::out | ios::trunc); + ofs.write(reinterpret_cast(data), size); + ofs.close(); + } + + void load(const string &path,byte *data,size_t target_size){ + ifstream ifs(path, ios::binary | ios::in); + if (!ifs.is_open()) + { + throw std::runtime_error("Failed to open file: " + path); + } + ifs.seekg(0, ios::end); + size_t size = ifs.tellg(); + ifs.seekg(0, ios::beg); + if(size!=target_size){ + throw std::runtime_error("file size mismatch: " + path); + } + ifs.read(reinterpret_cast(data), size); + if (ifs.fail()) + { + throw std::runtime_error("Failed to read file: " + path); + } + ifs.close(); + } + + std::pair> load(const string &path) + { + ifstream ifs(path, ios::binary | ios::in); + if (!ifs.is_open()) + { + throw std::runtime_error("Failed to open file: " + path); + } + ifs.seekg(0, ios::end); + size_t size = ifs.tellg(); + ifs.seekg(0, ios::beg); + shared_ptr data(new byte[size]); + ifs.read(reinterpret_cast(data.get()), size); + if (ifs.fail()) + { + throw std::runtime_error("Failed to read file: " + path); + } + ifs.close(); + return std::make_pair(size, data); + } } \ No newline at end of file diff --git a/excuter/cpp-common/src/stdutil/fs.hpp b/excuter/cpp-common/src/stdutil/fs.hpp index 26826d9b..9917e09d 100644 --- a/excuter/cpp-common/src/stdutil/fs.hpp +++ b/excuter/cpp-common/src/stdutil/fs.hpp @@ -2,10 +2,20 @@ #define DEEPX_STDUTIL_FS_HPP #include +#include namespace stdutil{ + + + using namespace std; string filename(const string &path); + + using byte = unsigned char; + + void save(const byte *data,size_t size,const string &path); + void load(const string &path,byte *data,size_t target_size); + pair> load(const string &path); } #endif // DEEPX_STDUTIL_FS_HPP \ No newline at end of file diff --git a/excuter/cpp-common/test/2_saveload.cpp b/excuter/cpp-common/test/2_saveload.cpp new file mode 100644 index 00000000..9e6f4b31 --- /dev/null +++ b/excuter/cpp-common/test/2_saveload.cpp @@ -0,0 +1,31 @@ +#include "stdutil/fs.hpp" +#include +using namespace stdutil; +void test_save(int total_size){ + stdutil::byte *data = new stdutil::byte[total_size]; + for(int i=0;i #include #include +#include + +#include + + namespace deepx::tensorfunc { + class CublasHandle { public: @@ -61,6 +66,20 @@ namespace deepx::tensorfunc return {blocks, blocksize}; }; + using std::shared_ptr; + + inline std::pair> device_offload(unsigned char *data,int size) + { + shared_ptr host_data(new unsigned char[size]); + cudaMemcpy(host_data.get(), data, size, cudaMemcpyDeviceToHost); + cudaError_t err=cudaGetLastError(); + if(err!=cudaSuccess){ + throw std::runtime_error("Failed to copy data from device to host"); + + } + return {size, host_data}; + } + } diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp index 00d338fe..5c606393 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp @@ -13,7 +13,7 @@ #include #include "deepx/tensorfunc/authors.hpp" #include "deepx/tensorfunc/io.hpp" - +#include "deepx/tensorfunc/cuda.hpp" namespace deepx::tensorfunc { template @@ -24,20 +24,9 @@ namespace deepx::tensorfunc int64_t total_bytes = t.shape.bytes(); // 统一分配CPU内存 - unsigned char *host_data = new unsigned char[total_bytes]; - if (host_data == nullptr) - { - throw std::runtime_error("Failed to allocate host memory"); - } - cudaError_t err = cudaMemcpy(host_data, t.data, total_bytes, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) - { - delete[] host_data; - throw std::runtime_error("Failed to copy data from device to host"); - } - - stdutil::print(t.shape.shape, host_data, t.shape.dtype, f); - delete[] host_data; + unsigned char* device_data=reinterpret_cast(t.data); + auto [_,host_data]= device_offload(device_data,total_bytes); + stdutil::print(t.shape.shape, host_data.get(), t.shape.dtype, f); }; }; @@ -50,36 +39,17 @@ namespace deepx::tensorfunc int64_t total_bytes = t.shape.bytes(); // 统一分配CPU内存 - unsigned char *host_data = new unsigned char[total_bytes]; - if (host_data == nullptr) - { - throw std::runtime_error("Failed to allocate host memory"); - } - - // 统一复制数据到CPU - cudaError_t err = cudaMemcpy(host_data, t.data, total_bytes, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) - { - delete[] host_data; - throw std::runtime_error("Failed to copy data from device to host"); - } - - float *host_float = new float[t.shape.size]; - if (host_float == nullptr) - { - delete[] host_data; - throw std::runtime_error("Failed to allocate host memory for float conversion"); - } - + unsigned char* device_data=reinterpret_cast(t.data); + auto [_,host_data_]= device_offload(device_data,total_bytes); + half* host_data=reinterpret_cast(host_data_.get()); + shared_ptr host_float(new float[t.shape.size]); for (size_t i = 0; i < t.shape.size; i++) { - host_float[i] = __half2float(((half *)host_data)[i]); + host_float[i] = __half2float(host_data[i]); } - delete[] host_data; // 打印转换后的float数据 - stdutil::print(t.shape.shape, host_float, Precision::Float32, f); - delete[] host_float; + stdutil::print(t.shape.shape, host_float.get(), Precision::Float32, f); } }; @@ -91,79 +61,27 @@ namespace deepx::tensorfunc int64_t total_bytes = t.shape.bytes(); // 统一分配CPU内存 - unsigned char *host_data = new unsigned char[total_bytes]; - if (host_data == nullptr) - { - throw std::runtime_error("Failed to allocate host memory"); - } - - // 统一复制数据到CPU - cudaError_t err = cudaMemcpy(host_data, t.data, total_bytes, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) - { - delete[] host_data; - throw std::runtime_error("Failed to copy data from device to host"); - } - - float *host_float = new float[t.shape.size]; - if (host_float == nullptr) - { - delete[] host_data; - throw std::runtime_error("Failed to allocate host memory for float conversion"); - } + unsigned char* device_data=reinterpret_cast(t.data); + auto [_,host_data_]= device_offload(device_data,total_bytes); + nv_bfloat16* host_data=reinterpret_cast(host_data_.get()); + shared_ptr host_float(new float[t.shape.size]); for (size_t i = 0; i < t.shape.size; i++) { - host_float[i] = __bfloat162float(((nv_bfloat16 *)host_data)[i]); - } - delete[] host_data; + host_float[i] = __bfloat162float(host_data[i]); + } // 打印转换后的float数据 - stdutil::print(t.shape.shape, host_float, Precision::Float32, f); - delete[] host_float; + stdutil::print(t.shape.shape, host_float.get(), Precision::Float32, f); } }; template void save(Tensor &tensor, const std::string &path) { - // 保存shape - std::string shapepath = path + ".shape"; - std::string shapedata = tensor.shape.toYaml(); - std::ofstream shape_fs(shapepath, std::ios::binary); - shape_fs.write(shapedata.c_str(), shapedata.size()); - shape_fs.close(); - - // 保存data - int64_t total_bytes = tensor.shape.bytes(); - // 统一分配CPU内存 - unsigned char *host_data = new unsigned char[total_bytes]; - if (host_data == nullptr) - { - throw std::runtime_error("Failed to allocate host memory"); - } - - // 统一复制数据到CPU - cudaError_t err = cudaMemcpy(host_data, tensor.data, total_bytes, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) - { - delete[] host_data; - throw std::runtime_error("Failed to copy data from device to host"); - } - - std::string datapath = path + ".data"; - std::ofstream data_fs(datapath, std::ios::binary | std::ios::in | std::ios::out); - - if (!data_fs.is_open()) - { - // 如果文件不存在,则创建新文件 - data_fs.open(datapath, std::ios::binary | std::ios::out); - } - data_fs.seekp(0); - data_fs.write(reinterpret_cast(host_data), total_bytes); - data_fs.close(); - - delete[] host_data; + unsigned char* device_data=reinterpret_cast(tensor.data); + auto [size,host_data]= device_offload(device_data,tensor.shape.bytes()); + stdutil::save(host_data.get(),size,path); }; template @@ -183,36 +101,19 @@ namespace deepx::tensorfunc // 检查file.size,是否是tensor.size*sizeof(T) std::string datapath = path + ".data"; - std::ifstream data_fs(datapath, std::ios::binary); - data_fs.seekg(0, std::ios::end); - std::streamsize fileSize = data_fs.tellg(); - std::streamsize expectedSize = shape.bytes(); - - if (fileSize != expectedSize) - { - throw std::runtime_error("数据文件大小不足: 需要 " + std::to_string(expectedSize) + + auto [fileSize,hostdata]=stdutil::load(datapath); + if(fileSize!=shape.bytes()){ + throw std::runtime_error("数据文件大小不足: 需要 " + std::to_string(shape.bytes()) + " 字节,但文件只有 " + std::to_string(fileSize) + " 字节"); } - data_fs.seekg(0); - - // TODO 从文件,到cuda内存(可能是显存) - + T *host_data=reinterpret_cast(hostdata.get()); shared_ptr> tensor = make_shared>(New(shape.shape)); - unsigned char *host_data = new unsigned char[fileSize]; - if (host_data == nullptr) - { - throw std::runtime_error("Failed to allocate host memory"); - } - data_fs.read(reinterpret_cast(host_data), fileSize); - data_fs.close(); - + cudaError_t err = cudaMemcpy(tensor->data, host_data, fileSize, cudaMemcpyHostToDevice); if (err != cudaSuccess) { - delete[] host_data; throw std::runtime_error("Failed to copy data from host to device"); } - delete[] host_data; return std::make_pair(tensor_name, tensor); } } diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp index 8e776a14..5e0c96c0 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp @@ -3,57 +3,92 @@ #include #include + +#include "stdutil/fs.hpp" #include "deepx/tensor.hpp" #include "deepx/dtype_cuda.hpp" #include "deepx/tensorfunc/tensorlife.hpp" +#include "deepx/tensorfunc/cuda.hpp" // 具体的张量类 namespace deepx::tensorfunc { + // NewFn template - static T* dataNew(int size) + static T *newFn(int size) { - T* data; + T *data; cudaError_t err = cudaMalloc(&data, size * sizeof(T)); - if (err != cudaSuccess) { + if (err != cudaSuccess) + { throw std::runtime_error("Failed to allocate Unified Memory"); } return data; } template - static void dataFree(T *data) + static void freeFn(T *data) { cudaFree(data); } template - static void dataCopy(T *data, T *data2, int size) + static void copyFn(T *src, T *dest, int size) { - cudaMemcpy(data2, data, size * sizeof(T), cudaMemcpyDefault); + cudaMemcpy(dest, src, size * sizeof(T), cudaMemcpyDeviceToDevice); + } + + template + static void saveFn(T *tensorData, size_t size, const std::string &path) + { + // 保存data + int64_t total_bytes = size * sizeof(T); + + // 统一分配CPU内存 + auto [host_data, err] = device_offload(tensorData, total_bytes); + stdutil::save(host_data.get(), total_bytes, path); + } + + // 不做任何转换,直接从内存到文件,或从文件到内存 + template + static int loadFn(const std::string &path, T *data, int count) + { + auto [file_size, hostdata] = stdutil::load(path); + if (file_size != count * sizeof(T)) + { + Precision p = precision(); + throw std::runtime_error("file_size!=count*" + precision_str(p)); + } + cudaMemcpy(data, hostdata.get(), file_size, cudaMemcpyHostToDevice); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + throw std::runtime_error("Failed to copy data from host to device"); + } + return count; } template Tensor New(const std::vector &shapedata) { Shape shape(shapedata); - shape.dtype=precision(); + shape.dtype = precision(); Tensor tensor(shape); - tensor.deleter = dataFree; - tensor.copyer = dataCopy; - tensor.newer = dataNew; + tensor.deleter = freeFn; + tensor.copyer = copyFn; + tensor.newer = newFn; - tensor.data = dataNew(shape.size); + tensor.data = newFn(shape.size); return tensor; } - + template - void copy(const Tensor &src,Tensor &dst) + void copy(const Tensor &src, Tensor &dst) { - dst.shape=src.shape; + dst.shape = src.shape; dst.copyer(src.data, dst.data, src.shape.size); } - //rename + // rename } #endif // DEEPX_TENSORFUNC_TENSORLIFE_MIAOBYTE_HPP diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/vector_cuda.cuh b/excuter/op-mem-cuda/src/deepx/tensorfunc/vector_cuda.cuh index 4fe17030..042543d8 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/vector_cuda.cuh +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/vector_cuda.cuh @@ -3,6 +3,7 @@ namespace deepx::tensorfunc { + //TODO 待验证 template __device__ void GridStrideLoopCopy(const T* src, T* dst, int size) { int idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -12,7 +13,7 @@ namespace deepx::tensorfunc dst[i] = src[i]; } } - + //TODO 待验证 // 全局复制函数,可从主机调用 template __global__ void GridStrideLoopCopyKernel(const T* src, T* dst, int size) { diff --git a/excuter/op-mem-cuda/src/deepx/tf/arg.hpp b/excuter/op-mem-cuda/src/deepx/tf/arg.hpp index dcc845fd..4b33c457 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/arg.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/arg.hpp @@ -21,11 +21,11 @@ namespace deepx::tf string math_formula() const override { - return "var argname = argvalue"; + return "argvalue->argname"; } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; if (this->args.size() != 1) { error = "argset(int32) must have 1 argument"; @@ -82,7 +82,7 @@ namespace deepx::tf string math_formula() const override { - return "shape = [3 4 5]"; + return "[3 4 5]->shape"; } int run(shared_ptr mem, string &error) override { diff --git a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp index 8b4604f9..db447136 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp @@ -39,7 +39,7 @@ namespace deepx::tf int run(shared_ptr mem, string &error) override { Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; - vector shape = this->getvector(1, -1); + vector shape = this->getvector(1, true); Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; if (input_type != output_type) { @@ -101,7 +101,7 @@ namespace deepx::tf int run(shared_ptr mem, string &error) override { Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; - vector dim_order = this->getvector(1, -1); + vector dim_order = this->getvector(1, true); Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; if (input_type != output_type) diff --git a/excuter/op-mem-cuda/src/deepx/tf/init.hpp b/excuter/op-mem-cuda/src/deepx/tf/init.hpp index 839c83bf..055bb65c 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/init.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/init.hpp @@ -18,60 +18,67 @@ namespace deepx::tf Constant(const vector &args, const vector &returns) { this->name = "constant"; - this->metadata.author= Author::name(); + this->metadata.author = Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; } - + string math_formula() const override + { + return "constant(value)->T1"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; auto tensor = mem->gettensor(name).get(); auto type = tensor->shape.dtype; switch (type) { case Precision::Float64: - - tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(0, mem)); break; - + case Precision::Float32: - - tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(0, mem)); break; - + case Precision::Float16: - - tensorfunc::constant(*mem->gettensor<__half>(name).get(), this->getvar<__half>(1, mem)); + + tensorfunc::constant(*mem->gettensor<__half>(name).get(), this->getvar<__half>(0, mem)); break; - + case Precision::BFloat16: - - tensorfunc::constant(*mem->gettensor<__nv_bfloat16>(name).get(), this->getvar<__nv_bfloat16>(1, mem)); + + tensorfunc::constant(*mem->gettensor<__nv_bfloat16>(name).get(), this->getvar<__nv_bfloat16>(0, mem)); break; - + case Precision::Int64: - - tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(0, mem)); break; - + case Precision::Int32: - - tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(0, mem)); break; - + case Precision::Int16: - - tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(0, mem)); break; - + case Precision::Int8: - - tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(0, mem)); break; case Precision::Bool: - tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(0, mem)); break; default: { @@ -81,14 +88,6 @@ namespace deepx::tf } return 0; }; - string math_formula() const override - { - return "constant(T1)"; - } - shared_ptr clone() const override - { - return make_shared>(*this); - } }; template @@ -98,14 +97,22 @@ namespace deepx::tf Arange(const vector &args, const vector &returns) { this->name = "arange"; - this->metadata.author= Author::name(); + this->metadata.author = Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; } + string math_formula() const override + { + return "arange(start,step)->T1"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; auto tensor = mem->gettensor(name).get(); auto type = tensor->shape.dtype; switch (type) @@ -113,49 +120,49 @@ namespace deepx::tf case Precision::Float64: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Float32: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Float16: { auto output = mem->gettensor<__half>(name).get(); - tensorfunc::arange(*output, this->getvar<__half>(1, mem), this->getvar<__half>(2, mem)); + tensorfunc::arange(*output, this->getvar<__half>(0, mem), this->getvar<__half>(1, mem)); break; } case Precision::BFloat16: { auto output = mem->gettensor<__nv_bfloat16>(name).get(); - tensorfunc::arange(*output, this->getvar<__nv_bfloat16>(1, mem), this->getvar<__nv_bfloat16>(2, mem)); + tensorfunc::arange(*output, this->getvar<__nv_bfloat16>(0, mem), this->getvar<__nv_bfloat16>(1, mem)); break; } case Precision::Int64: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Int32: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Int16: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Int8: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } default: @@ -166,14 +173,7 @@ namespace deepx::tf } return 0; } - string math_formula() const override - { - return "arange(T1,start,step)"; - } - shared_ptr clone() const override - { - return make_shared>(*this); - } + }; template @@ -183,65 +183,75 @@ namespace deepx::tf Uniform(const vector &args, const vector &returns) { this->name = "uniform"; - this->metadata.author= Author::name(); + this->metadata.author = Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; } + string math_formula() const override + { + return "uniform(low,high,seed)->T1"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; auto tensor = mem->gettensor(name).get(); auto type = tensor->shape.dtype; - unsigned int seed = static_cast( this->getvar(3, mem)); + int low_pos=0; + int high_pos=1; + unsigned int seed = static_cast(this->getvar(2, mem)); switch (type) { case Precision::Float64: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Float32: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Float16: { auto output = mem->gettensor<__half>(name).get(); - tensorfunc::uniform(*output, this->getvar<__half>(1, mem), this->getvar<__half>(2, mem), seed); + tensorfunc::uniform(*output, this->getvar<__half>(low_pos, mem), this->getvar<__half>(high_pos, mem), seed); break; } case Precision::BFloat16: { auto output = mem->gettensor<__nv_bfloat16>(name).get(); - tensorfunc::uniform(*output, this->getvar<__nv_bfloat16>(1, mem), this->getvar<__nv_bfloat16>(2, mem), seed); + tensorfunc::uniform(*output, this->getvar<__nv_bfloat16>(low_pos, mem), this->getvar<__nv_bfloat16>(high_pos, mem), seed); break; } case Precision::Int64: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Int32: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Int16: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Int8: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } default: @@ -252,16 +262,10 @@ namespace deepx::tf } return 0; } - string math_formula() const override - { - return "uniform(T1,low,high,seed)"; - } - shared_ptr clone() const override - { - return make_shared>(*this); - } + }; + // template class Normal : public TF { @@ -269,7 +273,7 @@ namespace deepx::tf Normal(const vector &args, const vector &returns) { this->name = "normal"; - this->metadata.author= Author::name(); + this->metadata.author = Author::name(); this->tftype = "init"; this->args = args; this->returns = returns; @@ -277,7 +281,7 @@ namespace deepx::tf string math_formula() const override { - return "normal(T1,mean,stddev,seed)"; + return "normal(mean,stddev,seed)->T1"; } shared_ptr clone() const override { @@ -285,41 +289,43 @@ namespace deepx::tf } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; auto tensor = mem->gettensor(name).get(); auto type = tensor->shape.dtype; - unsigned int seed = static_cast( this->getvar(3, mem)); + int mean_pos=0; + int stddev_pos=1; + unsigned int seed = static_cast(this->getvar(2, mem)); switch (type) { case Precision::Float64: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Float32: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Float16: - tensorfunc::normal(*mem->gettensor<__half>(name).get(), this->getvar<__half>(1, mem), this->getvar<__half>(2, mem), seed); + tensorfunc::normal(*mem->gettensor<__half>(name).get(), this->getvar<__half>(mean_pos, mem), this->getvar<__half>(stddev_pos, mem), seed); break; case Precision::BFloat16: - tensorfunc::normal(*mem->gettensor<__nv_bfloat16>(name).get(), this->getvar<__nv_bfloat16>(1, mem), this->getvar<__nv_bfloat16>(2, mem), seed); + tensorfunc::normal(*mem->gettensor<__nv_bfloat16>(name).get(), this->getvar<__nv_bfloat16>(mean_pos, mem), this->getvar<__nv_bfloat16>(stddev_pos, mem), seed); break; case Precision::Int64: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Int32: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Int16: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Int8: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; default: diff --git a/excuter/op-mem-cuda/src/deepx/tf/io.hpp b/excuter/op-mem-cuda/src/deepx/tf/io.hpp index 8049fc81..64466b15 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/io.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/io.hpp @@ -178,5 +178,126 @@ namespace deepx::tf return 0; } }; + + //loadtensor + class LoadTensor : public TF + { + public: + LoadTensor(vector args, vector returns) + { + this->name = "loadtensor"; + this->tftype = "io"; + this->args = args; + this->returns = returns; + } + string math_formula() const override + { + return "loadtensor(path)->tensor"; + } + shared_ptr clone() const override + { + return make_shared(*this); + } + int run(shared_ptr mem, string &error) override + { + string path = this->args[0].textvalue; + string tensorname = this->returns[0].textvalue; + if(!mem->existstensor(tensorname)) + { + error = "loadtensor " + tensorname + " not exists"; + return 1; + } + pair shape_name=tensorfunc::loadShape(path); + std::string tensor_name=shape_name.first; + Shape shape=shape_name.second; + switch (shape.dtype) + { + case Precision::Float64:{ + pair>> t = tensorfunc::load(path); + mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + break; + } + case Precision::Float32:{ + pair>> t = tensorfunc::load(path); + mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + break; + } + case Precision::Float16:{ + pair>> t = tensorfunc::load(path); + mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + break; + } + case Precision::BFloat16:{ + pair>> t = tensorfunc::load(path); + mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + break; + } + case Precision::Int64:{ + pair>> t = tensorfunc::load(path); + mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + break; + } + case Precision::Int32:{ + pair>> t = tensorfunc::load(path); + mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + break; + } + case Precision::Int16:{ + pair>> t = tensorfunc::load(path); + mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + break; + } + case Precision::Int8:{ + pair>> t = tensorfunc::load(path); + mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + break; + } + case Precision::Bool:{ + pair>> t = tensorfunc::load(path); + mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + break; + } + default: + break; + } + + + return 0; + } + }; + + //loadtensordata + class LoadTensorData : public TF + { + public: + LoadTensorData(vector args, vector returns) + { + this->name = "loadtensordata"; + this->tftype = "io"; + this->args = args; + this->returns = returns; + } + string math_formula() const override + { + return "loadtensordata(path)->tensor"; + } + shared_ptr clone() const override + { + return make_shared(*this); + } + int run(shared_ptr mem, string &error) override + { + string path = this->args[0].textvalue; + string tensorname = this->returns[0].textvalue; + if(!mem->existstensor(tensorname)) + { + error = "loadtensordata " + tensorname + " not found"; + return 1; + } + auto t = *mem->gettensor(tensorname); + t.loader(path,t.data,t.shape.size); + return 0; + } + }; } #endif diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp index c514faa8..b1bc509e 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp @@ -12,7 +12,7 @@ namespace deepx::tensorfunc { template - static T *dataNew(int size) + static T *newFn(int size) { return static_cast(MemoryPool::Malloc(size * sizeof(T))); } @@ -39,8 +39,8 @@ namespace deepx::tensorfunc Tensor tensor(shape); tensor.deleter = dataFree; tensor.copyer = dataCopy; - tensor.newer = dataNew; - tensor.data = dataNew(shape.size); + tensor.newer = newFn; + tensor.data = newFn(shape.size); return tensor; }; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp index 13733fa1..2418cd32 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp @@ -23,11 +23,11 @@ namespace deepx::tf string math_formula() const override { - return "var argname = argvalue"; + return "argvalue->argname"; } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; if (this->args.size() != 1) { error = "argset(int32) must have 1 argument"; @@ -84,7 +84,7 @@ namespace deepx::tf string math_formula() const override { - return "shape = [3 4 5]"; + return "[3 4 5]->shape"; } int run(shared_ptr mem, string &error) override { diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp index e0d96019..0f64432c 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp @@ -43,7 +43,7 @@ namespace deepx::tf return 1; } Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; - vector shape = this->getvector(1, -1); + vector shape = this->getvector(1, true); Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; if (input_type != output_type) { @@ -109,7 +109,7 @@ namespace deepx::tf return 1; } Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; - vector dim_order = this->getvector(1, -1); + vector dim_order = this->getvector(1,true); Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; if (input_type != output_type) { diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp index 54410653..756f880d 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp @@ -20,10 +20,17 @@ namespace deepx::tf this->args = args; this->returns = returns; } - + string math_formula() const override + { + return "constant(value)->T1"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; auto tensor = mem->gettensor(name).get(); if (tensor == nullptr) { @@ -36,38 +43,38 @@ namespace deepx::tf case Precision::Float64: { auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + tensorfunc::constant(*output, this->getvar(0, mem)); break; } case Precision::Float32: { auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + tensorfunc::constant(*output, this->getvar(0, mem)); break; } case Precision::Int64: { auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + tensorfunc::constant(*output, this->getvar(0, mem)); break; } case Precision::Int32: { auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + tensorfunc::constant(*output, this->getvar(0, mem)); break; } case Precision::Int16: { auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + tensorfunc::constant(*output, this->getvar(0, mem)); break; } case Precision::Int8: { auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + tensorfunc::constant(*output, this->getvar(0, mem)); break; } default: @@ -78,14 +85,6 @@ namespace deepx::tf } return 0; }; - string math_formula() const override - { - return "constant(T1,value)"; - } - shared_ptr clone() const override - { - return make_shared>(*this); - } }; // arange @@ -100,10 +99,18 @@ namespace deepx::tf this->tftype = "init"; this->args = args; this->returns = returns; + } + string math_formula() const override + { + return "arange(start,step)->T1"; + } + shared_ptr clone() const override + { + return make_shared>(*this); } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; auto tensor = mem->gettensor(name).get(); auto type = tensor->shape.dtype; switch (type) @@ -111,38 +118,38 @@ namespace deepx::tf case Precision::Float64: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Float32: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Int64: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Int32: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Int16: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } case Precision::Int8: { auto output = mem->gettensor(name).get(); - tensorfunc::arange(*output, this->getvar(1, mem), this->getvar(2, mem)); + tensorfunc::arange(*output, this->getvar(0, mem), this->getvar(1, mem)); break; } default: @@ -153,14 +160,7 @@ namespace deepx::tf } return 0; } - string math_formula() const override - { - return "arange(T1,start,step)"; - } - shared_ptr clone() const override - { - return make_shared>(*this); - } + }; // uniform @@ -176,48 +176,59 @@ namespace deepx::tf this->args = args; this->returns = returns; } + string math_formula() const override + { + return "uniform(low,high,seed)->T1"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; auto tensor = mem->gettensor(name).get(); auto type = tensor->shape.dtype; + int low_pos=0; + int high_pos=1; + unsigned int seed = static_cast(this->getvar(2, mem)); switch (type) { case Precision::Float64: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Float32: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Int64: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Int32: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Int16: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } case Precision::Int8: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(low_pos, mem), this->getvar(high_pos, mem), seed); break; } default: @@ -228,14 +239,7 @@ namespace deepx::tf } return 0; } - string math_formula() const override - { - return "uniform(T1,low,high,seed)"; - } - shared_ptr clone() const override - { - return make_shared>(*this); - } + }; // normal @@ -254,7 +258,7 @@ namespace deepx::tf string math_formula() const override { - return "normal(T1,mean,stddev,seed)"; + return "normal(mean,stddev,seed)->T1"; } shared_ptr clone() const override { @@ -262,28 +266,31 @@ namespace deepx::tf } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; auto tensor = mem->gettensor(name).get(); auto type = tensor->shape.dtype; + int mean_pos=0; + int stddev_pos=1; + unsigned int seed = static_cast(this->getvar(2, mem)); switch (type) { case Precision::Float64: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Float32: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Int64: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Int32: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Int16: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; case Precision::Int8: - tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(mean_pos, mem), this->getvar(stddev_pos, mem), seed); break; default: { diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp index 8c6fdc56..eb540f32 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp @@ -168,5 +168,40 @@ namespace deepx::tf return 0; } }; + + //loadtensordata + class LoadTensorData : public TF + { + public: + LoadTensorData(vector args, vector returns) + { + this->name = "loadtensordata"; + this->tftype = "io"; + this->args = args; + this->returns = returns; + } + string math_formula() const override + { + return "loadtensordata(path)->tensor.data"; + } + shared_ptr clone() const override + { + return make_shared(*this); + } + int run(shared_ptr mem, string &error) override + { + string path = this->args[0].textvalue; + string tensorname = this->returns[0].textvalue; + if(!mem->existstensor(tensorname)) + { + error = "loadtensordata " + tensorname + " not found"; + return 1; + } + auto t = *mem->gettensor(tensorname); + t.loader(path,t.data,t.shape.size); + return 0; + } + }; + } #endif // DEEPX_TF_IO_HPP From fc7d2c500792a5ef913238a9bb99941badba9cc6 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Fri, 25 Apr 2025 00:48:56 +0800 Subject: [PATCH 3/7] =?UTF-8?q?save,load=EF=BC=8C=E4=BB=94=E7=BB=86?= =?UTF-8?q?=E9=AA=8C=E8=AF=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/excuter/op-mem-cuda/list.md | 35 +- doc/excuter/op-mem-ompsimd/list.md | 39 +- excuter/cpp-common/src/deepx/tensor.hpp | 2 +- .../cpp-common/src/deepx/tensorfunc/io.hpp | 25 +- excuter/op-mem-cuda/src/client/tfs.cpp | 391 +++++++++--------- .../src/deepx/tensorfunc/io_miaobyte.hpp | 39 +- .../deepx/tensorfunc/tensorlife_miaobyte.hpp | 9 +- excuter/op-mem-cuda/src/deepx/tf/io.hpp | 91 +++- .../op-mem-cuda/src/deepx/tf/tensorlife.hpp | 48 +-- excuter/op-mem-ompsimd/src/client/tfs.cpp | 60 ++- .../src/deepx/tensorfunc/io_miaobyte.hpp | 46 +-- .../deepx/tensorfunc/tensorlife_miaobyte.hpp | 29 +- excuter/op-mem-ompsimd/src/deepx/tf/io.hpp | 51 ++- .../src/deepx/tf/tensorlife.hpp | 52 ++- front/py/deepx/nn/functional/__init__.py | 2 +- front/py/deepx/nn/functional/leaffunc_io.py | 4 + front/py/deepx/nn/functional/rtf_init.py | 17 +- front/py/deepx/nn/functional/rtf_io.py | 7 + front/py/deepx/nn/functional/rtf_life.py | 12 +- front/py/deepx/tensor/io.py | 19 +- front/py/deepx/tensor/shape.py | 9 +- front/py/examples/1_tensor/1_clone.py | 6 +- front/py/examples/1_tensor/1_copy.py | 3 +- front/py/examples/1_tensor/1_print.py | 2 +- front/py/examples/1_tensor/2_saveload.py | 13 +- 25 files changed, 579 insertions(+), 432 deletions(-) diff --git a/doc/excuter/op-mem-cuda/list.md b/doc/excuter/op-mem-cuda/list.md index aef5e0a8..f281979d 100644 --- a/doc/excuter/op-mem-cuda/list.md +++ b/doc/excuter/op-mem-cuda/list.md @@ -6,36 +6,43 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| vecset | none | vecset(vector value)->(vector name) | shape = [3 4 5] | vecset(vector value)->(vector name) | -| argset | none | argset(var value)->(var name) | var argname = argvalue | argset(var value)->(var name) | +| vecset | none | vecset(vector value)->(vector name) | [3 4 5]->shape | vecset(vector value)->(vector name) | +| argset | none | argset(var value)->(var name) | argvalue->argname | argset(var value)->(var name) | ### tensorlife | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| renametensor | none | renametensor(tensor t, var new_name)->() | rename T1 to T2 | renametensor(tensor t, var new_name)->() | +| renametensor | none | renametensor(var new_name)->(tensor t) | rename(newname)->T1 | renametensor(var new_name)->(tensor t) | | newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(vector shape)->(tensor tensor1) | | newtensor | none | newtensor(var shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(var shape)->(tensor tensor1) | -| deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | -| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | +| deltensor | none | deltensor()->(tensor t) | del->T1 | deltensor()->(tensor t) | +| copytensor | none | copytensor(tensor src)->(tensor dst) | T2.data = T1.data | copytensor(tensor src)->(tensor dst) | ### io | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| +| loadtensordata | none | loadtensordata(var path)->(tensor t) | loadtensordata(path)->tensor | loadtensordata(var path)->(tensor t) | | save | none | save(tensor t, var path)->() | save(T1,path) | save(tensor t, var path)->() | | print | miaobyte | print(tensor t)->() | print(T1) | print(tensor t)->() | | print | miaobyte | print(tensor t, var format)->() | print(T1) | print(tensor t, var format)->() | | load | none | load(var path)->() | load(path) | load(var path)->() | +### matmul + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| matmul | cublas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | + ### init | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| normal | miaobyte | normal(tensor t, var mean, var stddev, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var stddev, var seed)->() | -| uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | -| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | -| constant | miaobyte | constant(tensor t, var value)->() | constant(T1) | constant(tensor t, var value)->() | +| normal | miaobyte | normal(var mean, var stddev, var seed)->(tensor t) | normal(mean,stddev,seed)->T1 | normal(var mean, var stddev, var seed)->(tensor t) | +| uniform | miaobyte | uniform(var low, var high, var seed)->(tensor t) | uniform(low,high,seed)->T1 | uniform(var low, var high, var seed)->(tensor t) | +| arange | miaobyte | arange(var start, var step)->(tensor t) | arange(start,step)->T1 | arange(var start, var step)->(tensor t) | +| constant | miaobyte | constant(var value)->(tensor t) | constant(value)->T1 | constant(var value)->(tensor t) | ### elementwise @@ -64,20 +71,14 @@ | sub | miaobyte | sub(tensor A, tensor B)->(tensor C) | T3=T1-T2 | sub(tensor A, tensor B)->(tensor C) | | sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | | subscalar | miaobyte | subscalar(tensor A, var b)->(tensor C) | T3=T1-scalar | subscalar(tensor A, var b)->(tensor C) | +| exp | miaobyte | exp(tensor A)->(tensor C) | T3=exp(T1) | exp(tensor A)->(tensor C) | +| mul | miaobyte | mul(tensor A, tensor B)->(tensor C) | T3=T1*T2 | mul(tensor A, tensor B)->(tensor C) | | equal | miaobyte | equal(tensor A, tensor B, var epsilon)->(tensor mask) | mask=compare(T1, T2) | equal(tensor A, tensor B, var epsilon)->(tensor mask) | | mulscalar | miaobyte | mulscalar(tensor A, var b)->(tensor C) | T3=T1*scalar | mulscalar(tensor A, var b)->(tensor C) | | div | miaobyte | div(tensor A, tensor B)->(tensor C) | T3=T1/T2 | div(tensor A, tensor B)->(tensor C) | | invert | miaobyte | invert(tensor A)->(tensor C) | T3=~T1 | invert(tensor A)->(tensor C) | | max | miaobyte | max(tensor A, tensor B)->(tensor C) | T3=max(T1, T2) | max(tensor A, tensor B)->(tensor C) | | pow | miaobyte | pow(tensor A, tensor B)->(tensor C) | T3=pow(T1, T2) | pow(tensor A, tensor B)->(tensor C) | -| mul | miaobyte | mul(tensor A, tensor B)->(tensor C) | T3=T1*T2 | mul(tensor A, tensor B)->(tensor C) | -| exp | miaobyte | exp(tensor A)->(tensor C) | T3=exp(T1) | exp(tensor A)->(tensor C) | - -### matmul - -| Operation | Author | Func Def | Math Formula | IR Instruction | -|-----------|--------|------------|--------------|----------------| -| matmul | cublas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | ### reduce diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md index 9f43ccf4..8bcfcdb0 100644 --- a/doc/excuter/op-mem-ompsimd/list.md +++ b/doc/excuter/op-mem-ompsimd/list.md @@ -6,36 +6,44 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| vecset | none | vecset(vector value)->(vector name) | shape = [3 4 5] | vecset(vector value)->(vector name) | -| argset | none | argset(var value)->(var name) | var argname = argvalue | argset(var value)->(var name) | +| vecset | none | vecset(vector value)->(vector name) | [3 4 5]->shape | vecset(vector value)->(vector name) | +| argset | none | argset(var value)->(var name) | argvalue->argname | argset(var value)->(var name) | ### tensorlife | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| renametensor | none | renametensor(tensor t, var new_name)->() | rename T1 to T2 | renametensor(tensor t, var new_name)->() | -| newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector shape)->(tensor tensor1) | +| renametensor | none | renametensor(var new_name)->(tensor t) | rename(newname)->T1 | renametensor(var new_name)->(tensor t) | +| newtensor | none | newtensor(vector shape)->(tensor t) | T1 =Tensor(shape=[...]) | newtensor(vector shape)->(tensor t) | | newtensor | none | newtensor(var shape)->(tensor t) | T1 =Tensor(shape=[...]) | newtensor(var shape)->(tensor t) | -| deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | -| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | +| deltensor | none | deltensor()->(tensor t) | del->T1 | deltensor()->(tensor t) | +| copytensor | none | copytensor(tensor src)->(tensor dst) | T1.data->T2.data | copytensor(tensor src)->(tensor dst) | ### io | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| +| loadtensordata | none | loadtensordata(var path)->(tensor t) | loadtensordata(path)->tensor.data | loadtensordata(var path)->(tensor t) | | save | none | save(tensor t, var path)->() | save(T1,path) | save(tensor t, var path)->() | | print | miaobyte | print(tensor t)->() | print(T1) | print(tensor t)->() | | print | miaobyte | print(tensor t, var format)->() | print(T1) | print(tensor t, var format)->() | | load | none | load(var path)->() | mem.load(path) | load(var path)->() | +### matmul + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| matmul | cblas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | +| matmul | miaobyte | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | + ### init | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| normal | miaobyte | normal(tensor t, var mean, var std, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var std, var seed)->() | -| uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | -| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | -| constant | miaobyte | constant(tensor t, var value)->() | constant(T1,value) | constant(tensor t, var value)->() | +| normal | miaobyte | normal(var mean, var std, var seed)->(tensor t) | normal(mean,stddev,seed)->T1 | normal(var mean, var std, var seed)->(tensor t) | +| uniform | miaobyte | uniform(var low, var high, var seed)->(tensor t) | uniform(low,high,seed)->T1 | uniform(var low, var high, var seed)->(tensor t) | +| arange | miaobyte | arange(var start, var step)->(tensor t) | arange(start,step)->T1 | arange(var start, var step)->(tensor t) | +| constant | miaobyte | constant(var value)->(tensor t) | constant(value)->T1 | constant(var value)->(tensor t) | ### elementwise @@ -61,21 +69,14 @@ | sub | miaobyte | sub(tensor a, tensor b)->(tensor c) | T3=T1-T2 | sub(tensor a, tensor b)->(tensor c) | | sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | | subscalar | miaobyte | subscalar(tensor a, var scalar)->(tensor c) | T3=T1-scalar | subscalar(tensor a, var scalar)->(tensor c) | +| exp | miaobyte | exp(tensor A)->(tensor C) | T3=exp(T1) | exp(tensor A)->(tensor C) | +| mul | miaobyte | mul(tensor A, tensor B)->(tensor C) | T3=T1*T2 | mul(tensor A, tensor B)->(tensor C) | | equal | miaobyte | equal(tensor A, tensor B)->(tensor mask) | mask=equal(T1,T2) | equal(tensor A, tensor B)->(tensor mask) | | mulscalar | miaobyte | mulscalar(tensor A, var b)->(tensor C) | T3=T1*scalar | mulscalar(tensor A, var b)->(tensor C) | | div | miaobyte | div(tensor A, tensor B)->(tensor C) | T3=T1/T2 | div(tensor A, tensor B)->(tensor C) | | invert | miaobyte | invert(tensor A)->(tensor C) | T3=~T1 | invert(tensor A)->(tensor C) | | max | miaobyte | max(tensor A, tensor B)->(tensor C) | T3=max(T1,T2) | max(tensor A, tensor B)->(tensor C) | | pow | miaobyte | pow(tensor A, tensor B)->(tensor C) | T3=T1^T2 | pow(tensor A, tensor B)->(tensor C) | -| mul | miaobyte | mul(tensor A, tensor B)->(tensor C) | T3=T1*T2 | mul(tensor A, tensor B)->(tensor C) | -| exp | miaobyte | exp(tensor A)->(tensor C) | T3=exp(T1) | exp(tensor A)->(tensor C) | - -### matmul - -| Operation | Author | Func Def | Math Formula | IR Instruction | -|-----------|--------|------------|--------------|----------------| -| matmul | cblas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | -| matmul | miaobyte | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | ### reduce diff --git a/excuter/cpp-common/src/deepx/tensor.hpp b/excuter/cpp-common/src/deepx/tensor.hpp index fc33ecde..c5a6ca55 100644 --- a/excuter/cpp-common/src/deepx/tensor.hpp +++ b/excuter/cpp-common/src/deepx/tensor.hpp @@ -31,7 +31,7 @@ namespace deepx using SaveFn = void (*)(T *,size_t,const std::string &); SaveFn saver; // 保存内存 - using LoadFn = int (*)(const std::string &, T *,int); + using LoadFn = void (*)(const std::string &, T *,int); LoadFn loader; // 加载内存 Tensor() = default; diff --git a/excuter/cpp-common/src/deepx/tensorfunc/io.hpp b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp index ae4876bf..73542616 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/io.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp @@ -16,19 +16,24 @@ namespace deepx::tensorfunc{ printDispatcher::print(t, f); } - template - void save(const Tensor &tensor,const std::string &path); - - + + inline void saveShape(const Shape &shape,const std::string &tensorPath){ + std::string shapepath = tensorPath + ".shape"; + std::string shapedata = shape.toYaml(); + std::ofstream shape_fs(shapepath, std::ios::binary); + shape_fs.write(shapedata.c_str(), shapedata.size()); + shape_fs.close(); + } - //load - template - pair>> load(const std::string &path); - + inline pair loadShape(const std::string &path) { std::string shapepath = path + ".shape"; std::ifstream shape_fs(shapepath, std::ios::binary); + if (!shape_fs.is_open()) + { + throw std::runtime_error("Failed to open shape file: " + shapepath); + } std::string shapedata((std::istreambuf_iterator(shape_fs)), std::istreambuf_iterator()); Shape shape; shape.fromYaml(shapedata); @@ -36,9 +41,7 @@ namespace deepx::tensorfunc{ std::string tensor_name = filename.substr(0, filename.find_last_of('.')); return std::make_pair(tensor_name, shape); } - //对loaddata,不同excuter的实现不同。gpu具有显存,可以直接从文件到显存。 - template - void loadData(const std::string &path,Tensor &tensor); + } #endif // DEEPX_TENSORFUNC_IO_HPP diff --git a/excuter/op-mem-cuda/src/client/tfs.cpp b/excuter/op-mem-cuda/src/client/tfs.cpp index 4e9095ca..c98748e2 100644 --- a/excuter/op-mem-cuda/src/client/tfs.cpp +++ b/excuter/op-mem-cuda/src/client/tfs.cpp @@ -52,26 +52,31 @@ namespace deepx::tf { Param("tensor1", DataCategory::Tensor, Precision::Any), }))); - //copytensor + // copytensor tffactory.add_tf(std::make_shared(vector( - { - Param("src", DataCategory::Tensor, Precision::Any), - Param("dst", DataCategory::Tensor, Precision::Any), - }), - vector())); - //deltensor - tffactory.add_tf(std::make_shared(vector( + { + Param("src", DataCategory::Tensor, Precision::Any), + }), + vector( + { + Param("dst", DataCategory::Tensor, Precision::Any), + }))); + // deltensor + tffactory.add_tf(std::make_shared(vector(), + vector( { Param("t", DataCategory::Tensor, Precision::Any), - }), - vector())); - //renametensor + }))); + // renametensor tffactory.add_tf(std::make_shared(vector( - { - Param("t", DataCategory::Tensor, Precision::Any), - Param("new_name", DataCategory::Var, Precision::String), - }), - vector())); + { + + Param("new_name", DataCategory::Var, Precision::String), + }), + vector( + { + Param("t", DataCategory::Tensor, Precision::Any), + }))); } // init @@ -80,34 +85,42 @@ namespace deepx::tf tffactory.add_tf(std::make_shared>(vector( { - Param("t", DataCategory::Tensor, Precision::Any), + Param("value", DataCategory::Var, Precision::Any), }), - vector())); + vector({ + Param("t", DataCategory::Tensor, Precision::Any), + }))); tffactory.add_tf(std::make_shared>(vector( { - Param("t", DataCategory::Tensor, Precision::Any), + Param("start", DataCategory::Var, Precision::Any), Param("step", DataCategory::Var, Precision::Any), }), - vector())); + vector({ + Param("t", DataCategory::Tensor, Precision::Any), + }))); tffactory.add_tf(std::make_shared>(vector( { - Param("t", DataCategory::Tensor, Precision::Any), + Param("low", DataCategory::Var, Precision::Any), Param("high", DataCategory::Var, Precision::Any), Param("seed", DataCategory::Var, Precision::Int32), }), - vector())); + vector({ + Param("t", DataCategory::Tensor, Precision::Any), + }))); tffactory.add_tf(std::make_shared>(vector( - { - Param("t", DataCategory::Tensor, Precision::Any), - Param("mean", DataCategory::Var, Precision::Any), - Param("stddev", DataCategory::Var, Precision::Any), - Param("seed", DataCategory::Var, Precision::Int32), - }), - vector())); + { + + Param("mean", DataCategory::Var, Precision::Any), + Param("stddev", DataCategory::Var, Precision::Any), + Param("seed", DataCategory::Var, Precision::Int32), + }), + vector({ + Param("t", DataCategory::Tensor, Precision::Any), + }))); } // io void register_io(TfFactory &opfactory) @@ -126,18 +139,26 @@ namespace deepx::tf vector())); opfactory.add_tf(std::make_shared(vector( - { - Param("t", DataCategory::Tensor, Precision::Any), - Param("path", DataCategory::Var, Precision::String), - }), - vector())); + { + Param("t", DataCategory::Tensor, Precision::Any), + Param("path", DataCategory::Var, Precision::String), + }), + vector())); opfactory.add_tf(std::make_shared(vector( - { - Param("path", DataCategory::Var, Precision::String), - }), - vector())); - + { + Param("path", DataCategory::Var, Precision::String), + }), + vector())); + // loadtensordata + opfactory.add_tf(std::make_shared(vector( + { + Param("path", DataCategory::Var, Precision::String), + }), + vector( + { + Param("t", DataCategory::Tensor, Precision::Any), + }))); } // elementwise @@ -234,16 +255,16 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); - //invert + // invert tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8), - }), - vector( - { - Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8), - }))); - + { + Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8), + }), + vector( + { + Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8), + }))); + tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Float64 | Precision::Float32 | Precision::Float16 | Precision::BFloat16), @@ -271,17 +292,17 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Float64 | Precision::Float32), }))); - //rpowscalar + // rpowscalar tffactory.add_tf(std::make_shared>(vector( - { - Param("scalar", DataCategory::Var, Precision::Float64 | Precision::Int32), - Param("A", DataCategory::Tensor, Precision::Float64 | Precision::Float32), - }), - vector( - { - Param("C", DataCategory::Tensor, Precision::Float64 | Precision::Float32), - }))); - //log + { + Param("scalar", DataCategory::Var, Precision::Float64 | Precision::Int32), + Param("A", DataCategory::Tensor, Precision::Float64 | Precision::Float32), + }), + vector( + { + Param("C", DataCategory::Tensor, Precision::Float64 | Precision::Float32), + }))); + // log tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Float64 | Precision::Float32 | Precision::Float16 | Precision::BFloat16), @@ -358,77 +379,77 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); - //equal + // equal tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("B", DataCategory::Tensor, Precision::Any), + Param("epsilon", DataCategory::Var, Precision::Float64), + }), + vector( + { + Param("mask", DataCategory::Tensor, Precision::Bool), + }))); + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("scalar", DataCategory::Var, Precision::Any), + Param("epsilon", DataCategory::Var, Precision::Float64), + }), + vector( + { + Param("mask", DataCategory::Tensor, Precision::Bool), + }))); + // less + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("B", DataCategory::Tensor, Precision::Any), + }), + vector( + { + Param("mask", DataCategory::Tensor, Precision::Bool), + }))); + // lessscalar + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("scalar", DataCategory::Var, Precision::Any), + }), + vector( + { + Param("mask", DataCategory::Tensor, Precision::Bool), + }))); + // greater + tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), Param("B", DataCategory::Tensor, Precision::Any), - Param("epsilon", DataCategory::Var, Precision::Float64), }), vector( { Param("mask", DataCategory::Tensor, Precision::Bool), }))); - tffactory.add_tf(std::make_shared>(vector( + // greaterscalar + tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), Param("scalar", DataCategory::Var, Precision::Any), - Param("epsilon", DataCategory::Var, Precision::Float64), }), vector( { Param("mask", DataCategory::Tensor, Precision::Bool), }))); - //less - tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("B", DataCategory::Tensor, Precision::Any), - }), - vector( - { - Param("mask", DataCategory::Tensor, Precision::Bool), - }))); - //lessscalar - tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("scalar", DataCategory::Var, Precision::Any), - }), - vector( - { - Param("mask", DataCategory::Tensor, Precision::Bool), - }))); - //greater - tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("B", DataCategory::Tensor, Precision::Any), - }), - vector( - { - Param("mask", DataCategory::Tensor, Precision::Bool), - }))); - //greaterscalar - tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("scalar", DataCategory::Var, Precision::Any), - }), - vector( - { - Param("mask", DataCategory::Tensor, Precision::Bool), - }))); - //switch - tffactory.add_tf(std::make_shared>(vector( - { - Param("tensors", DataCategory::ListTensor, Precision::Any), - Param("cases", DataCategory::Tensor,Precision::Int8), - }), - vector( - { - Param("result", DataCategory::Tensor, Precision::Any), - }))); + // switch + tffactory.add_tf(std::make_shared>(vector( + { + Param("tensors", DataCategory::ListTensor, Precision::Any), + Param("cases", DataCategory::Tensor, Precision::Int8), + }), + vector( + { + Param("result", DataCategory::Tensor, Precision::Any), + }))); } // matmul void register_matmul(TfFactory &tffactory) @@ -458,96 +479,96 @@ namespace deepx::tf }))); // transpose tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("dim_order", DataCategory::Vector, Precision::Int32), - }), - vector( - { - Param("C", DataCategory::Tensor, Precision::Any), - }))); + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("dim_order", DataCategory::Vector, Precision::Int32), + }), + vector( + { + Param("C", DataCategory::Tensor, Precision::Any), + }))); // concat tffactory.add_tf(std::make_shared>(vector( - { - Param("tensors", DataCategory::ListTensor, Precision::Any), - Param("dim", DataCategory::Var, Precision::Int32), - }), - vector( - { - Param("result", DataCategory::Tensor, Precision::Any), - }))); + { + Param("tensors", DataCategory::ListTensor, Precision::Any), + Param("dim", DataCategory::Var, Precision::Int32), + }), + vector( + { + Param("result", DataCategory::Tensor, Precision::Any), + }))); // broadcastTo tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("new_shape", DataCategory::Vector, Precision::Int32), - }), - vector( - { - Param("B", DataCategory::Tensor, Precision::Any), - }))); + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("new_shape", DataCategory::Vector, Precision::Int32), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); // indexselect tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("indices", DataCategory::Tensor, Precision::Int64|Precision::Int32), - Param("axis", DataCategory::Var, Precision::Int32), - }), - vector( - { - Param("B", DataCategory::Tensor, Precision::Any), - }))); + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("indices", DataCategory::Tensor, Precision::Int64 | Precision::Int32), + Param("axis", DataCategory::Var, Precision::Int32), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); } - // reduce - void register_reduce(TfFactory &tffactory) - { + // reduce + void register_reduce(TfFactory &tffactory) + { // sum tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("dims", DataCategory::Vector, Precision::Int32), - Param("keepdims", DataCategory::Var, Precision::Bool), - }), - vector( - { - Param("B", DataCategory::Tensor, Precision::Any), - }))); + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("dims", DataCategory::Vector, Precision::Int32), + Param("keepdims", DataCategory::Var, Precision::Bool), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); // prod tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("dims", DataCategory::Vector, Precision::Int32), - Param("keepdims", DataCategory::Var, Precision::Bool), - }), - vector( - { - Param("B", DataCategory::Tensor, Precision::Any), - }))); + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("dims", DataCategory::Vector, Precision::Int32), + Param("keepdims", DataCategory::Var, Precision::Bool), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); // max tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("dims", DataCategory::Vector, Precision::Int32), - Param("keepdims", DataCategory::Var, Precision::Bool), - }), - vector( - { - Param("B", DataCategory::Tensor, Precision::Any), - }))); + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("dims", DataCategory::Vector, Precision::Int32), + Param("keepdims", DataCategory::Var, Precision::Bool), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); // min tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("dims", DataCategory::Vector, Precision::Int32), - Param("keepdims", DataCategory::Var, Precision::Bool), - }), - vector( - { - Param("B", DataCategory::Tensor, Precision::Any), - }))); + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("dims", DataCategory::Vector, Precision::Int32), + Param("keepdims", DataCategory::Var, Precision::Bool), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); } - + int register_all(TfFactory &tffactory) { register_lifecycle(tffactory); diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp index 5c606393..4d0a096f 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp @@ -75,22 +75,15 @@ namespace deepx::tensorfunc } }; + //load template - void save(Tensor &tensor, const std::string &path) - { - // 统一分配CPU内存 - unsigned char* device_data=reinterpret_cast(tensor.data); - auto [size,host_data]= device_offload(device_data,tensor.shape.bytes()); - stdutil::save(host_data.get(),size,path); - }; - - template - pair>> load(const std::string &path) + pair>> load(const std::string &path) { // 加载shape - pair shape_name = loadShape(path); - Shape shape = shape_name.second; - std::string tensor_name = shape_name.first; + pair shape_name=loadShape(path); + Shape shape=shape_name.second; + std::string tensor_name=shape_name.first; + // 检查T 和 shape.dtype 是否匹配 if (shape.dtype != precision()) @@ -98,23 +91,11 @@ namespace deepx::tensorfunc throw std::runtime_error("调用load<" + precision_str(shape.dtype) + "> 不匹配: 需要 " + precision_str(shape.dtype) + " 类型,但文件为" + precision_str(precision()) + " 类型"); } - - // 检查file.size,是否是tensor.size*sizeof(T) - std::string datapath = path + ".data"; - auto [fileSize,hostdata]=stdutil::load(datapath); - if(fileSize!=shape.bytes()){ - throw std::runtime_error("数据文件大小不足: 需要 " + std::to_string(shape.bytes()) + - " 字节,但文件只有 " + std::to_string(fileSize) + " 字节"); - } - T *host_data=reinterpret_cast(hostdata.get()); + shared_ptr> tensor = make_shared>(New(shape.shape)); - - cudaError_t err = cudaMemcpy(tensor->data, host_data, fileSize, cudaMemcpyHostToDevice); - if (err != cudaSuccess) - { - throw std::runtime_error("Failed to copy data from host to device"); - } + tensor->loader(path,tensor->data,tensor->shape.size); return std::make_pair(tensor_name, tensor); - } + }; + } #endif // DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp index 5e0c96c0..7334301a 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/tensorlife_miaobyte.hpp @@ -44,13 +44,14 @@ namespace deepx::tensorfunc int64_t total_bytes = size * sizeof(T); // 统一分配CPU内存 - auto [host_data, err] = device_offload(tensorData, total_bytes); + + auto [_,host_data] = device_offload(reinterpret_cast(tensorData), total_bytes); stdutil::save(host_data.get(), total_bytes, path); } // 不做任何转换,直接从内存到文件,或从文件到内存 template - static int loadFn(const std::string &path, T *data, int count) + static void loadFn(const std::string &path, T *data, int count) { auto [file_size, hostdata] = stdutil::load(path); if (file_size != count * sizeof(T)) @@ -64,7 +65,6 @@ namespace deepx::tensorfunc { throw std::runtime_error("Failed to copy data from host to device"); } - return count; } template @@ -76,6 +76,9 @@ namespace deepx::tensorfunc tensor.deleter = freeFn; tensor.copyer = copyFn; tensor.newer = newFn; + tensor.saver = saveFn; + tensor.loader = loadFn; + tensor.data = newFn(shape.size); return tensor; diff --git a/excuter/op-mem-cuda/src/deepx/tf/io.hpp b/excuter/op-mem-cuda/src/deepx/tf/io.hpp index 64466b15..cb0a52b9 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/io.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/io.hpp @@ -78,17 +78,64 @@ namespace deepx::tf { string name = this->args[0].textvalue; string path = this->args[1].textvalue; - if (mem->existstensor(name)) - { - auto t = mem->gettensor(name); - tensorfunc::save(*t, path); - } - else + if (!mem->existstensor(name)) { + std::cerr << "save " << name << " not found" << std::endl; error = "save " + name + " not found"; return 1; } + Precision dtype = mem->gettensor(name)->shape.dtype; + switch (dtype) + { + case Precision::Float64:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Float32:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Float16:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::BFloat16:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Int64:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Int32:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Int16:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Int8:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Bool:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + default: + break; + } return 0; } }; @@ -202,59 +249,61 @@ namespace deepx::tf { string path = this->args[0].textvalue; string tensorname = this->returns[0].textvalue; - if(!mem->existstensor(tensorname)) - { - error = "loadtensor " + tensorname + " not exists"; - return 1; - } + pair shape_name=tensorfunc::loadShape(path); std::string tensor_name=shape_name.first; Shape shape=shape_name.second; + + if(mem->existstensor(tensor_name)) + { + cout<<"warning: "<delete_tensor(tensor_name); + } switch (shape.dtype) { case Precision::Float64:{ - pair>> t = tensorfunc::load(path); - mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + pair>> t = tensorfunc::load(path); + mem->addtensor(tensor_name, t.second); break; } case Precision::Float32:{ pair>> t = tensorfunc::load(path); - mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + mem->addtensor(tensor_name, t.second); break; } case Precision::Float16:{ pair>> t = tensorfunc::load(path); - mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + mem->addtensor(tensor_name, t.second); break; } case Precision::BFloat16:{ pair>> t = tensorfunc::load(path); - mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + mem->addtensor(tensor_name, t.second); break; } case Precision::Int64:{ pair>> t = tensorfunc::load(path); - mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + mem->addtensor(tensor_name, t.second); break; } case Precision::Int32:{ pair>> t = tensorfunc::load(path); - mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + mem->addtensor(tensor_name, t.second); break; } case Precision::Int16:{ pair>> t = tensorfunc::load(path); - mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + mem->addtensor(tensor_name, t.second); break; } case Precision::Int8:{ pair>> t = tensorfunc::load(path); - mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + mem->addtensor(tensor_name, t.second); break; } case Precision::Bool:{ pair>> t = tensorfunc::load(path); - mem->gettensor(tensorname)->copyer(t.second->data,mem->gettensor(tensorname)->data,t.second->shape.size); + mem->addtensor(tensor_name, t.second); break; } default: diff --git a/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp b/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp index 620b81e1..d21cb60f 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp @@ -145,62 +145,64 @@ namespace deepx::tf } int run(shared_ptr mem, string &error) override { - if (!checktensors({this->args[0].textvalue, this->args[1].textvalue}, mem, error) != 0) + if (!checktensors({this->args[0].textvalue, this->returns[0].textvalue}, mem, error) != 0) { return 1; } Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; - Precision type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype; + Precision type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; if (input_type != type) { error = "copytensor: input type and return type must be the same"; return 1; } - switch (input_type) + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); + switch (type) { case Precision::Float64: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Float32: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Float16: { - tensorfunc::copy(*mem->gettensor<__half>(this->args[0].textvalue), *mem->gettensor<__half>(this->args[1].textvalue)); + dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::BFloat16: { - tensorfunc::copy(*mem->gettensor<__nv_bfloat16>(this->args[0].textvalue), *mem->gettensor<__nv_bfloat16>(this->args[1].textvalue)); + dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Int64: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Int32: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Int16: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Int8: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Bool: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + dst->copyer(src->data, dst->data, src->shape.size); break; } default: @@ -208,7 +210,7 @@ namespace deepx::tf error = "copytensor: unsupported precision"; return 1; } - }; + } return 0; } @@ -234,14 +236,14 @@ namespace deepx::tf } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; mem->delete_tensor(name); return 0; } string math_formula() const override { - return "del T1"; + return "del->T1"; } shared_ptr clone() const override { @@ -249,33 +251,33 @@ namespace deepx::tf } }; - //rename + // rename class RenameTensor : public TF { public: RenameTensor(vector args, vector returns) { - this->name = "renametensor"; + this->name = "renametensor"; this->tftype = "tensorlife"; this->args = args; this->returns = returns; } - int run(shared_ptr mem, string &error) override + int run(shared_ptr mem, string &error) override { - string old_name = this->args[0].textvalue; - if (!checktensors({this->args[0].textvalue}, mem, error) != 0) + string old_name = this->returns[0].textvalue; + if (!checktensors({old_name}, mem, error) != 0) { return 1; } - string new_name = this->args[1].textvalue; - + string new_name = this->args[0].textvalue; + mem->rename_tensor(old_name, new_name); return 0; } string math_formula() const override { - return "rename T1 to T2"; + return "rename(newname)->T1"; } shared_ptr clone() const override { diff --git a/excuter/op-mem-ompsimd/src/client/tfs.cpp b/excuter/op-mem-ompsimd/src/client/tfs.cpp index 07df04bd..5080a673 100644 --- a/excuter/op-mem-ompsimd/src/client/tfs.cpp +++ b/excuter/op-mem-ompsimd/src/client/tfs.cpp @@ -44,7 +44,7 @@ namespace deepx::tf }), vector( { - Param("tensor1", DataCategory::Tensor, Precision::Any), + Param("t", DataCategory::Tensor, Precision::Any), }))); // newtensor author=miaobyte tffactory.add_tf(std::make_shared(vector( @@ -59,22 +59,28 @@ namespace deepx::tf tffactory.add_tf(std::make_shared(vector( { Param("src", DataCategory::Tensor, Precision::Any), - Param("dst", DataCategory::Tensor, Precision::Any), - }), - vector())); + }), + vector({ + Param("dst", DataCategory::Tensor, Precision::Any), + + }))); // deltensor tffactory.add_tf(std::make_shared(vector( { - Param("t", DataCategory::Tensor, Precision::Any), + }), - vector())); + vector({ + Param("t", DataCategory::Tensor, Precision::Any), + }))); //renametensor tffactory.add_tf(std::make_shared(vector( { - Param("t", DataCategory::Tensor, Precision::Any), + Param("new_name", DataCategory::Var, Precision::String), }), - vector())); + vector({ + Param("t", DataCategory::Tensor, Precision::Any), + }))); } // init @@ -83,36 +89,47 @@ namespace deepx::tf // constant author=miaobyte tffactory.add_tf(std::make_shared>(vector( { - Param("t", DataCategory::Tensor, Precision::Any), + Param("value", DataCategory::Var, Precision::Any), }), - vector())); + vector({ + Param("t", DataCategory::Tensor, Precision::Any), + }))); // arange author=miaobyte tffactory.add_tf(std::make_shared>(vector( { - Param("t", DataCategory::Tensor, Precision::Any), + Param("start", DataCategory::Var, Precision::Any), Param("step", DataCategory::Var, Precision::Any), }), - vector())); + vector({ + Param("t", DataCategory::Tensor, Precision::Any), + }))); // uniform author=miaobyte tffactory.add_tf(std::make_shared>(vector( { - Param("t", DataCategory::Tensor, Precision::Any), + Param("low", DataCategory::Var, Precision::Any), Param("high", DataCategory::Var, Precision::Any), Param("seed", DataCategory::Var, Precision::Int32), }), - vector())); + vector( + { + Param("t", DataCategory::Tensor, Precision::Any), + } + ))); // normal author=miaobyte tffactory.add_tf(std::make_shared>(vector( { - Param("t", DataCategory::Tensor, Precision::Any), + Param("mean", DataCategory::Var, Precision::Any), Param("std", DataCategory::Var, Precision::Any), Param("seed", DataCategory::Var, Precision::Int32), }), - vector())); + vector( + { + Param("t", DataCategory::Tensor, Precision::Any), + }))); } // io void register_io(TfFactory &opfactory) @@ -143,7 +160,16 @@ namespace deepx::tf { Param("path", DataCategory::Var, Precision::String), }), - vector())); + vector())); + //loadtensordata + opfactory.add_tf(std::make_shared(vector( + { + Param("path", DataCategory::Var, Precision::String), + }), + vector( + { + Param("t", DataCategory::Tensor, Precision::Any), + }))); } // elementwise diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp index 38c12f32..784fe767 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp @@ -36,34 +36,8 @@ namespace deepx::tensorfunc } }; - template - void save(Tensor &tensor, const std::string &path) - { - - // 保存shape - std::string shapepath = path + ".shape"; - std::string shapedata = tensor.shape.toYaml(); - std::ofstream shape_fs(shapepath, std::ios::binary); - shape_fs.write(shapedata.c_str(), shapedata.size()); - shape_fs.close(); - - // 保存data - std::string datapath = path + ".data"; - std::ofstream data_fs(datapath, std::ios::binary | std::ios::in | std::ios::out); - - if (!data_fs.is_open()) - { - // 如果文件不存在,则创建新文件 - data_fs.open(datapath, std::ios::binary | std::ios::out); - } - int data_size = tensor.shape.size * precision_bits(tensor.shape.dtype) / 8; - data_fs.write(reinterpret_cast(tensor.data), data_size); - data_fs.close(); - } //load - - template pair>> load(const std::string &path) { @@ -79,25 +53,9 @@ namespace deepx::tensorfunc throw std::runtime_error("调用load<" + precision_str(shape.dtype) + "> 不匹配: 需要 " + precision_str(shape.dtype) + " 类型,但文件为" + precision_str(precision()) + " 类型"); } - - // 检查file.size,是否是tensor.size*sizeof(T) - std::string datapath = path + ".data"; - std::ifstream data_fs(datapath, std::ios::binary); - data_fs.seekg(0, std::ios::end); - std::streamsize fileSize = data_fs.tellg(); - std::streamsize expectedSize = shape.size * (precision_bits(shape.dtype) / 8); - - if (fileSize != expectedSize) - { - throw std::runtime_error("数据文件大小不足: 需要 " + std::to_string(expectedSize) + - " 字节,但文件只有 " + std::to_string(fileSize) + " 字节"); - } - data_fs.seekg(0); - - // 创建tensor + shared_ptr> tensor = make_shared>(New(shape.shape)); - data_fs.read(reinterpret_cast(tensor->data), fileSize); - data_fs.close(); + tensor->loader(path,tensor->data,tensor->shape.size); return std::make_pair(tensor_name, tensor); }; diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp index b1bc509e..ca177a4a 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/tensorlife_miaobyte.hpp @@ -1,6 +1,7 @@ #ifndef DEEPX_TENSORFUNC_TENSORLIFE_MIAOBYTE_HPP #define DEEPX_TENSORFUNC_TENSORLIFE_MIAOBYTE_HPP +#include "stdutil/fs.hpp" #include "deepx/tensorfunc/tensorlife.hpp" #include "deepx/tensorfunc/authors.hpp" #include "deepx/tensor.hpp" @@ -18,17 +19,33 @@ namespace deepx::tensorfunc } template - static void dataFree(T *data) + static void freeFn(T *data) { MemoryPool::Free(data); } template - static void dataCopy(T *data, T *data2, int size) + static void copyFn(T *data, T *data2, int size) { std::copy(data, data + size, data2); } + template + static void saveFn(T *data, size_t size, const std::string &path) + { + unsigned char *udata = reinterpret_cast(data); + size_t udatasize = size * sizeof(T); + stdutil::save(udata,udatasize,path); + } + + + template + static void loadFn(const std::string &path, T *data, int size) + { + unsigned char *udata = reinterpret_cast(data); + size_t udatasize = size * sizeof(T); + stdutil::load(path,udata,udatasize); + } // New template Tensor New(const std::vector &shapedata) @@ -37,9 +54,13 @@ namespace deepx::tensorfunc shape.dtype = precision(); Tensor tensor(shape); - tensor.deleter = dataFree; - tensor.copyer = dataCopy; + tensor.deleter = freeFn; + tensor.copyer = copyFn; tensor.newer = newFn; + tensor.saver = saveFn; + tensor.loader = loadFn; + + tensor.data = newFn(shape.size); return tensor; }; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp index eb540f32..e964bc87 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp @@ -78,17 +78,56 @@ namespace deepx::tf { string name = this->args[0].textvalue; string path = this->args[1].textvalue; - if (mem->existstensor(name)) - { - auto t = mem->gettensor(name); - tensorfunc::save(*t, path); - } - else + if (!mem->existstensor(name)) { + std::cerr << "save " << name << " not found" << std::endl; error = "save " + name + " not found"; return 1; } + Precision dtype = mem->gettensor(name)->shape.dtype; + tensorfunc::saveShape(mem->gettensor(name)->shape,path); + switch (dtype) + { + case Precision::Float64:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Float32:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + + case Precision::Int64:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Int32:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Int16:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Int8:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + case Precision::Bool:{ + auto t = mem->gettensor(name); + t->saver(t->data,t->shape.size,path); + break; + } + default: + break; + } return 0; } }; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp index 8d4d4f23..ba97ad7e 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp @@ -137,52 +137,66 @@ namespace deepx::tf int run(shared_ptr mem, string &error) override { - if (!checktensors({this->args[0].textvalue, this->args[1].textvalue}, mem, error) != 0) + if (!checktensors({this->args[0].textvalue, this->returns[0].textvalue}, mem, error) != 0) { return 1; } - Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; - Precision type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype; - if (input_type != type) + Precision src_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + Precision dst_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (src_type != dst_type) { error = "copytensor: input type and return type must be the same"; return 1; } - switch (input_type) + switch (src_type) { case Precision::Float64: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); + dst->copyer(src->data,dst->data,src->shape.size); break; } case Precision::Float32: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); + dst->copyer(src->data,dst->data,src->shape.size); break; } case Precision::Int64: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); + dst->copyer(src->data,dst->data,src->shape.size); break; } case Precision::Int32: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); + dst->copyer(src->data,dst->data,src->shape.size); break; } case Precision::Int16: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); + dst->copyer(src->data,dst->data,src->shape.size); break; } case Precision::Int8: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); + dst->copyer(src->data,dst->data,src->shape.size); break; } case Precision::Bool: { - tensorfunc::copy(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue)); + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); + dst->copyer(src->data,dst->data,src->shape.size); break; } default: @@ -196,7 +210,7 @@ namespace deepx::tf string math_formula() const override { - return "T2.data = T1.data"; + return "T1.data->T2.data"; } shared_ptr clone() const override { @@ -216,14 +230,14 @@ namespace deepx::tf } int run(shared_ptr mem, string &error) override { - string name = this->args[0].textvalue; + string name = this->returns[0].textvalue; mem->delete_tensor(name); return 0; } string math_formula() const override { - return "del T1"; + return "del->T1"; } shared_ptr clone() const override { @@ -244,18 +258,18 @@ namespace deepx::tf } int run(shared_ptr mem, string &error) override { - string old_name = this->args[0].textvalue; - if (!checktensors({this->args[0].textvalue}, mem, error) != 0) + string old_name = this->returns[0].textvalue; + if (!checktensors({old_name}, mem, error) != 0) { return 1; } - string new_name = this->args[1].textvalue; + string new_name = this->args[0].textvalue; mem->rename_tensor(old_name, new_name); return 0; } string math_formula() const override { - return "rename T1 to T2"; + return "rename(newname)->T1"; } shared_ptr clone() const override { diff --git a/front/py/deepx/nn/functional/__init__.py b/front/py/deepx/nn/functional/__init__.py index 93825af3..4a17d8ca 100644 --- a/front/py/deepx/nn/functional/__init__.py +++ b/front/py/deepx/nn/functional/__init__.py @@ -18,7 +18,7 @@ __all__ = [ #leaffunc - "newtensor","printtensor","load", #life + "newtensor","rnewtensor","printtensor","load", #life "printtensor","save",#io "constant","constant_","full","zeros","ones","uniform","uniform_","arange","arange_","kaiming_uniform","kaiming_uniform_","calculate_fan_in_and_fan_out", "add","sub","mul","div","sqrt","pow","exp","log", diff --git a/front/py/deepx/nn/functional/leaffunc_io.py b/front/py/deepx/nn/functional/leaffunc_io.py index 9797eea9..77dbd9f7 100644 --- a/front/py/deepx/nn/functional/leaffunc_io.py +++ b/front/py/deepx/nn/functional/leaffunc_io.py @@ -10,3 +10,7 @@ def save(t:Tensor,path:str): from .rtf_io import rtf_save rtf_save(t,path) return t + +def loadData(t:Tensor,path:str)->Tensor: + from .rtf_io import rtf_loadtensordata + return rtf_loadtensordata(t,path) \ No newline at end of file diff --git a/front/py/deepx/nn/functional/rtf_init.py b/front/py/deepx/nn/functional/rtf_init.py index bc46205c..2fbcde8e 100644 --- a/front/py/deepx/nn/functional/rtf_init.py +++ b/front/py/deepx/nn/functional/rtf_init.py @@ -6,26 +6,29 @@ def rtf_constant(t:Tensor,value:Union[float,int]=0,author='miaobyte')->Tensor: - A_scalar_op("constant",t,value,author) + args=[Param.varnum(value)] + returns=[Param.tensor(t)] + ir=DeepxIR("constant", args, returns,author) + send(ir) return t def rtf_arange(t:Tensor,start:Optional[Union[float,int]]=0,step:Optional[Union[float,int]]=1,author='miaobyte')->Tensor: - args=[Param.tensor(t),Param.varnum(start),Param.varnum(step)] - returns=[] + args=[Param.varnum(start),Param.varnum(step)] + returns=[Param.tensor(t)] ir=DeepxIR("arange", args, returns,author) send(ir) return t def rtf_uniform(t:Tensor,low=0, high=1,seed:int=0,author='miaobyte')->Tensor: - args=[Param.tensor(t),Param.varnum(low),Param.varnum(high),Param.varnum(seed)] - returns=[] + args=[Param.varnum(low),Param.varnum(high),Param.varnum(seed)] + returns=[Param.tensor(t)] ir=DeepxIR("uniform", args, returns,author) send(ir) return t def rtf_normal(t:Tensor,mean:float=0, stddev:float=1,seed:int=0,author='miaobyte')->Tensor: - args=[Param.tensor(t),Param.varnum(mean),Param.varnum(stddev),Param.varnum(seed)] - returns=[] + args=[Param.varnum(mean),Param.varnum(stddev),Param.varnum(seed)] + returns=[Param.tensor(t)] ir=DeepxIR("normal", args, returns,author) send(ir) return t \ No newline at end of file diff --git a/front/py/deepx/nn/functional/rtf_io.py b/front/py/deepx/nn/functional/rtf_io.py index 918f63de..f5066a7a 100644 --- a/front/py/deepx/nn/functional/rtf_io.py +++ b/front/py/deepx/nn/functional/rtf_io.py @@ -24,3 +24,10 @@ def rtf_load(path:str)->Tensor: shapefile=path+'.shape' tensor_name,shape,dtype=loadShape(shapefile) return Tensor(shape.shape,dtype,tensor_name) + +def rtf_loadtensordata(t:Tensor,path:str)->Tensor: + args=[Param.varstr(path)] + returns=[Param.tensor(t)] + ir=DeepxIR("loadtensordata", args, returns) + send(ir) + return t \ No newline at end of file diff --git a/front/py/deepx/nn/functional/rtf_life.py b/front/py/deepx/nn/functional/rtf_life.py index 014cd505..21547b99 100644 --- a/front/py/deepx/nn/functional/rtf_life.py +++ b/front/py/deepx/nn/functional/rtf_life.py @@ -10,19 +10,19 @@ def rtf_newtensor(t:Tensor): def rtf_copytensor(t:Tensor,out:Tensor): - args=[Param.tensor(t),Param.tensor(out)] - returns=[] + args=[Param.tensor(t)] + returns=[Param.tensor(out)] ir=DeepxIR("copytensor", args, returns,'') send(ir) def rtf_deltensor(t:Tensor): - args=[Param.tensor(t)] - returns=[] + args=[] + returns=[Param.tensor(t)] ir=DeepxIR("deltensor", args, returns,'') send(ir) def rtf_renametensor(t:Tensor,new_name:str): - args=[Param.tensor(t),Param.varstr(new_name)] - returns=[] + args=[Param.varstr(new_name)] + returns=[Param.tensor(t)] ir=DeepxIR("renametensor", args, returns,'') send(ir) diff --git a/front/py/deepx/tensor/io.py b/front/py/deepx/tensor/io.py index 35e3b0f7..45e14a1f 100644 --- a/front/py/deepx/tensor/io.py +++ b/front/py/deepx/tensor/io.py @@ -1,6 +1,6 @@ import yaml import os -from deepx.tensor import Shape +from deepx.tensor import Shape,Tensor,tensor_method def loadShape(path:str)->tuple[str,Shape,str]: filename = os.path.basename(path) @@ -12,11 +12,14 @@ def loadShape(path:str)->tuple[str,Shape,str]: tensor_name = filename[:-6] # 移除'.shape'后缀 return (tensor_name,Shape(tuple(shape['shape'])),shape['dtype']) +@tensor_method +def loadData(self,path:str): + from deepx.nn.functional import loadData as loadData_func + loadData_func(self,path) + +@tensor_method +def save(self,path:str): + from deepx.nn.functional import save as save_func + save_func(self,path) -def saveShape(t:Shape,path:str): - if path.endswith('.shape'): - with open(path, 'w') as f: - yaml.dump({'shape': list(t.shape), 'dtype': t._dtype,'size':t.numel(),'dim':t.ndim,'stride':list(t.stride)}, f) - else: - raise ValueError("文件名必须以.shape结尾") - + diff --git a/front/py/deepx/tensor/shape.py b/front/py/deepx/tensor/shape.py index 4ce87937..1098586d 100644 --- a/front/py/deepx/tensor/shape.py +++ b/front/py/deepx/tensor/shape.py @@ -179,4 +179,11 @@ def reduceshape(cls,shape:tuple[int,...],dim:tuple[int,...],keepdim:bool)->tuple @classmethod def indexselectshape(cls,input_shape:tuple[int,...],index_shape:tuple[int,...],gatheraxis:int)->tuple[int,...]: return input_shape[:gatheraxis]+index_shape+input_shape[gatheraxis+1:] - \ No newline at end of file + + def save(self,path:str): + if path.endswith('.shape'): + import yaml + with open(path, 'w') as f: + yaml.dump({'shape': list(self.shape), 'dtype': self._dtype,'size':self.numel(),'dim':self.ndim,'stride':list(self.stride)}, f) + else: + raise ValueError("文件名必须以.shape结尾") \ No newline at end of file diff --git a/front/py/examples/1_tensor/1_clone.py b/front/py/examples/1_tensor/1_clone.py index 78654fb5..e0b2fcd4 100644 --- a/front/py/examples/1_tensor/1_clone.py +++ b/front/py/examples/1_tensor/1_clone.py @@ -1,9 +1,11 @@ -from deepx.tensor import Tensor + +from deepx import Tensor,newtensor,rnewtensor def clonetest(): t1=Tensor(shape=(1,2,3),dtype='float32',name='t1') + rnewtensor(t1) t2=t1.clone(name='t2') - print(t2) + t2.print() if __name__ == "__main__": clonetest() \ No newline at end of file diff --git a/front/py/examples/1_tensor/1_copy.py b/front/py/examples/1_tensor/1_copy.py index 2afeddcb..244d4bfa 100644 --- a/front/py/examples/1_tensor/1_copy.py +++ b/front/py/examples/1_tensor/1_copy.py @@ -4,8 +4,9 @@ def copytest(): from deepx.nn.functional import newtensor t1= newtensor(1, 2, 3,name='t1') t2= newtensor(1, 2, 3,name='t2') + t1.print() t1.copy_to(t2) - print(t2) + t2.print() if __name__ == "__main__": diff --git a/front/py/examples/1_tensor/1_print.py b/front/py/examples/1_tensor/1_print.py index 1bfebe9c..7a5a205e 100644 --- a/front/py/examples/1_tensor/1_print.py +++ b/front/py/examples/1_tensor/1_print.py @@ -7,7 +7,7 @@ def newtensor(): from deepx.nn.functional import newtensor t=newtensor(1,2,3,name='t') - print(t) + t.print() if __name__ == "__main__": newtensor() diff --git a/front/py/examples/1_tensor/2_saveload.py b/front/py/examples/1_tensor/2_saveload.py index 0b88d544..ec2225d7 100644 --- a/front/py/examples/1_tensor/2_saveload.py +++ b/front/py/examples/1_tensor/2_saveload.py @@ -1,21 +1,22 @@ -from deepx.tensor import Tensor from deepx.nn.functional import arange,save,load def saveloadfloat32(): - t1=arange(start=0,end=60 ,dtype='float32',name='t1').reshape_(3,4,5) + t1=arange(start=0,end=60 ,dtype='float32').reshape_((3,4,5)) dir='/home/lipeng/model/deepxmodel/tester/' + t1.save(dir+'t1') - t2=load(dir+t1.name) + t2=load(dir+'t1') t2.print() def saveloadint8(): - t=arange(start=0,end=60 ,dtype='int8',name='t.int8').reshape_(3,4,5) + t=arange(start=0,end=60 ,dtype='int8').reshape_((3,4,5)) dir='/home/lipeng/model/deepxmodel/tester/' + t.save(dir+'tint8') - t2=load(dir+t.name) + t2=load(dir+"tint8") t2.print() if __name__ == "__main__": - saveloadfloat32() + #saveloadfloat32() saveloadint8() \ No newline at end of file From 21a1dab7679a2a0e49af465fa85163a22c17e4ce Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Fri, 25 Apr 2025 01:11:11 +0800 Subject: [PATCH 4/7] save,load,loadtensordata,loadshape --- excuter/cpp-common/src/stdutil/fs.cpp | 1 + .../src/deepx/tensorfunc/io_miaobyte.hpp | 4 ++-- excuter/op-mem-cuda/src/deepx/tf/io.hpp | 2 ++ .../op-mem-cuda/src/deepx/tf/tensorlife.hpp | 21 +++++++++++++++++-- .../src/deepx/tensorfunc/io_miaobyte.hpp | 2 +- excuter/op-mem-ompsimd/src/deepx/tf/io.hpp | 1 + front/py/deepxutil/numpy/io.py | 4 ++-- front/py/examples/2_ir/2_elementwise_add.py | 4 ++-- .../examples/2_ir/2_elementwise_operator.py | 8 +++---- front/py/examples/2_ir/3_matmul.py | 8 +++---- 10 files changed, 38 insertions(+), 17 deletions(-) diff --git a/excuter/cpp-common/src/stdutil/fs.cpp b/excuter/cpp-common/src/stdutil/fs.cpp index 7f7a57fe..6b30d551 100644 --- a/excuter/cpp-common/src/stdutil/fs.cpp +++ b/excuter/cpp-common/src/stdutil/fs.cpp @@ -29,6 +29,7 @@ namespace stdutil { throw std::runtime_error("Failed to open file: " + path); } + ifs.seekg(0, ios::end); size_t size = ifs.tellg(); ifs.seekg(0, ios::beg); diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp index 4d0a096f..0967736f 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp @@ -91,9 +91,9 @@ namespace deepx::tensorfunc throw std::runtime_error("调用load<" + precision_str(shape.dtype) + "> 不匹配: 需要 " + precision_str(shape.dtype) + " 类型,但文件为" + precision_str(precision()) + " 类型"); } - + shared_ptr> tensor = make_shared>(New(shape.shape)); - tensor->loader(path,tensor->data,tensor->shape.size); + tensor->loader(path+".data",tensor->data,tensor->shape.size); return std::make_pair(tensor_name, tensor); }; diff --git a/excuter/op-mem-cuda/src/deepx/tf/io.hpp b/excuter/op-mem-cuda/src/deepx/tf/io.hpp index cb0a52b9..81d739f2 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/io.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/io.hpp @@ -86,6 +86,8 @@ namespace deepx::tf return 1; } Precision dtype = mem->gettensor(name)->shape.dtype; + tensorfunc::saveShape(mem->gettensor(name)->shape,path); + path+=".data"; switch (dtype) { case Precision::Float64:{ diff --git a/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp b/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp index d21cb60f..0db28933 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp @@ -156,52 +156,69 @@ namespace deepx::tf error = "copytensor: input type and return type must be the same"; return 1; } - auto src = mem->gettensor(this->args[0].textvalue); - auto dst = mem->gettensor(this->returns[0].textvalue); + switch (type) { case Precision::Float64: { + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Float32: { + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Float16: { + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::BFloat16: { + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Int64: { + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Int32: { + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Int16: { + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Int8: { + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); dst->copyer(src->data, dst->data, src->shape.size); break; } case Precision::Bool: { + auto src = mem->gettensor(this->args[0].textvalue); + auto dst = mem->gettensor(this->returns[0].textvalue); dst->copyer(src->data, dst->data, src->shape.size); break; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp index 784fe767..d7c26ccc 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp @@ -55,7 +55,7 @@ namespace deepx::tensorfunc } shared_ptr> tensor = make_shared>(New(shape.shape)); - tensor->loader(path,tensor->data,tensor->shape.size); + tensor->loader(path+".data",tensor->data,tensor->shape.size); return std::make_pair(tensor_name, tensor); }; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp index e964bc87..d0d9ae5d 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp @@ -87,6 +87,7 @@ namespace deepx::tf } Precision dtype = mem->gettensor(name)->shape.dtype; tensorfunc::saveShape(mem->gettensor(name)->shape,path); + path+=".data"; switch (dtype) { case Precision::Float64:{ diff --git a/front/py/deepxutil/numpy/io.py b/front/py/deepxutil/numpy/io.py index ae95a17a..4af9a79d 100644 --- a/front/py/deepxutil/numpy/io.py +++ b/front/py/deepxutil/numpy/io.py @@ -1,4 +1,4 @@ -from deepx.tensor import Shape,saveShape +from deepx.tensor import Shape def save_numpy(t,tensorpath:str): r''' @@ -10,7 +10,7 @@ def save_numpy(t,tensorpath:str): assert isinstance(t,ndarray) shape=Shape(t.shape) shape._dtype=str(t.dtype) - saveShape(shape,tensorpath+".shape") + shape.save(tensorpath+".shape") array = ascontiguousarray(t) array.tofile(tensorpath+'.data') diff --git a/front/py/examples/2_ir/2_elementwise_add.py b/front/py/examples/2_ir/2_elementwise_add.py index ceba8d73..dca1fa93 100644 --- a/front/py/examples/2_ir/2_elementwise_add.py +++ b/front/py/examples/2_ir/2_elementwise_add.py @@ -10,7 +10,7 @@ ############-------DEEPX-------################ -from deepx import Tensor,full +from deepx import full print() @@ -18,4 +18,4 @@ t2 = t1.clone() t3 = t1+t2 t3.add_(0.5) -print(t3) \ No newline at end of file +t3.print() \ No newline at end of file diff --git a/front/py/examples/2_ir/2_elementwise_operator.py b/front/py/examples/2_ir/2_elementwise_operator.py index c5870c79..3d053781 100644 --- a/front/py/examples/2_ir/2_elementwise_operator.py +++ b/front/py/examples/2_ir/2_elementwise_operator.py @@ -25,14 +25,14 @@ t3 = t1.add(t2,out='t3') t4=deepx.full([3,4,5],value=0.5,name='t4') t5=t4.add(t3,out='t5') -print(t5) +t5.print() t6=t1.div(t2,out='t6') -print(t6) +t6.print() t7=t2.rdiv(0.05,out='t7') t7.mul_(2.5) -print(t7) +t7.print() t8=t7.mul(t2,out='t8') -print(t8) +t8.print() diff --git a/front/py/examples/2_ir/3_matmul.py b/front/py/examples/2_ir/3_matmul.py index 3c22593f..205b27c0 100644 --- a/front/py/examples/2_ir/3_matmul.py +++ b/front/py/examples/2_ir/3_matmul.py @@ -1,13 +1,13 @@ benchcnt=100 -from deepx.nn.functional import save_npy +from deepxutil.numpy import save_numpy import numpy as np np_T1 = np.random.randn(1024, 1024).astype(np.float32) np_T2 = np.random.randn(1024, 1024).astype(np.float32) -npy_path = '/home/lipeng/model/deepxmodel/tester/' -save_npy(np_T1,npy_path+'t1') -save_npy(np_T2,npy_path+'t2') +npy_path = '/home/lipeng/model/deepxmodel/matmul/' +save_numpy(np_T1,npy_path+'t1') +save_numpy(np_T2,npy_path+'t2') ############-------PyTorch-------################ From f15f6cc7d0682524677da6ff72cfd9cebf988aee Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Fri, 25 Apr 2025 01:33:39 +0800 Subject: [PATCH 5/7] =?UTF-8?q?reduce:=E7=B3=BB=E5=88=97=E5=87=BD=E6=95=B0?= =?UTF-8?q?=E7=AA=81=E7=84=B6=E5=87=BA=E9=97=AE=E9=A2=98=EF=BC=8C=E5=BE=85?= =?UTF-8?q?=E8=A7=A3=E5=86=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- front/py/deepx/nn/functional/leaffunc_init.py | 37 ++++++++----------- front/py/deepx/scheduler/client/udpconn.py | 2 +- .../examples/2_ir/4_changeshape_broadcast.py | 6 +-- .../py/examples/2_ir/4_changeshape_concat.py | 2 +- .../py/examples/2_ir/4_changeshape_gather.py | 21 ++++++----- .../py/examples/2_ir/4_changeshape_reshape.py | 12 +++--- .../examples/2_ir/4_changeshape_transpose.py | 16 ++++---- .../py/examples/2_ir/5_reduce_sum_keepdim.py | 32 ++++++++-------- front/py/examples/2_ir/5_reduce_sumprod.py | 29 ++++++--------- 9 files changed, 73 insertions(+), 84 deletions(-) diff --git a/front/py/deepx/nn/functional/leaffunc_init.py b/front/py/deepx/nn/functional/leaffunc_init.py index 48160e3d..7711e2be 100644 --- a/front/py/deepx/nn/functional/leaffunc_init.py +++ b/front/py/deepx/nn/functional/leaffunc_init.py @@ -1,7 +1,7 @@ import math import time import os -from .leaffunc_life import newtensor,parse_shape +from .leaffunc_life import newtensor from .rtf_init import * from deepx import Tensor,Number from .authormap import defaultauthor @@ -12,25 +12,20 @@ def constant_(t:Tensor,value: Union[float,int])->Tensor: rtf_constant(t,value,defaultauthor['constant']) - - -def constant(*shape, value:Union[float,int], dtype:str='float32',name:str)->Tensor: - s = parse_shape(shape) - outtensor=newtensor(s,dtype=dtype,name=name) + +def constant(shape:tuple[int,...], value:Union[float,int], dtype:str='float32',name:str=None)->Tensor: + outtensor=newtensor(shape,dtype=dtype,name=name) constant_(outtensor, value) return outtensor -def full(*shape, value:Union[float,int], dtype:str='float32',name:str=None)->Tensor: - s = parse_shape(shape) - return constant(s, value=value, dtype=dtype,name=name) +def full(shape:tuple[int,...], value:Union[float,int], dtype:str='float32',name:str=None)->Tensor: + return constant(shape, value=value, dtype=dtype,name=name) -def zeros(*shape, dtype:str='float32',name:str=None)->Tensor: - s = parse_shape(shape) - return constant(s, value=0, dtype=dtype,name=name) +def zeros(shape:tuple[int,...], dtype:str='float32',name:str=None)->Tensor: + return constant(shape, value=0, dtype=dtype,name=name) -def ones(*shape, dtype:str='float32',name:str=None)->Tensor: - s = parse_shape(shape) - return constant(s, value=1, dtype=dtype,name=name) +def ones(shape:tuple[int,...], dtype:str='float32',name:str=None)->Tensor: + return constant(shape, value=1, dtype=dtype,name=name) def arange_(t:Tensor,start=0,step=1)->Tensor: from .rtf_init import rtf_arange @@ -49,9 +44,8 @@ def uniform_(t:Tensor,low=0, high=1,seed:int=None)->Tensor: from .rtf_init import rtf_uniform rtf_uniform(t,low,high,seed,defaultauthor['uniform']) -def uniform(*shape,low=0, high=1,seed:int=None,dtype:str='float32',name:str=None)->Tensor: - s = parse_shape(shape) - outtensor=newtensor(s,dtype=dtype,name=name) +def uniform(shape:tuple[int,...],low=0, high=1,seed:int=None,dtype:str='float32',name:str=None)->Tensor: + outtensor=newtensor(shape,dtype=dtype,name=name) uniform_(outtensor,low,high,seed) return outtensor @@ -166,7 +160,7 @@ def kaiming_uniform_( bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation return uniform_(tensor,-bound, bound) -def kaiming_uniform(*shape,a:float=0,mode:str='fan_in',nonlinearity:str='leaky_relu',dtype:str='float32',name:str=None,author='miaobyte')->Tensor: +def kaiming_uniform(shape:tuple[int,...],a:float=0,mode:str='fan_in',nonlinearity:str='leaky_relu',dtype:str='float32',name:str=None,author='miaobyte')->Tensor: outtensor=newtensor(shape,dtype=dtype,name=name) kaiming_uniform_(outtensor,a,mode,nonlinearity) return outtensor @@ -178,8 +172,7 @@ def normal_(t:Tensor,mean:float=0, stddev:float=1,seed:int=None)->Tensor: from .rtf_init import rtf_normal rtf_normal(t,mean,stddev,seed,defaultauthor['normal']) -def normal(*shape,mean:float=0, stddev:float=1,seed:int=None,dtype:str='float32',name:str=None,author='miaobyte')->Tensor: - s = parse_shape(shape) - outtensor=newtensor(s,dtype=dtype,name=name) +def normal(shape:tuple[int,...],mean:float=0, stddev:float=1,seed:int=None,dtype:str='float32',name:str=None,author='miaobyte')->Tensor: + outtensor=newtensor(shape,dtype=dtype,name=name) normal_(outtensor,mean,stddev,seed) return outtensor diff --git a/front/py/deepx/scheduler/client/udpconn.py b/front/py/deepx/scheduler/client/udpconn.py index 6a12c26a..a25b0963 100644 --- a/front/py/deepx/scheduler/client/udpconn.py +++ b/front/py/deepx/scheduler/client/udpconn.py @@ -3,7 +3,7 @@ import select class UDPConn: - def __init__(self, endpoint: str = "localhost:9090"): + def __init__(self, endpoint: str = "localhost:8080"): # 解析endpoint self._host, port_str = endpoint.split(':') self._port = int(port_str) diff --git a/front/py/examples/2_ir/4_changeshape_broadcast.py b/front/py/examples/2_ir/4_changeshape_broadcast.py index 9811201e..84199680 100644 --- a/front/py/examples/2_ir/4_changeshape_broadcast.py +++ b/front/py/examples/2_ir/4_changeshape_broadcast.py @@ -10,10 +10,10 @@ ########====DEEPX====######## from deepx import Tensor,arange,broadcastTo -a=arange(4,2,3,name="a") -b=arange(2,1,name='b') +a=arange(start=0,end=4*2*3,name="a").reshape_((4,2,3)) +b=arange(start=0,end=2,name='b').reshape((2,1)) bb=b.broadcastTo( a.shape,out="b.broadcasted") -print(bb) +bb.print() diff --git a/front/py/examples/2_ir/4_changeshape_concat.py b/front/py/examples/2_ir/4_changeshape_concat.py index 24a8c9a4..cda373fb 100644 --- a/front/py/examples/2_ir/4_changeshape_concat.py +++ b/front/py/examples/2_ir/4_changeshape_concat.py @@ -20,4 +20,4 @@ t3=ones([3,4,5],dtype='float32',name='t3') t=concat([t1,t2,t3],dim=1,out='t') -print(t) +t.print() diff --git a/front/py/examples/2_ir/4_changeshape_gather.py b/front/py/examples/2_ir/4_changeshape_gather.py index e05013bd..df12e5f2 100644 --- a/front/py/examples/2_ir/4_changeshape_gather.py +++ b/front/py/examples/2_ir/4_changeshape_gather.py @@ -8,21 +8,22 @@ import torch torch_t = torch.arange(10*5, dtype=torch.float32).reshape(10,5) torch_indices = torch.tensor(indices_np) -torch_t = torch.gather(torch_t, 1,torch_indices) -print(torch_t.shape) -print(torch_t) +torch_t2 = torch.index_select(torch_t, 1,torch_indices) +print(torch_t2.shape) +print(torch_t2) ############-------DEEPX-------################ -from deepx import Tensor,arange,Shape -from deepx.nn.functional import load,save_npy +from deepx import Tensor,arange,Shape,load +from deepxutil.numpy import save_numpy - -save_npy(indices_np,'/home/lipeng/model/deepxmodel/tester/testindices') +save_numpy(indices_np,'/home/lipeng/model/deepxmodel/tester/testindices') -t = arange(start=0,end=10*5,dtype='float32',name='t').reshape(10,5) +t = arange(start=0,end=10*5,dtype='float32',name='t').reshape_((10,5)) indices = load('/home/lipeng/model/deepxmodel/tester/testindices') indices.print() -t = t.gather(indices,dim=1) -t.print() \ No newline at end of file +t2 = t.indexselect(indices,axis=1) +t2.print() + +### indexselect 行为和tensorflow.gather保持一致,支持index为多维 \ No newline at end of file diff --git a/front/py/examples/2_ir/4_changeshape_reshape.py b/front/py/examples/2_ir/4_changeshape_reshape.py index 151dc842..a8dde5f5 100644 --- a/front/py/examples/2_ir/4_changeshape_reshape.py +++ b/front/py/examples/2_ir/4_changeshape_reshape.py @@ -14,10 +14,10 @@ from deepx import Tensor,zeros, ones, full, arange print() -t1 = ones([3,4],dtype='float32',name='t1') -print(t1) -t2=t1.reshape(3,2,2) -print(t2) +t1 = ones((3,4),dtype='float32',name='t1') +t1.print() +t2=t1.reshape((3,2,2)) +t2.print() -t3=ones([4,5],dtype='float32').reshape_(20) -print(t3) +t3=ones((4,5),dtype='float32').reshape_((20,)) +t3.print() diff --git a/front/py/examples/2_ir/4_changeshape_transpose.py b/front/py/examples/2_ir/4_changeshape_transpose.py index 8ced7ade..bc1efd08 100644 --- a/front/py/examples/2_ir/4_changeshape_transpose.py +++ b/front/py/examples/2_ir/4_changeshape_transpose.py @@ -1,5 +1,5 @@ ############-------PyTorch-------################ - +print() import torch torch_t1 = torch.ones(3, 4, dtype=torch.float32) print(torch_t1) @@ -12,15 +12,15 @@ ############-------DEEPX-------################ -from deepx import Tensor,zeros, ones, full, arange +from deepx import ones + -print() -t1 = ones([3,4],dtype='float32',name='t1') -print(t1) +t1 = ones((3,4),dtype='float32',name='t1') +t1.print() t2=t1.transpose(out='t2') -print(t2) +t2.print() -t3=ones([2,3,4],dtype='float32',name='t3') +t3=ones((2,3,4),dtype='float32',name='t3') t4=t3.transpose(out='t4') -print(t4) +t4.print() diff --git a/front/py/examples/2_ir/5_reduce_sum_keepdim.py b/front/py/examples/2_ir/5_reduce_sum_keepdim.py index 3a582b47..da4cf110 100644 --- a/front/py/examples/2_ir/5_reduce_sum_keepdim.py +++ b/front/py/examples/2_ir/5_reduce_sum_keepdim.py @@ -1,4 +1,5 @@ ############-------PyTorch-------################ +print() import torch torch_t = torch.arange(0,60).reshape(3,4,5) @@ -19,19 +20,18 @@ from deepx import Tensor,ones,zeros,arange from deepx.nn.functional import sum,prod -t=arange(3,4,5,name='t') -t.set_format("%.0f") -print(t) -s=sum(t,dim=[0,2],out="s",keepdim=True) -s.set_format("%.0f") -print(s) -p=prod(t,dim=[1],out="p",keepdim=True) -p.set_format("%.0f") -print(p) - -t1=ones(4,5,6,name="t1") -t1.set_format("%.0f") -print(t1) -t2=sum(t1,dim=[0,1],out='t2',keepdim=True) -t2.set_format("%.0f") -print(t2) +t=arange(0,60,name='t').reshape_((3,4,5)) +t.print() +s=sum(t,dim=(0,2),out="s",keepdim=True) + +s.print() +p=prod(t,dim=(1,),out="p",keepdim=True) + +p.print() + +t1=ones((4,5,6),name="t1") + +t1.print() +t2=sum(t1,dim=(0,1),out='t2',keepdim=True) + +t2.print() diff --git a/front/py/examples/2_ir/5_reduce_sumprod.py b/front/py/examples/2_ir/5_reduce_sumprod.py index cc4360f5..f141ca13 100644 --- a/front/py/examples/2_ir/5_reduce_sumprod.py +++ b/front/py/examples/2_ir/5_reduce_sumprod.py @@ -19,20 +19,15 @@ from deepx import Tensor,ones,zeros,arange from deepx.nn.functional import sum,prod -t=arange(3,4,5,name='t') -t.arange_(0,1) -t.set_format("%.0f") -print(t) -s=sum(t,dim=[0,2],out="s") -s.set_format("%.0f") -print(s) -p=prod(t,dim=[1],out="p") -p.set_format("%.0f") -print(p) - -t1=ones(4,5,6,name="t1") -t1.set_format("%.0f") -print(t1) -t2=sum(t1,dim=[0,1],out='t2') -t2.set_format("%.0f") -print(t2) +t=arange(0,60,name='t').reshape_((3,4,5)) + +t.print() +s=sum(t,dim=(0,2),out="s") +s.print() +p=prod(t,dim=(1,),out="p") +p.print() + +t1=ones((4,5,6),name="t1") +t1.print() +t2=sum(t1,dim=(0,1),out='t2') +t2.print() From dd61422a75098e6fd02067111947bcbc48662e9e Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Sat, 26 Apr 2025 16:12:13 +0800 Subject: [PATCH 6/7] =?UTF-8?q?reduce:=E7=B3=BB=E5=88=97=E5=87=BD=E6=95=B0?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98=E6=98=AF=E7=94=B1=E4=BA=8Edim?= =?UTF-8?q?=E4=BB=8Eshape=E6=96=87=E4=BB=B6=E8=AF=BB=E5=8F=96=E6=97=B6?= =?UTF-8?q?=EF=BC=8Cdim=E6=9C=AC=E8=BA=AB=E6=98=AF=E9=94=99=E7=9A=84?= =?UTF-8?q?=E3=80=82=E5=8E=9F=E5=9B=A0=E6=98=AFreshape=E5=90=8E=EF=BC=8Cdi?= =?UTF-8?q?m=E6=9C=AA=E4=BF=AE=E6=94=B9=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 现在已经擅长了dim这个成员,改为实时计算shape.size() --- doc/design.md | 21 +++++++++ .../{5_reduce_sumprod.py => 5_reduce_prod.py} | 8 ++-- front/py/examples/2_ir/5_reduce_sum.py | 33 ++++++++++++++ front/py/examples/3_module/1_swiglu.py | 43 +++++++++++++++++++ 4 files changed, 101 insertions(+), 4 deletions(-) create mode 100644 doc/design.md rename front/py/examples/2_ir/{5_reduce_sumprod.py => 5_reduce_prod.py} (86%) create mode 100644 front/py/examples/2_ir/5_reduce_sum.py create mode 100644 front/py/examples/3_module/1_swiglu.py diff --git a/doc/design.md b/doc/design.md new file mode 100644 index 00000000..bbc4df71 --- /dev/null +++ b/doc/design.md @@ -0,0 +1,21 @@ +# deepx默认原则 + +## 一.DeepxIR + +### 1.deepIR结构 +``` +deepIR{ + Meta{ + int id + string author + } meta + string name + []Param args + []Param returns +} +``` + +excuter执行deepxIR的规则 + ++ excuter执行deepxIR时,不得修改args中的tensor ++ 但deepIR不限制args和returns中的Param同名,这样可以实现类似inplace的操作 \ No newline at end of file diff --git a/front/py/examples/2_ir/5_reduce_sumprod.py b/front/py/examples/2_ir/5_reduce_prod.py similarity index 86% rename from front/py/examples/2_ir/5_reduce_sumprod.py rename to front/py/examples/2_ir/5_reduce_prod.py index f141ca13..a6f0eb63 100644 --- a/front/py/examples/2_ir/5_reduce_sumprod.py +++ b/front/py/examples/2_ir/5_reduce_prod.py @@ -5,8 +5,8 @@ print(torch_t) torch_s = torch.sum(torch_t, dim=[0, 2]) print(torch_s) -torch_p=torch.prod(torch_t,dim=1) -print(torch_p) +# torch_p=torch.prod(torch_t,dim=1) +# print(torch_p) torch_t1 = torch.ones(4, 5, 6,dtype=torch.float) print(torch_t1) @@ -24,8 +24,8 @@ t.print() s=sum(t,dim=(0,2),out="s") s.print() -p=prod(t,dim=(1,),out="p") -p.print() +# p=prod(t,dim=(1,),out="p") +# p.print() t1=ones((4,5,6),name="t1") t1.print() diff --git a/front/py/examples/2_ir/5_reduce_sum.py b/front/py/examples/2_ir/5_reduce_sum.py new file mode 100644 index 00000000..a6f0eb63 --- /dev/null +++ b/front/py/examples/2_ir/5_reduce_sum.py @@ -0,0 +1,33 @@ +############-------PyTorch-------################ + +import torch +torch_t = torch.arange(0,60).reshape(3,4,5) +print(torch_t) +torch_s = torch.sum(torch_t, dim=[0, 2]) +print(torch_s) +# torch_p=torch.prod(torch_t,dim=1) +# print(torch_p) + +torch_t1 = torch.ones(4, 5, 6,dtype=torch.float) +print(torch_t1) +torch_t2 = torch.sum(torch_t1, dim=[0, 1]) +print(torch_t2) + + +############-------DEEPX-------################ + +from deepx import Tensor,ones,zeros,arange +from deepx.nn.functional import sum,prod + +t=arange(0,60,name='t').reshape_((3,4,5)) + +t.print() +s=sum(t,dim=(0,2),out="s") +s.print() +# p=prod(t,dim=(1,),out="p") +# p.print() + +t1=ones((4,5,6),name="t1") +t1.print() +t2=sum(t1,dim=(0,1),out='t2') +t2.print() diff --git a/front/py/examples/3_module/1_swiglu.py b/front/py/examples/3_module/1_swiglu.py new file mode 100644 index 00000000..0e350582 --- /dev/null +++ b/front/py/examples/3_module/1_swiglu.py @@ -0,0 +1,43 @@ +hidden_size = 8 +eps = 1e-6 +dir='/home/lipeng/model/deepxmodel/llama/' + + + +############### PyTorch 实现部分 ############### +import torch +# 使用小规模数据以便打印完整结果 +pt_input = torch.arange(48, dtype=torch.float32).reshape(2, 3, hidden_size) / 10.0 - 2.0 +print("PyTorch 输入:") +print(pt_input) + +from transformers.models.llama.modeling_llama import LlamaRMSNorm as TransformersLlamaRMSNorm +from deepxutil.torch import save_torch +save_torch(pt_input,dir+'rmsnorm_input') +# 使用transformers库中的官方LlamaRMSNorm实现 +pt_norm = TransformersLlamaRMSNorm(hidden_size, eps=eps) +# 设置权重为固定值0.5 +with torch.no_grad(): + pt_norm.weight.fill_(0.5) +# 前向传播 +pt_output = pt_norm(pt_input) + + +print("\nPyTorch RMSNorm 结果:") +print(pt_output.shape) +print(pt_output) + + +############### DeepX 实现部分 ############### +from deepx import constant_,load +from deepx.transformer.models.llama.modeling_llama import LlamaRMSNorm + +input=load(dir+'rmsnorm_input') + +# DeepX计算流程 +norm = LlamaRMSNorm(hidden_size=hidden_size, eps=eps) +# 设置相同的权重 +constant_(norm.weight, 0.5) +# 前向计算 +output = norm(input) +output.print() From 66499590b42543e31e74123827d3e44f22b79485 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Sat, 26 Apr 2025 16:12:21 +0800 Subject: [PATCH 7/7] =?UTF-8?q?reduce:=E7=B3=BB=E5=88=97=E5=87=BD=E6=95=B0?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98=E6=98=AF=E7=94=B1=E4=BA=8Edim?= =?UTF-8?q?=E4=BB=8Eshape=E6=96=87=E4=BB=B6=E8=AF=BB=E5=8F=96=E6=97=B6?= =?UTF-8?q?=EF=BC=8Cdim=E6=9C=AC=E8=BA=AB=E6=98=AF=E9=94=99=E7=9A=84?= =?UTF-8?q?=E3=80=82=E5=8E=9F=E5=9B=A0=E6=98=AFreshape=E5=90=8E=EF=BC=8Cdi?= =?UTF-8?q?m=E6=9C=AA=E4=BF=AE=E6=94=B9=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 现在已经擅长了dim这个成员,改为实时计算shape.size() --- excuter/cpp-common/src/deepx/shape.cpp | 51 ++++++- excuter/cpp-common/src/deepx/shape.hpp | 18 ++- .../src/deepx/shape_changeshape.cpp | 4 +- .../src/deepx/shape_changeshape.hpp | 4 +- excuter/cpp-common/src/deepx/shape_matmul.cpp | 8 +- excuter/cpp-common/src/deepx/shape_range.cpp | 18 +-- excuter/cpp-common/src/deepx/shape_reduce.cpp | 8 +- .../cpp-common/src/deepx/shape_tensorinit.cpp | 6 +- excuter/cpp-common/src/deepx/shapeslice.cpp | 34 ----- excuter/cpp-common/src/deepx/shapeslice.hpp | 21 --- excuter/cpp-common/src/deepx/tensor.hpp | 11 ++ .../cpp-common/src/deepx/tensorfunc/io.hpp | 25 +--- .../src/deepx/tensorfunc/matmul.hpp | 4 +- excuter/cpp-common/src/deepx/tensorslice.hpp | 26 ---- excuter/cpp-common/src/stdutil/print.hpp | 2 +- .../deepx/tensorfunc/changeshape_miaobyte.hpp | 20 +-- .../src/deepx/tensorfunc/io_miaobyte.hpp | 2 +- .../src/deepx/tensorfunc/reduce_miaobyte.hpp | 16 +- excuter/op-mem-cuda/src/deepx/tf/io.hpp | 6 +- .../deepx/tensorfunc/changeshape_miaobyte.hpp | 30 ++-- .../deepx/tensorfunc/elementwise_miaobyte.hpp | 48 +++--- .../src/deepx/tensorfunc/io_miaobyte.hpp | 2 +- .../src/deepx/tensorfunc/matmul_cblas.hpp | 4 +- .../src/deepx/tensorfunc/matmul_miaobyte.hpp | 4 +- .../src/deepx/tensorfunc/reduce_miaobyte.hpp | 56 +++---- excuter/op-mem-ompsimd/src/deepx/tf/io.hpp | 4 +- .../op-mem-ompsimd/src/deepx/tf/reduce.hpp | 2 +- .../test/tensorfunc/1_shape.cpp | 2 +- .../test/tensorfunc/2_tensor_new.cpp | 4 +- .../test/tensorfunc/5_tensor_sum.cpp | 4 +- .../test/tensorfunc/8_tensor_concat.cpp | 2 +- .../models/llama/modeling_llama.py | 140 ++++++++++-------- front/py/examples/2_ir/5_reduce_sum.py | 8 +- 33 files changed, 283 insertions(+), 311 deletions(-) delete mode 100644 excuter/cpp-common/src/deepx/shapeslice.cpp delete mode 100644 excuter/cpp-common/src/deepx/shapeslice.hpp delete mode 100644 excuter/cpp-common/src/deepx/tensorslice.hpp diff --git a/excuter/cpp-common/src/deepx/shape.cpp b/excuter/cpp-common/src/deepx/shape.cpp index fa207e98..9f51a2e2 100644 --- a/excuter/cpp-common/src/deepx/shape.cpp +++ b/excuter/cpp-common/src/deepx/shape.cpp @@ -8,18 +8,19 @@ #include "deepx/dtype.hpp" namespace deepx { - Shape::Shape(const int *shape, int dim) { setshape(shape, dim); } + int Shape::dim() const{ + return shape.size(); + } int64_t Shape::bytes() const{ return size * (precision_bits(dtype) / 8); } void Shape::setshape(const int *shape, int dim) { this->shape.resize(dim); - this->dim = dim; std::copy(shape, shape + dim, this->shape.begin()); strides.resize(dim); strides[dim - 1] = 1; @@ -57,10 +58,10 @@ namespace deepx void Shape::print() const { std::cout << "shape:["; - for (int i = 0; i < dim; ++i) + for (int i = 0; i < dim(); ++i) { std::cout << shape[i]; - if (i < dim - 1) + if (i < dim() - 1) std::cout << ", "; } std::cout << "]" << std::endl; @@ -73,8 +74,8 @@ namespace deepx return idx; } std::vector Shape::linearto(int idx_linear) const{ - std::vector indices(dim,0); - for(int i=0;i indices(dim(),0); + for(int i=0;i()); - dim = node["dim"].as(); shape = node["shape"].as>(); strides=node["stride"].as>(); size=node["size"].as(); + + //check + Shape checkedshape(shape); + if(checkedshape.shape!=shape){ + throw std::runtime_error("Shape::fromYaml: shape mismatch"); + } + if(checkedshape.strides!=strides){ + throw std::runtime_error("Shape::fromYaml: strides mismatch"); + } + if(checkedshape.size!=size){ + throw std::runtime_error("Shape::fromYaml: size mismatch"); + } } + + void Shape::saveShape( const std::string &tensorPath) const{ + std::string shapedata = toYaml(); + std::ofstream shape_fs(tensorPath + ".shape", std::ios::binary); + shape_fs.write(shapedata.c_str(), shapedata.size()); + shape_fs.close(); + } + + pair Shape::loadShape(const std::string &path) + { + std::string shapepath = path + ".shape"; + std::ifstream shape_fs(shapepath, std::ios::binary); + if (!shape_fs.is_open()) + { + throw std::runtime_error("Failed to open shape file: " + shapepath); + } + std::string shapedata((std::istreambuf_iterator(shape_fs)), std::istreambuf_iterator()); + Shape shape; + shape.fromYaml(shapedata); + std::string filename = stdutil::filename(path); + std::string tensor_name = filename.substr(0, filename.find_last_of('.')); + return std::make_pair(tensor_name, shape); + } } \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape.hpp b/excuter/cpp-common/src/deepx/shape.hpp index 655dce38..b314e891 100644 --- a/excuter/cpp-common/src/deepx/shape.hpp +++ b/excuter/cpp-common/src/deepx/shape.hpp @@ -4,11 +4,14 @@ #include #include #include +#include +#include +#include "stdutil/fs.hpp" #include "deepx/dtype.hpp" namespace deepx { - //omp内线程局部变量 + // omp内线程局部变量 class ThreadLocalVectors { private: @@ -43,7 +46,6 @@ namespace deepx Precision dtype; std::vector shape; std::vector strides; - int dim; int64_t size; int64_t bytes() const; @@ -52,6 +54,7 @@ namespace deepx Shape(const std::initializer_list &shape); Shape(const int *shape, int dim); void setshape(const int *shape, int dim); + int dim() const; int operator[](int index) const; int &operator[](int index); bool operator==(const Shape &shape) const { return shape.shape == shape.shape; } @@ -67,16 +70,19 @@ namespace deepx void rangeParallel(int dimCount, std::function &indices)> func) const; // 支持omp,但omp内需要线程local变量 - void rangeParallel(int dimCount, std::function &indices, ThreadLocalVectors &tlv)> func,const vector tlv_sizes) const; - void rangeParallel(int dimCount, std::function func,const vector tlv_sizes) const; - void rangeParallel(int dimCount, std::function &indices, ThreadLocalVectors &tlv)> func,const vector tlv_sizes) const; + void rangeParallel(int dimCount, std::function &indices, ThreadLocalVectors &tlv)> func, const vector tlv_sizes) const; + void rangeParallel(int dimCount, std::function func, const vector tlv_sizes) const; + void rangeParallel(int dimCount, std::function &indices, ThreadLocalVectors &tlv)> func, const vector tlv_sizes) const; int linearat(const std::vector &indices) const; std::vector linearto(int idx_linear) const; std::string toYaml() const; void fromYaml(const std::string &yaml); - }; + void saveShape(const std::string &tensorPath) const; + + static pair loadShape(const std::string &path); + }; } #endif // DEEPX_SHAPE_HPP diff --git a/excuter/cpp-common/src/deepx/shape_changeshape.cpp b/excuter/cpp-common/src/deepx/shape_changeshape.cpp index f3a60bd1..d3a89d90 100644 --- a/excuter/cpp-common/src/deepx/shape_changeshape.cpp +++ b/excuter/cpp-common/src/deepx/shape_changeshape.cpp @@ -32,11 +32,11 @@ namespace deepx Shape concatShape(const std::vector &shapes, const int axis) { - std::vector outputShape(shapes[0].dim); + std::vector outputShape(shapes[0].dim()); outputShape = shapes[0].shape; for (int i = 1; i < shapes.size(); ++i) { - if (shapes[i].dim != outputShape.size()) + if (shapes[i].dim() != outputShape.size()) { throw std::invalid_argument("All tensors must have the same number of dimensions."); } diff --git a/excuter/cpp-common/src/deepx/shape_changeshape.hpp b/excuter/cpp-common/src/deepx/shape_changeshape.hpp index 3f299885..71cbcb63 100644 --- a/excuter/cpp-common/src/deepx/shape_changeshape.hpp +++ b/excuter/cpp-common/src/deepx/shape_changeshape.hpp @@ -38,11 +38,11 @@ namespace deepx int axisDim = 0; for (int i = 0; i < tensors.size(); i++) { - if (tensors[i]->shape.dim != output.shape.dim) + if (tensors[i]->shape.dim() != output.shape.dim()) { throw TensorShapeError("All input tensors must have the same dimension size for concat"); } - for (int j = 0; j < tensors[i]->shape.dim; j++) + for (int j = 0; j < tensors[i]->shape.dim(); j++) { if (j != axis) { diff --git a/excuter/cpp-common/src/deepx/shape_matmul.cpp b/excuter/cpp-common/src/deepx/shape_matmul.cpp index 3ed0d51a..46247c70 100644 --- a/excuter/cpp-common/src/deepx/shape_matmul.cpp +++ b/excuter/cpp-common/src/deepx/shape_matmul.cpp @@ -4,9 +4,9 @@ namespace deepx { - Shape matmul_shape(const Shape &A, const Shape &B) + Shape matmul_shape(const Shape &A, const Shape &B) { - if (A.dim < 2 || B.dim < 2) + if (A.dim() < 2 || B.dim() < 2) { throw std::invalid_argument("A and B must >= 2D tensors"); } @@ -14,8 +14,8 @@ namespace deepx { throw std::invalid_argument("A[-1] must be equal to B[-2]"); } - std::vector resultshape(A.dim); - std::copy(A.shape.begin(), A.shape.begin() + A.dim, resultshape.begin()); + std::vector resultshape(A.dim()); + std::copy(A.shape.begin(), A.shape.begin() + A.dim(), resultshape.begin()); Shape result(resultshape); result[-1] = B[-1]; return result; diff --git a/excuter/cpp-common/src/deepx/shape_range.cpp b/excuter/cpp-common/src/deepx/shape_range.cpp index 31a125e1..1f7fad54 100644 --- a/excuter/cpp-common/src/deepx/shape_range.cpp +++ b/excuter/cpp-common/src/deepx/shape_range.cpp @@ -42,7 +42,7 @@ namespace deepx } void Shape::range(int dimCount, std::function &indices)> func) const { - dimCount = checkdim(dimCount, dim); + dimCount = checkdim(dimCount, dim()); int totalSize = checkTotalSize(dimCount, shape); std::vector indices(dimCount, 0); @@ -61,7 +61,7 @@ namespace deepx } void Shape::range(int dimCount, std::function &indices)> func) const { - dimCount = checkdim(dimCount, dim); + dimCount = checkdim(dimCount, dim()); int totalSize = checkTotalSize(dimCount, shape); int stride = checkStride(dimCount, shape); @@ -82,7 +82,7 @@ namespace deepx void Shape::range(int dimCount, std::function func) const { - dimCount = checkdim(dimCount, dim); + dimCount = checkdim(dimCount, dim()); int totalSize = checkTotalSize(dimCount, shape); int stride = checkStride(dimCount, shape); for (int idx = 0; idx < totalSize; idx++) @@ -93,7 +93,7 @@ namespace deepx void Shape::rangeParallel(int dimCount, std::function &indices)> func) const { - dimCount = checkdim(dimCount, dim); + dimCount = checkdim(dimCount, dim()); int totalSize = checkTotalSize(dimCount, shape); #pragma omp parallel @@ -115,7 +115,7 @@ namespace deepx } void Shape::rangeParallel(int dimCount, std::function func) const { - dimCount = checkdim(dimCount, dim); + dimCount = checkdim(dimCount, dim()); int stride = checkStride(dimCount, shape); // 计算总循环次数 @@ -130,7 +130,7 @@ namespace deepx void Shape::rangeParallel(int dimCount, std::function &indices)> func) const { - dimCount = checkdim(dimCount, dim); + dimCount = checkdim(dimCount, dim()); int totalSize = checkTotalSize(dimCount, shape); int stride = checkStride(dimCount, shape); @@ -154,7 +154,7 @@ namespace deepx void Shape::rangeParallel(int dimCount, std::function &indices, ThreadLocalVectors &tlv)> func,const vector tlv_sizes) const { - dimCount = checkdim(dimCount, dim); + dimCount = checkdim(dimCount, dim()); int totalSize = checkTotalSize(dimCount, shape); #pragma omp parallel @@ -177,7 +177,7 @@ namespace deepx } void Shape::rangeParallel(int dimCount, std::function func,const vector tlv_sizes) const { - dimCount = checkdim(dimCount, dim); + dimCount = checkdim(dimCount, dim()); int stride = checkStride(dimCount, shape); // 计算总循环次数 @@ -196,7 +196,7 @@ namespace deepx void Shape::rangeParallel(int dimCount, std::function &indices, ThreadLocalVectors &tlv)> func,const vector tlv_sizes) const { - dimCount = checkdim(dimCount, dim); + dimCount = checkdim(dimCount, dim()); int totalSize = checkTotalSize(dimCount, shape); int stride = checkStride(dimCount, shape); diff --git a/excuter/cpp-common/src/deepx/shape_reduce.cpp b/excuter/cpp-common/src/deepx/shape_reduce.cpp index 2f017a9b..eeb427a2 100644 --- a/excuter/cpp-common/src/deepx/shape_reduce.cpp +++ b/excuter/cpp-common/src/deepx/shape_reduce.cpp @@ -73,14 +73,14 @@ namespace deepx return outputShape; } + // 创建一个(map映射)数组,标记哪些维度需要求和 std::vector reducedDim(const std::vector &shape, const std::vector &dims) { - // 创建一个映射数组,标记哪些维度需要求和 - std::vector sumMap(shape.size(), 0); + std::vector reducdMap(shape.size(), 0); for (int dim : dims) { - sumMap[dim] = 1; + reducdMap[dim] = 1; } - return sumMap; + return reducdMap; } } \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape_tensorinit.cpp b/excuter/cpp-common/src/deepx/shape_tensorinit.cpp index b2ae8e68..dd93e798 100644 --- a/excuter/cpp-common/src/deepx/shape_tensorinit.cpp +++ b/excuter/cpp-common/src/deepx/shape_tensorinit.cpp @@ -5,7 +5,7 @@ namespace deepx std::pair calculateFanInAndFanOut(const Shape &shape) { int fanIn, fanOut; - if (shape.dim < 2) + if (shape.dim() < 2) { fanIn = 1; fanOut = 1; @@ -15,9 +15,9 @@ namespace deepx int numInputFmaps = shape[1]; // 输入特征图数量 int numOutputFmaps = shape[0]; // 输出特征图数量 int receptiveFieldSize = 1; - if (shape.dim > 2) + if (shape.dim() > 2) { - for (int i = 2; i < shape.dim; ++i) + for (int i = 2; i < shape.dim(); ++i) { receptiveFieldSize *= shape[i]; // 计算感受野大小 } diff --git a/excuter/cpp-common/src/deepx/shapeslice.cpp b/excuter/cpp-common/src/deepx/shapeslice.cpp deleted file mode 100644 index 99874226..00000000 --- a/excuter/cpp-common/src/deepx/shapeslice.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "deepx/shapeslice.hpp" - -namespace deepx -{ - ShapeSlice::ShapeSlice(const std::vector &start, const std::vector &shape, Shape *parent) - { - this->shape = Shape(shape); - this->start = start; - this->parent = parent; - } - - ShapeSlice::~ShapeSlice() - { - parent = nullptr; - } - const std::vector ShapeSlice::toParentIndices(const std::vector &indices) const - { - std::vector parentindices = indices; - for (int i = 0; i < parentindices.size(); i++) - { - parentindices[i] = parentindices[i] + start[i]; - } - return parentindices; - } - const std::vector ShapeSlice::fromParentIndices(const std::vector &parentIndices) const - { - std::vector indices = parentIndices; - for (int i = 0; i < indices.size(); i++) - { - indices[i]=parentIndices[i]-start[i]; - } - return indices; - }; -} \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shapeslice.hpp b/excuter/cpp-common/src/deepx/shapeslice.hpp deleted file mode 100644 index 428041da..00000000 --- a/excuter/cpp-common/src/deepx/shapeslice.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef SHAPE_SLICE_HPP -#define SHAPE_SLICE_HPP - -#include "deepx/tensor.hpp" - -namespace deepx -{ - struct ShapeSlice - { - std::vector start; - Shape shape; - Shape *parent; - ShapeSlice() = default; - ShapeSlice(const std::vector &start, const std::vector &shape, Shape *parent); - ~ShapeSlice(); - const std::vector toParentIndices(const std::vector &indices) const; - const std::vector fromParentIndices(const std::vector &parentIndices) const; - }; - -} -#endif \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/tensor.hpp b/excuter/cpp-common/src/deepx/tensor.hpp index c5a6ca55..d49f12f8 100644 --- a/excuter/cpp-common/src/deepx/tensor.hpp +++ b/excuter/cpp-common/src/deepx/tensor.hpp @@ -156,6 +156,17 @@ namespace deepx tensor.saver = nullptr; return *this; } + + //io + + void save(const string &path) + { + if (saver) + { + shape.saveShape(path); + saver(data, shape.size, path+".data"); + } + } }; } #endif \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/tensorfunc/io.hpp b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp index 73542616..59a3606e 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/io.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp @@ -17,30 +17,7 @@ namespace deepx::tensorfunc{ } - inline void saveShape(const Shape &shape,const std::string &tensorPath){ - std::string shapepath = tensorPath + ".shape"; - std::string shapedata = shape.toYaml(); - std::ofstream shape_fs(shapepath, std::ios::binary); - shape_fs.write(shapedata.c_str(), shapedata.size()); - shape_fs.close(); - } - - - inline pair loadShape(const std::string &path) - { - std::string shapepath = path + ".shape"; - std::ifstream shape_fs(shapepath, std::ios::binary); - if (!shape_fs.is_open()) - { - throw std::runtime_error("Failed to open shape file: " + shapepath); - } - std::string shapedata((std::istreambuf_iterator(shape_fs)), std::istreambuf_iterator()); - Shape shape; - shape.fromYaml(shapedata); - std::string filename = stdutil::filename(path); - std::string tensor_name = filename.substr(0, filename.find_last_of('.')); - return std::make_pair(tensor_name, shape); - } + } diff --git a/excuter/cpp-common/src/deepx/tensorfunc/matmul.hpp b/excuter/cpp-common/src/deepx/tensorfunc/matmul.hpp index 2e099aba..35a50114 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/matmul.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/matmul.hpp @@ -12,11 +12,11 @@ namespace deepx::tensorfunc { return false; } - if (a.dim != b.dim) + if (a.dim() != b.dim()) { return false; } - for (int i = 0; i < a.dim - 2; ++i) + for (int i = 0; i < a.dim() - 2; ++i) { if (a[i] != b[i]) { diff --git a/excuter/cpp-common/src/deepx/tensorslice.hpp b/excuter/cpp-common/src/deepx/tensorslice.hpp deleted file mode 100644 index 11a791a6..00000000 --- a/excuter/cpp-common/src/deepx/tensorslice.hpp +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef TENSORSLICE_HPP -#define TENSORSLICE_HPP - -#include "deepx/shape.hpp" -namespace deepx -{ - //主要支持CNN的slice操作 - template - struct TensorSlice - { - Tensor *parent; - SliceShape sliceShape; - - TensorSlice(Tensor *parent, SliceShape sliceShape) - { - this->parent = parent; - this->sliceShape = sliceShape; - } - ~TensorSlice() - { - parent = nullptr; - sliceShape.parent = nullptr; - } - }; -} // namespace deepx -#endif \ No newline at end of file diff --git a/excuter/cpp-common/src/stdutil/print.hpp b/excuter/cpp-common/src/stdutil/print.hpp index 046a4c83..e7ea4c9d 100644 --- a/excuter/cpp-common/src/stdutil/print.hpp +++ b/excuter/cpp-common/src/stdutil/print.hpp @@ -90,7 +90,7 @@ namespace stdutil shape.dtype = dtype; shape.print(); - if (shape.dim == 1) + if (shape.dim() == 1) { std::cout << "["; for (int i = 0; i < shape[0]; ++i) diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp index a49439b9..922c8ddd 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp @@ -50,14 +50,14 @@ namespace deepx::tensorfunc { static void transpose(const Tensor &tensor, const std::vector &dim_order, Tensor &output) { - if (dim_order.size() != tensor.shape.dim) + if (dim_order.size() != tensor.shape.dim()) { throw std::runtime_error("Dimension order size must match tensor dimension size for transpose"); } launch_transpose(tensor.data, tensor.shape.strides.data(), output.data, output.shape.strides.data(), - tensor.shape.dim, tensor.shape.size, dim_order.data()); + tensor.shape.dim(), tensor.shape.size, dim_order.data()); } }; @@ -82,7 +82,7 @@ namespace deepx::tensorfunc vector inputStrides; for (int i = 0; i < tensors.size(); i++) { - std::copy(tensors[i]->shape.strides.data(), tensors[i]->shape.strides.data() + tensors[i]->shape.dim, std::back_inserter(inputStrides)); + std::copy(tensors[i]->shape.strides.data(), tensors[i]->shape.strides.data() + tensors[i]->shape.dim(), std::back_inserter(inputStrides)); } vector shapeAtAxis(tensors.size()); @@ -93,7 +93,7 @@ namespace deepx::tensorfunc launch_concat(tensorsData.data(), inputStrides.data(), C.data, C.shape.strides.data(), - C.shape.dim, + C.shape.dim(), C.shape.size, axis, tensors.size(), shapeAtAxis.data()); }; @@ -111,9 +111,9 @@ namespace deepx::tensorfunc throw TensorShapeError("Broadcast shape mismatch"); } auto bmap = broadcastMap(A.shape.shape, new_shape); - launch_broadcastTo(A.data, A.shape.strides.data(), A.shape.dim, + launch_broadcastTo(A.data, A.shape.strides.data(), A.shape.dim(), bmap.data(), - B.data, B.shape.strides.data(), B.shape.dim, B.shape.size); + B.data, B.shape.strides.data(), B.shape.dim(), B.shape.size); } }; @@ -122,17 +122,17 @@ namespace deepx::tensorfunc struct indexselectDispatcher { static void indexselect(const Tensor &input, const Tensor &indices, const int axis, Tensor &output){ - int gatherAxis = axis < 0 ? input.shape.dim + axis : axis; + int gatherAxis = axis < 0 ? input.shape.dim() + axis : axis; vector gatherShape = indexselectShape(input.shape.shape, indices.shape.shape, gatherAxis); if (gatherShape.empty()||gatherShape!=output.shape.shape) { throw TensorShapeError("Indexselect shape mismatch"); } - launch_indexselect(input.data, input.shape.strides.data(), input.shape.dim, - indices.data, indices.shape.strides.data(), indices.shape.dim, + launch_indexselect(input.data, input.shape.strides.data(), input.shape.dim(), + indices.data, indices.shape.strides.data(), indices.shape.dim(), gatherAxis, - output.data,output.shape.strides.data(),output.shape.dim,output.shape.size); + output.data,output.shape.strides.data(),output.shape.dim(),output.shape.size); } }; } diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp index 0967736f..065923e7 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp @@ -80,7 +80,7 @@ namespace deepx::tensorfunc pair>> load(const std::string &path) { // 加载shape - pair shape_name=loadShape(path); + pair shape_name=Shape::loadShape(path); Shape shape=shape_name.second; std::string tensor_name=shape_name.first; diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce_miaobyte.hpp index ce4c7223..2acacf06 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/reduce_miaobyte.hpp @@ -22,9 +22,9 @@ namespace deepx::tensorfunc constant(result, T(0)); std::vector checkeddims = checkedDims(tensor.shape.shape, dims); std::vector reduced_dims = reducedDim(tensor.shape.shape, checkeddims); - launch_sum(tensor.data, tensor.shape.strides.data(), tensor.shape.dim, tensor.shape.size, + launch_sum(tensor.data, tensor.shape.strides.data(), tensor.shape.dim(), tensor.shape.size, reduced_dims.data(), keepdims, - result.data, result.shape.strides.data(), result.shape.dim); + result.data, result.shape.strides.data(), result.shape.dim()) ; } }; @@ -36,9 +36,9 @@ namespace deepx::tensorfunc constant(result, T(1)); std::vector checkeddims = checkedDims(tensor.shape.shape, dims); std::vector reduced_dims = reducedDim(tensor.shape.shape, checkeddims); - launch_prod(tensor.data, tensor.shape.strides.data(), tensor.shape.dim, tensor.shape.size, + launch_prod(tensor.data, tensor.shape.strides.data(), tensor.shape.dim(), tensor.shape.size, reduced_dims.data(), keepdims, - result.data, result.shape.strides.data(), result.shape.dim); + result.data, result.shape.strides.data(), result.shape.dim()) ; } }; template @@ -49,9 +49,9 @@ namespace deepx::tensorfunc constant(result, std::numeric_limits::lowest()); std::vector checkeddims = checkedDims(tensor.shape.shape, dims); std::vector reduced_dims = reducedDim(tensor.shape.shape, checkeddims); - launch_reducemax(tensor.data, tensor.shape.strides.data(), tensor.shape.dim, tensor.shape.size, + launch_reducemax(tensor.data, tensor.shape.strides.data(), tensor.shape.dim(), tensor.shape.size, reduced_dims.data(), keepdims, - result.data, result.shape.strides.data(), result.shape.dim); + result.data, result.shape.strides.data(), result.shape.dim()) ; } }; @@ -63,9 +63,9 @@ namespace deepx::tensorfunc constant(result, std::numeric_limits::max()); std::vector checkeddims = checkedDims(tensor.shape.shape, dims); std::vector reduced_dims = reducedDim(tensor.shape.shape, checkeddims); - launch_reducemin(tensor.data, tensor.shape.strides.data(), tensor.shape.dim, tensor.shape.size, + launch_reducemin(tensor.data, tensor.shape.strides.data(), tensor.shape.dim(), tensor.shape.size, reduced_dims.data(), keepdims, - result.data, result.shape.strides.data(), result.shape.dim); + result.data, result.shape.strides.data(), result.shape.dim()) ; } }; } diff --git a/excuter/op-mem-cuda/src/deepx/tf/io.hpp b/excuter/op-mem-cuda/src/deepx/tf/io.hpp index 81d739f2..d676c743 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/io.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/io.hpp @@ -86,7 +86,7 @@ namespace deepx::tf return 1; } Precision dtype = mem->gettensor(name)->shape.dtype; - tensorfunc::saveShape(mem->gettensor(name)->shape,path); + mem->gettensor(name)->shape.saveShape(path); path+=".data"; switch (dtype) { @@ -165,7 +165,7 @@ namespace deepx::tf { string path = this->args[0].textvalue; - pair shape_name=tensorfunc::loadShape(path); + pair shape_name=Shape::loadShape(path); std::string tensor_name=shape_name.first; Shape shape=shape_name.second; @@ -252,7 +252,7 @@ namespace deepx::tf string path = this->args[0].textvalue; string tensorname = this->returns[0].textvalue; - pair shape_name=tensorfunc::loadShape(path); + pair shape_name=Shape::loadShape(path); std::string tensor_name=shape_name.first; Shape shape=shape_name.second; diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp index f9466111..a92a9571 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp @@ -48,7 +48,7 @@ namespace deepx::tensorfunc static void transpose(const Tensor &tensor, const std::vector &dim_order, Tensor &output) { - if (dim_order.size() != tensor.shape.dim) + if (dim_order.size() != tensor.shape.dim()) { throw std::invalid_argument("dimOrder size does not match the number of dimensions in the TensorCPU."); } @@ -62,7 +62,7 @@ namespace deepx::tensorfunc for (size_t i = 0; i < dim_order.size(); ++i) { tlv.get(0)[dim_order[i]] = indices[i]; } - output.data[idx_linear]= tensor.data[tensor.shape.linearat(tlv.get(0))]; }, {tensor.shape.dim}); + output.data[idx_linear]= tensor.data[tensor.shape.linearat(tlv.get(0))]; }, {tensor.shape.dim()}); } }; // concat @@ -131,7 +131,7 @@ namespace deepx::tensorfunc } auto bmap = broadcastMap(A.shape.shape, new_shape); - B.shape.rangeParallel(B.shape.dim, [&](const int idx, const std::vector &bindices) + B.shape.rangeParallel(B.shape.dim(), [&](const int idx, const std::vector &bindices) { vector aindices=fromBroadcastIndices(bmap, bindices); B.data[idx] = A.data[A.shape.linearat(aindices)]; }); @@ -157,8 +157,8 @@ namespace deepx::tensorfunc { static void indexselect(const Tensor &input, const Tensor &index, const int axis, Tensor &output) { - int gatherAxis = axis < 0 ? input.shape.dim + axis : axis; - if (gatherAxis < 0 || gatherAxis >= input.shape.dim) + int gatherAxis = axis < 0 ? input.shape.dim() + axis : axis; + if (gatherAxis < 0 || gatherAxis >= input.shape.dim()) { throw std::invalid_argument("Axis is out of bounds"); } @@ -168,12 +168,12 @@ namespace deepx::tensorfunc { throw TensorShapeError("Indexselect shape mismatch"); } - output.shape.rangeParallel(output.shape.dim, [&](const int idx, const std::vector &output_indices, ThreadLocalVectors &tlv) + output.shape.rangeParallel(output.shape.dim(), [&](const int idx, const std::vector &output_indices, ThreadLocalVectors &tlv) { fromIndexselectIndices(output_indices, index,tlv.get(1), gatherAxis, tlv.get(0)); output.data[idx] = input.data[input.shape.linearat(tlv.get(0))]; }, - {input.shape.dim,index.shape.dim}); + {input.shape.dim(),index.shape.dim()}); } }; @@ -205,12 +205,12 @@ namespace deepx::tensorfunc // void expand(const Tensor &input, Tensor &output) // { // // 检查输入和目标形状的兼容性 - // if (input.shape.dim != output.shape.dim) + // if (input.shape.dim() != output.shape.dim()) // { // throw std::invalid_argument("expand维度不匹配: 输入维度 " + - // std::to_string(input.shape.dim) + + // std::to_string(input.shape.dim()) + // ", 目标维度 " + - // std::to_string(output.shape.dim) + + // std::to_string(output.shape.dim()) + // "请先前dim补1的方式reshape"); // } @@ -231,7 +231,7 @@ namespace deepx::tensorfunc // // 找到最后一个需要扩展的维度 // int last_expand_dim = -1; - // for (int i = input.shape.dim - 1; i >= 0; --i) + // for (int i = input.shape.dim() - 1; i >= 0; --i) // { // if (input.shape[i] != output.shape.shape[i]) // { @@ -241,7 +241,7 @@ namespace deepx::tensorfunc // } // // 如果最后几个维度不需要扩展,可以连续复制 - // if (last_expand_dim < output.shape.dim - 1) + // if (last_expand_dim < output.shape.dim() - 1) // { // int copy_len = output.shape.strides[last_expand_dim + 1]; // output.shape.rangeParallel(last_expand_dim + 1, [&bm, &output, &input, copy_len](int idx_linear, const std::vector &indices, std::vector &oldIndices) @@ -250,15 +250,15 @@ namespace deepx::tensorfunc // int idx_old = input.shape.linearat(oldIndices); // std::copy(input.data + idx_old, // input.data + idx_old + copy_len, - // output.data + idx_linear); }, input.shape.dim); + // output.data + idx_linear); }, input.shape.dim()) ; // } // else // { - // output.shape.rangeParallel(output.shape.dim, [&bm, &output, &input](int idx_linear, const std::vector &indices, std::vector &oldIndices) + // output.shape.rangeParallel(output.shape.dim(), [&bm, &output, &input](int idx_linear, const std::vector &indices, std::vector &oldIndices) // { // fromBroadcastIndices(bm, indices, oldIndices); // int idx_old = input.shape.linearat(oldIndices); - // output.data[idx_linear] = input.data[idx_old]; }, input.shape.dim); + // output.data[idx_linear] = input.data[idx_old]; }, input.shape.dim()) ; // } // } } diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/elementwise_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/elementwise_miaobyte.hpp index 1e863ae3..5acf3bd7 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/elementwise_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/elementwise_miaobyte.hpp @@ -18,7 +18,7 @@ namespace deepx::tensorfunc { if (A.shape == B.shape && A.shape == C.shape) { - C.shape.rangeParallel(C.shape.dim - 1, [&A, &B, &C, &scalar_op, &simd_op](int i) + C.shape.rangeParallel(C.shape.dim() - 1, [&A, &B, &C, &scalar_op, &simd_op](int i) { int shape_last = C.shape[-1]; const ScalableTag tag; @@ -62,7 +62,7 @@ namespace deepx::tensorfunc { if (A.shape == C.shape) { - C.shape.rangeParallel(C.shape.dim - 1, [&A, &b, &C, &scalar_op, &simd_op](int i) + C.shape.rangeParallel(C.shape.dim() - 1, [&A, &b, &C, &scalar_op, &simd_op](int i) { int shape_last = C.shape[-1]; const ScalableTag tag; @@ -292,7 +292,7 @@ namespace deepx::tensorfunc { if (A.shape == C.shape) { - A.shape.rangeParallel(A.shape.dim-1, [&A, &C](int idx) + A.shape.rangeParallel(A.shape.dim()-1, [&A, &C](int idx) { for (int j=0;j tag; @@ -355,7 +355,7 @@ namespace deepx::tensorfunc { if (input.shape == output.shape) { - output.shape.rangeParallel(output.shape.dim - 1, [&input, &output](int i) + output.shape.rangeParallel(output.shape.dim() - 1, [&input, &output](int i) { int shape_last = output.shape[-1]; @@ -382,7 +382,7 @@ namespace deepx::tensorfunc { if (A.shape == B.shape && A.shape == C.shape) { - C.shape.rangeParallel(C.shape.dim - 1, [&A, &B, &C](int i) + C.shape.rangeParallel(C.shape.dim() - 1, [&A, &B, &C](int i) { for (int j = 0; j < C.shape[-1]; j++) C.data[i+j] = std::pow(A.data[i+j], B.data[i+j]); }); @@ -403,7 +403,7 @@ namespace deepx::tensorfunc { if (input.shape == output.shape) { - output.shape.rangeParallel(output.shape.dim - 1, [&input, &output, &value](int i) + output.shape.rangeParallel(output.shape.dim() - 1, [&input, &output, &value](int i) { for (int j = 0; j < output.shape[-1]; j++) output.data[i+j] = std::pow(input.data[i+j], value); }); @@ -423,7 +423,7 @@ namespace deepx::tensorfunc { if (input.shape == output.shape) { - output.shape.rangeParallel(output.shape.dim - 1, [&input, &output, &value](int i) + output.shape.rangeParallel(output.shape.dim() - 1, [&input, &output, &value](int i) { for (int j = 0; j < output.shape[-1]; j++) output.data[i+j] = std::pow(value, input.data[i+j]); }); @@ -443,7 +443,7 @@ namespace deepx::tensorfunc { if (input.shape == output.shape) { - output.shape.rangeParallel(output.shape.dim - 1, [&input, &output](int i) + output.shape.rangeParallel(output.shape.dim() - 1, [&input, &output](int i) { for (int j = 0; j < output.shape[-1]; j++) output.data[i+j] = std::log(input.data[i+j]); }); } @@ -462,7 +462,7 @@ namespace deepx::tensorfunc { if (input.shape == output.shape) { - output.shape.rangeParallel(output.shape.dim - 1, [&input, &output](int i) + output.shape.rangeParallel(output.shape.dim() - 1, [&input, &output](int i) { for (int j = 0; j < output.shape[-1]; j++) output.data[i+j] = std::exp(input.data[i+j]); }); } @@ -481,7 +481,7 @@ namespace deepx::tensorfunc { if (input.shape == output.shape) { - output.shape.rangeParallel(output.shape.dim - 1, [&input, &output](int i) + output.shape.rangeParallel(output.shape.dim() - 1, [&input, &output](int i) { int shape_last=output.shape[-1]; const ScalableTag tag; @@ -524,7 +524,7 @@ namespace deepx::tensorfunc { if (input.shape == output.shape) { - output.shape.rangeParallel(output.shape.dim - 1, [&input, &output](int i) + output.shape.rangeParallel(output.shape.dim() - 1, [&input, &output](int i) { int shape_last=output.shape[-1]; const ScalableTag tag; @@ -567,7 +567,7 @@ namespace deepx::tensorfunc { if (input.shape == output.shape) { - output.shape.rangeParallel(output.shape.dim - 1, [&input, &output](int i) + output.shape.rangeParallel(output.shape.dim() - 1, [&input, &output](int i) { int shape_last=output.shape[-1]; const ScalableTag tag; @@ -609,7 +609,7 @@ namespace deepx::tensorfunc { if (A.shape == B.shape && A.shape == C.shape) { - C.shape.rangeParallel(C.shape.dim - 1, [&A, &B, &C](int idx) + C.shape.rangeParallel(C.shape.dim() - 1, [&A, &B, &C](int idx) { int shape_last=C.shape[-1]; const ScalableTag tag; @@ -652,7 +652,7 @@ namespace deepx::tensorfunc { if (A.shape == C.shape) { - C.shape.rangeParallel(C.shape.dim - 1, [&A, b, &C](int idx) + C.shape.rangeParallel(C.shape.dim() - 1, [&A, b, &C](int idx) { int shape_last=C.shape[-1]; const ScalableTag tag; @@ -695,7 +695,7 @@ namespace deepx::tensorfunc { if (A.shape == B.shape && A.shape == C.shape) { - C.shape.rangeParallel(C.shape.dim - 1, [&A, &B, &C](int idx) + C.shape.rangeParallel(C.shape.dim() - 1, [&A, &B, &C](int idx) { int shape_last=C.shape[-1]; const ScalableTag tag; @@ -738,7 +738,7 @@ namespace deepx::tensorfunc { if (A.shape == C.shape) { - C.shape.rangeParallel(C.shape.dim - 1, [&A, b, &C](int idx) + C.shape.rangeParallel(C.shape.dim() - 1, [&A, b, &C](int idx) { int shape_last=C.shape[-1]; const ScalableTag tag; @@ -781,7 +781,7 @@ namespace deepx::tensorfunc { if (A.shape == B.shape && mask.shape == A.shape) { - A.shape.rangeParallel(A.shape.dim-1, [&A, &B, &mask,epsilon](int idx) + A.shape.rangeParallel(A.shape.dim()-1, [&A, &B, &mask,epsilon](int idx) { for (int i = 0; i < A.shape[-1]; i++) { @@ -810,7 +810,7 @@ namespace deepx::tensorfunc { if (A.shape == mask.shape) { - A.shape.rangeParallel(A.shape.dim-1, [&A, &mask, &scalar,epsilon](int idx) + A.shape.rangeParallel(A.shape.dim()-1, [&A, &mask, &scalar,epsilon](int idx) { for (int i = 0; i < A.shape[-1]; i++) { @@ -839,7 +839,7 @@ namespace deepx::tensorfunc { if (A.shape == B.shape && mask.shape == A.shape) { - A.shape.rangeParallel(A.shape.dim-1, [&A, &B, &mask](int idx) + A.shape.rangeParallel(A.shape.dim()-1, [&A, &B, &mask](int idx) { for (int i = 0; i < A.shape[-1]; i++) { @@ -862,7 +862,7 @@ namespace deepx::tensorfunc { if (A.shape == mask.shape) { - A.shape.rangeParallel(A.shape.dim-1, [&A, &mask, &scalar](int idx) + A.shape.rangeParallel(A.shape.dim()-1, [&A, &mask, &scalar](int idx) { for (int i = 0; i < A.shape[-1]; i++) { @@ -885,7 +885,7 @@ namespace deepx::tensorfunc { if (A.shape == B.shape && mask.shape == A.shape) { - A.shape.rangeParallel(A.shape.dim-1, [&A, &B, &mask](int idx) + A.shape.rangeParallel(A.shape.dim()-1, [&A, &B, &mask](int idx) { for (int i = 0; i < A.shape[-1]; i++) { @@ -908,7 +908,7 @@ namespace deepx::tensorfunc { if (A.shape == mask.shape) { - A.shape.rangeParallel(A.shape.dim-1, [&A, &mask, &scalar](int idx) + A.shape.rangeParallel(A.shape.dim()-1, [&A, &mask, &scalar](int idx) { for (int i = 0; i < A.shape[-1]; i++) { @@ -931,7 +931,7 @@ namespace deepx::tensorfunc { if (cases.shape == C.shape) { - C.shape.rangeParallel(C.shape.dim-1, [&tensors, &cases, &C](int idx) + C.shape.rangeParallel(C.shape.dim()-1, [&tensors, &cases, &C](int idx) { for (int i = 0; i < C.shape[-1]; i++) { diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp index d7c26ccc..f5324b5e 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/io_miaobyte.hpp @@ -42,7 +42,7 @@ namespace deepx::tensorfunc pair>> load(const std::string &path) { // 加载shape - pair shape_name=loadShape(path); + pair shape_name=Shape::loadShape(path); Shape shape=shape_name.second; std::string tensor_name=shape_name.first; diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_cblas.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_cblas.hpp index 8656191b..f12d9301 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_cblas.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_cblas.hpp @@ -19,7 +19,7 @@ namespace deepx::tensorfunc } // 计算batch size (将除最后两维外的所有维度展平) int64_t batch_size = 1; - for (int i = 0; i < a.shape.dim - 2; ++i) + for (int i = 0; i < a.shape.dim() - 2; ++i) { batch_size *= a.shape[i]; } @@ -70,7 +70,7 @@ namespace deepx::tensorfunc } // 计算batch size (将除最后两维外的所有维度展平) int64_t batch_size = 1; - for (int i = 0; i < a.shape.dim - 2; ++i) + for (int i = 0; i < a.shape.dim() - 2; ++i) { batch_size *= a.shape[i]; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp index e5dadce8..4e3d26ad 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_miaobyte.hpp @@ -15,8 +15,8 @@ namespace deepx::tensorfunc throw std::invalid_argument("A.shape could matmul with B.shape"); } //TODO - //这里如果对二维矩阵运算,则omp并行不起来,因为C.shape.dim - 2刚好=0 - C.shape.rangeParallel(C.shape.dim - 2, [&](const std::vector &indices) + //这里如果对二维矩阵运算,则omp并行不起来,因为C.shape.dim() - 2刚好=0 + C.shape.rangeParallel(C.shape.dim() - 2, [&](const std::vector &indices) { int aIdx=A.shape.linearat(indices); int bIdx=B.shape.linearat(indices); diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp index b6ebea5c..6c5283e2 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp @@ -25,12 +25,12 @@ namespace deepx::tensorfunc std::vector checkeddims = checkedDims(tensor.shape.shape, dims); std::vector reduced_dims = reducedDim(tensor.shape.shape, checkeddims); const int minshape_1 = Lanes(ScalableTag()); - if (reduced_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > reduced_dims.size() || tensor.shape[-1] >= minshape_1) + if (checkeddims.rbegin()[0] == tensor.shape.dim() - 1 || tensor.shape.dim() > reduced_dims.size() || tensor.shape[-1] >= minshape_1) { - tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) + tensor.shape.rangeParallel(tensor.shape.dim(), [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) { // 计算输出索引 - for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + for (size_t i = 0, j = 0; i < tensor.shape.dim(); ++i) { if (reduced_dims[i] == 0) { @@ -41,15 +41,15 @@ namespace deepx::tensorfunc } int outputIdx = result.shape.linearat(tlv.get(0)); #pragma omp atomic - result.data[outputIdx] += tensor.data[idx_linear]; }, {result.shape.dim}); + result.data[outputIdx] += tensor.data[idx_linear]; }, {result.shape.dim()}); } else { // 如果数据连续(对齐),则可以simd - tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) + tensor.shape.rangeParallel(tensor.shape.dim() - 1, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) { // 计算输出索引 - for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + for (size_t i = 0, j = 0; i < tensor.shape.dim(); ++i) { if (reduced_dims[i] == 0) { @@ -87,7 +87,7 @@ namespace deepx::tensorfunc } #pragma omp atomic result.data[outputIdx] += sum; }, - {result.shape.dim}); + {result.shape.dim()}); } } }; @@ -103,13 +103,13 @@ namespace deepx::tensorfunc const int minshape_1 = Lanes(ScalableTag()); // 如果dims的最后一个元素是tensor.shape.dim-1,则说明reduceprod的数据不连续(不对齐),无法simd(需要不停跳跃) constant(result, T(1)); - if (reduced_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > reduced_dims.size() || tensor.shape[-1] >= minshape_1) + if (reduced_dims.rbegin()[0] == tensor.shape.dim() - 1 || tensor.shape.dim() > reduced_dims.size() || tensor.shape[-1] >= minshape_1) { - tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) + tensor.shape.rangeParallel(tensor.shape.dim(), [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) { // 计算输出索引 - for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { + for (size_t i = 0,j=0; i < tensor.shape.dim() ; ++i) { if (reduced_dims[i]==0) { tlv.get(0)[j++]=indices[i]; }else if (keepdims && (reduced_dims[i] == 1)) { @@ -120,16 +120,16 @@ namespace deepx::tensorfunc int outputIdx=result.shape.linearat(tlv.get(0)); #pragma omp atomic result.data[outputIdx]*=tensor.data[idx_linear]; - }, {result.shape.dim}); + }, {result.shape.dim()}); } else { // 如果数据连续(对齐),则可以simd - tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &reduced_dims, keepdims](const int i, const std::vector &indices, ThreadLocalVectors &tlv) + tensor.shape.rangeParallel(tensor.shape.dim() - 1, [&tensor, &result, &reduced_dims, keepdims](const int i, const std::vector &indices, ThreadLocalVectors &tlv) { // 计算输出索引 - for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + for (size_t i = 0, j = 0; i < tensor.shape.dim(); ++i) { if (reduced_dims[i] == 0) { @@ -173,7 +173,7 @@ namespace deepx::tensorfunc } #pragma omp atomic result.data[outputIdx] *= product; - }, {result.shape.dim}); + }, {result.shape.dim()}); } } }; @@ -188,13 +188,13 @@ namespace deepx::tensorfunc const int minshape_1 = Lanes(ScalableTag()); // 如果dims的最后一个元素是tensor.shape.dim-1,则说明reducemax的数据不连续(不对齐),无法simd(需要不停跳跃) constant(result, std::numeric_limits::lowest()); - if (reduced_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > reduced_dims.size() || tensor.shape[-1] >= minshape_1) + if (reduced_dims.rbegin()[0] == tensor.shape.dim() - 1 || tensor.shape.dim() > reduced_dims.size() || tensor.shape[-1] >= minshape_1) { - tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) + tensor.shape.rangeParallel(tensor.shape.dim(), [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) { // 计算输出索引 - for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { + for (size_t i = 0,j=0; i < tensor.shape.dim() ; ++i) { if (reduced_dims[i]==0) { tlv.get(0)[j++]=indices[i]; }else if (keepdims && (reduced_dims[i] == 1)) { @@ -204,16 +204,16 @@ namespace deepx::tensorfunc // 累加求和 int outputIdx=result.shape.linearat(tlv.get(0)); result.data[outputIdx]=std::max(result.data[outputIdx],tensor.data[idx_linear]); - }, {result.shape.dim}); + }, {result.shape.dim()}); } else { // 如果数据连续(对齐),则可以simd - tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &reduced_dims, keepdims](const int i, const std::vector &indices, ThreadLocalVectors &tlv) + tensor.shape.rangeParallel(tensor.shape.dim() - 1, [&tensor, &result, &reduced_dims, keepdims](const int i, const std::vector &indices, ThreadLocalVectors &tlv) { // 计算输出索引 - for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + for (size_t i = 0, j = 0; i < tensor.shape.dim(); ++i) { if (reduced_dims[i] == 0) { @@ -256,7 +256,7 @@ namespace deepx::tensorfunc } result.data[outputIdx] = std::max(result.data[outputIdx],maxt); - }, {result.shape.dim}); + }, {result.shape.dim()}); } } }; @@ -271,13 +271,13 @@ namespace deepx::tensorfunc const int minshape_1 = Lanes(ScalableTag()); // 如果dims的最后一个元素是tensor.shape.dim-1,则说明reducemin的数据不连续(不对齐),无法simd(需要不停跳跃) constant(result, std::numeric_limits::max()); - if (reduced_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > reduced_dims.size() || tensor.shape[-1] >= minshape_1) + if (reduced_dims.rbegin()[0] == tensor.shape.dim() - 1 || tensor.shape.dim() > reduced_dims.size() || tensor.shape[-1] >= minshape_1) { - tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) + tensor.shape.rangeParallel(tensor.shape.dim(), [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, ThreadLocalVectors &tlv) { // 计算输出索引 - for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { + for (size_t i = 0,j=0; i < tensor.shape.dim() ; ++i) { if (reduced_dims[i]==0) { tlv.get(0)[j++]=indices[i]; }else if (keepdims && (reduced_dims[i] == 1)) { @@ -288,16 +288,16 @@ namespace deepx::tensorfunc int outputIdx=result.shape.linearat(tlv.get(0)); result.data[outputIdx]=std::min(result.data[outputIdx],tensor.data[idx_linear]); - }, {result.shape.dim}); + }, {result.shape.dim()}); } else { // 如果数据连续(对齐),则可以simd - tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &reduced_dims, keepdims](const int i, const std::vector &indices, ThreadLocalVectors &tlv) + tensor.shape.rangeParallel(tensor.shape.dim() - 1, [&tensor, &result, &reduced_dims, keepdims](const int i, const std::vector &indices, ThreadLocalVectors &tlv) { // 计算输出索引 - for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + for (size_t i = 0, j = 0; i < tensor.shape.dim(); ++i) { if (reduced_dims[i] == 0) { @@ -339,7 +339,7 @@ namespace deepx::tensorfunc mint = std::min(mint,tensor.data[i + j]); } - result.data[outputIdx] = std::min(result.data[outputIdx],mint); }, {result.shape.dim}); + result.data[outputIdx] = std::min(result.data[outputIdx],mint); }, {result.shape.dim()}); } } }; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp index d0d9ae5d..a279b708 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp @@ -86,7 +86,7 @@ namespace deepx::tf return 1; } Precision dtype = mem->gettensor(name)->shape.dtype; - tensorfunc::saveShape(mem->gettensor(name)->shape,path); + mem->gettensor(name)->shape.saveShape(path); path+=".data"; switch (dtype) { @@ -156,7 +156,7 @@ namespace deepx::tf { string path = this->args[0].textvalue; - pair shape_name=tensorfunc::loadShape(path); + pair shape_name=Shape::loadShape(path); std::string tensor_name=shape_name.first; Shape shape=shape_name.second; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp index ff483da6..f7eeeba2 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp @@ -187,7 +187,7 @@ namespace deepx::tf } }; - template + template class ReduceMin : public TF { public: diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/1_shape.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/1_shape.cpp index 9f13efba..af393e9b 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/1_shape.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/1_shape.cpp @@ -13,7 +13,7 @@ void test_tensor_shape() { Shape shape2; shape2.fromYaml(yaml); - std::cout<<"shape2: "< tensor=New({2, 3}); constant(tensor,1); print(tensor); - save(tensor,"tensor"); + tensor.save("tensor"); Tensor tensor2=New({2, 3}); constant(tensor2,2); print(tensor2); - save(tensor2,"tensor2"); + tensor2.save("tensor2"); } void test_arange() { diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp index dc2ef698..71c8e395 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp @@ -22,9 +22,9 @@ void test_sum() { omp_set_num_threads(1); - std::vector shape={2, 3, 4}; + std::vector shape={3, 4,5}; Tensor tensor= New(shape); - constant(tensor,float(1)); + arange(tensor,float(0),float(1)); print(tensor,"%.0f"); cout<<""<> result = combination(3); diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp index 74fd575b..538eba0c 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp @@ -37,7 +37,7 @@ void test_concat(){ std::cout<<"================"<shape.dim;i++){ + for (int i=0;ishape.dim();i++){ Shape shape=concatShape(tensors,i); Tensor result=New(shape.shape); concat(tensors,i,result); diff --git a/front/py/deepx/transformer/models/llama/modeling_llama.py b/front/py/deepx/transformer/models/llama/modeling_llama.py index c8d9c403..c3b07fd3 100644 --- a/front/py/deepx/transformer/models/llama/modeling_llama.py +++ b/front/py/deepx/transformer/models/llama/modeling_llama.py @@ -1,6 +1,6 @@ -from deepx.nn.modules import Module -from deepx import Tensor,ones,rsqrt - +from deepx.nn.modules import Module,Linear +from deepx import Tensor,ones,rsqrt,concat +from deepx.transformer.modeling_rope_utils import ROPE_INIT_FUNCTIONS # RMSNorm # copy from https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py # 数学公式 @@ -21,58 +21,82 @@ def forward(self, hidden_states:Tensor): def extra_repr(self): return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}" -# -# class LlamaRotaryEmbedding(Module): -# def __init__(self,rope_type:str="default",max_seq_len:int=1024,device=None): -# super().__init__() -# self.max_seq_len_cached = config.max_position_embeddings -# self.original_max_seq_len = config.max_position_embeddings -# -# self.config = config -# self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type] -# -# inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device) -# self.register_buffer("inv_freq", inv_freq, persistent=False) -# self.original_inv_freq = self.inv_freq -# -# def _dynamic_frequency_update(self, position_ids, device): -# """ -# dynamic RoPE layers should recompute `inv_freq` in the following situations: -# 1 - growing beyond the cached sequence length (allow scaling) -# 2 - the current sequence length is in the original scale (avoid losing precision with small sequences) -# """ -# seq_len = torch.max(position_ids) + 1 -# if seq_len > self.max_seq_len_cached: # growth -# inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device, seq_len=seq_len) -# self.register_buffer("inv_freq", inv_freq, persistent=False) # TODO joao: may break with compilation -# self.max_seq_len_cached = seq_len -# -# if seq_len < self.original_max_seq_len and self.max_seq_len_cached > self.original_max_seq_len: # reset -# # This .to() is needed if the model has been moved to a device after being initialized (because -# # the buffer is automatically moved, but not the original copy) -# self.original_inv_freq = self.original_inv_freq.to(device) -# self.register_buffer("inv_freq", self.original_inv_freq, persistent=False) -# self.max_seq_len_cached = self.original_max_seq_len -# -# @torch.no_grad() -# def forward(self, x, position_ids): -# if "dynamic" in self.rope_type: -# self._dynamic_frequency_update(position_ids, device=x.device) -# -# # Core RoPE block -# inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1) -# position_ids_expanded = position_ids[:, None, :].float() -# # Force float32 (see https://github.com/huggingface/transformers/pull/29285) -# device_type = x.device.type -# device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu" -# with torch.autocast(device_type=device_type, enabled=False): -# freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) -# emb = torch.cat((freqs, freqs), dim=-1) -# cos = emb.cos() -# sin = emb.sin() -# -# # Advanced RoPE types (e.g. yarn) apply a post-processing scaling factor, equivalent to scaling attention -# cos = cos * self.attention_scaling -# sin = sin * self.attention_scaling -# -# return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) + +class LlamaRotaryEmbedding(Module): + def __init__(self,rope_type:str="default",max_seq_len:int=1024,device=None): + super().__init__() + # 最大序列长度 + self.max_seq_len_cached = max_seq_len + # 原始最大序列长度 + self.original_max_seq_len = max_seq_len + # 旋转类型 + self.rope_type=rope_type + # 旋转初始化函数 + self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type] + # 旋转初始化函数 + inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device) + #TODO + # 注册缓存 + self.register_buffer("inv_freq", inv_freq, persistent=False) + # 原始旋转频率 + self.original_inv_freq = self.inv_freq + + # def _dynamic_frequency_update(self, position_ids, device): + # """ + # dynamic RoPE layers should recompute `inv_freq` in the following situations: + # 1 - growing beyond the cached sequence length (allow scaling) + # 2 - the current sequence length is in the original scale (avoid losing precision with small sequences) + # """ + # seq_len = torch.max(position_ids) + 1 + # if seq_len > self.max_seq_len_cached: # growth + # inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device, seq_len=seq_len) + # self.register_buffer("inv_freq", inv_freq, persistent=False) # TODO joao: may break with compilation + # self.max_seq_len_cached = seq_len + + # if seq_len < self.original_max_seq_len and self.max_seq_len_cached > self.original_max_seq_len: # reset + # # This .to() is needed if the model has been moved to a device after being initialized (because + # # the buffer is automatically moved, but not the original copy) + # self.original_inv_freq = self.original_inv_freq.to(device) + # self.register_buffer("inv_freq", self.original_inv_freq, persistent=False) + # self.max_seq_len_cached = self.original_max_seq_len + + def forward(self, x, position_ids): + # TODO + # if "dynamic" in self.rope_type: + # self._dynamic_frequency_update(position_ids, device=x.device) + + # Core RoPE block + inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1) + position_ids_expanded = position_ids[:, None, :].float() + + freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) + emb = concat((freqs, freqs), dim=-1) + cos = emb.cos() + sin = emb.sin() + + # Advanced RoPE types (e.g. yarn) apply a post-processing scaling factor, equivalent to scaling attention + cos = cos * self.attention_scaling + sin = sin * self.attention_scaling + + return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) + +class LlamaMLP(Module): + def __init__(self, config): + super().__init__() + self.config = config + # 输入层大小 + self.hidden_size = config.hidden_size + # 中间层大小 + self.intermediate_size = config.intermediate_size + #门控投影层 + self.gate_proj = Linear(self.hidden_size, self.intermediate_size, bias=config.mlp_bias) + #上投影层 + self.up_proj = Linear(self.hidden_size, self.intermediate_size, bias=config.mlp_bias) + #下投影层 + self.down_proj = Linear(self.intermediate_size, self.hidden_size, bias=config.mlp_bias) + #激活函数 + self.act_fn = ACT2FN[config.hidden_act] + + def forward(self, x): + down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x)) + return down_proj \ No newline at end of file diff --git a/front/py/examples/2_ir/5_reduce_sum.py b/front/py/examples/2_ir/5_reduce_sum.py index a6f0eb63..6932f4b2 100644 --- a/front/py/examples/2_ir/5_reduce_sum.py +++ b/front/py/examples/2_ir/5_reduce_sum.py @@ -27,7 +27,7 @@ # p=prod(t,dim=(1,),out="p") # p.print() -t1=ones((4,5,6),name="t1") -t1.print() -t2=sum(t1,dim=(0,1),out='t2') -t2.print() +# t1=ones((4,5,6),name="t1") +# t1.print() +# t2=sum(t1,dim=(0,1),out='t2') +# t2.print()