diff --git a/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp b/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp index 11c4b2b5..c0eb4306 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp @@ -92,19 +92,19 @@ namespace deepx::tensorfunc template struct repeat_interleaveDispatcher { - static void repeat_interleave(const Tensor &A, const int repeats, Tensor &B) = delete; - static void repeat_interleave(const Tensor &A, const Tensor &repeats, Tensor &B) = delete; + static void repeat_interleave(const Tensor &A, const int repeats,const int dim, Tensor &B) = delete; + // static void repeat_interleave(const Tensor &A, const Tensor &repeats, Tensor &B) = delete; }; template - void repeat_interleave(const Tensor &A, const int repeats, Tensor &B) + void repeat_interleave(const Tensor &A, const int repeats,const int dim, Tensor &B) { - repeat_interleaveDispatcher::repeat_interleave(A, repeats, B); - } - template - void repeat_interleave(const Tensor &A, const Tensor &repeats, Tensor &B) - { - repeat_interleaveDispatcher::repeat_interleave(A, repeats, B); + repeat_interleaveDispatcher::repeat_interleave(A, repeats,dim, B); } + // template + // void repeat_interleave(const Tensor &A, const Tensor &repeats, Tensor &B) + // { + // repeat_interleaveDispatcher::repeat_interleave(A, repeats, B); + // } diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh index d3845ee4..5a883809 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh @@ -81,5 +81,13 @@ namespace deepx::tensorfunc const int *repeats, T *output, const int *outputStrides, const int outputlen, const int dim); + + // repeat_interleave + template + __global__ void repeat_interleave_kernel( + const T *input, const int *inputStrides, + const int *repeats, + T *output, const int *outputStrides, const int outputlen, + const int dim); }; #endif // DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_CUH \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp index 818e4bab..14f4a1ce 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp @@ -152,5 +152,6 @@ namespace deepx::tensorfunc B.data, B.shape.strides.data(),B.shape.size, B.shape.dim()); } }; + } #endif // DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_HPP \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp index fca52981..678810b8 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp @@ -411,7 +411,7 @@ namespace deepx::tf tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); @@ -479,7 +479,7 @@ namespace deepx::tf tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); @@ -548,7 +548,7 @@ namespace deepx::tf tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); @@ -616,7 +616,7 @@ namespace deepx::tf tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); @@ -685,7 +685,7 @@ namespace deepx::tf tensorfunc::rsubscalar(this->getvar(1, mem), *mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::rsubscalar(this->getvar(1, mem), *mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::rsubscalar(this->getvar(1, mem), *mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::rsubscalar(this->getvar(1, mem), *mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->returns[0].textvalue)); @@ -754,7 +754,7 @@ namespace deepx::tf tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); @@ -822,7 +822,7 @@ namespace deepx::tf tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); @@ -891,7 +891,7 @@ namespace deepx::tf tensorfunc::div(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::div(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::div(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::div(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); @@ -959,7 +959,7 @@ namespace deepx::tf tensorfunc::divscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::divscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::divscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::divscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); @@ -1027,7 +1027,7 @@ namespace deepx::tf tensorfunc::rdivscalar(this->getvar(0, mem), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int64: - tensorfunc::rdivscalar(this->getvar(0, mem), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + tensorfunc::rdivscalar(this->getvar(0, mem), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); break; case Precision::Int32: tensorfunc::rdivscalar(this->getvar(0, mem), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); diff --git a/front/py/deepx/tensor/changeshape.py b/front/py/deepx/tensor/changeshape.py index 39b415ad..6b0b309e 100644 --- a/front/py/deepx/tensor/changeshape.py +++ b/front/py/deepx/tensor/changeshape.py @@ -55,9 +55,9 @@ def broadcast_to(self,shape:tuple[int,...],out:Union[Tensor,str]='')->Tensor: return result @tensor_method -def indexselect(self,index:Tensor,axis:int=0,out:Union[Tensor,str]='')->Tensor: +def indexselect(self,index:Tensor,gatheraxis:int=0,out:Union[Tensor,str]='')->Tensor: assert isinstance(index,Tensor) - gatheraxis=axis%self.ndim + gatheraxis=gatheraxis%self.ndim from deepx.nn.functional import indexselect as indexselect_func result=indexselect_func(self,index,gatheraxis,out) return result diff --git a/front/py/deepx/tensor/tensor.py b/front/py/deepx/tensor/tensor.py index d7dbd783..ac63d9ac 100644 --- a/front/py/deepx/tensor/tensor.py +++ b/front/py/deepx/tensor/tensor.py @@ -124,6 +124,8 @@ def __mul__(self, other:Union[Number,'Tensor']): return self.mul(other) def __rmul__(self, other:Union[Number,'Tensor']): return self.mul(other) + def __neg__(self): + return self.mul(-1.0) def __truediv__(self, other:Union[Number,'Tensor']): return self.div(other) def __rtruediv__(self, other:Union[Number,'Tensor']): diff --git a/front/py/deepx/transformer/models/llama/__init__.py b/front/py/deepx/transformer/models/llama/__init__.py index d77def35..96a73bfc 100644 --- a/front/py/deepx/transformer/models/llama/__init__.py +++ b/front/py/deepx/transformer/models/llama/__init__.py @@ -1,4 +1,7 @@ from .embedding import * +from .attention import * + __all__ = [ - "LlamaRotaryEmbedding" + "LlamaRotaryEmbedding", + "rotate_half" ] \ No newline at end of file diff --git a/front/py/deepx/transformer/models/llama/attention.py b/front/py/deepx/transformer/models/llama/attention.py index 325d6a27..eb37f731 100644 --- a/front/py/deepx/transformer/models/llama/attention.py +++ b/front/py/deepx/transformer/models/llama/attention.py @@ -7,9 +7,9 @@ def rotate_half(x:Tensor): index_front=arange(0,x.shape[-1]//2,dtype="int32") index_back=arange(x.shape[-1]//2,x.shape[-1],dtype="int32") - x1 = x.index_select(dim=-1,index=index_front) - x2 = x.index_select(dim=-1,index=index_back) - return concat((-x2, x1), dim=-1) + x1 = x.indexselect(gatheraxis=-1,index=index_front) + x2 = x.indexselect(gatheraxis=-1,index=index_back) + return concat((-x2, x1,), dim=-1) def apply_rotary_pos_emb(q:Tensor, k:Tensor, cos:Tensor, sin:Tensor, unsqueeze_dim:int=1): cos = cos.unsqueeze(unsqueeze_dim) diff --git a/front/py/examples/4_transformer/llama/llama_ b/front/py/examples/4_transformer/llama/llama_ deleted file mode 100644 index e69de29b..00000000 diff --git a/front/py/examples/4_transformer/llama/llama_attention.py b/front/py/examples/4_transformer/llama/llama_attention.py new file mode 100644 index 00000000..c029a74f --- /dev/null +++ b/front/py/examples/4_transformer/llama/llama_attention.py @@ -0,0 +1,11 @@ +from token_text import dir + +############-------DEEPX-------################ +from deepx import load +from deepx.transformer.models.llama import rotate_half + +input=load(dir+'input') +input.print() +r=rotate_half(input) +r.print() + diff --git a/front/py/examples/4_transformer/llama/llama_attention_torch.py b/front/py/examples/4_transformer/llama/llama_attention_torch.py new file mode 100644 index 00000000..3cb8aca8 --- /dev/null +++ b/front/py/examples/4_transformer/llama/llama_attention_torch.py @@ -0,0 +1,8 @@ +from token_text import torch_input +print() +############-------TORCH-------################ +from transformers.models.llama.modeling_llama import rotate_half + +print(torch_input) +r=rotate_half(torch_input) +print(r) diff --git a/front/py/examples/4_transformer/llama/llama_rope.py b/front/py/examples/4_transformer/llama/llama_rope.py index e0f05986..4f40b390 100644 --- a/front/py/examples/4_transformer/llama/llama_rope.py +++ b/front/py/examples/4_transformer/llama/llama_rope.py @@ -1,4 +1,4 @@ -from llama_rope_torch import dir,config +from token_text import dir,config ############-------DEEPX-------################ from deepx.nn.modules import Embedding,Module diff --git a/front/py/examples/4_transformer/llama/llama_rope_torch.py b/front/py/examples/4_transformer/llama/llama_rope_torch.py index 3f894e3b..dc0369fc 100644 --- a/front/py/examples/4_transformer/llama/llama_rope_torch.py +++ b/front/py/examples/4_transformer/llama/llama_rope_torch.py @@ -1,45 +1,6 @@ -hidden_size = 8 -eps = 1e-6 -dir = '/home/lipeng/model/deepxmodel/llama/' -model_path = "/home/lipeng/model/deepseek-ai/DeepSeek-R1-Distill-Llama-8B" -print() - -from transformers import AutoTokenizer, AutoConfig - - -def init_tokenizer(model_path): - tokenizer = AutoTokenizer.from_pretrained(model_path) - tokenizer.pad_token = tokenizer.eos_token - return tokenizer - - -tokenizer = init_tokenizer(model_path) -config = AutoConfig.from_pretrained(model_path) - - -def tokenize_text(text, tokenizer): - tokens = tokenizer(text, return_tensors="pt").input_ids - import torch - # 处理超出词汇表范围的token - if torch.any(tokens >= tokenizer.vocab_size): - # 获取UNK token ID,如果没有则使用0 - unk_token_id = tokenizer.unk_token_id if hasattr(tokenizer, - 'unk_token_id') and tokenizer.unk_token_id is not None else 0 - # 替换所有超出范围的token为UNK - tokens = torch.where(tokens < tokenizer.vocab_size, tokens, torch.tensor(unk_token_id, device=tokens.device)) - return tokens - - ############-------PyTorch-------################ import torch - -# 创建输入 -text = "这是一个测试文本,用于演示嵌入层的使用。" -torch_input = tokenize_text(text, tokenizer) -from deepxutil.torch import save_torch - -save_torch(torch_input, dir + 'input') - +from token_text import torch_input,config # 创建网络 @@ -63,7 +24,6 @@ def forward(self, x): if __name__ == "__main__": torch_net = NetTorch(config) - save_torch(torch_net.embed_tokens.weight, dir + 'weight') # 前向传播 torch_output = torch_net(torch_input) torch_sin, torch_cos = torch_output diff --git a/front/py/examples/4_transformer/llama/token_text.py b/front/py/examples/4_transformer/llama/token_text.py new file mode 100644 index 00000000..84a4a59d --- /dev/null +++ b/front/py/examples/4_transformer/llama/token_text.py @@ -0,0 +1,42 @@ +hidden_size = 8 +eps = 1e-6 +dir = '/home/lipeng/model/deepxmodel/llama/' +model_path = "/home/lipeng/model/deepseek-ai/DeepSeek-R1-Distill-Llama-8B" +print() + +from transformers import AutoTokenizer, AutoConfig + + +def init_tokenizer(model_path): + tokenizer = AutoTokenizer.from_pretrained(model_path) + tokenizer.pad_token = tokenizer.eos_token + return tokenizer + + +tokenizer = init_tokenizer(model_path) +config = AutoConfig.from_pretrained(model_path) + + +def tokenize_text(text, tokenizer): + tokens = tokenizer(text, return_tensors="pt").input_ids + import torch + # 处理超出词汇表范围的token + if torch.any(tokens >= tokenizer.vocab_size): + # 获取UNK token ID,如果没有则使用0 + unk_token_id = tokenizer.unk_token_id if hasattr(tokenizer, + 'unk_token_id') and tokenizer.unk_token_id is not None else 0 + # 替换所有超出范围的token为UNK + tokens = torch.where(tokens < tokenizer.vocab_size, tokens, torch.tensor(unk_token_id, device=tokens.device)) + return tokens + + +############-------PyTorch-------################ +import torch + +# 创建输入 +text = "这是一个测试文本,用于演示嵌入层的使用。" +torch_input = tokenize_text(text, tokenizer) +from deepxutil.torch import save_torch + +save_torch(torch_input, dir + 'input') +