Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,19 @@ namespace deepx::tensorfunc
template <typename Author, typename T>
struct repeat_interleaveDispatcher
{
static void repeat_interleave(const Tensor<T> &A, const int repeats, Tensor<T> &B) = delete;
static void repeat_interleave(const Tensor<T> &A, const Tensor<T> &repeats, Tensor<T> &B) = delete;
static void repeat_interleave(const Tensor<T> &A, const int repeats,const int dim, Tensor<T> &B) = delete;
// static void repeat_interleave(const Tensor<T> &A, const Tensor<T> &repeats, Tensor<T> &B) = delete;
};
template <typename Author, typename T>
void repeat_interleave(const Tensor<T> &A, const int repeats, Tensor<T> &B)
void repeat_interleave(const Tensor<T> &A, const int repeats,const int dim, Tensor<T> &B)
{
repeat_interleaveDispatcher<Author, T>::repeat_interleave(A, repeats, B);
}
template <typename Author, typename T>
void repeat_interleave(const Tensor<T> &A, const Tensor<T> &repeats, Tensor<T> &B)
{
repeat_interleaveDispatcher<Author, T>::repeat_interleave(A, repeats, B);
repeat_interleaveDispatcher<Author, T>::repeat_interleave(A, repeats,dim, B);
}
// template <typename Author, typename T>
// void repeat_interleave(const Tensor<T> &A, const Tensor<T> &repeats, Tensor<T> &B)
// {
// repeat_interleaveDispatcher<Author, T>::repeat_interleave(A, repeats, B);
// }



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,13 @@ namespace deepx::tensorfunc
const int *repeats,
T *output, const int *outputStrides, const int outputlen,
const int dim);

// repeat_interleave
template <int DIM, typename T>
__global__ void repeat_interleave_kernel(
const T *input, const int *inputStrides,
const int *repeats,
T *output, const int *outputStrides, const int outputlen,
const int dim);
};
#endif // DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_CUH
Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,6 @@ namespace deepx::tensorfunc
B.data, B.shape.strides.data(),B.shape.size, B.shape.dim());
}
};

}
#endif // DEEPX_TENSORFUNC_CHANGESHAPE_MIAOBYTE_HPP
20 changes: 10 additions & 10 deletions excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ namespace deepx::tf
tensorfunc::add<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), *mem->gettensor<nv_bfloat16>(this->args[1].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::add<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::add<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), *mem->gettensor<int64_t>(this->args[1].textvalue), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::add<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down Expand Up @@ -479,7 +479,7 @@ namespace deepx::tf
tensorfunc::addscalar<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), this->getvar<nv_bfloat16>(1, mem), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::addscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::addscalar<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), this->getvar<int64_t>(1, mem), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::addscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down Expand Up @@ -548,7 +548,7 @@ namespace deepx::tf
tensorfunc::sub<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), *mem->gettensor<nv_bfloat16>(this->args[1].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::sub<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::sub<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), *mem->gettensor<int64_t>(this->args[1].textvalue), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::sub<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down Expand Up @@ -616,7 +616,7 @@ namespace deepx::tf
tensorfunc::subscalar<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), this->getvar<nv_bfloat16>(1, mem), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::subscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::subscalar<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), this->getvar<int64_t>(1, mem), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::subscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down Expand Up @@ -685,7 +685,7 @@ namespace deepx::tf
tensorfunc::rsubscalar<Author, nv_bfloat16>(this->getvar<nv_bfloat16>(1, mem), *mem->gettensor<nv_bfloat16>(this->args[0].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::rsubscalar<Author, int32_t>(this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::rsubscalar<Author, int64_t>(this->getvar<int64_t>(1, mem), *mem->gettensor<int64_t>(this->args[0].textvalue), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::rsubscalar<Author, int32_t>(this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down Expand Up @@ -754,7 +754,7 @@ namespace deepx::tf
tensorfunc::mul<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), *mem->gettensor<nv_bfloat16>(this->args[1].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::mul<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::mul<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), *mem->gettensor<int64_t>(this->args[1].textvalue), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::mul<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down Expand Up @@ -822,7 +822,7 @@ namespace deepx::tf
tensorfunc::mulscalar<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), this->getvar<nv_bfloat16>(1, mem), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::mulscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::mulscalar<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), this->getvar<int64_t>(1, mem), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::mulscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down Expand Up @@ -891,7 +891,7 @@ namespace deepx::tf
tensorfunc::div<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), *mem->gettensor<nv_bfloat16>(this->args[1].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::div<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::div<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), *mem->gettensor<int64_t>(this->args[1].textvalue), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::div<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down Expand Up @@ -959,7 +959,7 @@ namespace deepx::tf
tensorfunc::divscalar<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), this->getvar<nv_bfloat16>(1, mem), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::divscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::divscalar<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), this->getvar<int64_t>(1, mem), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::divscalar<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), this->getvar<int32_t>(1, mem), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down Expand Up @@ -1027,7 +1027,7 @@ namespace deepx::tf
tensorfunc::rdivscalar<Author, nv_bfloat16>(this->getvar<nv_bfloat16>(0, mem), *mem->gettensor<nv_bfloat16>(this->args[1].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::rdivscalar<Author, int32_t>(this->getvar<int32_t>(0, mem), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
tensorfunc::rdivscalar<Author, int64_t>(this->getvar<int64_t>(0, mem), *mem->gettensor<int64_t>(this->args[1].textvalue), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::rdivscalar<Author, int32_t>(this->getvar<int32_t>(0, mem), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
Expand Down
4 changes: 2 additions & 2 deletions front/py/deepx/tensor/changeshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ def broadcast_to(self,shape:tuple[int,...],out:Union[Tensor,str]='')->Tensor:
return result

@tensor_method
def indexselect(self,index:Tensor,axis:int=0,out:Union[Tensor,str]='')->Tensor:
def indexselect(self,index:Tensor,gatheraxis:int=0,out:Union[Tensor,str]='')->Tensor:
assert isinstance(index,Tensor)
gatheraxis=axis%self.ndim
gatheraxis=gatheraxis%self.ndim
from deepx.nn.functional import indexselect as indexselect_func
result=indexselect_func(self,index,gatheraxis,out)
return result
Expand Down
2 changes: 2 additions & 0 deletions front/py/deepx/tensor/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ def __mul__(self, other:Union[Number,'Tensor']):
return self.mul(other)
def __rmul__(self, other:Union[Number,'Tensor']):
return self.mul(other)
def __neg__(self):
return self.mul(-1.0)
def __truediv__(self, other:Union[Number,'Tensor']):
return self.div(other)
def __rtruediv__(self, other:Union[Number,'Tensor']):
Expand Down
5 changes: 4 additions & 1 deletion front/py/deepx/transformer/models/llama/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from .embedding import *
from .attention import *

__all__ = [
"LlamaRotaryEmbedding"
"LlamaRotaryEmbedding",
"rotate_half"
]
6 changes: 3 additions & 3 deletions front/py/deepx/transformer/models/llama/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
def rotate_half(x:Tensor):
index_front=arange(0,x.shape[-1]//2,dtype="int32")
index_back=arange(x.shape[-1]//2,x.shape[-1],dtype="int32")
x1 = x.index_select(dim=-1,index=index_front)
x2 = x.index_select(dim=-1,index=index_back)
return concat((-x2, x1), dim=-1)
x1 = x.indexselect(gatheraxis=-1,index=index_front)
x2 = x.indexselect(gatheraxis=-1,index=index_back)
return concat((-x2, x1,), dim=-1)

def apply_rotary_pos_emb(q:Tensor, k:Tensor, cos:Tensor, sin:Tensor, unsqueeze_dim:int=1):
cos = cos.unsqueeze(unsqueeze_dim)
Expand Down
Empty file.
11 changes: 11 additions & 0 deletions front/py/examples/4_transformer/llama/llama_attention.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from token_text import dir

############-------DEEPX-------################
from deepx import load
from deepx.transformer.models.llama import rotate_half

input=load(dir+'input')
input.print()
r=rotate_half(input)
r.print()

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from token_text import torch_input
print()
############-------TORCH-------################
from transformers.models.llama.modeling_llama import rotate_half

print(torch_input)
r=rotate_half(torch_input)
print(r)
2 changes: 1 addition & 1 deletion front/py/examples/4_transformer/llama/llama_rope.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from llama_rope_torch import dir,config
from token_text import dir,config

############-------DEEPX-------################
from deepx.nn.modules import Embedding,Module
Expand Down
42 changes: 1 addition & 41 deletions front/py/examples/4_transformer/llama/llama_rope_torch.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,6 @@
hidden_size = 8
eps = 1e-6
dir = '/home/lipeng/model/deepxmodel/llama/'
model_path = "/home/lipeng/model/deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
print()

from transformers import AutoTokenizer, AutoConfig


def init_tokenizer(model_path):
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
return tokenizer


tokenizer = init_tokenizer(model_path)
config = AutoConfig.from_pretrained(model_path)


def tokenize_text(text, tokenizer):
tokens = tokenizer(text, return_tensors="pt").input_ids
import torch
# 处理超出词汇表范围的token
if torch.any(tokens >= tokenizer.vocab_size):
# 获取UNK token ID,如果没有则使用0
unk_token_id = tokenizer.unk_token_id if hasattr(tokenizer,
'unk_token_id') and tokenizer.unk_token_id is not None else 0
# 替换所有超出范围的token为UNK
tokens = torch.where(tokens < tokenizer.vocab_size, tokens, torch.tensor(unk_token_id, device=tokens.device))
return tokens


############-------PyTorch-------################
import torch

# 创建输入
text = "这是一个测试文本,用于演示嵌入层的使用。"
torch_input = tokenize_text(text, tokenizer)
from deepxutil.torch import save_torch

save_torch(torch_input, dir + 'input')

from token_text import torch_input,config

# 创建网络

Expand All @@ -63,7 +24,6 @@ def forward(self, x):

if __name__ == "__main__":
torch_net = NetTorch(config)
save_torch(torch_net.embed_tokens.weight, dir + 'weight')
# 前向传播
torch_output = torch_net(torch_input)
torch_sin, torch_cos = torch_output
Expand Down
42 changes: 42 additions & 0 deletions front/py/examples/4_transformer/llama/token_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
hidden_size = 8
eps = 1e-6
dir = '/home/lipeng/model/deepxmodel/llama/'
model_path = "/home/lipeng/model/deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
print()

from transformers import AutoTokenizer, AutoConfig


def init_tokenizer(model_path):
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
return tokenizer


tokenizer = init_tokenizer(model_path)
config = AutoConfig.from_pretrained(model_path)


def tokenize_text(text, tokenizer):
tokens = tokenizer(text, return_tensors="pt").input_ids
import torch
# 处理超出词汇表范围的token
if torch.any(tokens >= tokenizer.vocab_size):
# 获取UNK token ID,如果没有则使用0
unk_token_id = tokenizer.unk_token_id if hasattr(tokenizer,
'unk_token_id') and tokenizer.unk_token_id is not None else 0
# 替换所有超出范围的token为UNK
tokens = torch.where(tokens < tokenizer.vocab_size, tokens, torch.tensor(unk_token_id, device=tokens.device))
return tokens


############-------PyTorch-------################
import torch

# 创建输入
text = "这是一个测试文本,用于演示嵌入层的使用。"
torch_input = tokenize_text(text, tokenizer)
from deepxutil.torch import save_torch

save_torch(torch_input, dir + 'input')

Loading