Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion excuter/cpp-common/src/stdutil/fs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,14 @@ namespace stdutil

void save(const byte *data, size_t size, const string &path)
{

ofstream ofs(path, ios::binary | ios::out | ios::trunc);
if (!ofs.is_open()) {
throw std::runtime_error("Failed to open file for writing: " + path);
}
ofs.write(reinterpret_cast<const char *>(data), size);
if (!ofs) {
throw std::runtime_error("Failed to write data to file: " + path);
}
ofs.close();
}

Expand Down
6 changes: 6 additions & 0 deletions excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ namespace deepx::tf
case Precision::Float32:
reshape<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), shape, *mem->gettensor<float>(this->returns[0].textvalue));
break;
case Precision::Float16:
reshape<Author, half>(*mem->gettensor<half>(this->args[0].textvalue), shape, *mem->gettensor<half>(this->returns[0].textvalue));
break;
case Precision::BFloat16:
reshape<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), shape, *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
reshape<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), shape, *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
Expand Down
72 changes: 58 additions & 14 deletions excuter/op-mem-cuda/src/deepx/tf/io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,68 @@ namespace deepx::tf
int run(shared_ptr<MemBase> mem, string &error) override
{
string name = this->args[0].textvalue;
if (mem->existstensor(name))
{
auto t = mem->gettensor(name);
if (this->args.size() == 1)
{
tensorfunc::print<Author, void>(*t);
}
else
{
tensorfunc::print<Author, void>(*t, this->args[1].textvalue);
}
}
else
{
if (!mem->existstensor(name))
{
std::cerr << "print " << name << " not found" << std::endl;
error = "print " + name + " not found";
return 1;
}
string format="";
if (this->args.size() > 1){
format = this->args[1].textvalue;
}

Precision dtype = mem->gettensor(name)->shape.dtype;
switch (dtype)
{
case Precision::Float64:{
auto t = mem->gettensor<double>(name);
tensorfunc::print<Author,double>(*t,format);
break;
}
case Precision::Float32:{
auto t = mem->gettensor<float>(name);
tensorfunc::print<Author>(*t,format);
break;
}
case Precision::Float16:{
auto t = mem->gettensor<half>(name);
tensorfunc::print<Author>(*t,format);
break;
}
case Precision::BFloat16:{
auto t = mem->gettensor<nv_bfloat16>(name);
tensorfunc::print<Author>(*t,format);
break;
}
case Precision::Int64:{
auto t = mem->gettensor<int64_t>(name);
tensorfunc::print<Author>(*t,format);
break;
}
case Precision::Int32:{
auto t = mem->gettensor<int32_t>(name);
tensorfunc::print<Author>(*t,format);
break;
}
case Precision::Int16:{
auto t = mem->gettensor<int16_t>(name);
tensorfunc::print<Author>(*t,format);
break;
}
case Precision::Int8:{
auto t = mem->gettensor<int8_t>(name);
tensorfunc::print<Author>(*t,format);
break;
}
case Precision::Bool:{
auto t = mem->gettensor<bool>(name);
tensorfunc::print<Author,bool>(*t,format);
break;
}
default:
break;
}
return 0;
}

Expand Down
5 changes: 3 additions & 2 deletions front/py/deepx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from .tensor import Tensor,Shape,Number
from deepx.nn.functional import * # 导入所有functional函数
from deepx.nn.functional import __all__ as _func_all # 获取functional的导出列表

from deepx.utils import __all__ as _utils_all # 获取utils的导出列表
__all__ = [
#tensor
'Tensor','Shape','Number',
*_func_all
*_func_all,
*_utils_all,
]

# 为了支持 import deepx as dx 的用法
Expand Down
5 changes: 3 additions & 2 deletions front/py/deepx/nn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .deepxir import *

from .modules import __all__ as _modules_all
__all__ = [
"DeepxIR","DeepxIRResp"
"DeepxIR","DeepxIRResp",
*_modules_all
]
56 changes: 24 additions & 32 deletions front/py/deepx/transformer/modeling_rope_utils.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,46 @@
from typing import Tuple
from typing import Tuple,Optional
import math
from deepx.utils import Config
from deepx import arange,Tensor,where

def _compute_default_rope_parameters(config:dict={
"rope_theta":10000.0,
"head_dim":0,
"partial_rotary_factor":1.0,
}) -> Tuple[Tensor, float]:
partial_rotary_factor = config.get("partial_rotary_factor", 1.0)
dim = config["head_dim"]* partial_rotary_factor
# 计算逆频率
base=config["rope_theta"]
inv_freq = 1.0 / (base ** (arange(0, dim, 2, dtype='float64')/ dim))
return inv_freq, 1.0
def _compute_default_rope_parameters(config:Config=None,seq_len: Optional[int] = None, **rope_kwargs) -> Tuple[Tensor, float]:
if len(rope_kwargs) > 0:
base = rope_kwargs["base"]
dim = rope_kwargs["dim"]
elif config is not None:
base = config.rope_theta
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
head_dim = getattr(config, "head_dim", None) or config.hidden_size // config.num_attention_heads
dim = int(head_dim * partial_rotary_factor)

attention_factor = 1.0 # Unused in this type of RoPE

# Compute the inverse frequencies
inv_freq = 1.0 / (base ** (arange(0, dim, 2, dtype="int64").float() / dim))
return inv_freq, attention_factor

def _compute_llama3_parameters(config:dict={
"rope_theta":10000.0,
"head_dim":0,
"partial_rotary_factor":1.0,
"factor":8,
"low_freq_factor":1,
"high_freq_factor":4,
"old_context_len":8192,
"seq_len":None
}) -> Tuple[Tensor, float]:
def _compute_llama3_parameters(config:Config,seq_len: Optional[int] = None,**rope_kwargs) -> Tuple[Tensor, float]:
# Gets the default RoPE parameters
inv_freq, attention_factor = _compute_default_rope_parameters(config)
inv_freq, attention_factor = _compute_default_rope_parameters(config, seq_len, **rope_kwargs)

factor = config["rope_scaling"]["factor"] # `8` in the original implementation
low_freq_factor = config["rope_scaling"]["low_freq_factor"] # `1` in the original implementation
high_freq_factor = config["rope_scaling"]["high_freq_factor"] # `4` in the original implementation
old_context_len = config["rope_scaling"]["original_max_position_embeddings"] # `8192` in the original implementation
factor = config.rope_scaling["factor"] # `8` in the original implementation
low_freq_factor = config.rope_scaling["low_freq_factor"] # `1` in the original implementation
high_freq_factor = config.rope_scaling["high_freq_factor"] # `4` in the original implementation
old_context_len = config.rope_scaling["original_max_position_embeddings"] # `8192` in the original implementation

low_freq_wavelen = old_context_len / low_freq_factor
high_freq_wavelen = old_context_len / high_freq_factor

wavelen = 2 * math.pi / inv_freq
wavelen.print()
# wavelen < high_freq_wavelen: do nothing
# wavelen > low_freq_wavelen: divide by factor
inv_freq_llama = where(wavelen > low_freq_wavelen, inv_freq / factor, inv_freq)
# otherwise: interpolate between the two, using a smooth factor
smooth_factor = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
smoothed_inv_freq = (1 - smooth_factor) * inv_freq_llama / factor + smooth_factor * inv_freq_llama
is_medium_freq = ~(wavelen < high_freq_wavelen) * ~(wavelen > low_freq_wavelen)
is_medium_freq.print()
# TODO 这一步执行后,会导致an illegal memory access was encountered
inv_freq_llama = where(is_medium_freq, smoothed_inv_freq, inv_freq_llama)
is_medium_freq.print()
inv_freq_llama.print()

return inv_freq_llama, attention_factor

ROPE_INIT_FUNCTIONS = {
Expand Down
26 changes: 15 additions & 11 deletions front/py/deepx/transformer/models/llama/attention.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Optional,Tuple
from deepx.nn.modules import Module,Linear
from deepx import nn
from deepx import Tensor,matmul,softmax,cat,dropout as dropout_func
from deepx.nn.modules import Module
from deepx.utils import Config



Expand Down Expand Up @@ -52,7 +54,9 @@ def eager_attention_forward(
return attn_output, attn_weights

class LlamaAttention(Module):
def __init__(self, config:dict, layer_idx: int):
"""Multi-headed attention from 'Attention Is All You Need' paper"""

def __init__(self, config: Config, layer_idx: int):
super().__init__()
self.config = config
self.layer_idx = layer_idx
Expand All @@ -62,19 +66,20 @@ def __init__(self, config:dict, layer_idx: int):
self.attention_dropout = config.attention_dropout
self.is_causal = True

self.q_proj = Linear(
self.q_proj = nn.Linear(
config.hidden_size, config.num_attention_heads * self.head_dim, bias=config.attention_bias
)
self.k_proj = Linear(
self.k_proj = nn.Linear(
config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias
)
self.v_proj = Linear(
self.v_proj = nn.Linear(
config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias
)
self.o_proj = Linear(
self.o_proj = nn.Linear(
config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
)


def forward(
self,
hidden_states: Tensor,
Expand All @@ -90,17 +95,16 @@ def forward(

cos, sin = position_embeddings
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
attn_output, attn_weights = attention_interface(


attn_output, attn_weights =eager_attention_forward(
self,
query_states,
key_states,
value_states,
attention_mask,
dropout=0.0 if not self.training else self.attention_dropout,
scaling=self.scaling,
**kwargs,
dropout=0.0 if not self.training else self.attention_dropout
)

attn_output = attn_output.reshape(*input_shape, -1)
Expand Down
16 changes: 8 additions & 8 deletions front/py/deepx/transformer/models/llama/embedding.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from deepx.nn.modules import Module
from deepx import Tensor,concat
from deepx import cat
from deepx.transformer.modeling_rope_utils import ROPE_INIT_FUNCTIONS

from deepx.utils import Config
# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
class LlamaRotaryEmbedding(Module):
def __init__(self,config:dict):
def __init__(self,config:Config):
super().__init__()
# 最大序列长度
self.max_seq_len_cached = config["max_position_embeddings"]
self.max_seq_len_cached = config.max_position_embeddings
# 原始最大序列长度
self.original_max_seq_len = config["max_position_embeddings"]
self.original_max_seq_len = config.max_position_embeddings
# 旋转类型
self.rope_type=config["rope_scaling"]["rope_type"]
self.rope_type=config.rope_scaling.rope_type
# 旋转初始化函数
self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]
# 旋转初始化函数
Expand Down Expand Up @@ -39,7 +39,7 @@ def __init__(self,config:dict):

def forward(self, x, position_ids):
# 扩展旋转频率
inv_freq_expanded = self.inv_freq[None, :, None].todtype('float32').expand((position_ids.shape[0], -1, 1))
inv_freq_expanded = self.inv_freq[None, :, None].float().expand((position_ids.shape[0], -1, 1))

# 使用torch.unsqueeze和type转换替代索引操作
position_ids_expanded = position_ids[:, None, :].float()
Expand All @@ -48,7 +48,7 @@ def forward(self, x, position_ids):
# 计算频率
freqs = (inv_freq_expanded @ position_ids_expanded).T
# 拼接频率
emb = concat((freqs, freqs), dim=-1)
emb = cat((freqs, freqs), dim=-1)
# 计算余弦和正弦
cos = emb.cos()
sin = emb.sin()
Expand Down
12 changes: 0 additions & 12 deletions front/py/deepx/transformer/models/llama/groupedquery_attention.py

This file was deleted.

20 changes: 3 additions & 17 deletions front/py/deepx/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,5 @@
from .tensor import Tensor
from .creation import zeros, ones, arange
from .elementwise import add, sub, mul, div
from .matmul import matmul, dot
from .reduction import sum, mean, max, min
from .shape import reshape, transpose
from .comparison import lt, gt, eq
from .trigonometric import sin, cos, tan
from .config import Config

__all__ = [
'Tensor',
'zeros', 'ones', 'arange',
'add', 'sub', 'mul', 'div',
'matmul', 'dot',
'sum', 'mean', 'max', 'min',
'reshape', 'transpose',
'lt', 'gt', 'eq',
'sin', 'cos', 'tan'
]
'Config',
]
Loading
Loading