From 7a343f0cc9de16f81cc2848b3dd304606fbc423a Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Sat, 19 Apr 2025 19:35:30 +0800 Subject: [PATCH 1/3] =?UTF-8?q?normal:=E5=88=9D=E5=A7=8B=E5=8C=96=E3=80=82?= =?UTF-8?q?=E8=A7=A3=E4=BA=86=E4=B8=80=E5=A4=A9bug=EF=BC=8C=E6=9C=80?= =?UTF-8?q?=E5=90=8E=E5=8F=91=E7=8E=B0init.hpp=20switch=E9=87=8C=E6=BC=8F?= =?UTF-8?q?=E5=86=99=E4=BA=86break=EF=BC=8C=E5=AF=BC=E8=87=B4normal?= =?UTF-8?q?=E8=B0=83=E7=94=A8=E5=90=8E=E6=95=B0=E6=8D=AE=E5=BC=82=E5=B8=B8?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- front/py/deepx/nn/modules/sparse.py | 41 +++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 front/py/deepx/nn/modules/sparse.py diff --git a/front/py/deepx/nn/modules/sparse.py b/front/py/deepx/nn/modules/sparse.py new file mode 100644 index 00000000..cab749e0 --- /dev/null +++ b/front/py/deepx/nn/modules/sparse.py @@ -0,0 +1,41 @@ +from .module import Module +from deepx.tensor import Tensor + +class Embedding(Module): + def __init__(self, + num_embeddings:int, + embedding_dim:int, + padding_idx:int=None, + max_norm:float=None, + norm_type:float=2.0, + scale_grad_by_freq:bool=False, + sparse:bool=False): + super(Embedding, self).__init__() + self.num_embeddings = num_embeddings + self.embedding_dim = embedding_dim + self.padding_idx = padding_idx + self.max_norm = max_norm + self.norm_type = norm_type + self.scale_grad_by_freq = scale_grad_by_freq + self.sparse = sparse + self.weight = Tensor(num_embeddings, embedding_dim) + self.weight.uniform_(-0.01, 0.01) + if padding_idx is not None: + self.weight[padding_idx] = 0 + + def forward(self, input:Tensor)->Tensor: + return self.weight[input] + + def backward(self, grad:Tensor)->Tensor: + self.weight.grad = grad + return None + + def __str__(self)->str: + return f"Embedding(num_embeddings={self.num_embeddings}, embedding_dim={self.embedding_dim})" + + def __repr__(self)->str: + return self.__str__() + + def __len__(self)->int: + return self.num_embeddings + From c7d9ad1aa6efa7168b2723a04692a1a738861ed2 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Sat, 19 Apr 2025 19:35:38 +0800 Subject: [PATCH 2/3] =?UTF-8?q?normal:=E5=88=9D=E5=A7=8B=E5=8C=96=E3=80=82?= =?UTF-8?q?=E8=A7=A3=E4=BA=86=E4=B8=80=E5=A4=A9bug=EF=BC=8C=E6=9C=80?= =?UTF-8?q?=E5=90=8E=E5=8F=91=E7=8E=B0init.hpp=20switch=E9=87=8C=E6=BC=8F?= =?UTF-8?q?=E5=86=99=E4=BA=86break=EF=BC=8C=E5=AF=BC=E8=87=B4normal?= =?UTF-8?q?=E8=B0=83=E7=94=A8=E5=90=8E=E6=95=B0=E6=8D=AE=E5=BC=82=E5=B8=B8?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/excuter/op-mem-cuda/list.md | 12 +- doc/excuter/op-mem-ompsimd/list.md | 22 ++- excuter/cpp-common/src/deepx/dtype.hpp | 4 +- .../cpp-common/src/deepx/tensorfunc/init.hpp | 20 +- excuter/cpp-common/src/stdutil/print.hpp | 18 +- excuter/op-mem-cuda/src/client/tfs.cpp | 8 + .../op-mem-cuda/src/deepx/tensorfunc/cuda.hpp | 33 ++-- .../src/deepx/tensorfunc/init_miaobyte.cu | 171 ++++++++++------- .../src/deepx/tensorfunc/init_miaobyte.cuh | 68 ++----- .../src/deepx/tensorfunc/init_miaobyte.hpp | 28 ++- excuter/op-mem-cuda/src/deepx/tf/init.hpp | 172 ++++++++++++------ excuter/op-mem-ompsimd/src/client/tfs.cpp | 9 + .../src/deepx/tensorfunc/init_miaobyte.hpp | 34 +++- excuter/op-mem-ompsimd/src/deepx/tf/init.hpp | 62 ++++++- front/py/deepx/nn/functional/authormap.py | 1 + front/py/deepx/nn/functional/leaffunc_init.py | 29 ++- front/py/deepx/nn/functional/rtf_init.py | 7 + front/py/deepx/tensor/init.py | 26 +-- front/py/deepx/tensor/tensor.py | 30 +-- front/py/examples/2_ir/1_init_zeroones.py | 52 ++++-- 20 files changed, 517 insertions(+), 289 deletions(-) diff --git a/doc/excuter/op-mem-cuda/list.md b/doc/excuter/op-mem-cuda/list.md index 2d7a12fb..8704844c 100644 --- a/doc/excuter/op-mem-cuda/list.md +++ b/doc/excuter/op-mem-cuda/list.md @@ -10,10 +10,13 @@ | transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T2 = T1.transpose(dimorder=[1,0]) | transpose(tensor A, vector dim_order)->(tensor C) | | reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | | matmul | cublas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | +| switch | miaobyte | switch(listtensor tensors, tensor cases)->(tensor result) | C=switch(tensors,cases) | switch(listtensor tensors, tensor cases)->(tensor result) | +| greaterscalar | miaobyte | greaterscalar(tensor A, var scalar)->(tensor mask) | mask=compare(T1, scalar) | greaterscalar(tensor A, var scalar)->(tensor mask) | | equalscalar | miaobyte | equalscalar(tensor A, var scalar, var epsilon)->(tensor mask) | mask=compare(T1, scalar) | equalscalar(tensor A, var scalar, var epsilon)->(tensor mask) | | prod | miaobyte | prod(tensor A, vector dims, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector dims, var keepdims)->(tensor B) | | min | miaobyte | min(tensor A, tensor B)->(tensor C) | T3=min(T1, T2) | min(tensor A, tensor B)->(tensor C) | | maxscalar | miaobyte | maxscalar(tensor A, var scalar)->(tensor C) | T3=max(T1, scalar) | maxscalar(tensor A, var scalar)->(tensor C) | +| normal | miaobyte | normal(tensor t, var mean, var stddev, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var stddev, var seed)->() | | uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | | addscalar | miaobyte | addscalar(tensor A, var b)->(tensor C) | T3=T1+scalar | addscalar(tensor A, var b)->(tensor C) | | log | miaobyte | log(tensor A)->(tensor C) | T3=log(T1) | log(tensor A)->(tensor C) | @@ -24,22 +27,24 @@ | add | cublas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | +| greater | miaobyte | greater(tensor A, tensor B)->(tensor mask) | mask=compare(T1, T2) | greater(tensor A, tensor B)->(tensor mask) | | print | miaobyte | print(tensor )->() | print(T1) | print(tensor )->() | | print | miaobyte | print(tensor , var )->() | print(T1) | print(tensor , var )->() | | newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(vector shape)->(tensor tensor1) | | newtensor | none | newtensor(var shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(var shape)->(tensor tensor1) | +| less | miaobyte | less(tensor A, tensor B)->(tensor mask) | mask=compare(T1, T2) | less(tensor A, tensor B)->(tensor mask) | | constant | miaobyte | constant(tensor t, var value)->() | constant(T1) | constant(tensor t, var value)->() | | powscalar | miaobyte | powscalar(tensor A, var scalar)->(tensor C) | T3=pow(T1, scalar) | powscalar(tensor A, var scalar)->(tensor C) | | vecset | none | vecset(vector value)->(vector name) | shape = [3 4 5] | vecset(vector value)->(vector name) | -| reducemin | miaobyte | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | -| subscalar | miaobyte | subscalar(tensor A, var b)->(tensor C) | T3=T1-scalar | subscalar(tensor A, var b)->(tensor C) | -| sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | | minscalar | miaobyte | minscalar(tensor A, var scalar)->(tensor C) | T3=min(T1, scalar) | minscalar(tensor A, var scalar)->(tensor C) | | rdivscalar | miaobyte | rdivscalar(var scalar, tensor A)->(tensor C) | T3=scalar/T1 | rdivscalar(var scalar, tensor A)->(tensor C) | | rpowscalar | miaobyte | rpowscalar(var scalar, tensor A)->(tensor C) | T3=pow(scalar, T1) | rpowscalar(var scalar, tensor A)->(tensor C) | | sub | miaobyte | sub(tensor A, tensor B)->(tensor C) | T3=T1-T2 | sub(tensor A, tensor B)->(tensor C) | | sum | miaobyte | sum(tensor A, vector dims, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector dims, var keepdims)->(tensor B) | | argset | none | argset(var value)->(var name) | var argname = argvalue | argset(var value)->(var name) | +| reducemin | miaobyte | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | +| sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | +| subscalar | miaobyte | subscalar(tensor A, var b)->(tensor C) | T3=T1-scalar | subscalar(tensor A, var b)->(tensor C) | | equal | miaobyte | equal(tensor A, tensor B, var epsilon)->(tensor mask) | mask=compare(T1, T2) | equal(tensor A, tensor B, var epsilon)->(tensor mask) | | mulscalar | miaobyte | mulscalar(tensor A, var b)->(tensor C) | T3=T1*scalar | mulscalar(tensor A, var b)->(tensor C) | | div | miaobyte | div(tensor A, tensor B)->(tensor C) | T3=T1/T2 | div(tensor A, tensor B)->(tensor C) | @@ -48,5 +53,6 @@ | pow | miaobyte | pow(tensor A, tensor B)->(tensor C) | T3=pow(T1, T2) | pow(tensor A, tensor B)->(tensor C) | | mul | miaobyte | mul(tensor A, tensor B)->(tensor C) | T3=T1*T2 | mul(tensor A, tensor B)->(tensor C) | | exp | miaobyte | exp(tensor A)->(tensor C) | T3=exp(T1) | exp(tensor A)->(tensor C) | +| lessscalar | miaobyte | lessscalar(tensor A, var scalar)->(tensor mask) | mask=compare(T1, scalar) | lessscalar(tensor A, var scalar)->(tensor mask) | | deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | | cos | miaobyte | cos(tensor A)->(tensor C) | T3=cos(T1) | cos(tensor A)->(tensor C) | diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md index 68ea3b70..65ffb758 100644 --- a/doc/excuter/op-mem-ompsimd/list.md +++ b/doc/excuter/op-mem-ompsimd/list.md @@ -11,8 +11,10 @@ | reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | | matmul | cblas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | | matmul | miaobyte | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | -| comparescalar | miaobyte | comparescalar(tensor A, var scalar)->(tensor mask) | mask=compare(T1,scalar) | comparescalar(tensor A, var scalar)->(tensor mask) | -| compare | miaobyte | compare(tensor A, tensor B)->(tensor mask) | mask=compare(T1,T2) | compare(tensor A, tensor B)->(tensor mask) | +| switch | miaobyte | switch(listtensor tensors, tensor cases)->(tensor C) | C=switch([tensors],case) | switch(listtensor tensors, tensor cases)->(tensor C) | +| greaterscalar | miaobyte | greaterscalar(tensor A, var scalar)->(tensor mask) | mask=greater(T1,scalar) | greaterscalar(tensor A, var scalar)->(tensor mask) | +| equalscalar | miaobyte | equalscalar(tensor A, var scalar)->(tensor mask) | mask=equal(T1,scalar) | equalscalar(tensor A, var scalar)->(tensor mask) | +| normal | miaobyte | normal(tensor t, var mean, var std, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var std, var seed)->() | | uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | | addscalar | miaobyte | addscalar(tensor a, var scalar)->(tensor c) | T3=T1+scalar | addscalar(tensor a, var scalar)->(tensor c) | | log | miaobyte | log(tensor A)->(tensor C) | T3=log(T1) | log(tensor A)->(tensor C) | @@ -23,20 +25,27 @@ | copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | | prod | miaobyte | prod(tensor A, vector axis, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector axis, var keepdims)->(tensor B) | | min | miaobyte | min(tensor A, tensor B)->(tensor C) | T3=min(T1,T2) | min(tensor A, tensor B)->(tensor C) | +| greater | miaobyte | greater(tensor A, tensor B)->(tensor mask) | mask=greater(T1,T2) | greater(tensor A, tensor B)->(tensor mask) | | print | miaobyte | print(tensor )->() | print(T1) | print(tensor )->() | | print | miaobyte | print(tensor , var )->() | print(T1) | print(tensor , var )->() | | newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector shape)->(tensor tensor1) | | newtensor | none | newtensor(var shape)->(tensor t) | T1 =Tensor(shape=[...]) | newtensor(var shape)->(tensor t) | +| lessscalar | miaobyte | lessscalar(tensor A, var scalar)->(tensor mask) | mask=less(T1,scalar) | lessscalar(tensor A, var scalar)->(tensor mask) | | deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | +| less | miaobyte | less(tensor A, tensor B)->(tensor mask) | mask=less(T1,T2) | less(tensor A, tensor B)->(tensor mask) | | constant | miaobyte | constant(tensor t, var value)->() | constant(T1,value) | constant(tensor t, var value)->() | | powscalar | miaobyte | powscalar(tensor A, var scalar)->(tensor C) | T3=T1^scalar | powscalar(tensor A, var scalar)->(tensor C) | | vecset | none | vecset(vector value)->(vector name) | shape = [3 4 5] | vecset(vector value)->(vector name) | -| reducemin | miaobyte | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | -| subscalar | miaobyte | subscalar(tensor a, var scalar)->(tensor c) | T3=T1-scalar | subscalar(tensor a, var scalar)->(tensor c) | -| sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | +| minscalar | miaobyte | minscalar(tensor A, var scalar)->(tensor C) | T3=min(T1,scalar) | minscalar(tensor A, var scalar)->(tensor C) | +| rdivscalar | miaobyte | rdivscalar(var scalar, tensor A)->(tensor C) | T3=scalar/T1 | rdivscalar(var scalar, tensor A)->(tensor C) | +| rpowscalar | miaobyte | rpowscalar(var scalar, tensor A)->(tensor C) | T3=scalar^T1 | rpowscalar(var scalar, tensor A)->(tensor C) | | sub | miaobyte | sub(tensor a, tensor b)->(tensor c) | T3=T1-T2 | sub(tensor a, tensor b)->(tensor c) | | sum | miaobyte | sum(tensor A, vector axis, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector axis, var keepdims)->(tensor B) | | argset | none | argset(var value)->(var name) | var argname = argvalue | argset(var value)->(var name) | +| reducemin | miaobyte | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | +| sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | +| subscalar | miaobyte | subscalar(tensor a, var scalar)->(tensor c) | T3=T1-scalar | subscalar(tensor a, var scalar)->(tensor c) | +| equal | miaobyte | equal(tensor A, tensor B)->(tensor mask) | mask=equal(T1,T2) | equal(tensor A, tensor B)->(tensor mask) | | mulscalar | miaobyte | mulscalar(tensor A, var b)->(tensor C) | T3=T1*scalar | mulscalar(tensor A, var b)->(tensor C) | | div | miaobyte | div(tensor A, tensor B)->(tensor C) | T3=T1/T2 | div(tensor A, tensor B)->(tensor C) | | invert | miaobyte | invert(tensor A)->(tensor C) | T3=~T1 | invert(tensor A)->(tensor C) | @@ -45,6 +54,3 @@ | maxscalar | miaobyte | maxscalar(tensor A, var scalar)->(tensor C) | T3=max(T1,scalar) | maxscalar(tensor A, var scalar)->(tensor C) | | mul | miaobyte | mul(tensor A, tensor B)->(tensor C) | T3=T1*T2 | mul(tensor A, tensor B)->(tensor C) | | exp | miaobyte | exp(tensor A)->(tensor C) | T3=exp(T1) | exp(tensor A)->(tensor C) | -| rdivscalar | miaobyte | rdivscalar(var scalar, tensor A)->(tensor C) | T3=scalar/T1 | rdivscalar(var scalar, tensor A)->(tensor C) | -| rpowscalar | miaobyte | rpowscalar(var scalar, tensor A)->(tensor C) | T3=scalar^T1 | rpowscalar(var scalar, tensor A)->(tensor C) | -| minscalar | miaobyte | minscalar(tensor A, var scalar)->(tensor C) | T3=min(T1,scalar) | minscalar(tensor A, var scalar)->(tensor C) | diff --git a/excuter/cpp-common/src/deepx/dtype.hpp b/excuter/cpp-common/src/deepx/dtype.hpp index 810566c8..5b9551a9 100644 --- a/excuter/cpp-common/src/deepx/dtype.hpp +++ b/excuter/cpp-common/src/deepx/dtype.hpp @@ -174,6 +174,7 @@ namespace deepx return 8; case Precision::Float8E4M3: return 8; + //TODO 需要根据平台支持 case Precision::Float4E2M1: return 4; case Precision::Int64: @@ -184,10 +185,11 @@ namespace deepx return 16; case Precision::Int8: return 8; + //TODO,int4 需要根据平台支持 case Precision::Int4: return 4; case Precision::Bool: - return 1; + return 8; case Precision::String: case Precision::Any: default: diff --git a/excuter/cpp-common/src/deepx/tensorfunc/init.hpp b/excuter/cpp-common/src/deepx/tensorfunc/init.hpp index 56e106e5..fdca437b 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/init.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/init.hpp @@ -6,6 +6,7 @@ namespace deepx::tensorfunc { + //constant template struct constantDispatcher { @@ -17,7 +18,8 @@ namespace deepx::tensorfunc { constantDispatcher::constant(tensor, value); } - + + //arange template struct arangeDispatcher { @@ -30,6 +32,7 @@ namespace deepx::tensorfunc arangeDispatcher::arange(tensor, start, step); } + //uniform template struct uniformDispatcher { @@ -41,6 +44,19 @@ namespace deepx::tensorfunc { uniformDispatcher::uniform(tensor, low, high, seed); } -} + //normal + template + struct normalDispatcher + { + static void normal(Tensor &tensor, const T mean , const T stddev , const unsigned int seed) = delete; + }; + + template + void normal(Tensor &tensor, const T mean = T(0), const T stddev = T(1), const unsigned int seed = 0) + { + normalDispatcher::normal(tensor, mean, stddev, seed); + } + +} #endif diff --git a/excuter/cpp-common/src/stdutil/print.hpp b/excuter/cpp-common/src/stdutil/print.hpp index 139575b4..046a4c83 100644 --- a/excuter/cpp-common/src/stdutil/print.hpp +++ b/excuter/cpp-common/src/stdutil/print.hpp @@ -35,16 +35,22 @@ namespace stdutil case Precision::Float64: printf(format.c_str(), ((double *)data)[offset]); break; - case Precision::Float32: - printf(format.c_str(), ((float *)data)[offset]); + case Precision::Float32:{ + float result = ((float *)data)[offset]; + printf(format.c_str(), result); break; - case Precision::Float16: - printf(format.c_str(), ((float *)data)[offset]); + } + case Precision::Float16:{ + float result = ((float *)data)[offset]; + printf(format.c_str(), result); break; - case Precision::BFloat16: - printf(format.c_str(), ((float *)data)[offset]); + } + case Precision::BFloat16:{ + float result = ((float *)data)[offset]; + printf(format.c_str(), result); break; } + } } inline std::string default_format(const deepx::Precision &dtype) diff --git a/excuter/op-mem-cuda/src/client/tfs.cpp b/excuter/op-mem-cuda/src/client/tfs.cpp index c64973ca..f92bcfc4 100644 --- a/excuter/op-mem-cuda/src/client/tfs.cpp +++ b/excuter/op-mem-cuda/src/client/tfs.cpp @@ -93,6 +93,14 @@ namespace deepx::tf Param("seed", DataCategory::Var, Precision::Int32), }), vector())); + tffactory.add_tf(std::make_shared>(vector( + { + Param("t", DataCategory::Tensor, Precision::Any), + Param("mean", DataCategory::Var, Precision::Any), + Param("stddev", DataCategory::Var, Precision::Any), + Param("seed", DataCategory::Var, Precision::Int32), + }), + vector())); } // io void register_util(TfFactory &opfactory) diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp index a9b6886f..7dc0320e 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp @@ -17,7 +17,6 @@ namespace deepx::tensorfunc throw std::runtime_error("Failed to create cuBLAS handle"); } } - ~CublasHandle() { if (handle_) @@ -29,26 +28,36 @@ namespace deepx::tensorfunc private: cublasHandle_t handle_; }; - - inline std::pair BestDims(int total_elements) + //TODO + inline int deviceblocksize(){ + int device_id; + cudaGetDevice(&device_id); + cudaDeviceProp props; + cudaGetDeviceProperties(&props, device_id); + return props.maxThreadsPerBlock; + } + inline int deviceblock() { - // 默认块大小 - int optimal_block_size = 256; // 一般256或512是较好的选择 - // 计算设备属性以确定最佳配置 int device_id; cudaGetDevice(&device_id); cudaDeviceProp props; cudaGetDeviceProperties(&props, device_id); - // 根据SM数量和每个SM的最大线程数决定块数 + // 根据SM数量计算建议的块数上限 int sm_count = props.multiProcessorCount; int optimal_blocks = sm_count * 8; // 每个SM分配多个块以增加并行度 + return optimal_blocks; + } - // 确保至少启动足够的线程来处理所有数据 - int min_blocks = (total_elements + optimal_block_size - 1) / optimal_block_size; - int actual_blocks = std::min(optimal_blocks, min_blocks); - - return {actual_blocks, optimal_block_size}; + // 计算最佳的块大小和块数 + inline std::pair BestDims(int total_elements) + { + // 默认块大小 + int blocksize = total_elements > 256 ? 256 : total_elements; + int blocks = (total_elements + blocksize - 1) / blocksize; // 向上取整除法 + int optimal_blocks = deviceblock(); + blocks = std::min(blocks, optimal_blocks); + return {blocks, blocksize}; }; } diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.cu b/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.cu index aaeed355..0e98773c 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.cu +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.cu @@ -4,123 +4,154 @@ #include "deepx/tensor.hpp" #include "deepx/tensorfunc/authors.hpp" - +#include "deepx/tensorfunc/cuda.hpp" namespace deepx::tensorfunc { template - __global__ void kernel_constant(T *data, const int size, const T value) + __global__ void kernel_constant(T *data, const T value, const int size) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < size) + int stride = blockDim.x * gridDim.x; + for (int idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; idx += stride) { data[idx] = value; } } - template __global__ void kernel_constant(double *data, const int size, const double value); - template __global__ void kernel_constant(float *data, const int size, const float value); - template __global__ void kernel_constant(half *data, const int size, const half value); - template __global__ void kernel_constant(nv_bfloat16 *data, const int size, const nv_bfloat16 value); - template __global__ void kernel_constant(int64_t *data, const int size, const int64_t value); - template __global__ void kernel_constant(int32_t *data, const int size, const int32_t value); - template __global__ void kernel_constant(int16_t *data, const int size, const int16_t value); - template __global__ void kernel_constant(int8_t *data, const int size, const int8_t value); template - void launch_constant(const int numBlocks, const int blockSize, T *a, const T value, const int size) + void launch_constant(T *a, const T value, const int size) { - kernel_constant<<>>(a, size, value); + auto [numBlocks, blockSize] = BestDims(size); + kernel_constant<<>>(a, value, size); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) throw std::runtime_error("Failed to launch constant kernel"); + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) + throw std::runtime_error("Failed to synchronize device"); } - template void launch_constant(const int numBlocks, const int blockSize, double *a, const double value, const int size); - template void launch_constant(const int numBlocks, const int blockSize, float *a, const float value, const int size); - template void launch_constant(const int numBlocks, const int blockSize, half *a, const half value, const int size); - template void launch_constant(const int numBlocks, const int blockSize, nv_bfloat16 *a, const nv_bfloat16 value, const int size); - template void launch_constant(const int numBlocks, const int blockSize, int64_t *a, const int64_t value, const int size); - template void launch_constant(int numBlocks, int blockSize, int32_t *a, int32_t value, int size); - template void launch_constant(const int numBlocks, const int blockSize, int16_t *a, const int16_t value, const int size); - template void launch_constant(const int numBlocks, const int blockSize, int8_t *a, const int8_t value, const int size); + template void launch_constant(double *a, const double value, const int size); + template void launch_constant(float *a, const float value, const int size); + template void launch_constant(half *a, const half value, const int size); + template void launch_constant(nv_bfloat16 *a, const nv_bfloat16 value, const int size); + template void launch_constant(int64_t *a, const int64_t value, const int size); + template void launch_constant(int32_t *a, const int32_t value, const int size); + template void launch_constant(int16_t *a, const int16_t value, const int size); + template void launch_constant(int8_t *a, const int8_t value, const int size); + template void launch_constant(bool *a, const bool value, const int size); // 添加kernel函数 template - __global__ void kernel_arange(T *data, const int size, const T start, const T step) + __global__ void kernel_arange(T *data, const float start, const float step, const int size) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < size) + int stride = blockDim.x * gridDim.x; + for (int idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; idx += stride) { - data[idx] = start + step * static_cast(static_cast(idx)); + data[idx] = static_cast(start + step * static_cast(idx)); } } - template __global__ void kernel_arange(double *data, const int size, const double start, const double step); - template __global__ void kernel_arange(float *data, const int size, const float start, const float step); - template __global__ void kernel_arange(half *data, const int size, const half start, const half step); - template __global__ void kernel_arange(nv_bfloat16 *data, const int size, const nv_bfloat16 start, const nv_bfloat16 step); - template __global__ void kernel_arange(int64_t *data, const int size, const int64_t start, const int64_t step); - template __global__ void kernel_arange(int32_t *data, const int size, const int32_t start, const int32_t step); - template __global__ void kernel_arange(int16_t *data, const int size, const int16_t start, const int16_t step); - template __global__ void kernel_arange(int8_t *data, const int size, const int8_t start, const int8_t step); template - void launch_arange(const int numBlocks, const int blockSize, T *a, const T start, const T step, const int size) + void launch_arange(T *a, const T start, const T step, const int size) { - kernel_arange<<>>(a, size, start, step); + auto [numBlocks, blockSize] = BestDims(size); + kernel_arange<<>>(a, static_cast(start), static_cast(step), size); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) throw std::runtime_error("Failed to launch arange kernel"); + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) + throw std::runtime_error("Failed to synchronize device"); } - template void launch_arange(const int numBlocks, const int blockSize, double *a, const double start, const double step, const int size); - template void launch_arange(const int numBlocks, const int blockSize, float *a, const float start, const float step, const int size); - template void launch_arange(const int numBlocks, const int blockSize, half *a, const half start, const half step, const int size); - template void launch_arange(const int numBlocks, const int blockSize, nv_bfloat16 *a, const nv_bfloat16 start, const nv_bfloat16 step, const int size); - template void launch_arange(const int numBlocks, const int blockSize, int64_t *a, const int64_t start, const int64_t step, const int size); - template void launch_arange(const int numBlocks, const int blockSize, int32_t *a, const int32_t start, const int32_t step, const int size); - template void launch_arange(const int numBlocks, const int blockSize, int16_t *a, const int16_t start, const int16_t step, const int size); - template void launch_arange(const int numBlocks, const int blockSize, int8_t *a, const int8_t start, const int8_t step, const int size); + template void launch_arange(double *a, const double start, const double step, const int size); + template void launch_arange(float *a, const float start, const float step, const int size); + template void launch_arange(half *a, const half start, const half step, const int size); + template void launch_arange(nv_bfloat16 *a, const nv_bfloat16 start, const nv_bfloat16 step, const int size); + template void launch_arange(int64_t *a, const int64_t start, const int64_t step, const int size); + template void launch_arange(int32_t *a, const int32_t start, const int32_t step, const int size); + template void launch_arange(int16_t *a, const int16_t start, const int16_t step, const int size); + template void launch_arange(int8_t *a, const int8_t start, const int8_t step, const int size); // 添加kernel函数 template - __global__ void kernel_uniform(T *data, const int size, const T low, const T high, const unsigned int seed) + __global__ void kernel_uniform(T *data, const float low, const float high, const unsigned int seed, const int size) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < size) - { - // 为每个线程创建独立的随机数生成器状态 - curandState state; - curand_init(seed, idx, 0, &state); + int stride = blockDim.x * gridDim.x; + curandState state; + curand_init(seed, threadIdx.x, 0, &state); // 仅初始化一次 + for (int idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; idx += stride) + { // 生成[0,1)范围的随机数 float rand = curand_uniform(&state); // 先用float类型进行计算,然后转换为目标类型 - float result = static_cast(low) + (static_cast(high) - static_cast(low)) * rand; + float result = low + (high - low) * rand; + printf("threadIdx: %d, idx: %d, result: %f\n", threadIdx.x, idx, result); data[idx] = static_cast(result); } } - template __global__ void kernel_uniform(double *data, const int size, const double low, const double high, const unsigned int seed); - template __global__ void kernel_uniform(float *data, const int size, const float low, const float high, const unsigned int seed); - template __global__ void kernel_uniform(half *data, const int size, const half low, const half high, const unsigned int seed); - template __global__ void kernel_uniform(nv_bfloat16 *data, const int size, const nv_bfloat16 low, const nv_bfloat16 high, const unsigned int seed); - template __global__ void kernel_uniform(int64_t *data, const int size, const int64_t low, const int64_t high, const unsigned int seed); - template __global__ void kernel_uniform(int32_t *data, int size, int32_t low, int32_t high, unsigned int seed); - template __global__ void kernel_uniform(int16_t *data, const int size, const int16_t low, const int16_t high, const unsigned int seed); - template __global__ void kernel_uniform(int8_t *data, const int size, const int8_t low, const int8_t high, const unsigned int seed); template - void launch_uniform(const int numBlocks, const int blockSize, T *a, const T low, const T high, const unsigned int seed, const int size) + void launch_uniform(T *a, const T low, const T high, const unsigned int seed, const int size) { - kernel_uniform<<>>(a, size, low, high, seed); + auto [numBlocks, blockSize] = BestDims(size); + kernel_uniform<<>>(a, float(low), float(high), seed, size); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) throw std::runtime_error("Failed to launch uniform kernel"); + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) + throw std::runtime_error("Failed to synchronize device"); + } + template void launch_uniform(double *a, const double low, const double high, const unsigned int seed, const int size); + template void launch_uniform(float *a, const float low, const float high, const unsigned int seed, const int size); + template void launch_uniform(half *a, const half low, const half high, const unsigned int seed, const int size); + template void launch_uniform(nv_bfloat16 *a, const nv_bfloat16 low, const nv_bfloat16 high, const unsigned int seed, const int size); + template void launch_uniform(int64_t *a, const int64_t low, const int64_t high, const unsigned int seed, const int size); + template void launch_uniform(int32_t *a, const int32_t low, const int32_t high, const unsigned int seed, const int size); + template void launch_uniform(int16_t *a, const int16_t low, const int16_t high, const unsigned int seed, const int size); + template void launch_uniform(int8_t *a, const int8_t low, const int8_t high, const unsigned int seed, const int size); + + // normal + template + __global__ void kernel_normal(T *data, const float mean, const float stddev, const unsigned int seed, const int size) + { + int stride = blockDim.x * gridDim.x; + curandState state; + curand_init(seed, threadIdx.x, 0, &state); // 仅初始化一次 + + for (int idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; idx += stride) + { + // 生成[0,1)范围的随机数 + float rand = curand_uniform(&state); + + // 先用float类型进行计算,然后转换为目标类型 + float result = rand; + // float result = mean + stddev * rand; + printf("threadIdx: %d, idx: %d, result: %f\n", threadIdx.x, idx, result); + data[idx] = static_cast(rand); + } + } + template + void launch_normal(T *a, const T mean, const T stddev, const unsigned int seed, const int size) + { + auto [numBlocks, blockSize] = BestDims(size); + kernel_normal<<>>(a,float(mean), float(stddev), seed, size); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + throw std::runtime_error("Failed to launch normal kernel"); + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) + throw std::runtime_error("Failed to synchronize device"); } - template void launch_uniform(const int numBlocks, const int blockSize, double *a, const double low, const double high, const unsigned int seed, const int size); - template void launch_uniform(const int numBlocks, const int blockSize, float *a, const float low, const float high, const unsigned int seed, const int size); - template void launch_uniform(const int numBlocks, const int blockSize, half *a, const half low, const half high, const unsigned int seed, const int size); - template void launch_uniform(const int numBlocks, const int blockSize, nv_bfloat16 *a, const nv_bfloat16 low, const nv_bfloat16 high, const unsigned int seed, const int size); - template void launch_uniform(const int numBlocks, const int blockSize, int64_t *a, const int64_t low, const int64_t high, const unsigned int seed, const int size); - template void launch_uniform(const int numBlocks, const int blockSize, int32_t *a, const int32_t low, const int32_t high, const unsigned int seed, const int size); - template void launch_uniform(const int numBlocks, const int blockSize, int16_t *a, const int16_t low, const int16_t high, const unsigned int seed, const int size); - template void launch_uniform(const int numBlocks, const int blockSize, int8_t *a, const int8_t low, const int8_t high, const unsigned int seed, const int size); + template void launch_normal(double *a, const double mean, const double stddev, const unsigned int seed, const int size); + template void launch_normal(float *a, const float mean, const float stddev, const unsigned int seed, const int size); + template void launch_normal(half *a, const half mean, const half stddev, const unsigned int seed, const int size); + template void launch_normal(nv_bfloat16 *a, const nv_bfloat16 mean, const nv_bfloat16 stddev, const unsigned int seed, const int size); + template void launch_normal(int64_t *a, const int64_t mean, const int64_t stddev, const unsigned int seed, const int size); + template void launch_normal(int32_t *a, const int32_t mean, const int32_t stddev, const unsigned int seed, const int size); + template void launch_normal(int16_t *a, const int16_t mean, const int16_t stddev, const unsigned int seed, const int size); + template void launch_normal(int8_t *a, const int8_t mean, const int8_t stddev, const unsigned int seed, const int size); + } \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.cuh b/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.cuh index b2c2c42c..1acbb322 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.cuh +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.cuh @@ -11,73 +11,29 @@ namespace deepx::tensorfunc { template - __global__ void kernel_constant(T *data, const int size, const T value); + __global__ void kernel_constant(T *data, const T value, const int size); template - void launch_constant(const int numBlocks, const int blockSize, T *a, const T value, const int size); - - template <> - void launch_constant(const int numBlocks, const int blockSize, double *a, const double value, const int size); - template <> - void launch_constant(const int numBlocks, const int blockSize, float *a, const float value, const int size); - template <> - void launch_constant(const int numBlocks, const int blockSize, half *a, const half value, const int size); - template <> - void launch_constant(const int numBlocks, const int blockSize, nv_bfloat16 *a, const nv_bfloat16 value, const int size); - template <> - void launch_constant(const int numBlocks, const int blockSize, int64_t *a, const int64_t value, const int size); - template <> - void launch_constant(const int numBlocks, const int blockSize, int32_t *a, const int32_t value, const int size); - template <> - void launch_constant(const int numBlocks, const int blockSize, int16_t *a, const int16_t value, const int size); - template <> - void launch_constant(const int numBlocks, const int blockSize, int8_t *a, const int8_t value, const int size); - + void launch_constant(T *a, const T value, const int size); + template - __global__ void kernel_arange(T *data, const int size, const T start, const T step); + __global__ void kernel_arange(T *data, const float start, const float step, const int size); template - void launch_arange(const int numBlocks, const int blockSize, T *a, const T start, const T step, const int size); + void launch_arange(T *a, const T start, const T step, const int size); + + template + __global__ void kernel_uniform(T *data, const float low, const float high, const unsigned int seed, const int size); - template <> - void launch_arange(const int numBlocks, const int blockSize, double *a, const double start, const double step, const int size); - template <> - void launch_arange(const int numBlocks, const int blockSize, float *a, const float start, const float step, const int size); - template <> - void launch_arange(const int numBlocks, const int blockSize, half *a, const half start, const half step, const int size); - template <> - void launch_arange(const int numBlocks, const int blockSize, nv_bfloat16 *a, const nv_bfloat16 start, const nv_bfloat16 step, const int size); - template <> - void launch_arange(const int numBlocks, const int blockSize, int64_t *a, const int64_t start, const int64_t step, const int size); - template <> - void launch_arange(const int numBlocks, const int blockSize, int32_t *a, const int32_t start, const int32_t step, const int size); - template <> - void launch_arange(const int numBlocks, const int blockSize, int16_t *a, const int16_t start, const int16_t step, const int size); - template <> - void launch_arange(const int numBlocks, const int blockSize, int8_t *a, const int8_t start, const int8_t step, const int size); + template + void launch_uniform(T *a, const T low, const T high, const unsigned int seed, const int size); template - __global__ void kernel_uniform(T *data, const int size, const T low, const T high, const unsigned int seed); + __global__ void kernel_normal(T *data, const float mean, const float stddev, const unsigned int seed, const int size); template - void launch_uniform(const int numBlocks, const int blockSize, T *a, const T low, const T high, const unsigned int seed, const int size); + void launch_normal(T *a, const T mean, const T stddev, const unsigned int seed, const int size); - template <> - void launch_uniform(const int numBlocks, const int blockSize, double *a, const double low, const double high, const unsigned int seed, const int size); - template <> - void launch_uniform(const int numBlocks, const int blockSize, float *a, const float low, const float high, const unsigned int seed, const int size); - template <> - void launch_uniform(const int numBlocks, const int blockSize, half *a, const half low, const half high, const unsigned int seed, const int size); - template <> - void launch_uniform(const int numBlocks, const int blockSize, nv_bfloat16 *a, const nv_bfloat16 low, const nv_bfloat16 high, const unsigned int seed, const int size); - template <> - void launch_uniform(const int numBlocks, const int blockSize, int64_t *a, const int64_t low, const int64_t high, const unsigned int seed, const int size); - template <> - void launch_uniform(const int numBlocks, const int blockSize, int32_t *a, const int32_t low, const int32_t high, const unsigned int seed, const int size); - template <> - void launch_uniform(const int numBlocks, const int blockSize, int16_t *a, const int16_t low, const int16_t high, const unsigned int seed, const int size); - template <> - void launch_uniform(const int numBlocks, const int blockSize, int8_t *a, const int8_t low, const int8_t high, const unsigned int seed, const int size); } #endif \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.hpp index f909eefd..8340d2b6 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/init_miaobyte.hpp @@ -1,43 +1,51 @@ #ifndef DEEPX_TENSORFUNC_INIT_MIAO_BYTE_HPP #define DEEPX_TENSORFUNC_INIT_MIAO_BYTE_HPP +#include + #include "deepx/tensorfunc/authors.hpp" #include "deepx/tensorfunc/init.hpp" #include "deepx/tensor.hpp" #include "deepx/tensorfunc/init_miaobyte.cuh" namespace deepx::tensorfunc { - // 分发器实现 + // constant template struct constantDispatcher { static void constant(Tensor &tensor, const T value) { - const int BLOCKSIZE = tensor.shape.size > 256 ? 256 : tensor.shape.size; - int numBlocks = (tensor.shape.size + BLOCKSIZE - 1) / BLOCKSIZE; - launch_constant(numBlocks, BLOCKSIZE, tensor.data, value, tensor.shape.size); + launch_constant(tensor.data, value, tensor.shape.size); } }; + // arange template struct arangeDispatcher { static void arange(Tensor &tensor, const T start, const T step) { - const int BLOCKSIZE = tensor.shape.size > 256 ? 256 : tensor.shape.size; - int numBlocks = (tensor.shape.size + BLOCKSIZE - 1) / BLOCKSIZE; - launch_arange(numBlocks, BLOCKSIZE, tensor.data, start, step, tensor.shape.size); + launch_arange(tensor.data, start, step, tensor.shape.size); } }; + // uniform template struct uniformDispatcher { static void uniform(Tensor &tensor, const T low, const T high, const unsigned int seed) { - const int BLOCKSIZE = tensor.shape.size > 256 ? 256 : tensor.shape.size; - int numBlocks = (tensor.shape.size + BLOCKSIZE - 1) / BLOCKSIZE; - launch_uniform(numBlocks, BLOCKSIZE, tensor.data, low, high, seed, tensor.shape.size); + launch_uniform(tensor.data, low, high, seed, tensor.shape.size); + } + }; + + // normal + template + struct normalDispatcher + { + static void normal(Tensor &tensor, const T mean, const T stddev, const unsigned int seed) + { + launch_normal(tensor.data, mean, stddev, seed, tensor.shape.size); } }; } diff --git a/excuter/op-mem-cuda/src/deepx/tf/init.hpp b/excuter/op-mem-cuda/src/deepx/tf/init.hpp index 2e60ba62..bbdb883e 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/init.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/init.hpp @@ -33,66 +33,59 @@ namespace deepx::tf } int run(shared_ptr mem, string &error) override - { + { string name = this->args[0].textvalue; auto tensor = mem->gettensor(name).get(); - auto type=tensor->shape.dtype; + auto type = tensor->shape.dtype; switch (type) { case Precision::Float64: - { - auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); break; - } + case Precision::Float32: - { - auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); break; - } + case Precision::Float16: - { - auto output = mem->gettensor<__half>(name).get(); - tensorfunc::constant(*output, this->getvar<__half>(1, mem)); + + tensorfunc::constant(*mem->gettensor<__half>(name).get(), this->getvar<__half>(1, mem)); break; - } + case Precision::BFloat16: - { - auto output = mem->gettensor<__nv_bfloat16>(name).get(); - tensorfunc::constant(*output, this->getvar<__nv_bfloat16>(1, mem)); + + tensorfunc::constant(*mem->gettensor<__nv_bfloat16>(name).get(), this->getvar<__nv_bfloat16>(1, mem)); break; - } + case Precision::Int64: - { - auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); break; - } + case Precision::Int32: - { - auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); break; - } + case Precision::Int16: - { - auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); break; - } + case Precision::Int8: - { - auto output = mem->gettensor(name).get(); - tensorfunc::constant(*output, this->getvar(1, mem)); + + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); + break; + case Precision::Bool: + tensorfunc::constant(*mem->gettensor(name).get(), this->getvar(1, mem)); break; - } default: { error = "unsupported dtype: " + precision_str(type); return 1; } - } return 0; }; @@ -103,7 +96,7 @@ namespace deepx::tf shared_ptr clone() const override { return make_shared>(*this); - } + } }; template @@ -121,7 +114,7 @@ namespace deepx::tf { string name = this->args[0].textvalue; auto tensor = mem->gettensor(name).get(); - auto type=tensor->shape.dtype; + auto type = tensor->shape.dtype; switch (type) { case Precision::Float64: @@ -141,13 +134,13 @@ namespace deepx::tf auto output = mem->gettensor<__half>(name).get(); tensorfunc::arange(*output, this->getvar<__half>(1, mem), this->getvar<__half>(2, mem)); break; - } + } case Precision::BFloat16: { auto output = mem->gettensor<__nv_bfloat16>(name).get(); tensorfunc::arange(*output, this->getvar<__nv_bfloat16>(1, mem), this->getvar<__nv_bfloat16>(2, mem)); break; - } + } case Precision::Int64: { auto output = mem->gettensor(name).get(); @@ -177,7 +170,6 @@ namespace deepx::tf error = "unsupported dtype: " + precision_str(type); return 1; } - } return 0; } @@ -189,8 +181,8 @@ namespace deepx::tf { return make_shared>(*this); } - }; - + }; + template class Uniform : public TF { @@ -200,61 +192,62 @@ namespace deepx::tf this->name = "uniform"; this->author = Author::name(); this->args = args; - this->returns = returns; + this->returns = returns; } int run(shared_ptr mem, string &error) override { string name = this->args[0].textvalue; auto tensor = mem->gettensor(name).get(); - auto type=tensor->shape.dtype; + auto type = tensor->shape.dtype; + unsigned int seed = static_cast( this->getvar(3, mem)); switch (type) { case Precision::Float64: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); break; } case Precision::Float32: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); break; } case Precision::Float16: { auto output = mem->gettensor<__half>(name).get(); - tensorfunc::uniform(*output, this->getvar<__half>(1, mem), this->getvar<__half>(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar<__half>(1, mem), this->getvar<__half>(2, mem), seed); break; } case Precision::BFloat16: { auto output = mem->gettensor<__nv_bfloat16>(name).get(); - tensorfunc::uniform(*output, this->getvar<__nv_bfloat16>(1, mem), this->getvar<__nv_bfloat16>(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar<__nv_bfloat16>(1, mem), this->getvar<__nv_bfloat16>(2, mem), seed); break; - } + } case Precision::Int64: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); break; } case Precision::Int32: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); break; } case Precision::Int16: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); break; } case Precision::Int8: { auto output = mem->gettensor(name).get(); - tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + tensorfunc::uniform(*output, this->getvar(1, mem), this->getvar(2, mem), seed); break; } default: @@ -262,7 +255,6 @@ namespace deepx::tf error = "unsupported dtype: " + precision_str(type); return 1; } - } return 0; } @@ -274,8 +266,76 @@ namespace deepx::tf { return make_shared>(*this); } - }; - + }; + + template + class Normal : public TF + { + public: + Normal(const vector &args, const vector &returns) + { + this->name = "normal"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + + string math_formula() const override + { + return "normal(T1,mean,stddev,seed)"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } + int run(shared_ptr mem, string &error) override + { + string name = this->args[0].textvalue; + auto tensor = mem->gettensor(name).get(); + auto type = tensor->shape.dtype; + unsigned int seed = static_cast( this->getvar(3, mem)); + switch (type) + { + case Precision::Float64: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + break; + + case Precision::Float32: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + break; + case Precision::Float16: + tensorfunc::normal(*mem->gettensor<__half>(name).get(), this->getvar<__half>(1, mem), this->getvar<__half>(2, mem), seed); + break; + + case Precision::BFloat16: + tensorfunc::normal(*mem->gettensor<__nv_bfloat16>(name).get(), this->getvar<__nv_bfloat16>(1, mem), this->getvar<__nv_bfloat16>(2, mem), seed); + break; + + case Precision::Int64: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + break; + + case Precision::Int32: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + break; + + case Precision::Int16: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + break; + + case Precision::Int8: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), seed); + break; + + default: + { + error = "unsupported dtype: " + precision_str(type); + return 1; + } + } + return 0; + } + }; } #endif diff --git a/excuter/op-mem-ompsimd/src/client/tfs.cpp b/excuter/op-mem-ompsimd/src/client/tfs.cpp index 2208863c..defad8c6 100644 --- a/excuter/op-mem-ompsimd/src/client/tfs.cpp +++ b/excuter/op-mem-ompsimd/src/client/tfs.cpp @@ -97,6 +97,15 @@ namespace deepx::tf Param("seed", DataCategory::Var, Precision::Int32), }), vector())); + // normal author=miaobyte + tffactory.add_tf(std::make_shared>(vector( + { + Param("t", DataCategory::Tensor, Precision::Any), + Param("mean", DataCategory::Var, Precision::Any), + Param("std", DataCategory::Var, Precision::Any), + Param("seed", DataCategory::Var, Precision::Int32), + }), + vector())); } // io void register_util(TfFactory &opfactory) diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/init_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/init_miaobyte.hpp index 27c63e99..4afda582 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/init_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/init_miaobyte.hpp @@ -10,7 +10,8 @@ #include "deepx/tensorfunc/init.hpp" namespace deepx::tensorfunc -{ +{ + //constant template struct constantDispatcher { @@ -20,6 +21,7 @@ namespace deepx::tensorfunc } }; + //uniform template struct uniformDispatcher { @@ -47,6 +49,7 @@ namespace deepx::tensorfunc } }; + //arange template struct arangeDispatcher { @@ -59,7 +62,34 @@ namespace deepx::tensorfunc } } }; - + + //normal + template + struct normalDispatcher + { + static void normal(Tensor &tensor, const T mean, const T stddev, const unsigned int seed = 0) + { + std::normal_distribution dist(mean, stddev); + std::default_random_engine generator; + + // 设置随机数生成器种子 + if (seed == 0) + { + std::random_device rd; + generator.seed(rd()); + } + else + { + generator.seed(seed); + } + + // 单线程循环填充数据 + for (int i = 0; i < tensor.shape.size; ++i) + { + tensor.data[i] = static_cast(dist(generator)); + } + } + }; } #endif // DEEPX_OP_CPU_INIT_HPP \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp index e208b540..dd5cdc45 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp @@ -7,7 +7,7 @@ #include "stdutil/num.hpp" namespace deepx::tf { - + // constant template class Constant : public TF { @@ -33,7 +33,8 @@ namespace deepx::tf { string name = this->args[0].textvalue; auto tensor = mem->gettensor(name).get(); - if (tensor==nullptr) { + if (tensor == nullptr) + { error = "tensor not found: " + name; return 1; } @@ -95,6 +96,7 @@ namespace deepx::tf } }; + // arange template class Arange : public TF { @@ -168,6 +170,7 @@ namespace deepx::tf } }; + // uniform template class Uniform : public TF { @@ -241,6 +244,61 @@ namespace deepx::tf } }; + // normal + template + class Normal : public TF + { + public: + Normal(const vector &args, const vector &returns) + { + this->name = "normal"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + + string math_formula() const override + { + return "normal(T1,mean,stddev,seed)"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } + int run(shared_ptr mem, string &error) override + { + string name = this->args[0].textvalue; + auto tensor = mem->gettensor(name).get(); + auto type = tensor->shape.dtype; + switch (type) + { + case Precision::Float64: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + break; + case Precision::Float32: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + break; + case Precision::Int64: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + break; + case Precision::Int32: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + break; + case Precision::Int16: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + break; + case Precision::Int8: + tensorfunc::normal(*mem->gettensor(name).get(), this->getvar(1, mem), this->getvar(2, mem), this->getvar(3, mem)); + break; + default: + { + error = "unsupported dtype: " + precision_str(type); + return 1; + } + } + return 0; + } + }; } #endif diff --git a/front/py/deepx/nn/functional/authormap.py b/front/py/deepx/nn/functional/authormap.py index 91e0573b..6c32a4d6 100644 --- a/front/py/deepx/nn/functional/authormap.py +++ b/front/py/deepx/nn/functional/authormap.py @@ -5,6 +5,7 @@ 'uniform':'miaobyte', 'constant':'miaobyte', 'arange':'miaobyte', + 'normal':'miaobyte', #elementwise 'add':'miaobyte', 'addscalar':'miaobyte', diff --git a/front/py/deepx/nn/functional/leaffunc_init.py b/front/py/deepx/nn/functional/leaffunc_init.py index 454dc09d..d6188592 100644 --- a/front/py/deepx/nn/functional/leaffunc_init.py +++ b/front/py/deepx/nn/functional/leaffunc_init.py @@ -56,23 +56,6 @@ def uniform(*shape,low=0, high=1,seed:int=None,dtype:str='float32',name:str=None uniform_(outtensor,low,high,seed) return outtensor -# def rand(*size, dtype=None, device=None): -# #TODO -# pass - -# def randn(*size, dtype=None, device=None): -# #TODO -# pass - -# def eye( -# n:int, -# m:Optional[int]=None, -# dtype:Optional[str]=None, -# device:Optional[str]=None): -# #TODO -# pass - - def calculate_fan_in_and_fan_out(tensor:Tensor)->tuple[int,int]: dimensions = tensor.dim() if dimensions < 2: @@ -189,3 +172,15 @@ def kaiming_uniform(*shape,a:float=0,mode:str='fan_in',nonlinearity:str='leaky_r kaiming_uniform_(outtensor,a,mode,nonlinearity) return outtensor +def normal_(t:Tensor,mean:float=0, stddev:float=1,seed:int=None)->Tensor: + if seed is None: + seed = int(time.time() * 1000) & 0xffffffff + seed = (seed + os.getpid()) & 0xffffffff + from .rtf_init import rtf_normal + rtf_normal(t,mean,stddev,seed,defaultauthor['normal']) + +def normal(*shape,mean:float=0, stddev:float=1,seed:int=None,dtype:str='float32',name:str=None,author='miaobyte')->Tensor: + s = parse_shape(shape) + outtensor=newtensor(s,dtype=dtype,name=name) + normal_(outtensor,mean,stddev,seed) + return outtensor diff --git a/front/py/deepx/nn/functional/rtf_init.py b/front/py/deepx/nn/functional/rtf_init.py index aa4e5d99..bc46205c 100644 --- a/front/py/deepx/nn/functional/rtf_init.py +++ b/front/py/deepx/nn/functional/rtf_init.py @@ -21,4 +21,11 @@ def rtf_uniform(t:Tensor,low=0, high=1,seed:int=0,author='miaobyte')->Tensor: returns=[] ir=DeepxIR("uniform", args, returns,author) send(ir) + return t + +def rtf_normal(t:Tensor,mean:float=0, stddev:float=1,seed:int=0,author='miaobyte')->Tensor: + args=[Param.tensor(t),Param.varnum(mean),Param.varnum(stddev),Param.varnum(seed)] + returns=[] + ir=DeepxIR("normal", args, returns,author) + send(ir) return t \ No newline at end of file diff --git a/front/py/deepx/tensor/init.py b/front/py/deepx/tensor/init.py index f50073ca..803ba866 100644 --- a/front/py/deepx/tensor/init.py +++ b/front/py/deepx/tensor/init.py @@ -3,24 +3,34 @@ @tensor_method def full_(self,value:Union[float,int]): - from deepx.nn.functional import constant as constant_func + from deepx.nn.functional import constant_ as constant_func constant_func(self,value=value) @tensor_method def zeros_(self): - from deepx.nn.functional import constant as constant_func + from deepx.nn.functional import constant_ as constant_func constant_func(self,value=0) @tensor_method def ones_(self): - from deepx.nn.functional import constant as constant_func + from deepx.nn.functional import constant_ as constant_func constant_func(self,value=1) @tensor_method -def uniform_(self,low=0, high=1,seed:int=0): - from deepx.nn.functional import uniform as uniform_func +def uniform_(self,low=0, high=1,seed:int=None): + from deepx.nn.functional import uniform_ as uniform_func uniform_func(self,low=low, high=high,seed=seed) +@tensor_method +def arange_(self,start=0,step=1): + from deepx.nn.functional import arange_ as arange_func + arange_func(self,start,step) + +@tensor_method +def normal_(self,mean=0, stddev=1,seed:int=None): + from deepx.nn.functional import normal_ as normal_func + normal_func(self,mean,stddev,seed) + @tensor_method def rand_(self): #todo @@ -30,12 +40,6 @@ def rand_(self): def randn_(self): #todo pass - -@tensor_method -def arange_(self,start=0,step=1,author='miaobyte'): - from deepx.nn.functional import arange_ as arange_func - arange_func(self,start,step,author) - @tensor_method def eye_(self,n,m=None): #todo diff --git a/front/py/deepx/tensor/tensor.py b/front/py/deepx/tensor/tensor.py index 8843bc66..5b909d88 100644 --- a/front/py/deepx/tensor/tensor.py +++ b/front/py/deepx/tensor/tensor.py @@ -19,9 +19,7 @@ def __init__(self,shape:Union[tuple[int],list[int],Shape],dtype:str='float32',na tensorid+=1 # dtype self._dtype = dtype - - # format - self.autoformat() + # shape if isinstance(shape, (tuple, list)) and all(isinstance(i, int) for i in shape): @@ -127,20 +125,22 @@ def T(self) -> str: return self.transpose() # 打印 - def autoformat(self): - if self._dtype == 'float32' or self._dtype == 'float64' or self._dtype == 'float16' or self._dtype == 'bfloat16': - self._format = '%.4f' - elif self._dtype == 'int32' or self._dtype == 'int64' or self._dtype == 'int8' or self._dtype == 'int16': - self._format = '%d' - elif self._dtype == 'bool': - self._format = '%d' + @staticmethod + def autoformat(dtype): + if dtype == 'float32' or dtype == 'float64' or dtype == 'float16' or dtype == 'bfloat16': + return '%.4f' + elif dtype == 'int32' or dtype == 'int64' or dtype == 'int8' or dtype == 'int16': + return '%d' + elif dtype == 'bool': + return '%d' else: - self._format = '%s' - def set_format(self,format:str): - self._format = format - def print(self): + return '%s' + + def print(self,format:str=None): + if format is None: + format=self.autoformat(self.dtype) from deepx.nn.functional import printtensor - printtensor(self,format=self._format) + printtensor(self,format) def __repr__(self) -> str: return 'Tensor(shape={},dtype={},name={})'.format(self.shape,self.dtype,self.name) diff --git a/front/py/examples/2_ir/1_init_zeroones.py b/front/py/examples/2_ir/1_init_zeroones.py index d6f63c34..d6de4318 100644 --- a/front/py/examples/2_ir/1_init_zeroones.py +++ b/front/py/examples/2_ir/1_init_zeroones.py @@ -2,26 +2,42 @@ ############-------PyTorch-------################ import torch -torch_t1 = torch.zeros(3, 4, 5, dtype=torch.float32) -torch_t2 = torch.ones(3, 4, 5, dtype=torch.float32) -torch_t4 = torch.full((3, 4, 5), 0.5) -print(torch_t4) - -torch_t6 = torch.zeros(3, 4, 5, dtype=torch.float32) -torch.nn.init.kaiming_uniform_(torch_t6) -print(torch_t6) - - +# torch_t1 = torch.zeros(3, 4, 5, dtype=torch.float32) +# torch_t2 = torch.ones(3, 4, 5, dtype=torch.float32) +# torch_t4 = torch.full((3, 4, 5), 0.5) +# print(torch_t4) +# torch_t5=torch.nn.init.uniform_(torch.zeros(3,4,5),0,1) +# print(torch_t5) + + +# torch_t6 = torch.zeros(3, 4, 5, dtype=torch.float32) +# torch.nn.init.kaiming_uniform_(torch_t6) +# print(torch_t6) +# +torch_t7 = torch.zeros(3, 4, 5, dtype=torch.float32) +torch_t7.normal_(mean=0,std=0.02) +print(torch_t7) ############-------DEEPX-------################ -from deepx import zeros,ones,full,kaiming_uniform +import deepx print() -t1 = zeros([3,4,5],dtype='float32') -t2 = ones([3,4,5],dtype='float32') -t4=full([3,4,5],value=0.5) -print(t4) - -t6=kaiming_uniform(3,4,5,dtype='float32') -print(t6) +# t1 = deepx.zeros([3,4,5],dtype='float32') +# t2 = deepx.ones([3,4,5],dtype='float32') +# t4=deepx.full([3,4,5],value=0.5) +# t4.print() +# t5=deepx.uniform(3,4,5,low=0,high=1) +# t5.print() +# t6=deepx.kaiming_uniform(3,4,5,dtype='float32') +# t6.print() + +t7=deepx.zeros(3,4,5,dtype='float32') +t7.normal_(mean=0,stddev=0.02) +t7.print("%.6f") + +# t7.uniform_(low=0,high=1) +# t7.print("%.6f") +# +# t7.arange_(start=0,step=1) +# t7.print("%.0f") From e9c0411a63589c0c45e7b3c1829560b4738bc602 Mon Sep 17 00:00:00 2001 From: lipeng <734991033@qq.com> Date: Sat, 19 Apr 2025 20:23:34 +0800 Subject: [PATCH 3/3] doc:tensor list --- doc/excuter/op-mem-cuda/list.md | 80 +++++++++---- doc/excuter/op-mem-ompsimd/list.md | 82 +++++++++---- excuter/cpp-common/src/deepx/tf/tf.hpp | 1 + excuter/cpp-common/src/deepx/tf/tffactory.cpp | 42 ++++--- excuter/op-mem-cuda/src/deepx/tf/arg.hpp | 21 +--- .../op-mem-cuda/src/deepx/tf/changeshape.hpp | 4 + .../src/deepx/tf/elementwise_basic.hpp | 109 +++--------------- .../src/deepx/tf/elementwise_compare.hpp | 11 ++ .../src/deepx/tf/elementwise_sin.hpp | 36 +----- .../src/deepx/tf/elementwise_sqrt.hpp | 60 ++-------- excuter/op-mem-cuda/src/deepx/tf/init.hpp | 15 +-- excuter/op-mem-cuda/src/deepx/tf/io.hpp | 11 +- excuter/op-mem-cuda/src/deepx/tf/matmul.hpp | 1 + excuter/op-mem-cuda/src/deepx/tf/reduce.hpp | 6 +- .../op-mem-cuda/src/deepx/tf/tensorlife.hpp | 3 + excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp | 24 +--- .../src/deepx/tf/changeshape.hpp | 4 + .../src/deepx/tf/elementwise.hpp | 30 +++++ excuter/op-mem-ompsimd/src/deepx/tf/init.hpp | 15 +-- excuter/op-mem-ompsimd/src/deepx/tf/io.hpp | 11 +- .../op-mem-ompsimd/src/deepx/tf/matmul.hpp | 12 +- .../op-mem-ompsimd/src/deepx/tf/reduce.hpp | 4 + .../src/deepx/tf/tensorlife.hpp | 13 +-- front/py/examples/2_ir/1_init_zeroones.py | 46 ++++---- 24 files changed, 292 insertions(+), 349 deletions(-) diff --git a/doc/excuter/op-mem-cuda/list.md b/doc/excuter/op-mem-cuda/list.md index 8704844c..34de625c 100644 --- a/doc/excuter/op-mem-cuda/list.md +++ b/doc/excuter/op-mem-cuda/list.md @@ -2,47 +2,61 @@ 本页面由 `excuter/op-mem-cuda 生成,请勿手动修改 +### arg + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| vecset | none | vecset(vector value)->(vector name) | shape = [3 4 5] | vecset(vector value)->(vector name) | +| argset | none | argset(var value)->(var name) | var argname = argvalue | argset(var value)->(var name) | + +### io + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| print | miaobyte | print(tensor )->() | print(T1) | print(tensor )->() | +| print | miaobyte | print(tensor , var )->() | print(T1) | print(tensor , var )->() | + +### tensorlife + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | +| newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(vector shape)->(tensor tensor1) | +| newtensor | none | newtensor(var shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(var shape)->(tensor tensor1) | +| deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | + +### init + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| normal | miaobyte | normal(tensor t, var mean, var stddev, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var stddev, var seed)->() | +| uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | +| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | +| constant | miaobyte | constant(tensor t, var value)->() | constant(T1) | constant(tensor t, var value)->() | + +### elementwise + | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| reducemax | miaobyte | reducemax(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemax(A, axis=[1 2], keepdims=false) | reducemax(tensor A, vector dims, var keepdims)->(tensor B) | -| broadcastTo | miaobyte | broadcastTo(tensor A, vector new_shape)->(tensor B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor A, vector new_shape)->(tensor B) | -| concat | miaobyte | concat(listtensor tensors, var dim)->(tensor result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor tensors, var dim)->(tensor result) | -| transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T2 = T1.transpose(dimorder=[1,0]) | transpose(tensor A, vector dim_order)->(tensor C) | -| reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | -| matmul | cublas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | | switch | miaobyte | switch(listtensor tensors, tensor cases)->(tensor result) | C=switch(tensors,cases) | switch(listtensor tensors, tensor cases)->(tensor result) | | greaterscalar | miaobyte | greaterscalar(tensor A, var scalar)->(tensor mask) | mask=compare(T1, scalar) | greaterscalar(tensor A, var scalar)->(tensor mask) | | equalscalar | miaobyte | equalscalar(tensor A, var scalar, var epsilon)->(tensor mask) | mask=compare(T1, scalar) | equalscalar(tensor A, var scalar, var epsilon)->(tensor mask) | -| prod | miaobyte | prod(tensor A, vector dims, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector dims, var keepdims)->(tensor B) | | min | miaobyte | min(tensor A, tensor B)->(tensor C) | T3=min(T1, T2) | min(tensor A, tensor B)->(tensor C) | | maxscalar | miaobyte | maxscalar(tensor A, var scalar)->(tensor C) | T3=max(T1, scalar) | maxscalar(tensor A, var scalar)->(tensor C) | -| normal | miaobyte | normal(tensor t, var mean, var stddev, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var stddev, var seed)->() | -| uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | | addscalar | miaobyte | addscalar(tensor A, var b)->(tensor C) | T3=T1+scalar | addscalar(tensor A, var b)->(tensor C) | | log | miaobyte | log(tensor A)->(tensor C) | T3=log(T1) | log(tensor A)->(tensor C) | -| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | | divscalar | miaobyte | divscalar(tensor A, var scalar)->(tensor C) | T3=scalar/T1 | divscalar(tensor A, var scalar)->(tensor C) | | sin | miaobyte | sin(tensor A)->(tensor C) | T3=sin(T1) | sin(tensor A)->(tensor C) | | tan | miaobyte | tan(tensor A)->(tensor C) | T3=tan(T1) | tan(tensor A)->(tensor C) | | add | cublas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | -| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | | greater | miaobyte | greater(tensor A, tensor B)->(tensor mask) | mask=compare(T1, T2) | greater(tensor A, tensor B)->(tensor mask) | -| print | miaobyte | print(tensor )->() | print(T1) | print(tensor )->() | -| print | miaobyte | print(tensor , var )->() | print(T1) | print(tensor , var )->() | -| newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(vector shape)->(tensor tensor1) | -| newtensor | none | newtensor(var shape)->(tensor tensor1) | T1 = zeros(shape) | newtensor(var shape)->(tensor tensor1) | | less | miaobyte | less(tensor A, tensor B)->(tensor mask) | mask=compare(T1, T2) | less(tensor A, tensor B)->(tensor mask) | -| constant | miaobyte | constant(tensor t, var value)->() | constant(T1) | constant(tensor t, var value)->() | | powscalar | miaobyte | powscalar(tensor A, var scalar)->(tensor C) | T3=pow(T1, scalar) | powscalar(tensor A, var scalar)->(tensor C) | -| vecset | none | vecset(vector value)->(vector name) | shape = [3 4 5] | vecset(vector value)->(vector name) | | minscalar | miaobyte | minscalar(tensor A, var scalar)->(tensor C) | T3=min(T1, scalar) | minscalar(tensor A, var scalar)->(tensor C) | | rdivscalar | miaobyte | rdivscalar(var scalar, tensor A)->(tensor C) | T3=scalar/T1 | rdivscalar(var scalar, tensor A)->(tensor C) | | rpowscalar | miaobyte | rpowscalar(var scalar, tensor A)->(tensor C) | T3=pow(scalar, T1) | rpowscalar(var scalar, tensor A)->(tensor C) | | sub | miaobyte | sub(tensor A, tensor B)->(tensor C) | T3=T1-T2 | sub(tensor A, tensor B)->(tensor C) | -| sum | miaobyte | sum(tensor A, vector dims, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector dims, var keepdims)->(tensor B) | -| argset | none | argset(var value)->(var name) | var argname = argvalue | argset(var value)->(var name) | -| reducemin | miaobyte | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | | sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | | subscalar | miaobyte | subscalar(tensor A, var b)->(tensor C) | T3=T1-scalar | subscalar(tensor A, var b)->(tensor C) | | equal | miaobyte | equal(tensor A, tensor B, var epsilon)->(tensor mask) | mask=compare(T1, T2) | equal(tensor A, tensor B, var epsilon)->(tensor mask) | @@ -54,5 +68,29 @@ | mul | miaobyte | mul(tensor A, tensor B)->(tensor C) | T3=T1*T2 | mul(tensor A, tensor B)->(tensor C) | | exp | miaobyte | exp(tensor A)->(tensor C) | T3=exp(T1) | exp(tensor A)->(tensor C) | | lessscalar | miaobyte | lessscalar(tensor A, var scalar)->(tensor mask) | mask=compare(T1, scalar) | lessscalar(tensor A, var scalar)->(tensor mask) | -| deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | | cos | miaobyte | cos(tensor A)->(tensor C) | T3=cos(T1) | cos(tensor A)->(tensor C) | + +### matmul + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| matmul | cublas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | + +### changeshape + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| broadcastTo | miaobyte | broadcastTo(tensor A, vector new_shape)->(tensor B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor A, vector new_shape)->(tensor B) | +| concat | miaobyte | concat(listtensor tensors, var dim)->(tensor result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor tensors, var dim)->(tensor result) | +| transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T2 = T1.transpose(dimorder=[1,0]) | transpose(tensor A, vector dim_order)->(tensor C) | +| reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | + +### reduce + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| reducemax | miaobyte | reducemax(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemax(A, axis=[1 2], keepdims=false) | reducemax(tensor A, vector dims, var keepdims)->(tensor B) | +| prod | miaobyte | prod(tensor A, vector dims, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector dims, var keepdims)->(tensor B) | +| sum | miaobyte | sum(tensor A, vector dims, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector dims, var keepdims)->(tensor B) | +| reducemin | miaobyte | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector dims, var keepdims)->(tensor B) | + diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md index 65ffb758..b396a357 100644 --- a/doc/excuter/op-mem-ompsimd/list.md +++ b/doc/excuter/op-mem-ompsimd/list.md @@ -2,47 +2,59 @@ 本页面由 `excuter/op-mem-ompsimd 生成,请勿手动修改 +### arg + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| vecset | none | vecset(vector value)->(vector name) | shape = [3 4 5] | vecset(vector value)->(vector name) | +| argset | none | argset(var value)->(var name) | var argname = argvalue | argset(var value)->(var name) | + +### io + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| print | miaobyte | print(tensor )->() | print(T1) | print(tensor )->() | +| print | miaobyte | print(tensor , var )->() | print(T1) | print(tensor , var )->() | + +### tensorlife + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | +| newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector shape)->(tensor tensor1) | +| newtensor | none | newtensor(var shape)->(tensor t) | T1 =Tensor(shape=[...]) | newtensor(var shape)->(tensor t) | +| deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | + +### init + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| normal | miaobyte | normal(tensor t, var mean, var std, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var std, var seed)->() | +| uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | +| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | +| constant | miaobyte | constant(tensor t, var value)->() | constant(T1,value) | constant(tensor t, var value)->() | + +### elementwise + | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| -| reducemax | miaobyte | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemax(A, axis=[1 2], keepdims=false) | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | -| broadcastTo | miaobyte | broadcastTo(tensor A, vector new_shape)->(tensor B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor A, vector new_shape)->(tensor B) | -| concat | miaobyte | concat(listtensor tensors, var dim)->(tensor result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor tensors, var dim)->(tensor result) | -| transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T1.transpose(dimorder=[1,0])->T2 | transpose(tensor A, vector dim_order)->(tensor C) | -| reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | -| matmul | cblas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | -| matmul | miaobyte | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | | switch | miaobyte | switch(listtensor tensors, tensor cases)->(tensor C) | C=switch([tensors],case) | switch(listtensor tensors, tensor cases)->(tensor C) | | greaterscalar | miaobyte | greaterscalar(tensor A, var scalar)->(tensor mask) | mask=greater(T1,scalar) | greaterscalar(tensor A, var scalar)->(tensor mask) | | equalscalar | miaobyte | equalscalar(tensor A, var scalar)->(tensor mask) | mask=equal(T1,scalar) | equalscalar(tensor A, var scalar)->(tensor mask) | -| normal | miaobyte | normal(tensor t, var mean, var std, var seed)->() | normal(T1,mean,stddev,seed) | normal(tensor t, var mean, var std, var seed)->() | -| uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | | addscalar | miaobyte | addscalar(tensor a, var scalar)->(tensor c) | T3=T1+scalar | addscalar(tensor a, var scalar)->(tensor c) | | log | miaobyte | log(tensor A)->(tensor C) | T3=log(T1) | log(tensor A)->(tensor C) | -| arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | | divscalar | miaobyte | divscalar(tensor A, var scalar)->(tensor C) | T3=T1/scalar | divscalar(tensor A, var scalar)->(tensor C) | | add | cblas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | -| copytensor | none | copytensor(tensor src, tensor dst)->() | T2.data = T1.data | copytensor(tensor src, tensor dst)->() | -| prod | miaobyte | prod(tensor A, vector axis, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector axis, var keepdims)->(tensor B) | | min | miaobyte | min(tensor A, tensor B)->(tensor C) | T3=min(T1,T2) | min(tensor A, tensor B)->(tensor C) | | greater | miaobyte | greater(tensor A, tensor B)->(tensor mask) | mask=greater(T1,T2) | greater(tensor A, tensor B)->(tensor mask) | -| print | miaobyte | print(tensor )->() | print(T1) | print(tensor )->() | -| print | miaobyte | print(tensor , var )->() | print(T1) | print(tensor , var )->() | -| newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector shape)->(tensor tensor1) | -| newtensor | none | newtensor(var shape)->(tensor t) | T1 =Tensor(shape=[...]) | newtensor(var shape)->(tensor t) | | lessscalar | miaobyte | lessscalar(tensor A, var scalar)->(tensor mask) | mask=less(T1,scalar) | lessscalar(tensor A, var scalar)->(tensor mask) | -| deltensor | none | deltensor(tensor t)->() | del T1 | deltensor(tensor t)->() | | less | miaobyte | less(tensor A, tensor B)->(tensor mask) | mask=less(T1,T2) | less(tensor A, tensor B)->(tensor mask) | -| constant | miaobyte | constant(tensor t, var value)->() | constant(T1,value) | constant(tensor t, var value)->() | | powscalar | miaobyte | powscalar(tensor A, var scalar)->(tensor C) | T3=T1^scalar | powscalar(tensor A, var scalar)->(tensor C) | -| vecset | none | vecset(vector value)->(vector name) | shape = [3 4 5] | vecset(vector value)->(vector name) | | minscalar | miaobyte | minscalar(tensor A, var scalar)->(tensor C) | T3=min(T1,scalar) | minscalar(tensor A, var scalar)->(tensor C) | | rdivscalar | miaobyte | rdivscalar(var scalar, tensor A)->(tensor C) | T3=scalar/T1 | rdivscalar(var scalar, tensor A)->(tensor C) | | rpowscalar | miaobyte | rpowscalar(var scalar, tensor A)->(tensor C) | T3=scalar^T1 | rpowscalar(var scalar, tensor A)->(tensor C) | | sub | miaobyte | sub(tensor a, tensor b)->(tensor c) | T3=T1-T2 | sub(tensor a, tensor b)->(tensor c) | -| sum | miaobyte | sum(tensor A, vector axis, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector axis, var keepdims)->(tensor B) | -| argset | none | argset(var value)->(var name) | var argname = argvalue | argset(var value)->(var name) | -| reducemin | miaobyte | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | | sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | | subscalar | miaobyte | subscalar(tensor a, var scalar)->(tensor c) | T3=T1-scalar | subscalar(tensor a, var scalar)->(tensor c) | | equal | miaobyte | equal(tensor A, tensor B)->(tensor mask) | mask=equal(T1,T2) | equal(tensor A, tensor B)->(tensor mask) | @@ -54,3 +66,29 @@ | maxscalar | miaobyte | maxscalar(tensor A, var scalar)->(tensor C) | T3=max(T1,scalar) | maxscalar(tensor A, var scalar)->(tensor C) | | mul | miaobyte | mul(tensor A, tensor B)->(tensor C) | T3=T1*T2 | mul(tensor A, tensor B)->(tensor C) | | exp | miaobyte | exp(tensor A)->(tensor C) | T3=exp(T1) | exp(tensor A)->(tensor C) | + +### matmul + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| matmul | cblas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | +| matmul | miaobyte | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | + +### changeshape + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| broadcastTo | miaobyte | broadcastTo(tensor A, vector new_shape)->(tensor B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor A, vector new_shape)->(tensor B) | +| concat | miaobyte | concat(listtensor tensors, var dim)->(tensor result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor tensors, var dim)->(tensor result) | +| transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T1.transpose(dimorder=[1,0])->T2 | transpose(tensor A, vector dim_order)->(tensor C) | +| reshape | miaobyte | reshape(tensor A, vector shape)->(tensor B) | T1.reshape(shape)->T2 | reshape(tensor A, vector shape)->(tensor B) | + +### reduce + +| Operation | Author | Func Def | Math Formula | IR Instruction | +|-----------|--------|------------|--------------|----------------| +| reducemax | miaobyte | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemax(A, axis=[1 2], keepdims=false) | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | +| prod | miaobyte | prod(tensor A, vector axis, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector axis, var keepdims)->(tensor B) | +| sum | miaobyte | sum(tensor A, vector axis, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector axis, var keepdims)->(tensor B) | +| reducemin | miaobyte | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | + diff --git a/excuter/cpp-common/src/deepx/tf/tf.hpp b/excuter/cpp-common/src/deepx/tf/tf.hpp index 430dc4c5..e123f10c 100644 --- a/excuter/cpp-common/src/deepx/tf/tf.hpp +++ b/excuter/cpp-common/src/deepx/tf/tf.hpp @@ -39,6 +39,7 @@ namespace deepx::tf public: string name; string author; + string tftype; vector args; vector returns; // diff --git a/excuter/cpp-common/src/deepx/tf/tffactory.cpp b/excuter/cpp-common/src/deepx/tf/tffactory.cpp index 5d80ef14..1936acfc 100644 --- a/excuter/cpp-common/src/deepx/tf/tffactory.cpp +++ b/excuter/cpp-common/src/deepx/tf/tffactory.cpp @@ -78,24 +78,36 @@ namespace deepx::tf std::stringstream ss; ss << "## " << excuter_name << " 支持算子列表 \n\n"; ss << "本页面由 `excuter/" << excuter_name << " 生成,请勿手动修改 \n\n"; - ss << "| Operation | Author | Func Def | Math Formula | IR Instruction |\n"; - ss << "|-----------|--------|------------|--------------|----------------|\n"; - - // 输出每个操作及其信息 - for (const auto &[name, tf_family] : tf_families) - { - for (const auto &[author, tf_author] : tf_family->tf_authors) - { - for (const auto &tf : tf_author->tfs) - { - ss << "| " << name << " | "; - ss << (author.empty() ? " none " : author) << " | "; - ss << tf->to_string(false, true) << " | "; - ss << tf->math_formula() << " | "; - ss << tf->to_string(false, true) << " |\n"; + + // 首先按tftype分组 + unordered_map>> tf_by_type; + + // 收集所有TF并按tftype分组 + for (const auto &[name, tf_family] : tf_families) { + for (const auto &[author, tf_author] : tf_family->tf_authors) { + for (const auto &tf : tf_author->tfs) { + tf_by_type[tf->tftype].push_back(tf); } } } + + // 为每个tftype生成一个表格 + for (const auto &[tftype, tfs] : tf_by_type) { + ss << "### " << tftype << "\n\n"; + ss << "| Operation | Author | Func Def | Math Formula | IR Instruction |\n"; + ss << "|-----------|--------|------------|--------------|----------------|\n"; + + for (const auto &tf : tfs) { + ss << "| " << tf->name << " | "; + ss << (tf->author.empty() ? " none " : tf->author) << " | "; + ss << tf->to_string(false, true) << " | "; + ss << tf->math_formula() << " | "; + ss << tf->to_string(false, true) << " |\n"; + } + + ss << "\n"; + } + return ss.str(); } } \ No newline at end of file diff --git a/excuter/op-mem-cuda/src/deepx/tf/arg.hpp b/excuter/op-mem-cuda/src/deepx/tf/arg.hpp index b4221840..5c8fa93b 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/arg.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/arg.hpp @@ -15,19 +15,11 @@ namespace deepx::tf { this->name = "argset"; this->author = ""; + this->tftype = "arg"; this->args = args; this->returns = returns; } - ArgSet(string text) - { - this->parse(text); - if (this->name != "argset") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } - string math_formula() const override { return "var argname = argvalue"; @@ -85,18 +77,11 @@ namespace deepx::tf { this->name = "vecset"; this->author = ""; + this->tftype = "arg"; this->args = args; this->returns = returns; } - VecSet(string text) - { - this->parse(text); - if (this->name != "vecset") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } - + string math_formula() const override { return "shape = [3 4 5]"; diff --git a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp index a45f5ba8..99186de5 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/changeshape.hpp @@ -19,6 +19,7 @@ namespace deepx::tf { this->name = "reshape"; this->author = Author::name(); + this->tftype = "changeshape"; this->args = args; this->returns = returns; } @@ -79,6 +80,7 @@ namespace deepx::tf { this->name = "transpose"; this->author = Author::name(); + this->tftype = "changeshape"; this->args = args; this->returns = returns; } @@ -146,6 +148,7 @@ namespace deepx::tf { this->name = "concat"; this->author = Author::name(); + this->tftype = "changeshape"; this->args = args; this->returns = returns; } @@ -279,6 +282,7 @@ namespace deepx::tf { this->name = "broadcastTo"; this->author = Author::name(); + this->tftype = "changeshape"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp index 8611a227..d18eeb88 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp @@ -16,20 +16,12 @@ namespace deepx::tf Add(const vector &args, const vector &returns) { this->name = "add"; - this->author = Author::name(); + this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - Add(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "add") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } string math_formula() const override { return "T3=T1+T2"; @@ -94,19 +86,11 @@ namespace deepx::tf { this->name = "addscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - AddScalar(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "addscalar") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=T1+scalar"; @@ -170,19 +154,11 @@ namespace deepx::tf { this->name = "sub"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - Sub(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "sub") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=T1-T2"; @@ -247,19 +223,11 @@ namespace deepx::tf { this->name = "subscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - SubScalar(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "subscalar") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=T1-scalar"; @@ -323,19 +291,11 @@ namespace deepx::tf { this->name = "mul"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - Mul(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "mul") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=T1*T2"; @@ -400,19 +360,11 @@ namespace deepx::tf { this->name = "mulscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - MulScalar(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "mulscalar") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=T1*scalar"; @@ -476,19 +428,11 @@ namespace deepx::tf { this->name = "div"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - Div(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "div") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=T1/T2"; @@ -553,19 +497,11 @@ namespace deepx::tf { this->name = "divscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - DivScalar(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "divscalar") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=scalar/T1"; @@ -629,19 +565,11 @@ namespace deepx::tf { this->name = "rdivscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - RDivScalar(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "rdivscalar") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=scalar/T1"; @@ -706,6 +634,7 @@ namespace deepx::tf { this->name = "invert"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp index 694ad3db..0ee58de8 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp @@ -16,6 +16,7 @@ namespace deepx::tf { this->name = "max"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -80,6 +81,7 @@ namespace deepx::tf { this->name = "maxscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -144,6 +146,7 @@ namespace deepx::tf { this->name = "min"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -209,6 +212,7 @@ namespace deepx::tf { this->name = "minscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -273,6 +277,7 @@ namespace deepx::tf { this->name = "equal"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -339,6 +344,7 @@ namespace deepx::tf { this->name = "equalscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -405,6 +411,7 @@ namespace deepx::tf { this->name = "less"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -471,6 +478,7 @@ namespace deepx::tf { this->name = "lessscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -536,6 +544,7 @@ namespace deepx::tf { this->name = "greater"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -602,6 +611,7 @@ namespace deepx::tf { this->name = "greaterscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -667,6 +677,7 @@ namespace deepx::tf { this->name = "switch"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_sin.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_sin.hpp index bea0a9f4..d5eae4e2 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_sin.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_sin.hpp @@ -16,19 +16,11 @@ namespace deepx::tf { this->name = "sin"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - Sin(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "sin") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=sin(T1)"; @@ -77,19 +69,11 @@ namespace deepx::tf { this->name = "cos"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - Cos(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "cos") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=cos(T1)"; @@ -140,19 +124,11 @@ namespace deepx::tf { this->name = "tan"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - Tan(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "tan") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=tan(T1)"; diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_sqrt.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_sqrt.hpp index ae417bfe..d13b22f6 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_sqrt.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_sqrt.hpp @@ -16,19 +16,11 @@ namespace deepx::tf { this->name = "pow"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - Pow(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "pow") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=pow(T1, T2)"; @@ -79,19 +71,11 @@ namespace deepx::tf { this->name = "powscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - PowScalar(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "powscalar") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=pow(T1, scalar)"; @@ -140,6 +124,7 @@ namespace deepx::tf { this->name = "rpowscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -190,19 +175,12 @@ namespace deepx::tf { this->name = "sqrt"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - Sqrt(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "sqrt") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=sqrt(T1)"; @@ -255,19 +233,11 @@ namespace deepx::tf { this->name = "log"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - Log(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "log") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=log(T1)"; @@ -320,19 +290,11 @@ namespace deepx::tf { this->name = "exp"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } - - Exp(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "exp") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=exp(T1)"; diff --git a/excuter/op-mem-cuda/src/deepx/tf/init.hpp b/excuter/op-mem-cuda/src/deepx/tf/init.hpp index bbdb883e..480bf18c 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/init.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/init.hpp @@ -19,19 +19,11 @@ namespace deepx::tf { this->name = "constant"; this->author = Author::name(); + this->tftype = "init"; this->args = args; this->returns = returns; } - Constant(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "constant") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } - + int run(shared_ptr mem, string &error) override { string name = this->args[0].textvalue; @@ -107,6 +99,7 @@ namespace deepx::tf { this->name = "arange"; this->author = Author::name(); + this->tftype = "init"; this->args = args; this->returns = returns; } @@ -191,6 +184,7 @@ namespace deepx::tf { this->name = "uniform"; this->author = Author::name(); + this->tftype = "init"; this->args = args; this->returns = returns; } @@ -276,6 +270,7 @@ namespace deepx::tf { this->name = "normal"; this->author = Author::name(); + this->tftype = "init"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-cuda/src/deepx/tf/io.hpp b/excuter/op-mem-cuda/src/deepx/tf/io.hpp index 6118471a..14315a85 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/io.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/io.hpp @@ -16,18 +16,11 @@ namespace deepx::tf { this->name = "print"; this->author = Author::name(); + this->tftype = "io"; this->args = args; this->returns = returns; } - Print(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "print") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + int run(shared_ptr mem, string &error) override { string name = this->args[0].textvalue; diff --git a/excuter/op-mem-cuda/src/deepx/tf/matmul.hpp b/excuter/op-mem-cuda/src/deepx/tf/matmul.hpp index 9a4c5bc6..a61b5c04 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/matmul.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/matmul.hpp @@ -19,6 +19,7 @@ namespace deepx::tf { this->name = "matmul"; this->author = Author::name(); + this->tftype = "matmul"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-cuda/src/deepx/tf/reduce.hpp b/excuter/op-mem-cuda/src/deepx/tf/reduce.hpp index 4a1643ea..a23319d7 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/reduce.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/reduce.hpp @@ -17,6 +17,7 @@ namespace deepx::tf { this->name = "sum"; this->author = Author::name(); + this->tftype = "reduce"; this->args = args; this->returns = returns; } @@ -82,6 +83,7 @@ namespace deepx::tf { this->name = "prod"; this->author = Author::name(); + this->tftype = "reduce"; this->args = args; this->returns = returns; } @@ -140,6 +142,7 @@ namespace deepx::tf { this->name = "reducemax"; this->author = Author::name(); + this->tftype = "reduce"; this->args = args; this->returns = returns; } @@ -190,7 +193,7 @@ namespace deepx::tf } }; - template + template class ReduceMin : public TF { public: @@ -198,6 +201,7 @@ namespace deepx::tf { this->name = "reducemin"; this->author = Author::name(); + this->tftype = "reduce"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp b/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp index 8e303037..43041188 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/tensorlife.hpp @@ -17,6 +17,7 @@ namespace deepx::tf NewTensor(vector args, vector returns) { this->name = "newtensor"; + this->tftype = "tensorlife"; this->args = args; this->returns = returns; } @@ -138,6 +139,7 @@ namespace deepx::tf CopyTensor(vector args, vector returns) { this->name = "copytensor"; + this->tftype = "tensorlife"; this->args = args; this->returns = returns; } @@ -226,6 +228,7 @@ namespace deepx::tf DelTensor(vector args, vector returns) { this->name = "deltensor"; + this->tftype = "tensorlife"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp index 2c3f4f5e..13733fa1 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/arg.hpp @@ -16,19 +16,11 @@ namespace deepx::tf ArgSet(vector args, vector returns) { this->name = "argset"; - this->author = ""; + this->tftype = "arg"; this->args = args; this->returns = returns; } - ArgSet(string text, bool call = false) - { - this->parse(text); - if (this->name != "argset") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } - + string math_formula() const override { return "var argname = argvalue"; @@ -85,19 +77,11 @@ namespace deepx::tf VecSet(vector args, vector returns) { this->name = "vecset"; - this->author = ""; + this->tftype = "arg"; this->args = args; this->returns = returns; } - VecSet(string text) - { - this->parse(text); - if (this->name != "vecset") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } - + string math_formula() const override { return "shape = [3 4 5]"; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp index 784abfc0..e9c534b7 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/changeshape.hpp @@ -21,6 +21,7 @@ namespace deepx::tf this->author = Author::name(); this->args = args; this->returns = returns; + this->tftype = "changeshape"; } string math_formula() const override @@ -84,6 +85,7 @@ namespace deepx::tf { this->name = "transpose"; this->author = Author::name(); + this->tftype = "changeshape"; this->args = args; this->returns = returns; } @@ -149,6 +151,7 @@ namespace deepx::tf { this->name = "concat"; this->author = Author::name(); + this->tftype = "changeshape"; this->args = args; this->returns = returns; } @@ -260,6 +263,7 @@ namespace deepx::tf { this->name = "broadcastTo"; this->author = Author::name(); + this->tftype = "changeshape"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp index 53f0b504..e1914688 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp @@ -19,6 +19,7 @@ namespace deepx::tf { this->name = "add"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -80,6 +81,7 @@ namespace deepx::tf { this->name = "addscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -139,6 +141,7 @@ namespace deepx::tf { this->name = "sub"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -200,6 +203,7 @@ namespace deepx::tf { this->name = "subscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -260,6 +264,7 @@ namespace deepx::tf { this->name = "mul"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -321,6 +326,7 @@ namespace deepx::tf { this->name = "mulscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -381,6 +387,7 @@ namespace deepx::tf { this->name = "div"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -442,6 +449,7 @@ namespace deepx::tf { this->name = "divscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -498,6 +506,7 @@ namespace deepx::tf { this->name = "rdivscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -555,6 +564,7 @@ namespace deepx::tf { this->name = "invert"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -609,6 +619,7 @@ namespace deepx::tf { this->name = "sqrt"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -653,6 +664,7 @@ namespace deepx::tf { this->name = "pow"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -698,6 +710,7 @@ namespace deepx::tf { this->name = "powscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -743,6 +756,7 @@ namespace deepx::tf { this->name = "rpowscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -787,6 +801,7 @@ namespace deepx::tf { this->name = "log"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -831,6 +846,7 @@ namespace deepx::tf { this->name = "exp"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -875,6 +891,7 @@ namespace deepx::tf { this->name = "sin"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -919,6 +936,7 @@ namespace deepx::tf { this->name = "cos"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -963,6 +981,7 @@ namespace deepx::tf { this->name = "tan"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1007,6 +1026,7 @@ namespace deepx::tf { this->name = "max"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1064,6 +1084,7 @@ namespace deepx::tf { this->name = "maxscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1120,6 +1141,7 @@ namespace deepx::tf { this->name = "min"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1177,6 +1199,7 @@ namespace deepx::tf { this->name = "minscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1235,6 +1258,7 @@ namespace deepx::tf { this->name = "equal"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1294,6 +1318,7 @@ namespace deepx::tf { this->name = "equalscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1352,6 +1377,7 @@ namespace deepx::tf { this->name = "less"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1410,6 +1436,7 @@ namespace deepx::tf { this->name = "lessscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1467,6 +1494,7 @@ namespace deepx::tf { this->name = "greater"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1525,6 +1553,7 @@ namespace deepx::tf { this->name = "greaterscalar"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } @@ -1582,6 +1611,7 @@ namespace deepx::tf { this->name = "switch"; this->author = Author::name(); + this->tftype = "elementwise"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp index dd5cdc45..c28b569e 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/init.hpp @@ -16,19 +16,11 @@ namespace deepx::tf { this->name = "constant"; this->author = Author::name(); + this->tftype = "init"; this->args = args; this->returns = returns; } - Constant(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "constant") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } - + int run(shared_ptr mem, string &error) override { string name = this->args[0].textvalue; @@ -105,6 +97,7 @@ namespace deepx::tf { this->name = "arange"; this->author = Author::name(); + this->tftype = "init"; this->args = args; this->returns = returns; } @@ -179,6 +172,7 @@ namespace deepx::tf { this->name = "uniform"; this->author = Author::name(); + this->tftype = "init"; this->args = args; this->returns = returns; } @@ -253,6 +247,7 @@ namespace deepx::tf { this->name = "normal"; this->author = Author::name(); + this->tftype = "init"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp index ba180f3d..0fd86a18 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/io.hpp @@ -15,19 +15,12 @@ namespace deepx::tf Print(vector args, vector returns) { this->name = "print"; + this->tftype = "io"; this->author = Author::name(); this->args = args; this->returns = returns; } - Print(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "print") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + int run(shared_ptr mem, string &error) override { string name = this->args[0].textvalue; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/matmul.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/matmul.hpp index 77d61208..89804a18 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/matmul.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/matmul.hpp @@ -17,19 +17,11 @@ namespace deepx::tf { this->name = "matmul"; this->author = Author::name(); + this->tftype = "matmul"; this->args = args; this->returns = returns; } - - MatMul(string text) - { - this->parse(text); - this->author = Author::name(); - if (this->name != "matmul") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + string math_formula() const override { return "T3=T1 @ T2"; diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp index 4e2bd1b9..f8b43e53 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp @@ -17,6 +17,7 @@ namespace deepx::tf { this->name = "sum"; this->author = Author::name(); + this->tftype = "reduce"; this->args = args; this->returns = returns; } @@ -76,6 +77,7 @@ namespace deepx::tf { this->name = "prod"; this->author = Author::name(); + this->tftype = "reduce"; this->args = args; this->returns = returns; } @@ -134,6 +136,7 @@ namespace deepx::tf { this->name = "reducemax"; this->author = Author::name(); + this->tftype = "reduce"; this->args = args; this->returns = returns; } @@ -192,6 +195,7 @@ namespace deepx::tf { this->name = "reducemin"; this->author = Author::name(); + this->tftype = "reduce"; this->args = args; this->returns = returns; } diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp index 92a45098..d703355c 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/tensorlife.hpp @@ -15,18 +15,11 @@ namespace deepx::tf NewTensor(vector args, vector returns) { this->name = "newtensor"; + this->tftype = "tensorlife"; this->args = args; this->returns = returns; } - - NewTensor(string text, bool call = false) - { - this->parse(text); - if (this->name != "newtensor") - { - throw std::runtime_error("Invalid name: " + this->name); - } - } + int run(shared_ptr mem, string &error) override { string name = this->returns[0].textvalue; @@ -139,6 +132,7 @@ namespace deepx::tf this->name = "copytensor"; this->args = args; this->returns = returns; + this->tftype = "tensorlife"; } int run(shared_ptr mem, string &error) override @@ -218,6 +212,7 @@ namespace deepx::tf this->name = "deltensor"; this->args = args; this->returns = returns; + this->tftype = "tensorlife"; } int run(shared_ptr mem, string &error) override { diff --git a/front/py/examples/2_ir/1_init_zeroones.py b/front/py/examples/2_ir/1_init_zeroones.py index d6de4318..ca788569 100644 --- a/front/py/examples/2_ir/1_init_zeroones.py +++ b/front/py/examples/2_ir/1_init_zeroones.py @@ -2,18 +2,18 @@ ############-------PyTorch-------################ import torch -# torch_t1 = torch.zeros(3, 4, 5, dtype=torch.float32) -# torch_t2 = torch.ones(3, 4, 5, dtype=torch.float32) -# torch_t4 = torch.full((3, 4, 5), 0.5) -# print(torch_t4) -# torch_t5=torch.nn.init.uniform_(torch.zeros(3,4,5),0,1) -# print(torch_t5) - - -# torch_t6 = torch.zeros(3, 4, 5, dtype=torch.float32) -# torch.nn.init.kaiming_uniform_(torch_t6) -# print(torch_t6) -# +torch_t1 = torch.zeros(3, 4, 5, dtype=torch.float32) +torch_t2 = torch.ones(3, 4, 5, dtype=torch.float32) +torch_t4 = torch.full((3, 4, 5), 0.5) +print(torch_t4) +torch_t5=torch.nn.init.uniform_(torch.zeros(3,4,5),0,1) +print(torch_t5) + + +torch_t6 = torch.zeros(3, 4, 5, dtype=torch.float32) +torch.nn.init.kaiming_uniform_(torch_t6) +print(torch_t6) + torch_t7 = torch.zeros(3, 4, 5, dtype=torch.float32) torch_t7.normal_(mean=0,std=0.02) print(torch_t7) @@ -23,21 +23,15 @@ import deepx print() -# t1 = deepx.zeros([3,4,5],dtype='float32') -# t2 = deepx.ones([3,4,5],dtype='float32') -# t4=deepx.full([3,4,5],value=0.5) -# t4.print() -# t5=deepx.uniform(3,4,5,low=0,high=1) -# t5.print() -# t6=deepx.kaiming_uniform(3,4,5,dtype='float32') -# t6.print() +t1 = deepx.zeros([3,4,5],dtype='float32') +t2 = deepx.ones([3,4,5],dtype='float32') +t4=deepx.full([3,4,5],value=0.5) +t4.print() +t5=deepx.uniform(3,4,5,low=0,high=1) +t5.print() +t6=deepx.kaiming_uniform(3,4,5,dtype='float32') +t6.print() t7=deepx.zeros(3,4,5,dtype='float32') t7.normal_(mean=0,stddev=0.02) t7.print("%.6f") - -# t7.uniform_(low=0,high=1) -# t7.print("%.6f") -# -# t7.arange_(start=0,step=1) -# t7.print("%.0f")