From f71f809bfd2b3ce3280d5ff440c854ad891d23b0 Mon Sep 17 00:00:00 2001 From: miaobyte <734991033@qq.com> Date: Thu, 20 Mar 2025 16:04:06 +0800 Subject: [PATCH 1/4] excuter(cpu/cuda):subscalar --- doc/excuter/op-mem-cuda/list.md | 2 + doc/excuter/op-mem-ompsimd/list.md | 1 + .../src/deepx/tensorfunc/elementwise.hpp | 12 +- excuter/op-mem-cuda/src/client/tfs.cpp | 13 +- .../tensorfunc/elementwise_miaobyte_basic.cu | 29 ++- .../tensorfunc/elementwise_miaobyte_basic.cuh | 33 ++- .../tensorfunc/elementwise_miaobyte_basic.hpp | 14 ++ .../src/deepx/tf/elementwise_basic.hpp | 82 ++++++- excuter/op-mem-ompsimd/src/client/tfs.cpp | 10 + .../src/deepx/tf/elementwise.hpp | 222 +++++++++++------- 10 files changed, 320 insertions(+), 98 deletions(-) diff --git a/doc/excuter/op-mem-cuda/list.md b/doc/excuter/op-mem-cuda/list.md index 171779a4..9913a248 100644 --- a/doc/excuter/op-mem-cuda/list.md +++ b/doc/excuter/op-mem-cuda/list.md @@ -5,8 +5,10 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| | addscalar | miaobyte | addscalar(tensor A, var b)->(tensor C) | T3=T1+scalar | addscalar(tensor A, var b)->(tensor C) | +| add | cublas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | +| subscalar | miaobyte | subscalar(tensor A, var b)->(tensor C) | T3=T1-scalar | subscalar(tensor A, var b)->(tensor C) | | arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | | constant | miaobyte | constant(tensor t, var value)->() | constant(T1) | constant(tensor t, var value)->() | | print | miaobyte | print(tensor )->() | print(T1) | print(tensor )->() | diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md index 581ab8f9..f10183f4 100644 --- a/doc/excuter/op-mem-ompsimd/list.md +++ b/doc/excuter/op-mem-ompsimd/list.md @@ -9,6 +9,7 @@ | add | cblas | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | add | miaobyte | add(tensor a, tensor b)->(tensor c) | T3=T1+T2 | add(tensor a, tensor b)->(tensor c) | | uniform | miaobyte | uniform(tensor t, var low, var high, var seed)->() | uniform(T1,low,high,seed) | uniform(tensor t, var low, var high, var seed)->() | +| subscalar | miaobyte | subscalar(tensor a, var scalar)->(tensor c) | T3=T1-scalar | subscalar(tensor a, var scalar)->(tensor c) | | arange | miaobyte | arange(tensor t, var start, var step)->() | arange(T1,start,step) | arange(tensor t, var start, var step)->() | | constant | miaobyte | constant(tensor t, var value)->() | constant(T1,value) | constant(tensor t, var value)->() | | print | miaobyte | print(tensor )->() | print(T1) | print(tensor )->() | diff --git a/excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp b/excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp index bf6fd053..e05506f7 100644 --- a/excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp +++ b/excuter/cpp-common/src/deepx/tensorfunc/elementwise.hpp @@ -24,7 +24,9 @@ namespace deepx::tensorfunc template struct addscalarDispatcher { - static void addscalar(const Tensor &input, const T value, Tensor &output) = delete; + static void addscalar(const Tensor &input, const T value, Tensor &output){ + throw NotImplementError("addscalar"); + } }; template @@ -36,7 +38,9 @@ namespace deepx::tensorfunc template struct subDispatcher { - static void sub(const Tensor &A, const Tensor &B, Tensor &C) = delete; + static void sub(const Tensor &A, const Tensor &B, Tensor &C){ + throw NotImplementError("sub"); + } }; template @@ -48,7 +52,9 @@ namespace deepx::tensorfunc template struct subscalarDispatcher { - static void subscalar(const Tensor &input, const T value, Tensor &output) = delete; + static void subscalar(const Tensor &input, const T value, Tensor &output){ + throw NotImplementError("subscalar"); + } }; template diff --git a/excuter/op-mem-cuda/src/client/tfs.cpp b/excuter/op-mem-cuda/src/client/tfs.cpp index 41108a7f..cfcbec3b 100644 --- a/excuter/op-mem-cuda/src/client/tfs.cpp +++ b/excuter/op-mem-cuda/src/client/tfs.cpp @@ -114,7 +114,7 @@ namespace deepx::tf { Param("c", DataCategory::Tensor, Precision::Any), }))); - tffactory.add_tf(std::make_shared>(vector( + tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), Param("b", DataCategory::Var, Precision::Any), @@ -133,7 +133,16 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); - + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("b", DataCategory::Var, Precision::Any), + }), + vector( + { + Param("C", DataCategory::Tensor, Precision::Any), + }))); + // opfactory.add_op(Sub_cblas()); // opfactory.add_op(Sub_cblas()); diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu index f66950ac..f4836cd6 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cu @@ -105,7 +105,34 @@ namespace deepx::tensorfunc template void launch_sub(const int numBlocks, const int blockSize, const int16_t* a, const int16_t* b, int16_t* c, const int size); template void launch_sub(const int numBlocks, const int blockSize, const int8_t* a, const int8_t* b, int8_t* c, const int size); - + template + __global__ void subscalar_kernel(const T* A, const T scalar, T* C,const int size){ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) { + C[idx] = A[idx] - scalar; + } + } + template __global__ void subscalar_kernel(const double* A, const double scalar, double* C,const int size); + template __global__ void subscalar_kernel(const float* A, const float scalar, float* C,const int size); + template __global__ void subscalar_kernel(const half* A, const half scalar, half* C,const int size); + template __global__ void subscalar_kernel(const nv_bfloat16* A, const nv_bfloat16 scalar, nv_bfloat16* C,const int size); + template __global__ void subscalar_kernel(const int64_t* A, const int64_t scalar, int64_t* C,const int size); + template __global__ void subscalar_kernel(const int32_t* A, const int32_t scalar, int32_t* C,const int size); + template __global__ void subscalar_kernel(const int16_t* A, const int16_t scalar, int16_t* C,const int size); + template __global__ void subscalar_kernel(const int8_t* A, const int8_t scalar, int8_t* C,const int size); + + template + void launch_subscalar(const int numBlocks, const int blockSize, const T* a, const T scalar, T* c, const int size) { + subscalar_kernel<<>>(a, scalar, c, size); + } + template void launch_subscalar(const int numBlocks, const int blockSize, const double* a, const double scalar, double* c, const int size); + template void launch_subscalar(const int numBlocks, const int blockSize, const float* a, const float scalar, float* c, const int size); + template void launch_subscalar(const int numBlocks, const int blockSize, const half* a, const half scalar, half* c, const int size); + template void launch_subscalar(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16 scalar, nv_bfloat16* c, const int size); + template void launch_subscalar(const int numBlocks, const int blockSize, const int64_t* a, const int64_t scalar, int64_t* c, const int size); + template void launch_subscalar(const int numBlocks, const int blockSize, const int32_t* a, const int32_t scalar, int32_t* c, const int size); + template void launch_subscalar(const int numBlocks, const int blockSize, const int16_t* a, const int16_t scalar, int16_t* c, const int size); + template void launch_subscalar(const int numBlocks, const int blockSize, const int8_t* a, const int8_t scalar, int8_t* c, const int size); } #endif // DEEPX_TENSORFUNC_ELEMENTWISE_MIAO_BYTE_BASIC_CUH diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cuh b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cuh index 77102fc9..966cfa1c 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cuh +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.cuh @@ -103,7 +103,38 @@ namespace deepx::tensorfunc template <> void launch_sub(int numBlocks, int blockSize, const int8_t* a, const int8_t* b, int8_t* c,const int size); - + + // subscalar + template + __global__ void subscalar_kernel(const T* A, const T scalar, T* C,const int size); + + template + void launch_subscalar(const int numBlocks, const int blockSize, const T* a, const T scalar, T* c,const int size); + + template <> + void launch_subscalar(const int numBlocks, const int blockSize, const double* a, const double scalar, double* c,const int size); + + template <> + void launch_subscalar(const int numBlocks, const int blockSize, const float* a, const float scalar, float* c,const int size); + + template <> + void launch_subscalar(const int numBlocks, const int blockSize, const nv_bfloat16* a, const nv_bfloat16 scalar, nv_bfloat16* c,const int size); + + template <> + void launch_subscalar<__half>(const int numBlocks, const int blockSize, const __half* a, const __half scalar, __half* c,const int size); + + template <> + void launch_subscalar(const int numBlocks, const int blockSize, const int64_t* a, const int64_t scalar, int64_t* c,const int size); + + template <> + void launch_subscalar(const int numBlocks, const int blockSize, const int32_t* a, const int32_t scalar, int32_t* c,const int size); + + template <> + void launch_subscalar(const int numBlocks, const int blockSize, const int16_t* a, const int16_t scalar, int16_t* c,const int size); + + template <> + void launch_subscalar(const int numBlocks, const int blockSize, const int8_t* a, const int8_t scalar, int8_t* c,const int size); + } #endif // DEEPX_TENSORFUNC_ELEMENTWISE_MIAO_BYTE_BASIC_CUH diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.hpp index 2da8ec9c..0500dd60 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/elementwise_miaobyte_basic.hpp @@ -55,6 +55,20 @@ namespace deepx::tensorfunc launch_sub(numBlocks, blockSize, A.data, B.data, C.data, A.shape.size); } }; + + template + struct subscalarDispatcher + { + static void subscalar(const Tensor &A, const T scalar, Tensor &C) + { + if (A.shape.size != C.shape.size) { + throw TensorShapeError("subscalar"); + } + const int blockSize = A.shape.size > 256 ? 256 : A.shape.size; + int numBlocks = (A.shape.size + blockSize - 1) / blockSize; + launch_subscalar(numBlocks, blockSize, A.data, scalar, C.data, A.shape.size); + } + }; } #endif // DEEPX_TENSORFUNC_ELEMENTWISE_MIAO_BYTE_BASIC_HPP diff --git a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp index 218432a8..c0910a99 100644 --- a/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp +++ b/excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp @@ -83,10 +83,10 @@ namespace deepx::tf }; template - class Addscalar : public TF + class AddScalar : public TF { public: - Addscalar(const vector &args, const vector &returns) + AddScalar(const vector &args, const vector &returns) { this->name = "addscalar"; this->author = Author::name(); @@ -94,7 +94,7 @@ namespace deepx::tf this->returns = returns; } - Addscalar(string text) + AddScalar(string text) { this->parse(text); this->author = Author::name(); @@ -109,7 +109,7 @@ namespace deepx::tf } shared_ptr clone() const override { - return make_shared>(*this); + return make_shared>(*this); } int run(shared_ptr mem, string &error) override { @@ -226,6 +226,80 @@ namespace deepx::tf return 0; } }; + + template + class SubScalar : public TF + { + public: + SubScalar(const vector &args, const vector &returns) + { + this->name = "subscalar"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + + SubScalar(string text) + { + this->parse(text); + this->author = Author::name(); + if (this->name != "subscalar") + { + throw std::runtime_error("Invalid name: " + this->name); + } + } + string math_formula() const override + { + return "T3=T1-scalar"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (a_type != c_type) + { + error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(c_type); + return 1; + } + switch (a_type) + { + case Precision::Float64: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float32: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float16: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::BFloat16: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int64: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int32: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int16: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int8: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + default: + error = "Unsupported dtype: " + precision_str(a_type); + return 1; + } + return 0; + } + }; + + }; #endif // DEEPX_TF_ELEMENTWISE_BASIC_HPP diff --git a/excuter/op-mem-ompsimd/src/client/tfs.cpp b/excuter/op-mem-ompsimd/src/client/tfs.cpp index 6bae8e79..b2de5145 100644 --- a/excuter/op-mem-ompsimd/src/client/tfs.cpp +++ b/excuter/op-mem-ompsimd/src/client/tfs.cpp @@ -140,6 +140,16 @@ namespace deepx::tf { Param("c", DataCategory::Tensor, Precision::Any), }))); + + tffactory.add_tf(std::make_shared>(vector( + { + Param("a", DataCategory::Tensor, Precision::Any), + Param("scalar", DataCategory::Var, Precision::Any), + }), + vector( + { + Param("c", DataCategory::Tensor, Precision::Any), + }))); // opfactory.add_op(Addscalar_miaobyte()); // opfactory.add_op(Addscalar_miaobyte()); diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp index 1702c644..5487a2a7 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp @@ -8,11 +8,13 @@ #include "deepx/tensorfunc/authors.hpp" #include "deepx/tensorfunc/elementwise_miaobyte.hpp" #include "deepx/tensorfunc/elementwise_cblas.hpp" -namespace deepx::tf { +namespace deepx::tf +{ template - class Add : public TF { - public: + class Add : public TF + { + public: Add(vector args, vector returns) { this->name = "add"; @@ -23,7 +25,7 @@ namespace deepx::tf { string math_formula() const override { return "T3=T1+T2"; - } + } shared_ptr clone() const override { return make_shared>(*this); @@ -38,38 +40,38 @@ namespace deepx::tf { error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type); return 1; } - switch (a_type) + switch (a_type) { - case Precision::Float64: - tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Float32: - tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int64: - tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int32: - tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int16: - tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int8: - tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - default: - error = "Unsupported dtype: " + precision_str(a_type); - return 1; + case Precision::Float64: + tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float32: + tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int64: + tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int32: + tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int16: + tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int8: + tensorfunc::add(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + default: + error = "Unsupported dtype: " + precision_str(a_type); + return 1; } return 0; } - }; template - class AddScalar : public TF { - public: + class AddScalar : public TF + { + public: AddScalar(vector args, vector returns) { this->name = "addscalar"; @@ -80,7 +82,7 @@ namespace deepx::tf { string math_formula() const override { return "T3=T1+scalar"; - } + } shared_ptr clone() const override { return make_shared>(*this); @@ -94,37 +96,37 @@ namespace deepx::tf { error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(c_type); return 1; } - switch (a_type) + switch (a_type) { - case Precision::Float64: - tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Float32: - tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int64: - tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int32: - tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int16: - tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int8: - tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); - break; - default: - error = "Unsupported dtype: " + precision_str(a_type); - return 1; + case Precision::Float64: + tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float32: + tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int64: + tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int32: + tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int16: + tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int8: + tensorfunc::addscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + default: + error = "Unsupported dtype: " + precision_str(a_type); + return 1; } return 0; } - }; template - class Sub : public TF { - public: + class Sub : public TF + { + public: Sub(vector args, vector returns) { this->name = "sub"; @@ -135,7 +137,7 @@ namespace deepx::tf { string math_formula() const override { return "T3=T1-T2"; - } + } shared_ptr clone() const override { return make_shared>(*this); @@ -150,43 +152,89 @@ namespace deepx::tf { error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type); return 1; } - switch (a_type) + switch (a_type) { - case Precision::Float64: - tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Float32: - tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int64: - tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int32: - tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int16: - tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - case Precision::Int8: - tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); - break; - default: - error = "Unsupported dtype: " + precision_str(a_type); - return 1; + case Precision::Float64: + tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float32: + tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int64: + tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int32: + tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int16: + tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int8: + tensorfunc::sub(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + default: + error = "Unsupported dtype: " + precision_str(a_type); + return 1; } return 0; } - }; - + template + class SubScalar : public TF + { + public: + SubScalar(vector args, vector returns) + { + this->name = "subscalar"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + string math_formula() const override + { + return "T3=T1-scalar"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (a_type != c_type) + { + error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(c_type); + return 1; + } + switch (a_type) + { + case Precision::Float64: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float32: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int64: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int32: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int16: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int8: + tensorfunc::subscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + default: + error = "Unsupported dtype: " + precision_str(a_type); + return 1; + } + return 0; + } + }; } - - - - - - - #endif From 1a92984eae9942d88c45247b68fe797925252507 Mon Sep 17 00:00:00 2001 From: miaobyte <734991033@qq.com> Date: Wed, 26 Mar 2025 18:19:30 +0800 Subject: [PATCH 2/4] =?UTF-8?q?front:newtensor,print=20=E8=81=94=E5=90=88?= =?UTF-8?q?=E8=B0=83=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/deepx/tf/elementwise.hpp | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp index 5487a2a7..d3342110 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tf/elementwise.hpp @@ -235,6 +235,119 @@ namespace deepx::tf return 0; } }; + + template + class Mul : public TF + { + public: + Mul(vector args, vector returns) + { + this->name = "mul"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + string math_formula() const override + { + return "T3=T1*T2"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + Precision b_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype; + Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (a_type != b_type || a_type != c_type) + { + error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type); + return 1; + } + switch (a_type) + { + case Precision::Float64: + tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float32: + tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int64: + tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int32: + tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int16: + tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int8: + tensorfunc::mul(*mem->gettensor(this->args[0].textvalue), *mem->gettensor(this->args[1].textvalue), *mem->gettensor(this->returns[0].textvalue)); + break; + default: + error = "Unsupported dtype: " + precision_str(a_type); + return 1; + } + return 0; + } + }; + + template + class MulScalar : public TF + { + public: + MulScalar(vector args, vector returns) + { + this->name = "mulscalar"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + string math_formula() const override + { + return "T3=T1*scalar"; + } + shared_ptr clone() const override + { + return make_shared>(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (a_type != c_type) + { + error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(c_type); + return 1; + } + switch (a_type) + { + case Precision::Float64: + tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Float32: + tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int64: + tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int32: + tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int16: + tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + case Precision::Int8: + tensorfunc::mulscalar(*mem->gettensor(this->args[0].textvalue), this->getvar(1, mem), *mem->gettensor(this->returns[0].textvalue)); + break; + default: + error = "Unsupported dtype: " + precision_str(a_type); + return 1; + } + return 0; + } + }; } #endif From 640d3cf5950b811a1720e8520eac4308b2dc6cb1 Mon Sep 17 00:00:00 2001 From: miaobyte <734991033@qq.com> Date: Wed, 26 Mar 2025 18:20:00 +0800 Subject: [PATCH 3/4] =?UTF-8?q?front:newtensor,print=20=E8=81=94=E5=90=88?= =?UTF-8?q?=E8=B0=83=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- front/py/deepx/nn/deepxir.py | 40 +++++++++++++++++++-------- front/py/deepx/nn/functional/new.py | 4 +-- front/py/deepx/nn/functional/print.py | 4 +-- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/front/py/deepx/nn/deepxir.py b/front/py/deepx/nn/deepxir.py index 6dce38ea..988afdc2 100644 --- a/front/py/deepx/nn/deepxir.py +++ b/front/py/deepx/nn/deepxir.py @@ -1,14 +1,36 @@ -from typing import Tuple, List, Optional +from typing import Tuple, List, Optional,Union import time from datetime import datetime # 添加datetime模块 +class Param: + def __init__(self, value:Optional[Union[str,int,float,list,tuple]], category:str=None,precision:str=None): + if isinstance(value,str): + self._textvalue=value + elif isinstance(value,int) or isinstance(value,float): + self._textvalue=str(value) + elif isinstance(value,list) or isinstance(value,tuple): + self._textvalue='['+' '.join(str(v) for v in value)+']' + else: + raise ValueError(f"Invalid value type: {type(value)}") + + self._category=category + self._precision=precision + + def __str__(self): + if self._category is not None: + if self._precision is not None: + return f"{self._category}<{self._precision}> {self._textvalue}" + else: + return f"{self._category} {self._textvalue}" + else: + return self._textvalue + class DeepxIR: def __init__(self, name:str, - dtype:str, - args: List[str], - returns: List[str], - author:str): + args: List[Param], + returns: List[Param], + author:str=''): """ 初始化操作节点 Args: @@ -17,8 +39,7 @@ def __init__(self, author: tensorfunc的作者名称,如"miaobyte" """ - self._name = name - self._dtype = dtype + self._name = name self._args = args self._returns = returns self._author = author @@ -28,10 +49,7 @@ def __init__(self, def __str__(self): # 函数名部分 - if self._dtype == None or self._dtype == '': - parts = [self._name] - else: - parts = [f"{self._name}@{self._dtype}"] + parts = [self._name] # 处理输入参数部分 - 使用括号和逗号分隔 args_parts = [] diff --git a/front/py/deepx/nn/functional/new.py b/front/py/deepx/nn/functional/new.py index 879eda7d..1cc14dff 100644 --- a/front/py/deepx/nn/functional/new.py +++ b/front/py/deepx/nn/functional/new.py @@ -1,6 +1,6 @@ from deepx.tensor import Tensor from deepx.autograd.graph import Graph -from deepx.nn.deepxir import DeepxIR +from deepx.nn.deepxir import DeepxIR,Param from deepx.scheduler import send def newtensor(t:Tensor,name:str=None): @@ -8,7 +8,7 @@ def newtensor(t:Tensor,name:str=None): t._graph = graph t._node=graph.add_tensor(name,t=t) if t.graph.eager: - ir2=DeepxIR("newtensor", t.dtype, t.shape, [t._node.name]) + ir2=DeepxIR("newtensor",[Param(t.shape)], [Param(t._node.name,category='tensor',precision=t.dtype)]) send(ir2) def copytensor(t:Tensor,out:Tensor): graph = Graph.get_default() diff --git a/front/py/deepx/nn/functional/print.py b/front/py/deepx/nn/functional/print.py index b4c11fb6..2eb2bb25 100644 --- a/front/py/deepx/nn/functional/print.py +++ b/front/py/deepx/nn/functional/print.py @@ -4,8 +4,8 @@ from deepx.scheduler import send OpNode.register("print") -def printtensor(t:Tensor,format=''): - ir=DeepxIR("print",'', [t.node.name,format], []) +def printtensor(t:Tensor,format='',author='miaobyte'): + ir=DeepxIR("print",[t.node.name,format], [],author) send(ir) return '' From 73bc7ba905da6451fdfe3127102d739ca85e050c Mon Sep 17 00:00:00 2001 From: harryharrygo Date: Thu, 20 Mar 2025 22:47:55 +0800 Subject: [PATCH 4/4] Fix build error in gcc compiler. (#5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In gcc/++13 compiler, it shows error: ``` dtype.hpp:8:29: error: found ‘:’ in nested-name-specifier, expected ‘::’ 8 | enum class DataCategory : uint8_t ``` --- excuter/cpp-common/src/deepx/dtype.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/excuter/cpp-common/src/deepx/dtype.hpp b/excuter/cpp-common/src/deepx/dtype.hpp index 9b9e24e8..b93a2a7a 100644 --- a/excuter/cpp-common/src/deepx/dtype.hpp +++ b/excuter/cpp-common/src/deepx/dtype.hpp @@ -2,6 +2,8 @@ #define DEEPX_DTYPE_HPP #include +#include + namespace deepx {