diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md index f018d192..c9d51f4f 100644 --- a/doc/excuter/op-mem-ompsimd/list.md +++ b/doc/excuter/op-mem-ompsimd/list.md @@ -4,6 +4,7 @@ | Operation | Author | Func Def | Math Formula | IR Instruction | |-----------|--------|------------|--------------|----------------| +| reducemax | miaobyte | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemax(A, axis=[1 2], keepdims=false) | reducemax(tensor A, vector axis, var keepdims)->(tensor B) | | broadcastTo | miaobyte | broadcastTo(tensor A, vector new_shape)->(tensor B) | T2 = T1.broadcastTo(new_shape=[4,3,2]) | broadcastTo(tensor A, vector new_shape)->(tensor B) | | concat | miaobyte | concat(listtensor tensors, var axis)->(tensor result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor tensors, var axis)->(tensor result) | | transpose | miaobyte | transpose(tensor A, vector dim_order)->(tensor C) | T1.transpose(dimorder=[1,0])->T2 | transpose(tensor A, vector dim_order)->(tensor C) | @@ -21,8 +22,10 @@ | newtensor | none | newtensor(vector shape)->(tensor tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector shape)->(tensor tensor1) | | newtensor | none | newtensor(var shape)->(tensor tensor1) | T1 =Tensor(shape=[...]) | newtensor(var shape)->(tensor tensor1) | | vecset | none | vecset(vector value)->(vector name) | shape = [3 4 5] | vecset(vector value)->(vector name) | +| reducemin | miaobyte | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | B = reducemin(A, axis=[1 2], keepdims=false) | reducemin(tensor A, vector axis, var keepdims)->(tensor B) | | subscalar | miaobyte | subscalar(tensor a, var scalar)->(tensor c) | T3=T1-scalar | subscalar(tensor a, var scalar)->(tensor c) | | sqrt | miaobyte | sqrt(tensor A)->(tensor C) | T3=sqrt(T1) | sqrt(tensor A)->(tensor C) | +| sum | miaobyte | sum(tensor A, vector axis, var keepdims)->(tensor B) | B = sum(A, axis=[1 2], keepdims=false) | sum(tensor A, vector axis, var keepdims)->(tensor B) | | argset | none | argset(var value)->(var name) | var argname = argvalue | argset(var value)->(var name) | | sub | miaobyte | sub(tensor a, tensor b)->(tensor c) | T3=T1-T2 | sub(tensor a, tensor b)->(tensor c) | | mulscalar | miaobyte | mulscalar(tensor A, var b)->(tensor C) | T3=T1*scalar | mulscalar(tensor A, var b)->(tensor C) | @@ -36,6 +39,7 @@ | exp | miaobyte | exp(tensor A)->(tensor C) | T3=exp(T1) | exp(tensor A)->(tensor C) | | rdivscalar | miaobyte | rdivscalar(var scalar, tensor A)->(tensor C) | T3=scalar/T1 | rdivscalar(var scalar, tensor A)->(tensor C) | | minscalar | miaobyte | minscalar(tensor A, var scalar)->(tensor C) | T3=min(T1,scalar) | minscalar(tensor A, var scalar)->(tensor C) | +| prod | miaobyte | prod(tensor A, vector axis, var keepdims)->(tensor B) | B = prod(A, axis=[1 2], keepdims=false) | prod(tensor A, vector axis, var keepdims)->(tensor B) | | min | miaobyte | min(tensor A, tensor B)->(tensor C) | T3=min(T1,T2) | min(tensor A, tensor B)->(tensor C) | | compare | miaobyte | compare(tensor A, tensor B)->(tensor mask) | mask=compare(T1,T2) | compare(tensor A, tensor B)->(tensor mask) | | matmul | cblas | matmul(tensor A, tensor B)->(tensor C) | T3=T1 @ T2 | matmul(tensor A, tensor B)->(tensor C) | diff --git a/excuter/cpp-common/src/deepx/shape_broadcast.hpp b/excuter/cpp-common/src/deepx/shape_broadcast.hpp deleted file mode 100644 index 290eb632..00000000 --- a/excuter/cpp-common/src/deepx/shape_broadcast.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef DEEPX_OP_CPU_SHAPE_HPP -#define DEEPX_OP_CPU_SHAPE_HPP - - -#include "deepx/tensor.hpp" - - -namespace deepx { - - std::vector broadcastShape(const std::vector &a, const std::vector &b); - enum BroadcastMap - { - xTox = 0, - nullTo1 = 1, - xTo1 = 2, - }; - std::vector broadcastMap(const std::vector &a, const std::vector &b); - -} - -#endif // DEEPX_OP_CPU_SHAPE_HPP \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape_broadcast.cpp b/excuter/cpp-common/src/deepx/shape_changeshape.cpp similarity index 54% rename from excuter/cpp-common/src/deepx/shape_broadcast.cpp rename to excuter/cpp-common/src/deepx/shape_changeshape.cpp index 75ee5575..17a89461 100644 --- a/excuter/cpp-common/src/deepx/shape_broadcast.cpp +++ b/excuter/cpp-common/src/deepx/shape_changeshape.cpp @@ -1,11 +1,60 @@ #include #include -#include "deepx/shape_broadcast.hpp" +#include "deepx/shape_changeshape.hpp" namespace deepx { - + //transpose + + std::vector swaplastTwoDimOrder(const std::vector &shape) + { + vector dimOrder = shape; + std::iota(dimOrder.begin(), dimOrder.end(), 0); + swap(dimOrder[dimOrder.size() - 1], dimOrder[dimOrder.size() - 2]); + return dimOrder; + } + std::vector transposeShape(const std::vector &shape, const std::vector &dimOrder) + { + if (dimOrder.size() != shape.size()) + { + throw std::invalid_argument("dimOrder size does not match the number of dimensions in the TensorCPU."); + } + std::vector newShape = shape; + for (size_t i = 0; i < dimOrder.size(); ++i) + { + newShape[i] =shape[dimOrder[i]]; + } + return newShape; + } + + //concat + + Shape concatShape(const std::vector &shapes,const int axis){ + std::vector outputShape(shapes[0].dim); + outputShape=shapes[0].shape; + for (int i = 1; i < shapes.size(); ++i) + { + if (shapes[i].dim != outputShape.size()) + { + throw std::invalid_argument("All tensors must have the same number of dimensions."); + } + for (size_t j = 0; j < outputShape.size(); ++j) + { + if (j == axis) + { + outputShape[j] += shapes[i].shape[j]; + } + else if (shapes[i].shape[j] != outputShape[j]) + { + throw std::invalid_argument("Shapes of tensors must match except in the concatenation axis."); + } + } + } + return Shape(outputShape); + } + + //broadcast std::vector broadcastShape(const std::vector &a, const std::vector &b) { int len1 = a.size(); diff --git a/excuter/cpp-common/src/deepx/shape_changeshape.hpp b/excuter/cpp-common/src/deepx/shape_changeshape.hpp new file mode 100644 index 00000000..291c7291 --- /dev/null +++ b/excuter/cpp-common/src/deepx/shape_changeshape.hpp @@ -0,0 +1,76 @@ +#ifndef DEEPX_SHAPE_CHANGESHAPE_HPP +#define DEEPX_SHAPE_CHANGESHAPE_HPP + +#include +#include +#include +#include +#include "deepx/tensor.hpp" +#include "deepx/shape.hpp" +#include "stdutil/error.hpp" + +namespace deepx +{ + // transpose + + using namespace std; + std::vector swaplastTwoDimOrder(const std::vector &shape); + + std::vector transposeShape(const std::vector &shape, const std::vector &dimOrder); + + // concat + + Shape concatShape(const std::vector &shapes, const int axis); + + template + Shape concatShape(const std::vector *> &tensors, const int axis) + { + std::vector shapes; + for (int i = 0; i < tensors.size(); ++i) + { + shapes.push_back(tensors[i]->shape); + } + return concatShape(shapes, axis); + } + + template + bool checkShapeConcat(const std::vector *> &tensors, const int axis, const Tensor &output) + { + int axisDim = 0; + for (int i = 0; i < tensors.size(); i++) + { + if (tensors[i]->shape.dim != output.shape.dim) + { + throw TensorShapeError("All input tensors must have the same dimension size for concat"); + } + for (int j = 0; j < tensors[i]->shape.dim; j++) + { + if (j != axis) + { + if (tensors[i]->shape[j] != output.shape[j]) + { + throw TensorShapeError("All input tensors must have the same dimension size for concat"); + } + } + else + { + axisDim += tensors[i]->shape[j]; + } + } + } + return axisDim == output.shape[axis]; + } + + // broadcast + std::vector broadcastShape(const std::vector &a, const std::vector &b); + enum BroadcastMap + { + xTox = 0, + nullTo1 = 1, + xTo1 = 2, + }; + std::vector broadcastMap(const std::vector &a, const std::vector &b); + +} + +#endif // DEEPX_SHAPE_CHANGESHAPE_HPP \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape_concat.cpp b/excuter/cpp-common/src/deepx/shape_concat.cpp deleted file mode 100644 index 597ba5b2..00000000 --- a/excuter/cpp-common/src/deepx/shape_concat.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include -#include "deepx/shape_concat.hpp" - -namespace deepx -{ - Shape concatShape(const std::vector &shapes,const int axis){ - std::vector outputShape(shapes[0].dim); - outputShape=shapes[0].shape; - for (int i = 1; i < shapes.size(); ++i) - { - if (shapes[i].dim != outputShape.size()) - { - throw std::invalid_argument("All tensors must have the same number of dimensions."); - } - for (size_t j = 0; j < outputShape.size(); ++j) - { - if (j == axis) - { - outputShape[j] += shapes[i].shape[j]; - } - else if (shapes[i].shape[j] != outputShape[j]) - { - throw std::invalid_argument("Shapes of tensors must match except in the concatenation axis."); - } - } - } - return Shape(outputShape); - } -} \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape_concat.hpp b/excuter/cpp-common/src/deepx/shape_concat.hpp deleted file mode 100644 index 91884e5e..00000000 --- a/excuter/cpp-common/src/deepx/shape_concat.hpp +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef DEEPX_SHAPE_CONCAT_HPP -#define DEEPX_SHAPE_CONCAT_HPP - -#include "deepx/shape.hpp" -#include "deepx/tensor.hpp" -#include "stdutil/error.hpp" - -namespace deepx -{ - - Shape concatShape(const std::vector &shapes,const int axis); - - template - Shape concatShape(const std::vector*> &tensors,const int axis){ - std::vector shapes; - for (int i = 0; i < tensors.size(); ++i) - { - shapes.push_back(tensors[i]->shape); - } - return concatShape(shapes,axis); - } - - template - bool checkShapeConcat(const std::vector*> &tensors,const int axis,const Tensor &output){ - int axisDim=0; - for (int i = 0; i < tensors.size(); i++) - { - if (tensors[i]->shape.dim != output.shape.dim) - { - throw TensorShapeError("All input tensors must have the same dimension size for concat"); - } - for (int j = 0; j < tensors[i]->shape.dim; j++) - { - if (j != axis) - { - if (tensors[i]->shape[j] != output.shape[j]) - { - throw TensorShapeError("All input tensors must have the same dimension size for concat"); - } - } - else - { - axisDim += tensors[i]->shape[j]; - } - } - } - return axisDim == output.shape[axis]; - } -}; -#endif // DEEPX_SHAPE_CONCAT_HPP \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape_reduce.cpp b/excuter/cpp-common/src/deepx/shape_reduce.cpp index 0a64eb65..2f017a9b 100644 --- a/excuter/cpp-common/src/deepx/shape_reduce.cpp +++ b/excuter/cpp-common/src/deepx/shape_reduce.cpp @@ -3,60 +3,65 @@ #include #include +#include "stdutil/error.hpp" #include "deepx/shape_reduce.hpp" namespace deepx { - std::vector reduceDimMap(const Shape &shape, const std::vector &dims) + std::vector checkedDims(const std::vector &inputshape, const std::vector &dims) { - // Step 1: 确定输出形状 - std::vector sumDims; + std::vector checkeddims; + // 如果dims为空,则求和所有维度 if (dims.empty()) { - for (int i = 0; i < shape.dim; ++i) + for (int i = 0; i < inputshape.size(); ++i) { - sumDims.push_back(i); + checkeddims.push_back(i); } } else - { - sumDims = std::vector(dims.data(), dims.data() + dims.size()); - } - std::sort(sumDims.begin(), sumDims.end()); - // 去重 - sumDims.erase(std::unique(sumDims.begin(), sumDims.end()), sumDims.end()); - - // 验证维度 - for (int d : sumDims) - { - if (d < 0 || d >= shape.dim) + { + // 验证维度 + for (int d : dims) { - throw std::invalid_argument("Dimension out of range in sum"); + if (d < 0) + { + d = inputshape.size() + d; + } + if (d >= inputshape.size()) + { + throw TensorShapeError("Dimension out of range in sum"); + } + checkeddims.push_back(d); } } - // 创建一个映射数组,标记哪些维度需要求和 - std::vector sumMap(shape.dim, 0); - for (int dim : sumDims) - { - sumMap[dim] = 1; - } - return sumMap; + // 排序 + std::sort(checkeddims.begin(), checkeddims.end()); + // 去重 + checkeddims.erase(std::unique(checkeddims.begin(), checkeddims.end()), checkeddims.end()); + + return checkeddims; } - std::vector reduceShape(const Shape &a, const std::vector &dims) + + std::vector reducedShape(const std::vector &inputshape, const std::vector &dims, const bool keepdim) { - + // 创建一个映射数组,标记哪些维度需要求和 - std::vector reduceMap = reduceDimMap(a, dims); + std::vector reducedims = reducedDim(inputshape, dims); // 计算输出形状 std::vector outputShape; - for (size_t i = 0; i < a.dim; ++i) + for (size_t i = 0; i < inputshape.size(); ++i) { - if (reduceMap[i] == 0) + if (reducedims[i] == 0) + { + outputShape.push_back(inputshape[i]); + } + else if (keepdim) { - outputShape.push_back(a[i]); + outputShape.push_back(1); } } @@ -66,5 +71,16 @@ namespace deepx outputShape.push_back(1); } return outputShape; - } + } + + std::vector reducedDim(const std::vector &shape, const std::vector &dims) + { + // 创建一个映射数组,标记哪些维度需要求和 + std::vector sumMap(shape.size(), 0); + for (int dim : dims) + { + sumMap[dim] = 1; + } + return sumMap; + } } \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape_reduce.hpp b/excuter/cpp-common/src/deepx/shape_reduce.hpp index 54093ebb..ac15d4e7 100644 --- a/excuter/cpp-common/src/deepx/shape_reduce.hpp +++ b/excuter/cpp-common/src/deepx/shape_reduce.hpp @@ -5,8 +5,15 @@ namespace deepx { - std::vector reduceDimMap(const Shape &shape, const std::vector &dims); - std::vector reduceShape(const Shape &a, const std::vector &dims); + + // 检查dims参数是否合法,返回整理后的dims + std::vector checkedDims(const std::vector &inputshape, const std::vector &dims); + + // 返回求和后的形状 + std::vector reducedShape(const std::vector &inputshape, const std::vector &dims, const bool keepdim = false); + + // 返回需要求和的维度 + std::vector reducedDim(const std::vector &inputshape, const std::vector &dims ); } #endif // DEEPX_SHAPE_SUM_HPP \ No newline at end of file diff --git a/excuter/cpp-common/src/deepx/shape_transpose.hpp b/excuter/cpp-common/src/deepx/shape_transpose.hpp deleted file mode 100644 index c41c7875..00000000 --- a/excuter/cpp-common/src/deepx/shape_transpose.hpp +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef DEEPX_SHAPE_TRANSPOSE_HPP -#define DEEPX_SHAPE_TRANSPOSE_HPP - -#include -#include -#include -#include -#include "deepx/shape.hpp" - -namespace deepx -{ - using namespace std; - std::vector swaplastTwoDimOrder(const std::vector &shape) - { - vector dimOrder = shape; - std::iota(dimOrder.begin(), dimOrder.end(), 0); - swap(dimOrder[dimOrder.size() - 1], dimOrder[dimOrder.size() - 2]); - return dimOrder; - } - std::vector transposeShape(const std::vector &shape, const std::vector &dimOrder) - { - if (dimOrder.size() != shape.size()) - { - throw std::invalid_argument("dimOrder size does not match the number of dimensions in the TensorCPU."); - } - std::vector newShape = shape; - for (size_t i = 0; i < dimOrder.size(); ++i) - { - newShape[i] =shape[dimOrder[i]]; - } - return newShape; - } -} -#endif // DEEPX_SHAPE_TRANSPOSE_HPP diff --git a/excuter/cpp-common/src/deepx/tf/tf.hpp b/excuter/cpp-common/src/deepx/tf/tf.hpp index f5b17204..4329fb47 100644 --- a/excuter/cpp-common/src/deepx/tf/tf.hpp +++ b/excuter/cpp-common/src/deepx/tf/tf.hpp @@ -25,9 +25,11 @@ namespace deepx::tf { TypeDef dtype; string textvalue; - any value; + Param(const string &textvalue = "", const DataCategory &dt = DataCategory::Unknown, const Precision &prec = Precision::Any) : textvalue(textvalue), dtype(make_dtype(dt, prec)) {} + + }; // TF:Tensor Function的缩写 @@ -75,6 +77,29 @@ namespace deepx::tf { throw std::invalid_argument("Invalid argument index"); } + // 处理布尔类型 + if constexpr (std::is_same::value) + { + const string &value = vars[idx].textvalue; + // 转换为小写再判断 + string lower_value = value; + std::transform(lower_value.begin(), lower_value.end(), lower_value.begin(), + [](unsigned char c) + { return std::tolower(c); }); + + if (lower_value == "true") + { + return true; + } + else if (lower_value == "false") + { + return false; + } + else + { + throw std::invalid_argument("Invalid bool value:" + value); + } + } if (is_float(vars[idx].textvalue)) { T value = T(std::stof(vars[idx].textvalue)); @@ -83,10 +108,8 @@ namespace deepx::tf return mem->getarg(vars[idx].textvalue); } - - template - vector getvector(int idx,bool arg = true) + vector getvector(int idx, bool arg = true) { vector &vars = arg ? args : returns; if (idx < 0) diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu index 92940d90..ac395c3f 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cu @@ -7,7 +7,7 @@ #include "deepx/tensorfunc/authors.hpp" #include "deepx/tensorfunc/tensor_cuda.cuh" #include "deepx/tensorfunc/vector_cuda.cuh" -#include "deepx/shape_broadcast.hpp" +#include "deepx/shape_changeshape.hpp" namespace deepx::tensorfunc { diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh index e44874ea..f9f47f7b 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.cuh @@ -4,7 +4,7 @@ #include #include -#include "deepx/shape_broadcast.hpp" +#include "deepx/shape_changeshape.hpp" #include "deepx/tensorfunc/cuda.hpp" #include "deepx/tensorfunc/authors.hpp" diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp index 1faf58e7..6f76fa23 100644 --- a/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp +++ b/excuter/op-mem-cuda/src/deepx/tensorfunc/changeshape_miaobyte.hpp @@ -8,8 +8,7 @@ #include "deepx/tensorfunc/authors.hpp" #include "deepx/tensorfunc/changeshape_miaobyte.cuh" #include "deepx/tensorfunc/cuda.hpp" -#include "deepx/shape_concat.hpp" -#include "deepx/shape_broadcast.hpp" +#include "deepx/shape_changeshape.hpp" #include "stdutil/error.hpp" namespace deepx::tensorfunc { diff --git a/excuter/op-mem-ompsimd/src/client/tfs.cpp b/excuter/op-mem-ompsimd/src/client/tfs.cpp index d81306ee..2c675170 100644 --- a/excuter/op-mem-ompsimd/src/client/tfs.cpp +++ b/excuter/op-mem-ompsimd/src/client/tfs.cpp @@ -7,6 +7,7 @@ #include "deepx/tf/io.hpp" #include "deepx/tf/changeshape.hpp" #include "deepx/tf/elementwise.hpp" +#include "deepx/tf/reduce.hpp" #include "deepx/tf/tffactory.hpp" #include "deepx/tf/matmul.hpp" #include "deepx/tensorfunc/authors.hpp" @@ -17,6 +18,7 @@ namespace deepx::tf // tensor void register_lifecycle(TfFactory &tffactory) { + // argset author=miaobyte tffactory.add_tf(std::make_shared(vector( { Param("value", DataCategory::Var, Precision::Any), @@ -25,6 +27,7 @@ namespace deepx::tf { Param("name", DataCategory::Var, Precision::Any), }))); + // vecset author=miaobyte tffactory.add_tf(std::make_shared( vector( { @@ -34,6 +37,7 @@ namespace deepx::tf { Param("name", DataCategory::Vector, Precision::Any), }))); + // newtensor author=miaobyte tffactory.add_tf(std::make_shared(vector( { Param("shape", DataCategory::Vector, Precision::Int32), @@ -42,6 +46,7 @@ namespace deepx::tf { Param("tensor1", DataCategory::Tensor, Precision::Any), }))); + // newtensor author=miaobyte tffactory.add_tf(std::make_shared(vector( { Param("shape", DataCategory::Var, Precision::String), @@ -50,22 +55,19 @@ namespace deepx::tf { Param("tensor1", DataCategory::Tensor, Precision::Any), }))); - // opfactory.add_op(DelTensor()); } // init void register_init(TfFactory &tffactory) { - // opfactory.add_op(Uniform()); - // opfactory.add_op(Uniform()); - + // constant author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("t", DataCategory::Tensor, Precision::Any), Param("value", DataCategory::Var, Precision::Any), }), vector())); - + // arange author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("t", DataCategory::Tensor, Precision::Any), @@ -73,6 +75,7 @@ namespace deepx::tf Param("step", DataCategory::Var, Precision::Any), }), vector())); + // uniform author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("t", DataCategory::Tensor, Precision::Any), @@ -85,12 +88,13 @@ namespace deepx::tf // io void register_util(TfFactory &opfactory) { + // print author=miaobyte opfactory.add_tf(std::make_shared>(vector( { Param("", DataCategory::Tensor, Precision::Any), }), vector())); - + // print author=miaobyte opfactory.add_tf(std::make_shared>(vector( { Param("", DataCategory::Tensor, Precision::Any), @@ -111,7 +115,7 @@ namespace deepx::tf { Param("c", DataCategory::Tensor, Precision::Any), }))); - + // add author=cblas tffactory.add_tf(std::make_shared>(vector( { Param("a", DataCategory::Tensor, Precision::Float64 | Precision::Float32), @@ -122,6 +126,7 @@ namespace deepx::tf Param("c", DataCategory::Tensor, Precision::Float64 | Precision::Float32), }))); + // add scalar author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("a", DataCategory::Tensor, Precision::Any), @@ -131,6 +136,7 @@ namespace deepx::tf { Param("c", DataCategory::Tensor, Precision::Any), }))); + // sub author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("a", DataCategory::Tensor, Precision::Any), @@ -140,7 +146,7 @@ namespace deepx::tf { Param("c", DataCategory::Tensor, Precision::Any), }))); - + // sub scalar author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("a", DataCategory::Tensor, Precision::Any), @@ -150,7 +156,7 @@ namespace deepx::tf { Param("c", DataCategory::Tensor, Precision::Any), }))); - + // mul author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -160,6 +166,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // mul scalar author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -179,6 +186,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // div scalar author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -198,6 +206,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // sqrt author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -207,6 +216,7 @@ namespace deepx::tf Param("C", DataCategory::Tensor, Precision::Any), }))); + // pow author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -216,6 +226,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // pow scalar author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -225,6 +236,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // log author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -233,6 +245,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // exp author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -241,6 +254,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // max author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -250,6 +264,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // max scalar author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -259,6 +274,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // min author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -268,6 +284,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // min scalar author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -277,6 +294,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // compare author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -287,6 +305,7 @@ namespace deepx::tf { Param("mask", DataCategory::Tensor, Precision::Float32), }))); + // compare scalar author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -300,6 +319,7 @@ namespace deepx::tf // matmul void register_matmul(TfFactory &tffactory) { + // matmul author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -309,6 +329,7 @@ namespace deepx::tf { Param("C", DataCategory::Tensor, Precision::Any), }))); + // matmul author=cblas tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Float64 | Precision::Float32), @@ -322,7 +343,7 @@ namespace deepx::tf // // changeshape void register_changeshape(TfFactory &tffactory) { - + // reshape author=miaobyte tffactory.add_tf(std::make_shared>(vector( { Param("A", DataCategory::Tensor, Precision::Any), @@ -334,48 +355,84 @@ namespace deepx::tf }))); tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("dim_order", DataCategory::Vector, Precision::Int32), - }), - vector( - { - Param("C", DataCategory::Tensor, Precision::Any), - }))); - + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("dim_order", DataCategory::Vector, Precision::Int32), + }), + vector( + { + Param("C", DataCategory::Tensor, Precision::Any), + }))); + // concat author=miaobyte tffactory.add_tf(std::make_shared>(vector( - { - Param("tensors", DataCategory::ListTensor, Precision::Any), - Param("axis", DataCategory::Var, Precision::Int32), - }), - vector( - { - Param("result", DataCategory::Tensor, Precision::Any), - }))); + { + Param("tensors", DataCategory::ListTensor, Precision::Any), + Param("axis", DataCategory::Var, Precision::Int32), + }), + vector( + { + Param("result", DataCategory::Tensor, Precision::Any), + }))); + // broadcastto author=miaobyte tffactory.add_tf(std::make_shared>(vector( - { - Param("A", DataCategory::Tensor, Precision::Any), - Param("new_shape", DataCategory::Vector, Precision::Int32), - }), - vector( - { - Param("B", DataCategory::Tensor, Precision::Any), - }))); + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("new_shape", DataCategory::Vector, Precision::Int32), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); } // // reduce - // void register_reduce(OpFactory &opfactory) - // { - // opfactory.add_op(Max()); - // opfactory.add_op(Max()); - // opfactory.add_op(Maxscalar()); - // opfactory.add_op(Maxscalar()); - // opfactory.add_op(Min()); - // opfactory.add_op(Min()); - // opfactory.add_op(Minscalar()); - // opfactory.add_op(Minscalar()); - // opfactory.add_op(Sum()); - // opfactory.add_op(Sum()); - // } + void register_reduce(TfFactory &tffactory) + { + // sum author=miaobyte + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("axis", DataCategory::Vector, Precision::Int32), + Param("keepdims", DataCategory::Var, Precision::Bool), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); + // prod author=miaobyte + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("axis", DataCategory::Vector, Precision::Int32), + Param("keepdims", DataCategory::Var, Precision::Bool), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); + // reducemax author=miaobyte + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("axis", DataCategory::Vector, Precision::Int32), + Param("keepdims", DataCategory::Var, Precision::Bool), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); + // reducemin author=miaobyte + tffactory.add_tf(std::make_shared>(vector( + { + Param("A", DataCategory::Tensor, Precision::Any), + Param("axis", DataCategory::Vector, Precision::Int32), + Param("keepdims", DataCategory::Var, Precision::Bool), + }), + vector( + { + Param("B", DataCategory::Tensor, Precision::Any), + }))); + } + int register_all(TfFactory &tffactory) { register_lifecycle(tffactory); @@ -384,7 +441,7 @@ namespace deepx::tf register_elementwise(tffactory); register_matmul(tffactory); register_changeshape(tffactory); - // register_reduce(opfactory); + register_reduce(tffactory); return 0; } } \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp index f62146ac..46f5a0b8 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/changeshape_miaobyte.hpp @@ -5,8 +5,7 @@ #include #include "deepx/tensor.hpp" -#include "deepx/shape_concat.hpp" -#include "deepx/shape_broadcast.hpp" +#include "deepx/shape_changeshape.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/changeshape.hpp" #include "deepx/tensorfunc/authors.hpp" diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/highway.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/highway.hpp new file mode 100644 index 00000000..ec18b89a --- /dev/null +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/highway.hpp @@ -0,0 +1,23 @@ +#ifndef DEEPX_TENSORFUNC_HIGHWAY_HPP +#define DEEPX_TENSORFUNC_HIGHWAY_HPP + +#include + +namespace deepx::tensorfunc +{ + using namespace hwy::HWY_NAMESPACE; + + template + T ReduceMul(D d, Vec v) + { + T result = GetLane(v); + for (size_t i = 1; i < Lanes(d); ++i) + { + result *= ExtractLane(v, i); + } + return result; + } + +} + +#endif diff --git a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp index 3eb77628..9d7e37c8 100644 --- a/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp +++ b/excuter/op-mem-ompsimd/src/deepx/tensorfunc/reduce_miaobyte.hpp @@ -4,10 +4,12 @@ #include #include #include -#include #include +#include #include "deepx/tensor.hpp" + +#include "deepx/tensorfunc/highway.hpp" #include "deepx/shape_reduce.hpp" #include "deepx/tensorfunc/reduce.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" @@ -16,175 +18,332 @@ namespace deepx::tensorfunc { using namespace hwy::HWY_NAMESPACE; + // sum author=miaobyte template struct sumDispatcher { - static void sum(const Tensor &tensor, const std::vector &dims, Tensor &result,const bool keepdims) + static void sum(const Tensor &tensor, const std::vector &dims, Tensor &result, const bool keepdims) { constant(result, T(0)); - - std::vector sorted_dims = dims; - if (dims.size() == 0) + std::vector checkeddims = checkedDims(tensor.shape.shape, dims); + std::vector reduced_dims = reducedDim(tensor.shape.shape, checkeddims); + const int minshape_1 = Lanes(ScalableTag()); + if (reduced_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > reduced_dims.size() || tensor.shape[-1] >= minshape_1) { - sorted_dims = arrange(tensor.shape.dim); + tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, std::vector &newIndices) + { + // 计算输出索引 + for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + { + if (reduced_dims[i] == 0) + { + newIndices[j++] = indices[i]; + }else if (keepdims && (reduced_dims[i] == 1)) { + newIndices[j++] = indices[i]; + } + } + int outputIdx = result.shape.linearat(newIndices); +#pragma omp atomic + result.data[outputIdx] += tensor.data[idx_linear]; }, result.shape.dim); } - // 从大到小排序 - std::sort(sorted_dims.begin(), sorted_dims.end(), std::greater()); - std::vector sumMap = reduceDimMap(tensor.shape, sorted_dims); - // 如果dims的最后一个元素是tensor.shape.dim-1,则说明求和的数据不连续(不对齐),无法simd(需要不停跳跃) - - const ScalableTag _tag; - size_t minshape_1 = Lanes(_tag); - // if (true) - if (sorted_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > sorted_dims.size() || tensor.shape[-1] >= minshape_1) + else + { + // 如果数据连续(对齐),则可以simd + tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, std::vector &newIndices) + { + // 计算输出索引 + for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + { + if (reduced_dims[i] == 0) + { + newIndices[j++] = indices[i]; + }else if (keepdims && (reduced_dims[i] == 1)) { + newIndices[j++] = indices[i]; + } + } + int outputIdx = result.shape.linearat(newIndices); + int shape_last = tensor.shape[-1]; + const ScalableTag tag; + const size_t lanes = Lanes(tag); + size_t j = 0; + T sum = 0; + // 前部分:处理到对齐 + while (j < shape_last && !IsAligned(tag, tensor.data + idx_linear + j)) + { + sum += tensor.data[idx_linear + j]; + ++j; + } + // 中间部分:SIMD + size_t aligned_end = shape_last - (shape_last % lanes); + auto sum_vec = Zero(tag); + for (; j + lanes <= aligned_end; j += lanes) + { + auto vec = Load(tag, tensor.data + idx_linear + j); + sum_vec = Add(sum_vec, vec); + } + // 将向量累加结果写回 + sum += ReduceSum(tag, sum_vec); + // 尾部分:处理剩余 + for (; j < shape_last; ++j) + { + sum += tensor.data[idx_linear + j]; + } +#pragma omp atomic + result.data[outputIdx] += sum; }, result.shape.dim); + } + } + }; + + // prod author=miaobyte + template + struct prodDispatcher + { + static void prod(const Tensor &tensor, const std::vector &dims, Tensor &result, const bool keepdims) + { + std::vector checkeddims = checkedDims(tensor.shape.shape, dims); + std::vector reduced_dims = reducedDim(tensor.shape.shape, checkeddims); + const int minshape_1 = Lanes(ScalableTag()); + // 如果dims的最后一个元素是tensor.shape.dim-1,则说明reduceprod的数据不连续(不对齐),无法simd(需要不停跳跃) + constant(result, T(1)); + if (reduced_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > reduced_dims.size() || tensor.shape[-1] >= minshape_1) { - tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &sumMap](const int idx_linear, const std::vector &indices, std::vector &newIndices) + tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, std::vector &newIndices) { // 计算输出索引 for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { - if (sumMap[i]==0) { + if (reduced_dims[i]==0) { + newIndices[j++]=indices[i]; + }else if (keepdims && (reduced_dims[i] == 1)) { newIndices[j++]=indices[i]; } } // 累加求和 int outputIdx=result.shape.linearat(newIndices); #pragma omp atomic - result.data[outputIdx]+=tensor.data[idx_linear]; }, result.shape.dim); + result.data[outputIdx]*=tensor.data[idx_linear]; }, result.shape.dim); } else { - // 这里有bug,todo - // 如果数据连续(对齐),则可以simd - tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &sumMap](const int idx_linear, const std::vector &indices, std::vector &newIndices) + // 如果数据连续(对齐),则可以simd + tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &reduced_dims, keepdims](const int i, const std::vector &indices, std::vector &newIndices) { // 计算输出索引 + for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) { - if (sumMap[i] == 0) + if (reduced_dims[i] == 0) { newIndices[j++] = indices[i]; + }else if (keepdims && (reduced_dims[i] == 1)) { + newIndices[j++] = indices[i]; } } + // 累加求和 int outputIdx = result.shape.linearat(newIndices); int shape_last = tensor.shape[-1]; const ScalableTag tag; const size_t lanes = Lanes(tag); size_t j = 0; - T sum = 0; + T product = 1; // 前部分:处理到对齐 - while (j < shape_last && !IsAligned(tag, tensor.data + idx_linear + j)) + while (j < shape_last && !IsAligned(tag, tensor.data + i + j)) { - sum += tensor.data[idx_linear + j]; + product *= tensor.data[i + j]; ++j; } // 中间部分:SIMD size_t aligned_end = shape_last - (shape_last % lanes); - auto sum_vec = Zero(tag); // 初始化累加向量为0 + auto product_vec = Load(tag, tensor.data + i + j); // 初始化累乘向量 + j+=lanes; for (; j + lanes <= aligned_end; j += lanes) { - auto vec = Load(tag, tensor.data + idx_linear + j); - sum_vec = Add(sum_vec, vec); // 向量累加 + auto vec = Load(tag, tensor.data + i + j); + product_vec = Mul(product_vec, vec); // 向量累乘 } - // 将向量累加结果写回 - sum += ReduceSum(tag, sum_vec); // 使用ReduceSum替代GetLane(SumOfLane()) + // 将向量累乘结果写回 + product *= ReduceMul(tag,product_vec); // 尾部分:处理剩余 for (; j < shape_last; ++j) { - sum += tensor.data[idx_linear + j]; + product *= tensor.data[i + j]; } #pragma omp atomic - result.data[outputIdx] += sum; }, result.shape.dim); + result.data[outputIdx] *= product; }, result.shape.dim); } } }; template - struct prodDispatcher + struct reducemaxDispatcher { - static void prod(const Tensor &tensor, const std::vector &dims, Tensor &result,const bool keepdims) + static void reducemax(const Tensor &tensor, const std::vector &dims, Tensor &result, const bool keepdims) { - - std::vector sorted_dims = dims; - if (dims.size() == 0) + std::vector checkeddims = checkedDims(tensor.shape.shape, dims); + std::vector reduced_dims = reducedDim(tensor.shape.shape, checkeddims); + const int minshape_1 = Lanes(ScalableTag()); + // 如果dims的最后一个元素是tensor.shape.dim-1,则说明reducemax的数据不连续(不对齐),无法simd(需要不停跳跃) + constant(result, std::numeric_limits::lowest()); + if (reduced_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > reduced_dims.size() || tensor.shape[-1] >= minshape_1) { - sorted_dims = arrange(tensor.shape.dim); + tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, std::vector &newIndices) + { + // 计算输出索引 + + for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { + if (reduced_dims[i]==0) { + newIndices[j++]=indices[i]; + }else if (keepdims && (reduced_dims[i] == 1)) { + newIndices[j++]=indices[i]; + } + } + // 累加求和 + int outputIdx=result.shape.linearat(newIndices); + result.data[outputIdx]=std::max(result.data[outputIdx],tensor.data[idx_linear]); + }, result.shape.dim); + } + else + { + // 如果数据连续(对齐),则可以simd + tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &reduced_dims, keepdims](const int i, const std::vector &indices, std::vector &newIndices) + { + // 计算输出索引 + + for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) + { + if (reduced_dims[i] == 0) + { + newIndices[j++] = indices[i]; + }else if (keepdims && (reduced_dims[i] == 1)) { + newIndices[j++] = indices[i]; + } + } + + int outputIdx = result.shape.linearat(newIndices); + + int shape_last = tensor.shape[-1]; + const ScalableTag tag; + const size_t lanes = Lanes(tag); + size_t j = 0; + T maxt = tensor.data[i]; + // 前部分:处理到对齐 + while (j < shape_last && !IsAligned(tag, tensor.data + i + j)) + { + maxt = std::max(maxt,tensor.data[i + j]); + ++j; + } + + // 中间部分:SIMD + size_t aligned_end = shape_last - (shape_last % lanes); + auto max_vec = Load(tag, tensor.data + i + j); // 初始化累乘向量为1 + for (; j + lanes <= aligned_end; j += lanes) + { + auto vec = Load(tag, tensor.data + i + j); + max_vec = Max(max_vec, vec); + } + + // 将向量累乘结果写回 + maxt = ReduceMax(tag, max_vec); + + // 尾部分:处理剩余 + for (; j < shape_last; ++j) + { + maxt = std::max(maxt,tensor.data[i + j]); + } + + result.data[outputIdx] = std::max(result.data[outputIdx],maxt); + }, result.shape.dim); } - // 从大到小排序 - std::sort(sorted_dims.begin(), sorted_dims.end(), std::greater()); - std::vector sumMap = reduceDimMap(tensor.shape, sorted_dims); - // 如果dims的最后一个元素是tensor.shape.dim-1,则说明求和的数据不连续(不对齐),无法simd(需要不停跳跃) - constant(result, T(1)); - if (sorted_dims.at(sorted_dims.size() - 1) == tensor.shape.dim - 1 && tensor.shape.dim > sorted_dims.size()) + } + }; + + template + struct reduceminDispatcher + { + static void reducemin(const Tensor &tensor, const std::vector &dims, Tensor &result, const bool keepdims) + { + std::vector checkeddims = checkedDims(tensor.shape.shape, dims); + std::vector reduced_dims = reducedDim(tensor.shape.shape, checkeddims); + const int minshape_1 = Lanes(ScalableTag()); + // 如果dims的最后一个元素是tensor.shape.dim-1,则说明reducemin的数据不连续(不对齐),无法simd(需要不停跳跃) + constant(result, std::numeric_limits::max()); + if (reduced_dims.rbegin()[0] == tensor.shape.dim - 1 || tensor.shape.dim > reduced_dims.size() || tensor.shape[-1] >= minshape_1) { - tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &sumMap](const int idx_linear, const std::vector &indices, std::vector &newIndices) + tensor.shape.rangeParallel(tensor.shape.dim, [&tensor, &result, &reduced_dims, keepdims](const int idx_linear, const std::vector &indices, std::vector &newIndices) { // 计算输出索引 for (size_t i = 0,j=0; i < tensor.shape.dim ; ++i) { - if (sumMap[i]==0) { + if (reduced_dims[i]==0) { + newIndices[j++]=indices[i]; + }else if (keepdims && (reduced_dims[i] == 1)) { newIndices[j++]=indices[i]; } } // 累加求和 int outputIdx=result.shape.linearat(newIndices); -#pragma omp atomic - result.data[outputIdx]*=tensor.data[idx_linear]; }, result.shape.dim); + + result.data[outputIdx]=std::min(result.data[outputIdx],tensor.data[idx_linear]); + }, result.shape.dim); } else { // 如果数据连续(对齐),则可以simd - tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &sumMap](const int i, const std::vector &indices, std::vector &newIndices) + tensor.shape.rangeParallel(tensor.shape.dim - 1, [&tensor, &result, &reduced_dims, keepdims](const int i, const std::vector &indices, std::vector &newIndices) { // 计算输出索引 for (size_t i = 0, j = 0; i < tensor.shape.dim; ++i) { - if (sumMap[i] == 0) + if (reduced_dims[i] == 0) { newIndices[j++] = indices[i]; + }else if (keepdims && (reduced_dims[i] == 1)) { + newIndices[j++] = indices[i]; } } - // 累加求和 + int outputIdx = result.shape.linearat(newIndices); int shape_last = tensor.shape[-1]; const ScalableTag tag; const size_t lanes = Lanes(tag); size_t j = 0; - T product = 1; + T mint = tensor.data[i]; // 前部分:处理到对齐 while (j < shape_last && !IsAligned(tag, tensor.data + i + j)) { - product *= tensor.data[i + j]; + mint = std::min(mint,tensor.data[i + j]); ++j; } // 中间部分:SIMD size_t aligned_end = shape_last - (shape_last % lanes); - auto product_vec = One(tag); // 初始化累乘向量为1 + auto mint_vec = Load(tag, tensor.data + i + j); // 初始化累乘向量为1 for (; j + lanes <= aligned_end; j += lanes) { auto vec = Load(tag, tensor.data + i + j); - product_vec = Mul(product_vec, vec); // 向量累乘 + mint_vec = Min(mint_vec, vec); } // 将向量累乘结果写回 - product *= ReduceMul(tag, product_vec); + mint = ReduceMin(tag, mint_vec); // 尾部分:处理剩余 for (; j < shape_last; ++j) { - product *= tensor.data[i + j]; + mint = std::min(mint,tensor.data[i + j]); } -#pragma omp atomic - result.data[outputIdx] *= product; }, result.shape.dim); + + result.data[outputIdx] = std::min(result.data[outputIdx],mint); + }, result.shape.dim); } } }; + } #endif \ No newline at end of file diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp b/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp new file mode 100644 index 00000000..5e5a873b --- /dev/null +++ b/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp @@ -0,0 +1,246 @@ +#ifndef DEEPX_TF_REDUCE_HPP +#define DEEPX_TF_REDUCE_HPP + +#include "deepx/tensor.hpp" +#include "deepx/tensorfunc/reduce_miaobyte.hpp" +#include +#include "deepx/tf/tf.hpp" +#include "deepx/dtype.hpp" + +namespace deepx::tf +{ + template + class Sum : public TF + { + public: + Sum(const vector &args, const vector &returns) + { + this->name = "sum"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + + string math_formula() const override + { + return "B = sum(A, axis=[1 2], keepdims=false)"; + } + shared_ptr clone() const override + { + return make_shared(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + vector dims = this->getvector(1, true); + bool keepdims = this->getvar(2,mem,true); + Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (input_type != output_type) + { + error = "Type mismatch: " + precision_str(input_type) + " != " + precision_str(output_type); + return 1; + } + switch (input_type) + { + case Precision::Float64: + sum(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Float32: + sum(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int64: + sum(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int32: + sum(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int16: + sum(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int8: + sum(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + default: + error = "Unsupported type: " + precision_str(input_type); + return 1; + } + return 0; + } + }; + + template + class Prod : public TF + { + public: + Prod(const vector &args, const vector &returns) + { + this->name = "prod"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + string math_formula() const override + { + return "B = prod(A, axis=[1 2], keepdims=false)"; + } + shared_ptr clone() const override + { + return make_shared(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + vector dims = this->getvector(1, true); + bool keepdims = this->getvar(2,mem,true); + Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (input_type != output_type) + { + error = "Type mismatch: " + precision_str(input_type) + " != " + precision_str(output_type); + return 1; + } + switch (input_type) + { + case Precision::Float64: + prod(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Float32: + prod(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int64: + prod(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int32: + prod(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int16: + prod(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int8: + prod(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + default: + error = "Unsupported type: " + precision_str(input_type); + return 1; + } + return 0; + } + }; + + template + class ReduceMax : public TF + { + public: + ReduceMax(const vector &args, const vector &returns) + { + this->name = "reducemax"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + string math_formula() const override + { + return "B = reducemax(A, axis=[1 2], keepdims=false)"; + } + shared_ptr clone() const override + { + return make_shared(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + vector dims = this->getvector(1, true); + bool keepdims = this->getvar(2,mem,true); + Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (input_type != output_type) + { + error = "Type mismatch: " + precision_str(input_type) + " != " + precision_str(output_type); + return 1; + } + switch (input_type) + { + case Precision::Float64: + reducemax(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Float32: + reducemax(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int64: + reducemax(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int32: + reducemax(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int16: + reducemax(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int8: + reducemax(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + default: + error = "Unsupported type: " + precision_str(input_type); + return 1; + } + return 0; + } + }; + + template + class ReduceMin : public TF + { + public: + ReduceMin(const vector &args, const vector &returns) + { + this->name = "reducemin"; + this->author = Author::name(); + this->args = args; + this->returns = returns; + } + string math_formula() const override + { + return "B = reducemin(A, axis=[1 2], keepdims=false)"; + } + shared_ptr clone() const override + { + return make_shared(*this); + } + int run(shared_ptr mem, string &error) override + { + Precision input_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype; + vector dims = this->getvector(1, true); + bool keepdims = this->getvar(2,mem,true); + Precision output_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype; + if (input_type != output_type) + { + error = "Type mismatch: " + precision_str(input_type) + " != " + precision_str(output_type); + return 1; + } + switch (input_type) + { + case Precision::Float64: + reducemin(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Float32: + reducemin(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int64: + reducemin(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int32: + reducemin(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int16: + reducemin(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + case Precision::Int8: + reducemin(*mem->gettensor(this->args[0].textvalue), dims, *mem->gettensor(this->returns[0].textvalue), keepdims); + break; + default: + error = "Unsupported type: " + precision_str(input_type); + return 1; + } + return 0; + } + }; +} + +#endif // DEEPX_TF_REDUCE_HPP diff --git a/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp.a b/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp.a deleted file mode 100644 index f3e6b039..00000000 --- a/excuter/op-mem-ompsimd/src/deepx/tf/reduce.hpp.a +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef DEEPX_OP_REDUCE_HPP -#define DEEPX_OP_REDUCE_HPP - -#include "deepx/tensor.hpp" -#include "deepx/tensorfunc/reduce.hpp" -#include "deepx/tensorfunc/changeshape.hpp" -#include "stdutil/num.hpp" - -namespace deepx::op -{ - template - class Sum : public Op - { - public: - Sum() - { - this->init("sum", deepx::dtype::name(), {}, {}, false, {}, {}); - } - Sum(vector args, vector returns, bool require_grad = false, vector args_grad = {}, vector returns_grad = {}) - { - this->init("sum", deepx::dtype::name(), args, returns, require_grad, args_grad, returns_grad); - } - Sum(initializer_list args, initializer_list returns, bool require_grad = false, initializer_list args_grad = {}, initializer_list returns_grad = {}) - { - this->init("sum", deepx::dtype::name(), args, returns, require_grad, args_grad, returns_grad); - } - void forward(mem::Mem &mem) override - { - auto A = mem.gettensor(this->args[0]); - std::vector dims = mem.getvector(this->args[1]); - auto output = mem.gettensor(this->returns[0]); - tensorfunc::sum(*A, dims, *output); - } - void backward(mem::Mem &mem) override - { - auto output_grad = mem.gettensor(this->returns_grad[0]); - auto A_grad = mem.gettensor(this->args_grad[0]); - - tensorfunc::expand(*output_grad, *A_grad); - } - void setexample() override - { - this->init("sum", "float32", {"T1", "1", "2"}, {"T2"}, false, {}, {}); - } - string math_formula() const override - { - return "T2 = sum(T1, dims=[1,2])"; - } - }; - - - // todo - template - class Max_reduce : public Op - { - public: - Max_reduce() - { - this->init("max_reduce", deepx::dtype::name(), {}, {}, false, {}, {}); - }; - void forward(mem::Mem &mem) override - { - } - - void backward(mem::Mem &mem) override { - - }; - }; - - -} - -#endif diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp index 02fc55ef..c68e2b8e 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/5_tensor_sum.cpp @@ -11,6 +11,7 @@ #include "deepx/shape_reduce.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" +#include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/io_miaobyte.hpp" #include "deepx/tensorfunc/authors.hpp" @@ -22,19 +23,20 @@ void test_sum() { omp_set_num_threads(1); - Shape shape({2, 3, 4}); - deepx::Tensor tensor= New(shape.shape); + std::vector shape={2, 3, 4}; + Tensor tensor= New(shape); constant(tensor,float(1)); - print(tensor); + print(tensor,"%.0f"); cout<<""<> result = combination(3); for (const auto &comb : result) { std::cout <<"sum(t,"<< comb <<")"<< std::endl; - Shape sumshape=reduceShape(shape,comb); - Tensor r = New(sumshape.shape); - sum(tensor, comb,r); - print(r); + std::vector checkeddims=checkedDims(shape,comb); + std::vector sumshape=reducedShape(shape,checkeddims); + Tensor r = New(sumshape); + sum(tensor, checkeddims,r); + print(r,"%.0f"); } /* []=>[2, 3, 4] @@ -49,18 +51,26 @@ void test_sum() } void benchmark_sum(int i){ - Shape shape({i,i,i}); - deepx::Tensor tensor= New(shape.shape); + std::vector shape={i,i,i}; + deepx::Tensor tensor= New(shape); std::iota(tensor.data ,tensor.data+tensor.shape.size,0); std::vector> result = combination(3); - std::cout<<"sum "<"; - auto start = std::chrono::high_resolution_clock::now(); + std::cout<<"sum "<"; + auto start = std::chrono::high_resolution_clock::now(); for (const auto &comb : result) { - Shape sShape = reduceShape(shape, comb); - Tensor r=New(sShape.shape); - sum(tensor, comb,r); - save(r,"5_tensor_sum"+std::to_string(i)+"result"); + std::cout <<"sum(t,"<< comb <<")"<< std::endl; + std::vector checkeddims=checkedDims(shape,comb); + std::vector sumshape=reducedShape(shape,checkeddims); + Tensor r=New(sumshape); + sum(tensor, checkeddims,r); + string combstr=""; + for (const auto &c : comb) + { + combstr+=std::to_string(c)+"_"; + } + save(r,"5_tensor_sum."+ combstr); + print(r,"%.0f"); } auto end=std::chrono::high_resolution_clock::now(); std::chrono::duration duration = end - start; diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp index d843b6e1..a97fbcd7 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/7_tensor_transpose.cpp @@ -9,7 +9,7 @@ #include "deepx/tensorfunc/io_miaobyte.hpp" #include "stdutil/vector.hpp" #include "tensorutil.hpp" -#include "deepx/shape_transpose.hpp" +#include "deepx/shape_changeshape.hpp" using namespace deepx::tensorfunc; using namespace deepx; diff --git a/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp b/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp index 9456e408..0922d9bc 100644 --- a/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp +++ b/excuter/op-mem-ompsimd/test/tensorfunc/8_tensor_concat.cpp @@ -6,7 +6,7 @@ #include "deepx/tensorfunc/changeshape_miaobyte.hpp" #include "deepx/tensor.hpp" #include "deepx/shape.hpp" -#include "deepx/shape_concat.hpp" +#include "deepx/shape_changeshape.hpp" #include "deepx/tensorfunc/new.hpp" #include "deepx/tensorfunc/init_miaobyte.hpp" #include "deepx/tensorfunc/io_miaobyte.hpp" diff --git a/front/py/deepx/nn/functional/init.py b/front/py/deepx/nn/functional/init.py index 74581339..77383dda 100644 --- a/front/py/deepx/nn/functional/init.py +++ b/front/py/deepx/nn/functional/init.py @@ -50,23 +50,17 @@ def ones(*size, dtype=None, device=None, class Arange(Function): @staticmethod def forward(ctx:Context, + t:Tensor, start:Optional[Union[float,int]]=0, - end:Optional[Union[float,int]]=None, - step:Optional[Union[float,int]]=1,dtype=None, device=None,name:Union[Tensor,str]='',author='miaobyte')->Tensor: - outtensor=None - if isinstance(name,str): - shape=[end-start] - outtensor=Tensor(shape=shape, dtype=dtype, device=device) - outtensor.addtograph(name) - else: - outtensor=name - g=outtensor.graph + step:Optional[Union[float,int]]=1, + author='miaobyte')->Tensor: + g=t.graph if g.eager: - ir=DeepxIR("arange", [outtensor.node.name,start,step], [],author) + ir=DeepxIR("arange",[t.node.name,start,step], [],author) send(ir) - return outtensor -def arange(start=0, end=None, step=1,dtype=None, device=None,name:Union[Tensor,str]='',author='miaobyte')->Tensor: - return Arange.apply(start,end,step,dtype,device,name,author) + return t +def arange(t:Tensor,start=0,step=1,author='miaobyte')->Tensor: + return Arange.apply(t,start,step,author) OpNode.register("uniform") class Uniform(Function): diff --git a/front/py/deepx/nn/functional/reduce.py b/front/py/deepx/nn/functional/reduce.py index 12dcaefe..10051348 100644 --- a/front/py/deepx/nn/functional/reduce.py +++ b/front/py/deepx/nn/functional/reduce.py @@ -1,10 +1,9 @@ from typing import Optional, Union from deepx.tensor import Tensor -from deepx.autograd.graph import OpNode from deepx.nn.deepxir import DeepxIR from deepx.scheduler import send -from .elementwise import _A_b_elementwiseop_C +from deepx.autograd import OpNode,Function,Context def reduceshape(inshape: Union[list[int], tuple[int]], dim: Union[list[int], tuple[int]], @@ -40,10 +39,11 @@ def reduceshape(inshape: Union[list[int], tuple[int]], def _A_v_reduceop_C( a:Tensor, - dim: Union[list[int],tuple[int]] = [], + dim: Union[list[int],tuple[int]]=None, keepdim:bool=False, op:str=None, - out:Union[Tensor,str]='')->Tensor: + out:Union[Tensor,str]='', + author:str='miaobyte')->Tensor: if dim is None: dim=list(range(a.ndim)) @@ -63,36 +63,26 @@ def _A_v_reduceop_C( result.node.add_input(opnode) if a.graph.eager: - args = [*dim, "keepdim"] if keepdim else [*dim] - varir=DeepxIR("argset",'int32', args, [vector_node.name]) - - send(varir) - ir=DeepxIR(op, a.dtype, [a.node.name,vector_node.name], [result.node.name]) + ir=DeepxIR(op, [a.node.name,dim,"true" if keepdim else "false"], [result.node.name],author) send(ir) return result - -#max - -OpNode.register("reducemax") -def reduce_max( - a:Tensor, - dim:list[int] = None, - keepdim=False, - out:Union[Tensor,str]='')->Tensor: - return _A_v_reduceop_C(a,dim,keepdim,"max",out) - -#min -OpNode.register("reducemin") -def reduce_min( - a:Tensor, - dim:list[int] = None, - keepdim=False, - out:Union[Tensor,str]='')->Tensor: - return _A_v_reduceop_C(a,dim,keepdim,"min",out) - - #sum OpNode.register("sum") +class Sum(Function): + @staticmethod + def forward(ctx:Context,a:Tensor,dim:Optional[Union[list[int],tuple[int]]]=None,keepdim:bool=False,out:Union[Tensor,str]='',author:str='miaobyte')->Tensor: + if ctx.requires_grad: + ctx.save_tensors('a',a) + ctx.save_data('dim',dim) + ctx.save_data('keepdim',keepdim) + return _A_v_reduceop_C(a,dim,keepdim,"sum",out,author) + + @staticmethod + def backward(ctx:Context,out_grad): + pass + + + def sum( a:Tensor, dim:Optional[Union[ @@ -100,11 +90,22 @@ def sum( tuple[int], ]]=None, keepdim:bool=False, - out:Union[Tensor,str]='')->Tensor: - return _A_v_reduceop_C(a,dim,keepdim,"sum",out) + out:Union[Tensor,str]='', + author:str='miaobyte', + requires_grad:bool=False)->Tensor: + return Sum.apply(a,dim,keepdim,out,author,requires_grad=requires_grad) #prod OpNode.register("prod") +class Prod(Function): + @staticmethod + def forward(ctx:Context,a:Tensor,dim:Optional[Union[list[int],tuple[int]]]=None,keepdim:bool=False,out:Union[Tensor,str]='',author:str='miaobyte')->Tensor: + return _A_v_reduceop_C(a,dim,keepdim,"prod",out,author) + + @staticmethod + def backward(ctx:Context,out_grad): + pass + def prod( a:Tensor, dim:Optional[Union[ @@ -112,8 +113,50 @@ def prod( tuple[int], ]]=None, keepdim:bool=False, - out:Union[Tensor,str]='')->Tensor: - return _A_v_reduceop_C(a,dim,keepdim,"prod",out) + out:Union[Tensor,str]='', + author:str='miaobyte', + requires_grad:bool=False)->Tensor: + return Prod.apply(a,dim,keepdim,out,author,requires_grad=requires_grad) + +#max +OpNode.register("reducemax") +class ReduceMax(Function): + @staticmethod + def forward(ctx:Context,a:Tensor,dim:Optional[Union[list[int],tuple[int]]]=None,keepdim:bool=False,out:Union[Tensor,str]='',author:str='miaobyte')->Tensor: + return _A_v_reduceop_C(a,dim,keepdim,"reducemax",out,author) + + @staticmethod + def backward(ctx:Context,out_grad): + pass +def reduce_max( + a:Tensor, + dim:list[int] = None, + keepdim=False, + out:Union[Tensor,str]='', + author:str='miaobyte', + requires_grad:bool=False)->Tensor: + return ReduceMax.apply(a,dim,keepdim,out,author,requires_grad=requires_grad) + +#min +OpNode.register("reducemin") +class ReduceMin(Function): + @staticmethod + def forward(ctx:Context,a:Tensor,dim:Optional[Union[list[int],tuple[int]]]=None,keepdim:bool=False,out:Union[Tensor,str]='',author:str='miaobyte')->Tensor: + return _A_v_reduceop_C(a,dim,keepdim,"reducemin",out,author) + + @staticmethod + def backward(ctx:Context,out_grad): + pass +def reduce_min( + a:Tensor, + dim:list[int] = None, + keepdim=False, + out:Union[Tensor,str]='', + author:str='miaobyte', + requires_grad:bool=False)->Tensor: + return ReduceMin.apply(a,dim,keepdim,out,author,requires_grad=requires_grad) + + #mean OpNode.register("mean") @@ -136,5 +179,3 @@ def mean( total *= a.shape[i] result = sum(a, dim, keepdim, out)/total return result -# #var -# OpNode.register("var") \ No newline at end of file diff --git a/front/py/deepx/tensor/init.py b/front/py/deepx/tensor/init.py index a6c44f72..f5738829 100644 --- a/front/py/deepx/tensor/init.py +++ b/front/py/deepx/tensor/init.py @@ -31,9 +31,9 @@ def randn_(self): pass @tensor_method -def arange_(self,start,end=None,step=1): - #todo - pass +def arange_(self,start=0,step=1,author='miaobyte'): + from deepx.nn.functional import arange as arange_func + arange_func(self,start,step,author) @tensor_method def eye_(self,n,m=None): diff --git a/front/py/deepx/tensor/tensor.py b/front/py/deepx/tensor/tensor.py index 7c91ab88..91fa8f7d 100644 --- a/front/py/deepx/tensor/tensor.py +++ b/front/py/deepx/tensor/tensor.py @@ -29,6 +29,14 @@ def __init__( self._dtype = default_dtype else: self._dtype = str(dtype) + + # format + if self._dtype == 'float32' or self._dtype == 'float64' or self._dtype == 'float16' or self._dtype == 'bfloat16': + self._format = '%.4f' + elif self._dtype == 'int32' or self._dtype == 'int64' or self._dtype == 'int8' or self._dtype == 'int16': + self._format = '%d' + else: + self._format = '%s' # shape if shape is not None: if isinstance(shape, (tuple, list)) and all(isinstance(i, int) for i in shape): @@ -145,9 +153,12 @@ def __matmul__(self, other): def T(self) -> str: return self.transpose(1,0,out=self.node.name+".T") + # 打印 + def set_format(self,format:str): + self._format = format def __repr__(self) -> str: from deepx.nn.functional import printtensor - s=printtensor(self) + s=printtensor(self,format=self._format) return s diff --git a/front/py/examples/2_ir/4_changeshape_broadcast.dot b/front/py/examples/2_ir/4_changeshape_broadcast.dot index d58501d9..63f24c90 100644 --- a/front/py/examples/2_ir/4_changeshape_broadcast.dot +++ b/front/py/examples/2_ir/4_changeshape_broadcast.dot @@ -2,34 +2,34 @@ digraph { rankdir=TB node [shape=record] - 139260210680960 [label="a + 139182860375312 [label="a (4, 2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 139258236846528 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 139260210690752 [label="vector_1 + 139181167609744 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 139182860384960 [label="vector_1 (4, 2, 3)" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] - 139258235360000 [label="b + 139181166122448 [label="b (2, 1)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 139258235359616 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 139258235359856 [label="vector_2 + 139181166122592 [label=reshape color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 139181166122688 [label="vector_2 (2, 1)" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] - 139258235360048 [label="b.broadcasted + 139181166122736 [label="b.broadcasted (4, 2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 139258235360096 [label=broadcastTo color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 139258235360576 [label="vector_3 + 139181166123168 [label=broadcastTo color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 139181166122928 [label="vector_3 (4, 2, 3)" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] - 139258235360240 [label=add color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 139258235360480 [label="tensor_4 + 139181166123216 [label=add color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 139181166123264 [label="tensor_4 (4, 2, 3)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 139258236846528 -> 139260210680960 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139260210680960 -> 139258236846528 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139260210690752 -> 139258236846528 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139258235359616 -> 139258235360000 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139258235360000 -> 139258235359616 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139258235359856 -> 139258235359616 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139258235360096 -> 139258235360048 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139258235360000 -> 139258235360096 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139258235360576 -> 139258235360096 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139260210680960 -> 139258235360240 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139258235360048 -> 139258235360240 [arrowsize=0.8 color=gray40 penwidth=1.2] - 139258235360240 -> 139258235360480 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139181167609744 -> 139182860375312 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139182860375312 -> 139181167609744 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139182860384960 -> 139181167609744 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139181166122592 -> 139181166122448 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139181166122448 -> 139181166122592 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139181166122688 -> 139181166122592 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139181166123168 -> 139181166122736 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139181166122448 -> 139181166123168 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139181166122928 -> 139181166123168 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139182860375312 -> 139181166123216 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139181166122736 -> 139181166123216 [arrowsize=0.8 color=gray40 penwidth=1.2] + 139181166123216 -> 139181166123264 [arrowsize=0.8 color=gray40 penwidth=1.2] } diff --git a/front/py/examples/2_ir/4_changeshape_broadcast.dot.svg b/front/py/examples/2_ir/4_changeshape_broadcast.dot.svg index 293e8a9a..5ee163ac 100644 --- a/front/py/examples/2_ir/4_changeshape_broadcast.dot.svg +++ b/front/py/examples/2_ir/4_changeshape_broadcast.dot.svg @@ -9,148 +9,148 @@ %3 - + -139260210680960 +139182860375312 a (4, 2, 3) - + -139258236846528 +139181167609744 reshape - + -139260210680960->139258236846528 +139182860375312->139181167609744 - + -139258235360240 +139181166123216 add - + -139260210680960->139258235360240 +139182860375312->139181166123216 - + -139258236846528->139260210680960 +139181167609744->139182860375312 - + -139260210690752 +139182860384960 vector_1 (4, 2, 3) - + -139260210690752->139258236846528 +139182860384960->139181167609744 - + -139258235360000 +139181166122448 b (2, 1) - + -139258235359616 +139181166122592 reshape - + -139258235360000->139258235359616 +139181166122448->139181166122592 - + -139258235360096 +139181166123168 broadcastTo - + -139258235360000->139258235360096 +139181166122448->139181166123168 - + -139258235359616->139258235360000 +139181166122592->139181166122448 - + -139258235359856 +139181166122688 vector_2 (2, 1) - + -139258235359856->139258235359616 +139181166122688->139181166122592 - + -139258235360048 +139181166122736 b.broadcasted (4, 2, 3) - + -139258235360048->139258235360240 +139181166122736->139181166123216 - + -139258235360096->139258235360048 +139181166123168->139181166122736 - + -139258235360576 +139181166122928 vector_3 (4, 2, 3) - + -139258235360576->139258235360096 +139181166122928->139181166123168 - + -139258235360480 +139181166123264 tensor_4 (4, 2, 3) - + -139258235360240->139258235360480 +139181166123216->139181166123264 diff --git a/front/py/examples/2_ir/5_reduce_sum.dot b/front/py/examples/2_ir/5_reduce_sum.dot deleted file mode 100644 index 9670e4f4..00000000 --- a/front/py/examples/2_ir/5_reduce_sum.dot +++ /dev/null @@ -1,41 +0,0 @@ -// Computational Graph -digraph { - rankdir=TB - node [shape=record] - 137431348365008 [label="t -(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429052759232 [label=uniform color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 137431348378688 [label="var_1 --1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429052765232 [label="var_2 -1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429050978992 [label="var_3 -0" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429050978752 [label="s -(4,)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429050978800 [label="vector_1 -[0, 2]" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429050979232 [label=sum color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 137429050979184 [label="t1 -(4, 5, 6)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429050979568 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 137429050979520 [label="var_4 -1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429050979664 [label="t2 -(6,)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429050979952 [label="vector_2 -[0, 1]" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] - 137429050980048 [label=sum color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] - 137429052759232 -> 137431348365008 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137431348378688 -> 137429052759232 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137429052765232 -> 137429052759232 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137429050978992 -> 137429052759232 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137429050979232 -> 137429050978752 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137431348365008 -> 137429050979232 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137429050978800 -> 137429050979232 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137429050979568 -> 137429050979184 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137429050979520 -> 137429050979568 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137429050980048 -> 137429050979664 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137429050979184 -> 137429050980048 [arrowsize=0.8 color=gray40 penwidth=1.2] - 137429050979952 -> 137429050980048 [arrowsize=0.8 color=gray40 penwidth=1.2] -} diff --git a/front/py/examples/2_ir/5_reduce_sum.dot.svg b/front/py/examples/2_ir/5_reduce_sum.dot.svg deleted file mode 100644 index cc43c7f4..00000000 --- a/front/py/examples/2_ir/5_reduce_sum.dot.svg +++ /dev/null @@ -1,179 +0,0 @@ - - - - - - -%3 - - - -137431348365008 - -t -(3, 4, 5) - - - -137429050979232 - -sum - - - -137431348365008->137429050979232 - - - - - -137429052759232 - -uniform - - - -137429052759232->137431348365008 - - - - - -137431348378688 - -var_1 --1 - - - -137431348378688->137429052759232 - - - - - -137429052765232 - -var_2 -1 - - - -137429052765232->137429052759232 - - - - - -137429050978992 - -var_3 -0 - - - -137429050978992->137429052759232 - - - - - -137429050978752 - -s -(4,) - - - -137429050978800 - -vector_1 -[0, 2] - - - -137429050978800->137429050979232 - - - - - -137429050979232->137429050978752 - - - - - -137429050979184 - -t1 -(4, 5, 6) - - - -137429050980048 - -sum - - - -137429050979184->137429050980048 - - - - - -137429050979568 - -constant - - - -137429050979568->137429050979184 - - - - - -137429050979520 - -var_4 -1 - - - -137429050979520->137429050979568 - - - - - -137429050979664 - -t2 -(6,) - - - -137429050979952 - -vector_2 -[0, 1] - - - -137429050979952->137429050980048 - - - - - -137429050980048->137429050979664 - - - - - diff --git a/front/py/examples/2_ir/5_reduce_sum_keepdim.py b/front/py/examples/2_ir/5_reduce_sum_keepdim.py index 41bc77c9..e6f1c5e4 100644 --- a/front/py/examples/2_ir/5_reduce_sum_keepdim.py +++ b/front/py/examples/2_ir/5_reduce_sum_keepdim.py @@ -1,28 +1,45 @@ ############-------PyTorch-------################ import torch -torch_t = torch.arange(60).reshape(3,4,5) +torch_t = torch.arange(0,60).reshape(3,4,5) print(torch_t) +torch_s = torch.sum(torch_t, dim=[0, 2],keepdim=True) +print(torch_s) +torch_p=torch.prod(torch_t,dim=1) +print(torch_p) -torch_s1 = torch.sum(torch_t, dim=[0, 2],keepdim=True) -print(torch_s1) - +torch_t1 = torch.ones(4, 5, 6,dtype=torch.float) +print(torch_t1) +torch_t2 = torch.sum(torch_t1, dim=[0, 1],keepdim=True) +print(torch_t2) ############-------DEEPX-------################ from deepx import Tensor,ones,zeros,arange -from deepx.nn.functional import sum,mean - -t=arange(0,60,1).reshape_(3,4,5) -print((t)) - -s1=sum(t,dim=[0,2],keepdim=True,out="s1") -print(s1) - +from deepx.nn.functional import sum,prod + +t=Tensor(shape=(3,4,5)) +t.addtograph("t") +t.arange_(0,1) +t.set_format("%.0f") +print(t) +s=sum(t,dim=[0,2],out="s",keepdim=True) +s.set_format("%.0f") +print(s) +p=prod(t,dim=[1],out="p",keepdim=True) +p.set_format("%.0f") +# print(p) + +t1=ones(4,5,6,name="t1") +t1.set_format("%.0f") +print(t1) +t2=sum(t1,dim=[0,1],out='t2',keepdim=True) +t2.set_format("%.0f") +print(t2) import os script_name = os.path.splitext(os.path.basename( os.path.abspath(__file__)))[0] -str=t.graph.to_dot() +str=t2.graph.to_dot() str.render(script_name+".dot", format='svg') \ No newline at end of file diff --git a/front/py/examples/2_ir/5_reduce_sumprod.dot b/front/py/examples/2_ir/5_reduce_sumprod.dot new file mode 100644 index 00000000..da812dbb --- /dev/null +++ b/front/py/examples/2_ir/5_reduce_sumprod.dot @@ -0,0 +1,38 @@ +// Computational Graph +digraph { + rankdir=TB + node [shape=record] + 133977343199552 [label="t +(3, 4, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 133975085212112 [label="s +(4,)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 133975083685904 [label="vector_1 +[0, 2]" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] + 133975083685520 [label=sum color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 133975083685760 [label="p +(3, 5)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 133975083686048 [label="vector_2 +[1]" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] + 133975083686096 [label=prod color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 133977343199120 [label="t1 +(4, 5, 6)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 133975083686528 [label=constant color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 133975083686288 [label="var_1 +1" color=orange fillcolor=moccasin fontname="Sans-Serif" labeljust=l shape=box style=filled] + 133975083686240 [label="t2 +(6,)" color=skyblue fillcolor=aliceblue fontname="Sans-Serif" labeljust=l shape=box style=filled] + 133975083686768 [label="vector_3 +[0, 1]" color=darkseagreen fillcolor=honeydew fontname="Sans-Serif" labeljust=l shape=box style=filled] + 133975083686576 [label=sum color=darkslategray fillcolor=lightgray fontname="Courier Bold" labeljust=l shape=box style=filled] + 133975083685520 -> 133975085212112 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133977343199552 -> 133975083685520 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133975083685904 -> 133975083685520 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133975083686096 -> 133975083685760 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133977343199552 -> 133975083686096 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133975083686048 -> 133975083686096 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133975083686528 -> 133977343199120 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133975083686288 -> 133975083686528 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133975083686576 -> 133975083686240 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133977343199120 -> 133975083686576 [arrowsize=0.8 color=gray40 penwidth=1.2] + 133975083686768 -> 133975083686576 [arrowsize=0.8 color=gray40 penwidth=1.2] +} diff --git a/front/py/examples/2_ir/5_reduce_sumprod.dot.svg b/front/py/examples/2_ir/5_reduce_sumprod.dot.svg new file mode 100644 index 00000000..2b180653 --- /dev/null +++ b/front/py/examples/2_ir/5_reduce_sumprod.dot.svg @@ -0,0 +1,166 @@ + + + + + + +%3 + + + +133977343199552 + +t +(3, 4, 5) + + + +133975083685520 + +sum + + + +133977343199552->133975083685520 + + + + + +133975083686096 + +prod + + + +133977343199552->133975083686096 + + + + + +133975085212112 + +s +(4,) + + + +133975083685904 + +vector_1 +[0, 2] + + + +133975083685904->133975083685520 + + + + + +133975083685520->133975085212112 + + + + + +133975083685760 + +p +(3, 5) + + + +133975083686048 + +vector_2 +[1] + + + +133975083686048->133975083686096 + + + + + +133975083686096->133975083685760 + + + + + +133977343199120 + +t1 +(4, 5, 6) + + + +133975083686576 + +sum + + + +133977343199120->133975083686576 + + + + + +133975083686528 + +constant + + + +133975083686528->133977343199120 + + + + + +133975083686288 + +var_1 +1 + + + +133975083686288->133975083686528 + + + + + +133975083686240 + +t2 +(6,) + + + +133975083686768 + +vector_3 +[0, 1] + + + +133975083686768->133975083686576 + + + + + +133975083686576->133975083686240 + + + + + diff --git a/front/py/examples/2_ir/5_reduce_sum.py b/front/py/examples/2_ir/5_reduce_sumprod.py similarity index 63% rename from front/py/examples/2_ir/5_reduce_sum.py rename to front/py/examples/2_ir/5_reduce_sumprod.py index 30698b97..eea1e71b 100644 --- a/front/py/examples/2_ir/5_reduce_sum.py +++ b/front/py/examples/2_ir/5_reduce_sumprod.py @@ -1,13 +1,14 @@ ############-------PyTorch-------################ import torch -torch_t = torch.empty(3, 4, 5).uniform_(-1, 1) +torch_t = torch.arange(0,60).reshape(3,4,5) print(torch_t) torch_s = torch.sum(torch_t, dim=[0, 2]) print(torch_s) +torch_p=torch.prod(torch_t,dim=1) +print(torch_p) - -torch_t1 = torch.ones(4, 5, 6) +torch_t1 = torch.ones(4, 5, 6,dtype=torch.float) print(torch_t1) torch_t2 = torch.sum(torch_t1, dim=[0, 1]) print(torch_t2) @@ -16,18 +17,25 @@ ############-------DEEPX-------################ from deepx import Tensor,ones,zeros,arange -from deepx.nn.functional import sum,mean +from deepx.nn.functional import sum,prod t=Tensor(shape=(3,4,5)) t.addtograph("t") -t.uniform_(low=-1,high=1) -print((t)) +t.arange_(0,1) +t.set_format("%.0f") +print(t) s=sum(t,dim=[0,2],out="s") +s.set_format("%.0f") print(s) +p=prod(t,dim=[1],out="p") +p.set_format("%.0f") +print(p) t1=ones(4,5,6,name="t1") +t1.set_format("%.0f") print(t1) t2=sum(t1,dim=[0,1],out='t2') +t2.set_format("%.0f") print(t2)