array2d · miaobyte · Apr 6, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 5, 2025
diff --git a/doc/excuter/op-mem-cuda/list.md b/doc/excuter/op-mem-cuda/list.md
@@ -4,7 +4,11 @@
 
 | Operation | Author | Func Def | Math Formula | IR Instruction |
 |-----------|--------|------------|--------------|----------------|
+| concat | miaobyte | concat(listtensor<any> tensors, var<int32> axis)->(tensor<any> result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor<any> tensors, var<int32> axis)->(tensor<any> result) |
+| transpose | miaobyte | transpose(tensor<any> A, vector<int32> dim_order)->(tensor<any> C) | T2 = T1.transpose(dimorder=[1,0]) | transpose(tensor<any> A, vector<int32> dim_order)->(tensor<any> C) |
+| reshape | miaobyte | reshape(tensor<any> A, vector<int32> shape)->() | T2=T1.reshape(shape) | reshape(tensor<any> A, vector<int32> shape)->() |
 | matmul | cublas | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1 @ T2 | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
+| comparescalar | miaobyte | comparescalar(tensor<any> A, var<any> scalar)->(tensor<int8> mask) | mask=compare(T1, scalar) | comparescalar(tensor<any> A, var<any> scalar)->(tensor<int8> mask) |
 | add | cublas | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
 | add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
 | uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |

diff --git a/doc/excuter/op-mem-ompsimd/list.md b/doc/excuter/op-mem-ompsimd/list.md
@@ -4,35 +4,38 @@
 
 | Operation | Author | Func Def | Math Formula | IR Instruction |
 |-----------|--------|------------|--------------|----------------|
-| concat |  none  | concat()->() | Tresult = concat([T1, T2...], axis=3) | concat()->() |
-| matmul | cblas | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) | T3=T1 @ T2 | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) |
-| matmul | miaobyte | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1 @ T2 | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
-| compare | miaobyte | compare(tensor<any> A, tensor<any> B)->(tensor<int8> mask) | mask=compare(T1,T2) | compare(tensor<any> A, tensor<any> B)->(tensor<int8> mask) |
-| min | miaobyte | min(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=min(T1,T2) | min(tensor<any> A, tensor<any> B)->(tensor<any> C) |
-| minscalar | miaobyte | minscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=min(T1,scalar) | minscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
-| exp | miaobyte | exp(tensor<any> A)->(tensor<any> C) | T3=exp(T1) | exp(tensor<any> A)->(tensor<any> C) |
-| maxscalar | miaobyte | maxscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=max(T1,scalar) | maxscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
-| pow | miaobyte | pow(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1^T2 | pow(tensor<any> A, tensor<any> B)->(tensor<any> C) |
-| powscalar | miaobyte | powscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=T1^scalar | powscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
-| rdivscalar | miaobyte | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) | T3=scalar/T1 | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) |
-| div | miaobyte | div(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
-| sub | miaobyte | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1-T2 | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) |
-| argset |  none  | argset(var<any> value)->(var<any> name) | var argname = argvalue | argset(var<any> value)->(var<any> name) |
-| mulscalar | miaobyte | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
-| sqrt | miaobyte | sqrt(tensor<any> A)->(tensor<any> C) | T3=sqrt(T1) | sqrt(tensor<any> A)->(tensor<any> C) |
-| vecset |  none  | vecset(vector<any> value)->(vector<any> name) | shape = [3  4  5] | vecset(vector<any> value)->(vector<any> name) |
-| newtensor |  none  | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
-| newtensor |  none  | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(var<string> shape)->(tensor<any> tensor1) |
-| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
-| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
-| max | miaobyte | max(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=max(T1,T2) | max(tensor<any> A, tensor<any> B)->(tensor<any> C) |
-| divscalar | miaobyte | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=T1/scalar | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
-| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1,value) | constant(tensor<any> t, var<any> value)->() |
-| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
-| subscalar | miaobyte | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1-scalar | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
-| log | miaobyte | log(tensor<any> A)->(tensor<any> C) | T3=log(T1) | log(tensor<any> A)->(tensor<any> C) |
-| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
+| concat | miaobyte | concat(listtensor<any> tensors, var<int32> axis)->(tensor<any> result) | Tresult = concat([T1, T2...], axis=3) | concat(listtensor<any> tensors, var<int32> axis)->(tensor<any> result) |
+| transpose | miaobyte | transpose(tensor<any> A, vector<int32> dim_order)->(tensor<any> C) | T2 = T1.transpose(dimorder=[1,0]) | transpose(tensor<any> A, vector<int32> dim_order)->(tensor<any> C) |
 | add | cblas | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) | T3=T1+T2 | add(tensor<float64|float32> a, tensor<float64|float32> b)->(tensor<float64|float32> c) |
 | add | miaobyte | add(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1+T2 | add(tensor<any> a, tensor<any> b)->(tensor<any> c) |
+| comparescalar | miaobyte | comparescalar(tensor<any> A, var<any> scalar)->(tensor<float32> mask) | mask=compare(T1,scalar) | comparescalar(tensor<any> A, var<any> scalar)->(tensor<float32> mask) |
+| uniform | miaobyte | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() | uniform(T1,low,high,seed) | uniform(tensor<any> t, var<any> low, var<any> high, var<int32> seed)->() |
 | addscalar | miaobyte | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1+scalar | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
+| log | miaobyte | log(tensor<any> A)->(tensor<any> C) | T3=log(T1) | log(tensor<any> A)->(tensor<any> C) |
+| reshape | miaobyte | reshape(tensor<any> A, vector<int32> shape)->() | T2=T1.reshape(shape) | reshape(tensor<any> A, vector<int32> shape)->() |
+| arange | miaobyte | arange(tensor<any> t, var<any> start, var<any> step)->() | arange(T1,start,step) | arange(tensor<any> t, var<any> start, var<any> step)->() |
+| divscalar | miaobyte | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=T1/scalar | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
+| print | miaobyte | print(tensor<any> )->() | print(T1) | print(tensor<any> )->() |
+| print | miaobyte | print(tensor<any> , var<string> )->() | print(T1) | print(tensor<any> , var<string> )->() |
+| newtensor |  none  | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
+| newtensor |  none  | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(var<string> shape)->(tensor<any> tensor1) |
+| vecset |  none  | vecset(vector<any> value)->(vector<any> name) | shape = [3  4  5] | vecset(vector<any> value)->(vector<any> name) |
+| subscalar | miaobyte | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) | T3=T1-scalar | subscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
+| sqrt | miaobyte | sqrt(tensor<any> A)->(tensor<any> C) | T3=sqrt(T1) | sqrt(tensor<any> A)->(tensor<any> C) |
+| argset |  none  | argset(var<any> value)->(var<any> name) | var argname = argvalue | argset(var<any> value)->(var<any> name) |
+| sub | miaobyte | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1-T2 | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) |
+| mulscalar | miaobyte | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
+| div | miaobyte | div(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
+| constant | miaobyte | constant(tensor<any> t, var<any> value)->() | constant(T1,value) | constant(tensor<any> t, var<any> value)->() |
+| powscalar | miaobyte | powscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=T1^scalar | powscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
+| max | miaobyte | max(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=max(T1,T2) | max(tensor<any> A, tensor<any> B)->(tensor<any> C) |
+| pow | miaobyte | pow(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1^T2 | pow(tensor<any> A, tensor<any> B)->(tensor<any> C) |
+| maxscalar | miaobyte | maxscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=max(T1,scalar) | maxscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
 | mul | miaobyte | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1*T2 | mul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
+| exp | miaobyte | exp(tensor<any> A)->(tensor<any> C) | T3=exp(T1) | exp(tensor<any> A)->(tensor<any> C) |
+| rdivscalar | miaobyte | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) | T3=scalar/T1 | rdivscalar(var<any> scalar, tensor<any> A)->(tensor<any> C) |
+| minscalar | miaobyte | minscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) | T3=min(T1,scalar) | minscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
+| min | miaobyte | min(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=min(T1,T2) | min(tensor<any> A, tensor<any> B)->(tensor<any> C) |
+| compare | miaobyte | compare(tensor<any> A, tensor<any> B)->(tensor<float32> mask) | mask=compare(T1,T2) | compare(tensor<any> A, tensor<any> B)->(tensor<float32> mask) |
+| matmul | cblas | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) | T3=T1 @ T2 | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) |
+| matmul | miaobyte | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1 @ T2 | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
diff --git a/excuter/cpp-common/src/deepx/dtype.hpp b/excuter/cpp-common/src/deepx/dtype.hpp
@@ -3,9 +3,30 @@
 
 #include <string>
 #include <cstdint>
+#include <sstream>
 
 namespace deepx
 {
+    template <typename T>
+    T to(const std::string &textvalue)
+    {
+        if constexpr (std::is_same_v<T, std::string>)
+        {
+            return textvalue;
+        }
+        else if constexpr (std::is_arithmetic_v<T>)
+        {
+            return static_cast<T>(std::stof(textvalue));
+        }
+        else
+        {
+            // 对于其他类型，尝试从字符串转换
+            T value;
+            std::istringstream iss(textvalue);
+            iss >> value;
+            return value;
+        }
+    }
 
     enum class DataCategory : uint8_t
     {
@@ -112,7 +133,7 @@ namespace deepx
         // 布尔类型 (13位)
         Bool = 1 << 13,   // 0010 0000 0000 0000
         String = 1 << 15, // 0100 0000 0000 0000
-        // 常用组合
+                          // 常用组合
         Any = 0xFFFF, // 1111 1111 1111 1111
         Float = Float64 | Float32 | Float16 | BFloat16 | Float8E5M2 | Float8E4M3 | Float4E2M1,
         Float8 = Float8E5M2 | Float8E4M3, // 所有FP8格式
@@ -230,8 +251,6 @@ namespace deepx
         return TypeDef(category, precision);
     }
 
-
-
     // 修改precision_str函数以使用标准命名格式
     inline std::string precision_str(Precision p)
     {

diff --git a/excuter/cpp-common/src/deepx/mem/mem.hpp b/excuter/cpp-common/src/deepx/mem/mem.hpp
@@ -150,6 +150,7 @@ namespace deepx::mem
 
             return tensors;
         }
+
 
         void delete_tensor(const string &name)
         {

diff --git a/excuter/cpp-common/src/deepx/shape_concat.hpp b/excuter/cpp-common/src/deepx/shape_concat.hpp
@@ -3,6 +3,7 @@
 
 #include "deepx/shape.hpp"
 #include "deepx/tensor.hpp"
+#include "stdutil/error.hpp"
 
 namespace deepx
 {
@@ -18,6 +19,32 @@ namespace deepx
         }
         return concatShape(shapes,axis);
     }
-}
 
-#endif
+    template<typename T>
+    bool checkShapeConcat(const std::vector<Tensor<T>*> &tensors,const int axis,const Tensor<T> &output){
+        int axisDim=0;
+        for (int i = 0; i < tensors.size(); i++)
+        {
+            if (tensors[i]->shape.dim != output.shape.dim)
+            {
+                throw TensorShapeError("All input tensors must have the same dimension size for concat");
+            }
+            for (int j = 0; j < tensors[i]->shape.dim; j++)
+            {
+                if (j != axis)
+                {   
+                    if (tensors[i]->shape[j] != output.shape[j])
+                    {
+                        throw TensorShapeError("All input tensors must have the same dimension size for concat");
+                    }
+                }
+                else
+                {
+                    axisDim += tensors[i]->shape[j];
+                }
+            }
+        }
+        return axisDim == output.shape[axis];
+    }
+};
+#endif // DEEPX_SHAPE_CONCAT_HPP
diff --git a/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp b/excuter/cpp-common/src/deepx/tensorfunc/changeshape.hpp
@@ -1,48 +1,132 @@
-#ifndef DEEPX_TENSORFUNC_CHANGE_SHAPE_HPP
-#define DEEPX_TENSORFUNC_CHANGE_SHAPE_HPP
+#ifndef DEEPX_TENSORFUNC_CHANGESHAPE_HPP
+#define DEEPX_TENSORFUNC_CHANGESHAPE_HPP
 
+#include <vector>
 #include "deepx/tensor.hpp"
 #include "stdutil/error.hpp"
 
 namespace deepx::tensorfunc
 {
+    using namespace std;
+    template <typename Author, typename T>
+    struct reshapeDispatcher
+    {
+        static void reshape(Tensor<T> &tensor, const std::vector<int> &new_shape) = delete;
+    };
+
+    // A.reshape(new_shape)
+    template <typename Author, typename T>
+    void reshape(Tensor<T> &tensor, const std::vector<int> &new_shape)
+    {
+        reshapeDispatcher<Author, T>::reshape(tensor, new_shape);
+    }
+
+    template <typename Author, typename T>
+    struct transposeDispatcher
+    {
+        static void transpose(const Tensor<T> &tensor, const std::vector<int> &dim_order, Tensor<T> &output) = delete;
+    };
+
+    // transpose(A,dim_order)=>B
+    template <typename Author, typename T>
+    void transpose(const Tensor<T> &tensor, const std::vector<int> &dim_order, Tensor<T> &output)
+    {
+        transposeDispatcher<Author, T>::transpose(tensor, dim_order, output);
+    }
+
+    template <typename Author, typename T>
+    struct concatDispatcher
+    {
+        static void concat(const vector<Tensor<T>*> tensors, const int axis, Tensor<T> &C) = delete;
+    };
+    // concat(tensors,axis)=>C
+    template <typename Author, typename T>
+    void concat(const vector<Tensor<T>*> tensors, const int axis, Tensor<T> &C)
+    {
+        concatDispatcher<Author, T>::concat(tensors, axis, C);
+    }
+
+    // https://onnx.ai/onnx/operators/onnx__Split.html
+    template <typename Author, typename T>
+    struct splitDispatcher
+    {
+        static void split(const Tensor<T> &A, const int axis,const std::vector<int> &splits, Tensor<T> *&B) = delete;
+        static void split(const Tensor<T> &A, const int axis,const int num_outputs, Tensor<T> *&B) = delete;
+    };  
+    // split(tensor,axis,splits)=>tensors
+    template <typename Author, typename T>
+    void split(const Tensor<T> &A, const int axis,const std::vector<int> &splits, Tensor<T> *&B)
+    {
+        splitDispatcher<Author, T>::split(A, axis, splits, B);
+
+    }   
+
+    // split(tensor,axis,num_outputs)=>tensors
+    template <typename Author, typename T>
+    void split(const Tensor<T> &A, const int axis,const int num_outputs, Tensor<T> *&B)
+    {
+        splitDispatcher<Author, T>::split(A, axis, num_outputs, B);
+    }
 
-    // 通用模板声明
     template <typename Author, typename T>
-    struct InitDispatcher
+    struct expandDispatcher
     {
-        static void reshape(Tensor<T> &tensor, const Shape &new_shape) = delete;
+        static void expand(const Tensor<T> &A, const Shape &new_shape, Tensor<T> &B) = delete;
     };
 
     template <typename Author, typename T>
-    void reshape(Tensor<T> &tensor, const Shape &new_shape)
-    {
-        InitDispatcher<Author, T>::reshape(tensor, new_shape);
-    }
-
-    // // 作者特化示例（类型无关实现）
-    // template <typename T>
-    // struct InitDispatcher<miaobyte, T>
-    // {
-    //     static void reshape(Tensor<T> &tensor, const Shape &new_shape)
-    //     {
-    //         // 统一实现，不依赖T的类型
-    //         if (tensor.shape.size() != new_shape.size())
-    //         {
-    //             throw std::invalid_argument("Total elements must match");
-    //         }
-    //         tensor.shape = new_shape;
-    //     }
-    // };
-    // 特化作者和具体精度
-    // template <>
-    // struct InitDispatcher<miaobyte, float>
-    // {
-    //     static void reshape(Tensor<float> &tensor, const Shape &new_shape)
-    //     {
-    //         // CUDA实现
-    //     }
-    // };
+    void expand(const Tensor<T> &A, const Shape &new_shape, Tensor<T> &B)
+    {
+        expandDispatcher<Author, T>::expand(A, new_shape, B);
+    }
+
+    template <typename Author, typename T>
+    struct squeezeDispatcher
+    {
+        static void squeeze(Tensor<T> &tensor) = delete;
+    };  
+
+    template <typename Author, typename T>
+    void squeeze(Tensor<T> &tensor)
+    {
+        squeezeDispatcher<Author, T>::squeeze(tensor);
+    }
+
+    template <typename Author, typename T>
+    struct unsqueezeDispatcher
+    {
+        static void unsqueeze(Tensor<T> &tensor, const int axis) = delete;
+    };
+
+    template <typename Author, typename T>
+    void unsqueeze(Tensor<T> &tensor, const int axis)
+    {
+        unsqueezeDispatcher<Author, T>::unsqueeze(tensor, axis);
+    }
+
+    template <typename Author, typename T>
+    struct flattenDispatcher
+    {
+        static void flatten(Tensor<T> &tensor) = delete;
+    };
+
+    template <typename Author, typename T>
+    void flatten(Tensor<T> &tensor)
+    {
+        flattenDispatcher<Author, T>::flatten(tensor);
+    }
+
+    template <typename Author, typename T>
+    struct paddingDispatcher
+    {
+        static void padding(Tensor<T> &tensor, const Shape &new_shape) = delete;
+    };  
+
+    template <typename Author, typename T>
+    void padding(Tensor<T> &tensor, const Shape &new_shape)
+    {
+        paddingDispatcher<Author, T>::padding(tensor, new_shape);
+    }
 }
 
 #endif
-Original file line number
+Diff line change
@@ Expand Up / @@ -150,6 +150,7 @@ namespace deepx::mem @@
                 return tensors;
             }
             void delete_tensor(const string &name)
             {
@@ Expand Down @@