array2d · miaobyte · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025
diff --git a/doc/excuter/deepx.op.drawio b/doc/excuter/deepx.op.drawio
diff --git a/doc/excuter/deepx.op.drawio.svg b/doc/excuter/deepx.op.drawio.svg
diff --git a/doc/excuter/deepx.op.jpg b/doc/excuter/deepx.op.jpg
diff --git a/doc/excuter/excuter.md b/doc/excuter/excuter.md
@@ -60,9 +60,16 @@ todo
 
 #### 4.front对接测试
 
-1.先启动excuter
+1.先启动excuter可执行文件， 位于excuter/op-mem-{cuda/ompsimd}/build，可执行文件名同excuter名
 2.然后测试front中py的对应算子脚本（front/py/examples 目录）
 
+可以按照顺序，以此测试
+
+1_tensor
+
+2_ir
+
+3_functional
 
 
 
diff --git a/excuter/cpp-common/src/deepx/tensorfunc/io.hpp b/excuter/cpp-common/src/deepx/tensorfunc/io.hpp
@@ -0,0 +1,39 @@
+#ifndef DEEPX_TENSORFUNC_IO_HPP
+#define DEEPX_TENSORFUNC_IO_HPP
+
+#include "deepx/tensor.hpp"
+
+namespace deepx::tensorfunc{
+
+    template <typename Author,typename T>
+    struct printDispatcher{
+        static void print(const Tensor<T> &t, const std::string &f="")=delete;
+    };
+
+    template <typename Author, typename T>
+    void print(const Tensor<T> &t, const std::string &f=""){
+        printDispatcher<Author,T>::print(t, f);
+    }
+
+    template <typename Author, typename T>
+    struct saveDispatcher{
+        static void save(Tensor<T> &tensor,const std::string &path,int filebegin=0)=delete;
+    };
+
+    template <typename Author, typename T>
+    void save(Tensor<T> &tensor,const std::string &path,int filebegin=0){
+        saveDispatcher<Author,T>::save(tensor, path, filebegin);
+    }
+
+    template <typename Author, typename T>
+    struct loadDispatcher{
+        static Tensor<T> load(const std::string &path,int filebegin=0)=delete;
+    };
+
+    template <typename Author, typename T>
+    Tensor<T> load(const std::string &path,int filebegin=0){
+        return loadDispatcher<Author,T>::load(path, filebegin);
+    }
+}
+
+#endif // DEEPX_TENSORFUNC_IO_HPP
diff --git a/excuter/cpp-common/src/deepx/tensorfunc/print.hpp b/excuter/cpp-common/src/deepx/tensorfunc/print.hpp
diff --git a/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp b/excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp
@@ -7,50 +7,51 @@
 
 namespace deepx::tensorfunc
 {
+
+
     template <typename Author, typename T>
-    struct reducesumDispatcher
+    struct reducemaxDispatcher
     {
-        static void reducesum(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B) = delete;
+        static void reducemax(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false) = delete;
     };
     template <typename Author, typename T>
-    void reducesum(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B)
+    void reducemax(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false)
     {
-        reducesumDispatcher<Author, T>::reducesum(A, axis, keepdims, B);
+        reducemaxDispatcher<Author, T>::reducemax(A, dims, B, keepdims);
     }
 
     template <typename Author, typename T>
-    struct reduceprodDispatcher
+    struct reduceminDispatcher
     {
-        static void reduceprod(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B) = delete;
+        static void reducemin(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false) = delete;
     };
-
     template <typename Author, typename T>
-    void reduceprod(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B)
+    void reducemin(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false)
     {
-        reduceprodDispatcher<Author, T>::reduceprod(A, axis, keepdims, B);
+        reduceminDispatcher<Author, T>::reducemin(A, dims, B, keepdims);
     }
-
+    
     template <typename Author, typename T>
-    struct reducemaxDispatcher
+    struct  sumDispatcher
     {
-        static void reducemax(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B) = delete;
+        static void reducesum(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false) = delete;
     };
     template <typename Author, typename T>
-    void reducemax(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B)
+    void sum(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false)
     {
-        reducemaxDispatcher<Author, T>::reducemax(A, axis, keepdims, B);
+        sumDispatcher<Author, T>::sum(A, dims, B, keepdims);
     }
 
     template <typename Author, typename T>
-    struct reduceminDispatcher
+    struct  prodDispatcher
     {
-        static void reducemin(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B) = delete;
+        static void prod(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false) = delete;
     };
+
     template <typename Author, typename T>
-    void reducemin(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B)
+    void prod(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false)
     {
-        reduceminDispatcher<Author, T>::reducemin(A, axis, keepdims, B);
+        prodDispatcher<Author, T>::prod(A, dims, B, keepdims);
     }
-
 }
 #endif // DEEPX_TENSORFUNC_REDUCE_HPP
diff --git a/excuter/op-mem-cuda/src/client/tfs.cpp b/excuter/op-mem-cuda/src/client/tfs.cpp
@@ -1,7 +1,7 @@
 #include "deepx/tf/arg.hpp"
 #include "deepx/tf/tf.hpp"
 #include "deepx/tf/new.hpp"
-#include "deepx/tf/print.hpp"
+#include "deepx/tf/io.hpp"
 #include "deepx/tf/init.hpp"
 #include "deepx/tf/elementwise_basic.hpp"
 #include "deepx/tf/elementwise_sqrt.hpp"

diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/file.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/file.hpp
diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp
@@ -0,0 +1,137 @@
+#ifndef DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP
+#define DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP
+
+#include <iostream>
+#include <string>
+#include <deepx/tensor.hpp>
+#include <deepx/dtype.hpp>
+#include <stdutil/vector.hpp>
+#include <stdutil/print.hpp>
+#include "deepx/tensorfunc/authors.hpp"
+#include "deepx/tensorfunc/io.hpp"
+
+namespace deepx::tensorfunc
+{
+    template <typename T>
+    struct printDispatcher<miaobyte, T>
+    {
+        static void print(const Tensor<T> &t, const std::string &f = "")
+        {
+            int bytes = precision_bits(t.shape.dtype) / 8;
+            size_t total_bytes = t.shape.size * bytes;
+
+            // 统一分配CPU内存
+            unsigned char *host_data = new unsigned char[total_bytes];
+            if (host_data == nullptr)
+            {
+                throw std::runtime_error("Failed to allocate host memory");
+            }
+
+            // 统一复制数据到CPU
+            cudaError_t err = cudaMemcpy(host_data, t.data, total_bytes, cudaMemcpyDeviceToHost);
+            if (err != cudaSuccess)
+            {
+                delete[] host_data;
+                throw std::runtime_error("Failed to copy data from device to host");
+            }
+
+            // 对于half和bf16类型需要转换为float
+            if (t.shape.dtype == Precision::Float16 || t.shape.dtype == Precision::BFloat16)
+            {
+                float *host_float = new float[t.shape.size];
+                if (host_float == nullptr)
+                {
+                    delete[] host_data;
+                    throw std::runtime_error("Failed to allocate host memory for float conversion");
+                }
+
+                // 在CPU上进行类型转换
+                if (t.shape.dtype == Precision::Float16)
+                {
+                    for (size_t i = 0; i < t.shape.size; i++)
+                    {
+                        host_float[i] = __half2float(((half *)host_data)[i]);
+                    }
+                }
+                else
+                { // BFloat16
+                    for (size_t i = 0; i < t.shape.size; i++)
+                    {
+                        host_float[i] = __bfloat162float(((nv_bfloat16 *)host_data)[i]);
+                    }
+                }
+
+                // 打印转换后的float数据
+                stdutil::print(t.shape.shape, host_float, Precision::Float32, f.empty() ? "%.4f" : f);
+                delete[] host_float;
+            }
+            else
+            {
+                // 其他类型直接打印
+                stdutil::print(t.shape.shape, host_data, t.shape.dtype, f);
+            }
+
+            delete[] host_data;
+        }
+    };
+
+    template <typename T>
+    struct saveDispatcher<miaobyte, T>
+    {
+        static void save(Tensor<T> &tensor, const std::string &path, int filebegin = 0)
+        {
+            // 保存shape
+            std::string shapepath = path + ".shape";
+            std::string shapedata = tensor.shape.toYaml();
+            std::ofstream shape_fs(shapepath, std::ios::binary);
+            shape_fs.write(shapedata.c_str(), shapedata.size());
+            shape_fs.close();
+
+            // 保存data
+            std::string datapath = path + ".data";
+            std::ofstream data_fs(datapath, std::ios::binary | std::ios::in | std::ios::out);
+
+            if (!data_fs.is_open())
+            {
+                // 如果文件不存在，则创建新文件
+                data_fs.open(datapath, std::ios::binary | std::ios::out);
+            }
+            data_fs.seekp(filebegin);
+            data_fs.write(reinterpret_cast<const char *>(tensor.data), tensor.shape.size * sizeof(T));
+            data_fs.close();
+        }
+    };
+    template <typename T>
+    struct loadDispatcher<miaobyte, T>
+    {
+        static Tensor<T> load(const std::string &path, int filebegin = 0)
+        {
+            // 加载shape
+            std::string shapepath = path + ".shape";
+            std::ifstream shape_fs(shapepath, std::ios::binary);
+            std::string shapedata((std::istreambuf_iterator<char>(shape_fs)), std::istreambuf_iterator<char>());
+
+            Shape shape;
+            shape.fromYaml(shapedata);
+            shape_fs.close();
+
+            // 加载data
+            Tensor<T> tensor = New<T>(shape);
+            std::string datapath = path + ".data";
+            std::ifstream data_fs(datapath, std::ios::binary);
+
+            if (!data_fs.is_open())
+            {
+                throw std::runtime_error("无法打开数据文件: " + datapath);
+            }
+
+            // 设置读取位置
+            data_fs.seekg(filebegin);
+            data_fs.read(reinterpret_cast<char *>(tensor.data), shape.size * sizeof(T));
+            data_fs.close();
+
+            return tensor;
+        }
+    };
+}
+#endif // DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP
diff --git a/excuter/op-mem-cuda/src/deepx/tensorfunc/print_miaobyte.hpp b/excuter/op-mem-cuda/src/deepx/tensorfunc/print_miaobyte.hpp