Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 92 additions & 49 deletions doc/excuter/deepx.op.drawio

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion doc/excuter/deepx.op.drawio.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/excuter/deepx.op.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 8 additions & 1 deletion doc/excuter/excuter.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,16 @@ todo

#### 4.front对接测试

1.先启动excuter
1.先启动excuter可执行文件, 位于excuter/op-mem-{cuda/ompsimd}/build,可执行文件名同excuter名
2.然后测试front中py的对应算子脚本(front/py/examples 目录)

可以按照顺序,以此测试

1_tensor

2_ir

3_functional



39 changes: 39 additions & 0 deletions excuter/cpp-common/src/deepx/tensorfunc/io.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef DEEPX_TENSORFUNC_IO_HPP
#define DEEPX_TENSORFUNC_IO_HPP

#include "deepx/tensor.hpp"

namespace deepx::tensorfunc{

template <typename Author,typename T>
struct printDispatcher{
static void print(const Tensor<T> &t, const std::string &f="")=delete;
};

template <typename Author, typename T>
void print(const Tensor<T> &t, const std::string &f=""){
printDispatcher<Author,T>::print(t, f);
}

template <typename Author, typename T>
struct saveDispatcher{
static void save(Tensor<T> &tensor,const std::string &path,int filebegin=0)=delete;
};

template <typename Author, typename T>
void save(Tensor<T> &tensor,const std::string &path,int filebegin=0){
saveDispatcher<Author,T>::save(tensor, path, filebegin);
}

template <typename Author, typename T>
struct loadDispatcher{
static Tensor<T> load(const std::string &path,int filebegin=0)=delete;
};

template <typename Author, typename T>
Tensor<T> load(const std::string &path,int filebegin=0){
return loadDispatcher<Author,T>::load(path, filebegin);
}
}

#endif // DEEPX_TENSORFUNC_IO_HPP
19 changes: 0 additions & 19 deletions excuter/cpp-common/src/deepx/tensorfunc/print.hpp

This file was deleted.

39 changes: 20 additions & 19 deletions excuter/cpp-common/src/deepx/tensorfunc/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,50 +7,51 @@

namespace deepx::tensorfunc
{


template <typename Author, typename T>
struct reducesumDispatcher
struct reducemaxDispatcher
{
static void reducesum(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B) = delete;
static void reducemax(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false) = delete;
};
template <typename Author, typename T>
void reducesum(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B)
void reducemax(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false)
{
reducesumDispatcher<Author, T>::reducesum(A, axis, keepdims, B);
reducemaxDispatcher<Author, T>::reducemax(A, dims, B, keepdims);
}

template <typename Author, typename T>
struct reduceprodDispatcher
struct reduceminDispatcher
{
static void reduceprod(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B) = delete;
static void reducemin(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false) = delete;
};

template <typename Author, typename T>
void reduceprod(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B)
void reducemin(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false)
{
reduceprodDispatcher<Author, T>::reduceprod(A, axis, keepdims, B);
reduceminDispatcher<Author, T>::reducemin(A, dims, B, keepdims);
}

template <typename Author, typename T>
struct reducemaxDispatcher
struct sumDispatcher
{
static void reducemax(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B) = delete;
static void reducesum(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false) = delete;
};
template <typename Author, typename T>
void reducemax(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B)
void sum(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false)
{
reducemaxDispatcher<Author, T>::reducemax(A, axis, keepdims, B);
sumDispatcher<Author, T>::sum(A, dims, B, keepdims);
}

template <typename Author, typename T>
struct reduceminDispatcher
struct prodDispatcher
{
static void reducemin(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B) = delete;
static void prod(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false) = delete;
};

template <typename Author, typename T>
void reducemin(const Tensor<T> &A, const int axis,const bool keepdims, Tensor<T> &B)
void prod(const Tensor<T> &A, const std::vector<int> &dims,Tensor<T> &B,const bool keepdims=false)
{
reduceminDispatcher<Author, T>::reducemin(A, axis, keepdims, B);
prodDispatcher<Author, T>::prod(A, dims, B, keepdims);
}

}
#endif // DEEPX_TENSORFUNC_REDUCE_HPP
2 changes: 1 addition & 1 deletion excuter/op-mem-cuda/src/client/tfs.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "deepx/tf/arg.hpp"
#include "deepx/tf/tf.hpp"
#include "deepx/tf/new.hpp"
#include "deepx/tf/print.hpp"
#include "deepx/tf/io.hpp"
#include "deepx/tf/init.hpp"
#include "deepx/tf/elementwise_basic.hpp"
#include "deepx/tf/elementwise_sqrt.hpp"
Expand Down
18 changes: 0 additions & 18 deletions excuter/op-mem-cuda/src/deepx/tensorfunc/file.hpp

This file was deleted.

137 changes: 137 additions & 0 deletions excuter/op-mem-cuda/src/deepx/tensorfunc/io_miaobyte.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#ifndef DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP
#define DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP

#include <iostream>
#include <string>
#include <deepx/tensor.hpp>
#include <deepx/dtype.hpp>
#include <stdutil/vector.hpp>
#include <stdutil/print.hpp>
#include "deepx/tensorfunc/authors.hpp"
#include "deepx/tensorfunc/io.hpp"

namespace deepx::tensorfunc
{
template <typename T>
struct printDispatcher<miaobyte, T>
{
static void print(const Tensor<T> &t, const std::string &f = "")
{
int bytes = precision_bits(t.shape.dtype) / 8;
size_t total_bytes = t.shape.size * bytes;

// 统一分配CPU内存
unsigned char *host_data = new unsigned char[total_bytes];
if (host_data == nullptr)
{
throw std::runtime_error("Failed to allocate host memory");
}

// 统一复制数据到CPU
cudaError_t err = cudaMemcpy(host_data, t.data, total_bytes, cudaMemcpyDeviceToHost);
if (err != cudaSuccess)
{
delete[] host_data;
throw std::runtime_error("Failed to copy data from device to host");
}

// 对于half和bf16类型需要转换为float
if (t.shape.dtype == Precision::Float16 || t.shape.dtype == Precision::BFloat16)
{
float *host_float = new float[t.shape.size];
if (host_float == nullptr)
{
delete[] host_data;
throw std::runtime_error("Failed to allocate host memory for float conversion");
}

// 在CPU上进行类型转换
if (t.shape.dtype == Precision::Float16)
{
for (size_t i = 0; i < t.shape.size; i++)
{
host_float[i] = __half2float(((half *)host_data)[i]);
}
}
else
{ // BFloat16
for (size_t i = 0; i < t.shape.size; i++)
{
host_float[i] = __bfloat162float(((nv_bfloat16 *)host_data)[i]);
}
}

// 打印转换后的float数据
stdutil::print(t.shape.shape, host_float, Precision::Float32, f.empty() ? "%.4f" : f);
delete[] host_float;
}
else
{
// 其他类型直接打印
stdutil::print(t.shape.shape, host_data, t.shape.dtype, f);
}

delete[] host_data;
}
};

template <typename T>
struct saveDispatcher<miaobyte, T>
{
static void save(Tensor<T> &tensor, const std::string &path, int filebegin = 0)
{
// 保存shape
std::string shapepath = path + ".shape";
std::string shapedata = tensor.shape.toYaml();
std::ofstream shape_fs(shapepath, std::ios::binary);
shape_fs.write(shapedata.c_str(), shapedata.size());
shape_fs.close();

// 保存data
std::string datapath = path + ".data";
std::ofstream data_fs(datapath, std::ios::binary | std::ios::in | std::ios::out);

if (!data_fs.is_open())
{
// 如果文件不存在,则创建新文件
data_fs.open(datapath, std::ios::binary | std::ios::out);
}
data_fs.seekp(filebegin);
data_fs.write(reinterpret_cast<const char *>(tensor.data), tensor.shape.size * sizeof(T));
data_fs.close();
}
};
template <typename T>
struct loadDispatcher<miaobyte, T>
{
static Tensor<T> load(const std::string &path, int filebegin = 0)
{
// 加载shape
std::string shapepath = path + ".shape";
std::ifstream shape_fs(shapepath, std::ios::binary);
std::string shapedata((std::istreambuf_iterator<char>(shape_fs)), std::istreambuf_iterator<char>());

Shape shape;
shape.fromYaml(shapedata);
shape_fs.close();

// 加载data
Tensor<T> tensor = New<T>(shape);
std::string datapath = path + ".data";
std::ifstream data_fs(datapath, std::ios::binary);

if (!data_fs.is_open())
{
throw std::runtime_error("无法打开数据文件: " + datapath);
}

// 设置读取位置
data_fs.seekg(filebegin);
data_fs.read(reinterpret_cast<char *>(tensor.data), shape.size * sizeof(T));
data_fs.close();

return tensor;
}
};
}
#endif // DEEPX_TENSORFUNC_IO_MIAOBYTE_HPP
69 changes: 0 additions & 69 deletions excuter/op-mem-cuda/src/deepx/tensorfunc/print_miaobyte.hpp

This file was deleted.

Loading