Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/excuter/op-mem-cuda/list.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@
| newtensor | none | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
| newtensor | none | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 = zeros(shape) | newtensor(var<string> shape)->(tensor<any> tensor1) |
| vecset | none | vecset(vector<any> value)->(vector<any> name) | shape = [3 4 5] | vecset(vector<any> value)->(vector<any> name) |
| matmul | cublas | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1 @ T2 | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
| sub | miaobyte | sub(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1-T2 | sub(tensor<any> A, tensor<any> B)->(tensor<any> C) |
| argset | none | argset(var<any> value)->(var<any> name) | var argname = argvalue | argset(var<any> value)->(var<any> name) |
2 changes: 2 additions & 0 deletions doc/excuter/op-mem-ompsimd/list.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,7 @@
| newtensor | none | newtensor(vector<int32> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(vector<int32> shape)->(tensor<any> tensor1) |
| newtensor | none | newtensor(var<string> shape)->(tensor<any> tensor1) | T1 =Tensor(shape=[...]) | newtensor(var<string> shape)->(tensor<any> tensor1) |
| vecset | none | vecset(vector<any> value)->(vector<any> name) | shape = [3 4 5] | vecset(vector<any> value)->(vector<any> name) |
| matmul | cblas | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) | T3=T1 @ T2 | matmul(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) |
| matmul | miaobyte | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) | T3=T1 @ T2 | matmul(tensor<any> A, tensor<any> B)->(tensor<any> C) |
| sub | miaobyte | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) | T3=T1-T2 | sub(tensor<any> a, tensor<any> b)->(tensor<any> c) |
| argset | none | argset(var<any> value)->(var<any> name) | var argname = argvalue | argset(var<any> value)->(var<any> name) |
7 changes: 5 additions & 2 deletions excuter/cpp-common/src/deepx/tensorfunc/matmul.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "deepx/tensor.hpp"
#include "deepx/tensorfunc/authors.hpp"

#include "stdutil/error.hpp"
namespace deepx::tensorfunc
{
bool check_matmul_shape(const Shape &a, const Shape &b)
Expand All @@ -29,7 +29,10 @@ namespace deepx::tensorfunc
template <typename Author, typename T>
struct matmulDispatcher
{
static void matmul(const Tensor<T> &A, const Tensor<T> &B, Tensor<T> &C) = delete;
static void matmul(const Tensor<T> &A, const Tensor<T> &B, Tensor<T> &C)
{
throw NotImplementError("matmul");
}
};

template <typename Author, typename T>
Expand Down
22 changes: 15 additions & 7 deletions excuter/op-mem-cuda/src/client/tfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "deepx/tf/print.hpp"
#include "deepx/tf/init.hpp"
#include "deepx/tf/elementwise_basic.hpp"
#include "deepx/tf/matmul.hpp"
#include "deepx/dtype.hpp"
#include "deepx/tf/tffactory.hpp"
#include "deepx/tensorfunc/authors.hpp"
Expand Down Expand Up @@ -173,12 +174,19 @@ namespace deepx::tf
// opfactory.add_op(Powscalar_miaobyte<float>());
// opfactory.add_op(Powscalar_miaobyte<double>());
}
// // matmul
// void register_matmul(OpFactory &opfactory)
// {
// opfactory.add_op(MatMul<float>());
// opfactory.add_op(MatMul<double>());
// }
// matmul
void register_matmul(TfFactory &tffactory)
{
tffactory.add_tf(std::make_shared<MatMul<cublas>>(vector<Param>(
{
Param("A", DataCategory::Tensor, Precision::Any),
Param("B", DataCategory::Tensor, Precision::Any),
}),
vector<Param>(
{
Param("C", DataCategory::Tensor, Precision::Any),
})));
}
// // changeshape
void register_changeshape(TfFactory &tffactory)
{
Expand Down Expand Up @@ -207,7 +215,7 @@ namespace deepx::tf
register_init(tffactory);
register_util(tffactory);
register_elementwise(tffactory);
// register_matmul(opfactory);
register_matmul(tffactory);
register_changeshape(tffactory);
// register_reduce(opfactory);
return 0;
Expand Down
88 changes: 88 additions & 0 deletions excuter/op-mem-cuda/src/deepx/tf/matmul.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#ifndef DEEPX_TF_MATMUL_HPP
#define DEEPX_TF_MATMUL_HPP

#include <cuda_fp16.h>
#include <cuda_bf16.h>

#include "deepx/tf/tf.hpp"
#include "deepx/dtype.hpp"
#include "deepx/dtype_cuda.hpp"
#include "deepx/tensorfunc/matmul_cublas.hpp"

namespace deepx::tf
{
template <typename Author>
class MatMul : public TF
{
public:
MatMul(const vector<Param> &args, const vector<Param> &returns)
{
this->name = "matmul";
this->author = Author::name();
this->args = args;
this->returns = returns;
}

MatMul(string text)
{
this->parse(text);
this->author = Author::name();
if (this->name != "matmul")
{
throw std::runtime_error("Invalid name: " + this->name);
}
}
string math_formula() const override
{
return "T3=T1 @ T2";
}
shared_ptr<TF> clone() const override
{
return make_shared<MatMul<Author>>(*this);
}
int run(shared_ptr<MemBase> mem, string &error) override
{
Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
Precision b_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype;
Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
if (a_type != b_type || a_type != c_type)
{
error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type);
return 1;
}
switch (a_type)
{
case Precision::Float64:
tensorfunc::matmul<Author, double>(*mem->gettensor<double>(this->args[0].textvalue), *mem->gettensor<double>(this->args[1].textvalue), *mem->gettensor<double>(this->returns[0].textvalue));
break;
case Precision::Float32:
tensorfunc::matmul<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), *mem->gettensor<float>(this->args[1].textvalue), *mem->gettensor<float>(this->returns[0].textvalue));
break;
case Precision::Float16:
tensorfunc::matmul<Author, half>(*mem->gettensor<half>(this->args[0].textvalue), *mem->gettensor<half>(this->args[1].textvalue), *mem->gettensor<half>(this->returns[0].textvalue));
break;
case Precision::BFloat16:
tensorfunc::matmul<Author, nv_bfloat16>(*mem->gettensor<nv_bfloat16>(this->args[0].textvalue), *mem->gettensor<nv_bfloat16>(this->args[1].textvalue), *mem->gettensor<nv_bfloat16>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::matmul<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), *mem->gettensor<int64_t>(this->args[1].textvalue), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::matmul<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
break;
case Precision::Int16:
tensorfunc::matmul<Author, int16_t>(*mem->gettensor<int16_t>(this->args[0].textvalue), *mem->gettensor<int16_t>(this->args[1].textvalue), *mem->gettensor<int16_t>(this->returns[0].textvalue));
break;
case Precision::Int8:
tensorfunc::matmul<Author, int8_t>(*mem->gettensor<int8_t>(this->args[0].textvalue), *mem->gettensor<int8_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
break;
default:
error = "Unsupported dtype: " + precision_str(a_type);
return 1;
}
return 0;
}
};
}

#endif
32 changes: 24 additions & 8 deletions excuter/op-mem-ompsimd/src/client/tfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "deepx/tf/changeshape.hpp"
#include "deepx/tf/elementwise.hpp"
#include "deepx/tf/tffactory.hpp"

#include "deepx/tf/matmul.hpp"
#include "deepx/tensorfunc/authors.hpp"
namespace deepx::tf
{
Expand Down Expand Up @@ -186,12 +186,28 @@ namespace deepx::tf
// opfactory.add_op(Powscalar_miaobyte<float>());
// opfactory.add_op(Powscalar_miaobyte<double>());
}
// // matmul
// void register_matmul(OpFactory &opfactory)
// {
// opfactory.add_op(MatMul<float>());
// opfactory.add_op(MatMul<double>());
// }
// matmul
void register_matmul(TfFactory &tffactory)
{
tffactory.add_tf(std::make_shared<MatMul<miaobyte>>(vector<Param>(
{
Param("A", DataCategory::Tensor, Precision::Any),
Param("B", DataCategory::Tensor, Precision::Any),
}),
vector<Param>(
{
Param("C", DataCategory::Tensor, Precision::Any),
})));
tffactory.add_tf(std::make_shared<MatMul<cblas>>(vector<Param>(
{
Param("A", DataCategory::Tensor, Precision::Float64|Precision::Float32),
Param("B", DataCategory::Tensor, Precision::Float64|Precision::Float32),
}),
vector<Param>(
{
Param("C", DataCategory::Tensor, Precision::Float64|Precision::Float32),
})));
}
// // changeshape
void register_changeshape(TfFactory &tffactory)
{
Expand Down Expand Up @@ -220,7 +236,7 @@ namespace deepx::tf
register_init(tffactory);
register_util(tffactory);
register_elementwise(tffactory);
// register_matmul(opfactory);
register_matmul(tffactory);
register_changeshape(tffactory);
// register_reduce(opfactory);
return 0;
Expand Down
12 changes: 6 additions & 6 deletions excuter/op-mem-ompsimd/src/deepx/tensorfunc/matmul_cblas.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DEEPX_TENSORFUNC_MATMUL_HPP
#define DEEPX_TENSORFUNC_MATMUL_HPP
#ifndef DEEPX_TENSORFUNC_MATMUL_CBLAS_HPP
#define DEEPX_TENSORFUNC_MATMUL_CBLAS_HPP

#include <cblas.h> // 如果使用 OpenBLAS
#include "deepx/tensor.hpp"
Expand Down Expand Up @@ -64,7 +64,7 @@ namespace deepx::tensorfunc
{
static void matmul(const Tensor<double> &a, const Tensor<double> &b, Tensor<double> &c)
{
if (!check_shape(a.shape, b.shape))
if (!check_matmul_shape(a.shape, b.shape))
{
throw std::invalid_argument("a.shape could matmul with b.shape");
}
Expand Down Expand Up @@ -150,7 +150,7 @@ namespace deepx::tensorfunc
{
static void matmuladd(const Tensor<float> &a, const Tensor<float> &b, const float &alpha, const float &beta, Tensor<float> &c)
{
if (!check_shape(a.shape, b.shape))
if (!check_matmul_shape(a.shape, b.shape))
{
throw std::invalid_argument("a.shape could matmul with b.shape");
}
Expand Down Expand Up @@ -208,7 +208,7 @@ namespace deepx::tensorfunc
{
static void matmuladd(const Tensor<double> &a, const Tensor<double> &b, const double &alpha, const double &beta, Tensor<double> &c)
{
if (!check_shape(a.shape, b.shape))
if (!check_matmul_shape(a.shape, b.shape))
{
throw std::invalid_argument("a.shape could matmul with b.shape");
}
Expand Down Expand Up @@ -261,4 +261,4 @@ namespace deepx::tensorfunc
}
};
}
#endif // DEEPX_TENSORFUNC_MATMUL_HPP
#endif // DEEPX_TENSORFUNC_MATMUL_CBLAS_HPP
80 changes: 80 additions & 0 deletions excuter/op-mem-ompsimd/src/deepx/tf/matmul.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#ifndef DEEPX_TF_MATMUL_HPP
#define DEEPX_TF_MATMUL_HPP

#include "deepx/tf/tf.hpp"
#include "deepx/dtype.hpp"
#include "deepx/dtype_ompsimd.hpp"
#include "deepx/tensorfunc/matmul.hpp"
#include "deepx/tensorfunc/matmul_cblas.hpp"
#include "deepx/tensorfunc/matmul_miaobyte.hpp"
namespace deepx::tf
{
template <typename Author>
class MatMul : public TF
{
public:
MatMul(const vector<Param> &args, const vector<Param> &returns)
{
this->name = "matmul";
this->author = Author::name();
this->args = args;
this->returns = returns;
}

MatMul(string text)
{
this->parse(text);
this->author = Author::name();
if (this->name != "matmul")
{
throw std::runtime_error("Invalid name: " + this->name);
}
}
string math_formula() const override
{
return "T3=T1 @ T2";
}
shared_ptr<TF> clone() const override
{
return make_shared<MatMul<Author>>(*this);
}
int run(shared_ptr<MemBase> mem, string &error) override
{
Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
Precision b_type = mem->gettensor(this->args[1].textvalue).get()->shape.dtype;
Precision c_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
if (a_type != b_type || a_type != c_type)
{
error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(b_type) + " != " + precision_str(c_type);
return 1;
}
switch (a_type)
{
case Precision::Float64:
tensorfunc::matmul<Author, double>(*mem->gettensor<double>(this->args[0].textvalue), *mem->gettensor<double>(this->args[1].textvalue), *mem->gettensor<double>(this->returns[0].textvalue));
break;
case Precision::Float32:
tensorfunc::matmul<Author, float>(*mem->gettensor<float>(this->args[0].textvalue), *mem->gettensor<float>(this->args[1].textvalue), *mem->gettensor<float>(this->returns[0].textvalue));
break;
case Precision::Int64:
tensorfunc::matmul<Author, int64_t>(*mem->gettensor<int64_t>(this->args[0].textvalue), *mem->gettensor<int64_t>(this->args[1].textvalue), *mem->gettensor<int64_t>(this->returns[0].textvalue));
break;
case Precision::Int32:
tensorfunc::matmul<Author, int32_t>(*mem->gettensor<int32_t>(this->args[0].textvalue), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int32_t>(this->returns[0].textvalue));
break;
case Precision::Int16:
tensorfunc::matmul<Author, int16_t>(*mem->gettensor<int16_t>(this->args[0].textvalue), *mem->gettensor<int16_t>(this->args[1].textvalue), *mem->gettensor<int16_t>(this->returns[0].textvalue));
break;
case Precision::Int8:
tensorfunc::matmul<Author, int8_t>(*mem->gettensor<int8_t>(this->args[0].textvalue), *mem->gettensor<int8_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
break;
default:
error = "Unsupported dtype: " + precision_str(a_type);
return 1;
}
return 0;
}
};
}

#endif
Loading