Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/ISSUE_TEMPLATE/operator.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
name: 算子新增
about: 用于提交新的算子实现请求
title: '[算子] '
labels: enhancement, operator
assignees: ''
---

## 算子新增
该算子数学表达为

## 影响组件

### front
1.
2.

### 引擎
1.
2.

## 其他叙述

<!-- 请在此处添加其他相关信息,如:
- 参考实现(如PyTorch中的实现)
- 性能要求
- 测试用例
- 其他注意事项
-->
2 changes: 1 addition & 1 deletion doc/excuter/op-mem-cuda/list.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
| equal | miaobyte | T1==T2->mask | equal(tensor<any> A, tensor<any> B, var<float32> epsilon)->(tensor<bool> mask) |
| mulscalar | miaobyte | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
| div | miaobyte | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8> A)->(tensor<int64|int32|int16|int8> C) |
| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8|bool> A)->(tensor<int64|int32|int16|int8|bool> C) |
| max | miaobyte | T3=max(T1, T2) | max(tensor<any> A, tensor<any> B)->(tensor<any> C) |
| pow | miaobyte | T3=pow(T1, T2) | pow(tensor<float64|float32> A, tensor<float64|float32> B)->(tensor<float64|float32> C) |

Expand Down
5 changes: 4 additions & 1 deletion doc/excuter/op-mem-ompsimd/list.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,14 @@
| equalscalar | miaobyte | mask=equal(T1,scalar) | equalscalar(tensor<any> A, var<any> scalar, var<float32> eposilon)->(tensor<bool> mask) |
| min | miaobyte | T3=min(T1,T2) | min(tensor<any> A, tensor<any> B)->(tensor<any> C) |
| maxscalar | miaobyte | T3=max(T1,scalar) | maxscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
| tan | miaobyte | T3=tan(T1) | tan(tensor<any> A)->(tensor<any> C) |
| sin | miaobyte | T3=sin(T1) | sin(tensor<any> A)->(tensor<any> C) |
| divscalar | miaobyte | T3=T1/scalar | divscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
| log | miaobyte | T3=log(T1) | log(tensor<any> A)->(tensor<any> C) |
| addscalar | miaobyte | T3=T1+scalar | addscalar(tensor<any> a, var<any> scalar)->(tensor<any> c) |
| greater | miaobyte | mask=greater(T1,T2) | greater(tensor<any> A, tensor<any> B)->(tensor<bool> mask) |
| lessscalar | miaobyte | mask=less(T1,scalar) | lessscalar(tensor<any> A, var<any> scalar)->(tensor<bool> mask) |
| cos | miaobyte | T3=cos(T1) | cos(tensor<any> A)->(tensor<any> C) |
| notequalscalar | miaobyte | mask=notequal(T1,scalar) | notequalscalar(tensor<any> A, var<any> scalar, var<float32> epsilon)->(tensor<bool> mask) |
| minscalar | miaobyte | T3=min(T1,scalar) | minscalar(tensor<any> A, var<any> scalar)->(tensor<any> C) |
| rpowscalar | miaobyte | T3=scalar^T1 | rpowscalar(var<float32> scalar, tensor<any> A)->(tensor<any> C) |
Expand All @@ -78,7 +81,7 @@
| equal | miaobyte | equal(T1,T2)->mask | equal(tensor<any> A, tensor<any> B, var<float32> eposilon)->(tensor<bool> mask) |
| mulscalar | miaobyte | T3=T1*scalar | mulscalar(tensor<any> A, var<any> b)->(tensor<any> C) |
| div | miaobyte | T3=T1/T2 | div(tensor<any> A, tensor<any> B)->(tensor<any> C) |
| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8> A)->(tensor<int64|int32|int16|int8> C) |
| invert | miaobyte | T3=~T1 | invert(tensor<int64|int32|int16|int8|bool> A)->(tensor<int64|int32|int16|int8|bool> C) |
| max | miaobyte | T3=max(T1,T2) | max(tensor<any> A, tensor<any> B)->(tensor<any> C) |
| pow | miaobyte | T3=T1^T2 | pow(tensor<any> A, tensor<any> B)->(tensor<any> C) |

Expand Down
4 changes: 2 additions & 2 deletions excuter/op-mem-cuda/src/client/tfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,11 +280,11 @@ namespace deepx::tf
// invert
tffactory.add_tf(std::make_shared<Invert<miaobyte>>(vector<Param>(
{
Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
}),
vector<Param>(
{
Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
})));

tffactory.add_tf(std::make_shared<Sqrt<miaobyte>>(vector<Param>(
Expand Down
6 changes: 6 additions & 0 deletions excuter/op-mem-cuda/src/deepx/tensorfunc/cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ namespace deepx::tensorfunc
return {size, host_data};
}

inline void throwcudaerror(const std::string& msg,cudaError_t err){
if (err != cudaSuccess)
{
throw std::runtime_error(msg + "\n" + std::string(cudaGetErrorString(err)));
}
}
}

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ namespace deepx::tensorfunc
template void launch_invert<int32_t>(const int32_t *a, int32_t *c, const int size);
template void launch_invert<int16_t>(const int16_t *a, int16_t *c, const int size);
template void launch_invert<int8_t>(const int8_t *a, int8_t *c, const int size);
template void launch_invert<bool>(const bool *a, bool *c, const int size);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ namespace deepx::tensorfunc
T *data;
cudaError_t err = cudaMalloc(&data, size * sizeof(T));
if (err != cudaSuccess)
{
throw std::runtime_error("Failed to allocate Unified Memory");
{
throwcudaerror("Failed to cudaMalloc "+std::to_string(size) +" "+ precision_str(precision<T>()),err);
}
return data;
}
Expand Down
3 changes: 3 additions & 0 deletions excuter/op-mem-cuda/src/deepx/tf/elementwise_basic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,9 @@ namespace deepx::tf
case Precision::Int8:
tensorfunc::invert<Author>(*mem->gettensor<int8_t>(this->args[0].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
break;
case Precision::Bool:
tensorfunc::invert<Author>(*mem->gettensor<bool>(this->args[0].textvalue), *mem->gettensor<bool>(this->returns[0].textvalue));
break;
default:
error = "Unsupported dtype: " + precision_str(a_type);
return 1;
Expand Down
6 changes: 3 additions & 3 deletions excuter/op-mem-cuda/src/deepx/tf/elementwise_compare.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ namespace deepx::tf
{
Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
Precision mask_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
if (a_type != mask_type || mask_type != Precision::Bool)
if (mask_type != Precision::Bool)
{
error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(mask_type);
return 1;
Expand Down Expand Up @@ -769,7 +769,7 @@ namespace deepx::tf
{
Precision a_type = mem->gettensor(this->args[0].textvalue).get()->shape.dtype;
Precision mask_type = mem->gettensor(this->returns[0].textvalue).get()->shape.dtype;
if (a_type != mask_type || mask_type != Precision::Bool)
if (mask_type != Precision::Bool)
{
error = "Type mismatch: " + precision_str(a_type) + " != " + precision_str(mask_type);
return 1;
Expand Down Expand Up @@ -916,7 +916,7 @@ namespace deepx::tf
}
else
{
tensorfunc::Switch<Author, int8_t,int32_t>(mem->gettensors<int8_t>(this->getvector<string>(0)), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
tensorfunc::Switch<Author, int8_t,int32_t>(mem->gettensors<int8_t>(this->getvector<string>(0)), *mem->gettensor<int32_t>(this->args[1].textvalue), *mem->gettensor<int8_t>(this->returns[0].textvalue));
}
break;
case Precision::Bool:
Expand Down
31 changes: 29 additions & 2 deletions excuter/op-mem-ompsimd/src/client/tfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,11 +299,11 @@ namespace deepx::tf
// invert author=miaobyte
tffactory.add_tf(std::make_shared<Invert<miaobyte>>(vector<Param>(
{
Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
Param("A", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
}),
vector<Param>(
{
Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8),
Param("C", DataCategory::Tensor, Precision::Int64 | Precision::Int32 | Precision::Int16 | Precision::Int8|Precision::Bool),
})));
// sqrt author=miaobyte
tffactory.add_tf(std::make_shared<Sqrt<miaobyte>>(vector<Param>(
Expand Down Expand Up @@ -364,6 +364,33 @@ namespace deepx::tf
{
Param("C", DataCategory::Tensor, Precision::Any),
})));
// sin author=miaobyte
tffactory.add_tf(std::make_shared<Sin<miaobyte>>(vector<Param>(
{
Param("A", DataCategory::Tensor, Precision::Any),
}),
vector<Param>(
{
Param("C", DataCategory::Tensor, Precision::Any),
})));
// cos author=miaobyte
tffactory.add_tf(std::make_shared<Cos<miaobyte>>(vector<Param>(
{
Param("A", DataCategory::Tensor, Precision::Any),
}),
vector<Param>(
{
Param("C", DataCategory::Tensor, Precision::Any),
})));
// tan author=miaobyte
tffactory.add_tf(std::make_shared<Tan<miaobyte>>(vector<Param>(
{
Param("A", DataCategory::Tensor, Precision::Any),
}),
vector<Param>(
{
Param("C", DataCategory::Tensor, Precision::Any),
})));
// max author=miaobyte
tffactory.add_tf(std::make_shared<Max<miaobyte>>(vector<Param>(
{
Expand Down
111 changes: 28 additions & 83 deletions excuter/op-mem-ompsimd/src/deepx/tensorfunc/elementwise_miaobyte.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,30 +491,11 @@ namespace deepx::tensorfunc
{
output.shape.rangeElementwiseParallel([&input, &output](int i, int i_end)
{
const ScalableTag<T> tag;
const size_t lanes = Lanes(tag);
size_t j=0;

// 1. 处理前置未对齐部分
while (j < i_end && !IsAligned(tag,input.data + i + j)) {
output.data[i+j] = std::sin(input.data[i+j]);
++j;
}

// 2. 处理中间对齐部分
size_t aligned_end=i_end-(i_end%lanes);
for (; j+lanes<=aligned_end; j += lanes )
{
auto vec = Load(tag, input.data + i + j);
auto vec_result = Sin(vec);
Store(vec_result, tag, output.data + i + j);
}

// 3. 处理尾部剩余元素
for (;j<i_end;j++)
{
output.data[i+j] = std::sin(input.data[i+j]);
} });
for (int j = 0; j < i_end; j++)
{
output.data[i+j] = std::sin(input.data[i+j]);
}
});
}
else
{
Expand All @@ -533,30 +514,11 @@ namespace deepx::tensorfunc
{
output.shape.rangeElementwiseParallel([&input, &output](int i, int i_end)
{
const ScalableTag<T> tag;
const size_t lanes = Lanes(tag);
size_t j=0;

// 1. 处理前置未对齐部分
while (j < i_end && !IsAligned(tag,input.data + i + j)) {
output.data[i+j] = std::cos(input.data[i+j]);
++j;
}

// 2. 处理中间对齐部分
size_t aligned_end=i_end-(i_end%lanes);
for (; j+lanes<=aligned_end; j += lanes )
{
auto vec = Load(tag, input.data + i + j);
auto vec_result = Cos(vec);
Store(vec_result, tag, output.data + i + j);
}

// 3. 处理尾部剩余元素
for (;j<i_end;j++)
{
output.data[i+j] = std::cos(input.data[i+j]);
} });
for (int j = 0; j < i_end; j++)
{
output.data[i+j] = std::cos(input.data[i+j]);
}
});
}
else
{
Expand All @@ -575,37 +537,19 @@ namespace deepx::tensorfunc
{
output.shape.rangeElementwiseParallel([&input, &output](int i, int i_end)
{
const ScalableTag<T> tag;
const size_t lanes = Lanes(tag);
size_t j=0;

// 1. 处理前置未对齐部分
while (j < i_end && !IsAligned(tag,input.data + i + j)) {
output.data[i+j] = std::tan(input.data[i+j]);
++j;
}

// 2. 处理中间对齐部分
size_t aligned_end=i_end-(i_end%lanes);
for (; j+lanes<=aligned_end; j += lanes )
{
auto vec = Load(tag, input.data + i + j);
auto vec_result = Tan(vec);
Store(vec_result, tag, output.data + i + j);
}

// 3. 处理尾部剩余元素
for (;j<i_end;j++)
{
output.data[i+j] = std::tan(input.data[i+j]);
} });
for (int j = 0; j < i_end; j++)
{
output.data[i+j] = std::tan(input.data[i+j]);
}
});
}
else
{
throw std::invalid_argument("shape mismatch");
}
}
};


template <typename T>
struct maxDispatcher<miaobyte, T>
Expand Down Expand Up @@ -784,16 +728,17 @@ namespace deepx::tensorfunc
{
A.shape.rangeElementwiseParallel([&A, &B, &mask, epsilon](int i, int i_end)
{
for (int j = 0; j < i_end; j++)
{
if (epsilon == 0)
{
mask.data[i+j]=A.data[i+j]==B.data[i+j];
}
else{
mask.data[i+j]=std::abs(A.data[i+j]-B.data[i+j])<=epsilon;
}
} });
for (int j = 0; j < i_end; j++)
{
if (epsilon == 0)
{
mask.data[i + j] = A.data[i + j] == B.data[i + j];
}
else
{
mask.data[i + j] = std::abs(A.data[i + j] - B.data[i + j]) <= epsilon;
}
} });
}
else
{
Expand Down Expand Up @@ -995,7 +940,7 @@ namespace deepx::tensorfunc
{
for (int j = 0; j < i_end; j++)
{
int which_tensor=cases.data[i+j];
casesT which_tensor=cases.data[i+j];
C.data[i+j]=tensors[which_tensor]->data[i+j];
} });
}
Expand Down
Loading
Loading