Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 13 additions & 19 deletions .github/workflows/github_actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:
steps:
- name: checkout code
uses: actions/checkout@v3.2.0
with:
submodules: recursive
- name: setup riscv toolchain
run: |
mkdir /opt/riscv
Expand All @@ -29,30 +31,22 @@ jobs:

# for validate test cases only
check_test_cases:
runs-on: ubuntu-22.04
runs-on: ubuntu-22.04-arm
strategy:
matrix:
arch: [aarch64]
cxx_compiler: [g++-10, clang++-11]
cxx_compiler: [g++-12, clang++-14]
steps:
- name: checkout code
uses: actions/checkout@v3.2.0
- name: install dependencies
run: |
sudo apt-get update -q -y
sudo apt-get install -q -y "${{ matrix.cxx_compiler }}" make gcc
- name: build artifact
# The Github Action for non-x86 CPU
# https://github.com/uraimo/run-on-arch-action
uses: uraimo/run-on-arch-action@v2.5.0
with:
arch: ${{ matrix.arch }}
distro: ubuntu20.04
env: |
CXX: ${{ matrix.cxx_compiler }}
install: |
apt-get update -q -y
apt-get install -q -y "${{ matrix.cxx_compiler }}" make
apt-get install -q -y gcc
run: |
export ENABLE_TEST_ALL=true
make test
env:
CXX: ${{ matrix.cxx_compiler }}
ENABLE_TEST_ALL: "true"
run: make test

coding_style:
runs-on: ubuntu-22.04
Expand All @@ -63,5 +57,5 @@ jobs:
# clang-format version should be set
run: |
sudo apt-get install -q -y clang-format
sh scripts/check-format.sh
bash scripts/check-format.sh
shell: bash
195 changes: 177 additions & 18 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,11 @@ FORCE_INLINE uint64x2_t vdupq_n_u64(uint64_t a);
FORCE_INLINE int8x8_t vcnt_s8(int8x8_t a);
FORCE_INLINE uint8x8_t vcnt_u8(uint8x8_t a);

FORCE_INLINE int64_t vget_lane_s64(int64x1_t a, const int b);
FORCE_INLINE uint64_t vget_lane_u64(uint64x1_t a, const int b);
FORCE_INLINE int64_t vqrshld_s64(int64_t a, int64_t b);
FORCE_INLINE uint64_t vqrshld_u64(uint64_t a, int64_t b);

/* vadd */
FORCE_INLINE int8x8_t vadd_s8(int8x8_t a, int8x8_t b) { return __riscv_vadd_vv_i8m1(a, b, 8); }

Expand Down Expand Up @@ -5422,24 +5427,56 @@ FORCE_INLINE int16_t vqshlh_s16(int16_t a, int16_t b) {

FORCE_INLINE int32_t vqshls_s32(int32_t a, int32_t b) {
if (b < 0) {
return a >> -b;
int32_t rshift = -b;
if (rshift >= 32) {
return (a < 0) ? -1 : 0;
}
return a >> rshift;
}
if ((INT32_MAX >> b) < a) {
if (b >= 32) {
if (a > 0) {
return INT32_MAX;
}
if (a < 0) {
return INT32_MIN;
}
return 0;
}
int64_t tmp = ((int64_t)a) << b;
if (tmp > INT32_MAX) {
return INT32_MAX;
} else {
return a << b;
}
if (tmp < INT32_MIN) {
return INT32_MIN;
}
return (int32_t)tmp;
}

FORCE_INLINE int64_t vqshld_s64(int64_t a, int64_t b) {
if (b < 0) {
return a >> -b;
int64_t rshift = -b;
if (rshift >= 64) {
return (a < 0) ? -1 : 0;
}
return a >> rshift;
}
if (b >= 64) {
if (a > 0) {
return INT64_MAX;
}
if (a < 0) {
return INT64_MIN;
}
return 0;
}
if ((INT64_MAX >> b) < a) {
__int128 tmp = ((__int128)a) << b;
if (tmp > INT64_MAX) {
return INT64_MAX;
} else {
return (int64_t)a << b;
}
if (tmp < INT64_MIN) {
return INT64_MIN;
}
return (int64_t)tmp;
}

FORCE_INLINE uint8_t vqshlb_u8(uint8_t a, int8_t b) {
Expand Down Expand Up @@ -5525,7 +5562,11 @@ FORCE_INLINE int32x2_t vqrshl_s32(int32x2_t a, int32x2_t b) {
return __riscv_vmerge_vvm_i32m1(shr, shl, positive_mask, 2);
}

// FORCE_INLINE int64x1_t vqrshl_s64(int64x1_t a, int64x1_t b);
FORCE_INLINE int64x1_t vqrshl_s64(int64x1_t a, int64x1_t b) {
int64_t a0 = vget_lane_s64(a, 0);
int64_t b0 = vget_lane_s64(b, 0);
return vdup_n_s64(vqrshld_s64(a0, b0));
}

FORCE_INLINE uint8x8_t vqrshl_u8(uint8x8_t a, int8x8_t b) {
vbool8_t positive_mask = __riscv_vmsgt_vx_i8m1_b8(b, 0, 8);
Expand Down Expand Up @@ -5566,7 +5607,11 @@ FORCE_INLINE uint32x2_t vqrshl_u32(uint32x2_t a, int32x2_t b) {
return __riscv_vmerge_vvm_u32m1(shr, shl, positive_mask, 2);
}

// FORCE_INLINE uint64x1_t vqrshl_u64(uint64x1_t a, int64x1_t b);
FORCE_INLINE uint64x1_t vqrshl_u64(uint64x1_t a, int64x1_t b) {
uint64_t a0 = vget_lane_u64(a, 0);
int64_t b0 = vget_lane_s64(b, 0);
return vdup_n_u64(vqrshld_u64(a0, b0));
}

FORCE_INLINE int8x16_t vqrshlq_s8(int8x16_t a, int8x16_t b) {
vbool8_t positive_mask = __riscv_vmsgt_vx_i8m1_b8(b, 0, 16);
Expand Down Expand Up @@ -5650,21 +5695,135 @@ FORCE_INLINE uint32x4_t vqrshlq_u32(uint32x4_t a, int32x4_t b) {

// FORCE_INLINE uint64x2_t vqrshlq_u64(uint64x2_t a, int64x2_t b);

// FORCE_INLINE int8_t vqrshlb_s8(int8_t a, int8_t b);
FORCE_INLINE int8_t vqrshlb_s8(int8_t a, int8_t b) {
if (b < 0) {
return ((int16_t)a + (1 << (-b - 1))) >> (-b);
} else {
return vqshlb_s8(a, b);
}
}

// FORCE_INLINE int16_t vqrshlh_s16(int16_t a, int16_t b);
FORCE_INLINE int16_t vqrshlh_s16(int16_t a, int16_t b) {
if (b < 0) {
return ((int32_t)a + (1 << (-b - 1))) >> (-b);
} else {
return vqshlh_s16(a, b);
}
}

// FORCE_INLINE int32_t vqrshls_s32(int32_t a, int32_t b);
FORCE_INLINE int32_t vqrshls_s32(int32_t a, int32_t b) {
if (b < 0) {
return ((int64_t)a + (1 << (-b - 1))) >> (-b);
} else {
return vqshls_s32(a, b);
}
}

// FORCE_INLINE int64_t vqrshld_s64(int64_t a, int64_t b);
FORCE_INLINE int64_t vqrshld_s64(int64_t a, int64_t b) {
if (b < 0) {
if (b <= -64) {
return 0;
}
uint64_t b_neg = (uint64_t)(-b);
// Rounded arithmetic right shift without __int128:
// a = q * 2^n + r, where q = a >> n and r are low n bits of a.
// Add round-to-nearest increment (2^(n-1)) to r and carry into q.
int64_t q = a >> b_neg;
uint64_t mask = (UINT64_C(1) << b_neg) - 1;
uint64_t r = ((uint64_t)a) & mask;
uint64_t carry = (r + (UINT64_C(1) << (b_neg - 1))) >> b_neg;
return q + (int64_t)carry;
} else {
if (b >= 64) {
if (a > 0) {
return INT64_MAX;
}
if (a < 0) {
return INT64_MIN;
}
return 0;
}
return vqshld_s64(a, b);
}
}

// FORCE_INLINE uint8_t vqrshlb_u8(uint8_t a, int8_t b);
FORCE_INLINE uint8_t vqrshlb_u8(uint8_t a, int8_t b) {
if (b < 0) {
if (b <= -8) {
return 0;
}
uint32_t b_neg = (uint32_t)(-b);
uint32_t q = (uint32_t)a >> b_neg;
uint32_t mask = (UINT32_C(1) << b_neg) - 1;
uint32_t r = (uint32_t)a & mask;
uint32_t carry = (r + (UINT32_C(1) << (b_neg - 1))) >> b_neg;
return (uint8_t)(q + carry);
} else {
if (b >= 8) {
return a == 0 ? 0 : UINT8_MAX;
}
return vqshlb_u8(a, b);
}
}

// FORCE_INLINE uint16_t vqrshlh_u16(uint16_t a, int16_t b);
FORCE_INLINE uint16_t vqrshlh_u16(uint16_t a, int16_t b) {
if (b < 0) {
if (b <= -16) {
return 0;
}
uint32_t b_neg = (uint32_t)(-b);
uint32_t q = (uint32_t)a >> b_neg;
uint32_t mask = (UINT32_C(1) << b_neg) - 1;
uint32_t r = (uint32_t)a & mask;
uint32_t carry = (r + (UINT32_C(1) << (b_neg - 1))) >> b_neg;
return (uint16_t)(q + carry);
} else {
if (b >= 16) {
return a == 0 ? 0 : UINT16_MAX;
}
return vqshlh_u16(a, b);
}
}

// FORCE_INLINE uint32_t vqrshls_u32(uint32_t a, int32_t b);
FORCE_INLINE uint32_t vqrshls_u32(uint32_t a, int32_t b) {
if (b < 0) {
if (b <= -32) {
return 0;
}
uint64_t b_neg = (uint64_t)(-b);
uint64_t q = (uint64_t)a >> b_neg;
uint64_t mask = (UINT64_C(1) << b_neg) - 1;
uint64_t r = (uint64_t)a & mask;
uint64_t carry = (r + (UINT64_C(1) << (b_neg - 1))) >> b_neg;
return (uint32_t)(q + carry);
} else {
if (b >= 32) {
return a == 0 ? 0 : UINT32_MAX;
}
return vqshls_u32(a, b);
}
}

// FORCE_INLINE uint64_t vqrshld_u64(uint64_t a, int64_t b);
FORCE_INLINE uint64_t vqrshld_u64(uint64_t a, int64_t b) {
if (b < 0) {
if (b <= -64) {
return 0;
}
uint64_t b_neg = (uint64_t)(-b);
// Rounded logical right shift without __uint128_t:
// split into quotient/remainder at 2^n and propagate rounding carry.
uint64_t q = a >> b_neg;
uint64_t mask = (UINT64_C(1) << b_neg) - 1;
uint64_t r = a & mask;
uint64_t carry = (r + (UINT64_C(1) << (b_neg - 1))) >> b_neg;
return q + carry;
} else {
if (b >= 64) {
return a == 0 ? 0 : UINT64_MAX;
}
return vqshld_u64(a, b);
}
}

FORCE_INLINE int8x8_t vshr_n_s8(int8x8_t a, const int b) {
const int imm = b - (b >> 3);
Expand Down
29 changes: 15 additions & 14 deletions tests/common.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "common.h"
#include <cmath>
#include <math.h>
#include <stdlib.h>

namespace NEON2RVV {
int32_t NaN = ~0;
Expand Down Expand Up @@ -596,7 +597,7 @@ result_t validate_double_pair(double a, double b) {
// We do an integer (binary) compare rather than a
// floating point compare to take NaNs and infinities
// into account as well.
if (std::isnan(a) && std::isnan(b)) {
if (isnan(a) && isnan(b)) {
return TEST_SUCCESS;
}
return (*ua) == (*ub) ? TEST_SUCCESS : TEST_FAIL;
Expand Down Expand Up @@ -663,16 +664,16 @@ result_t validate_float_error(float32x4_t a, float f0, float f1, float f2, float
float df1 = fabsf((t[1] - f1) / f1);
float df2 = fabsf((t[2] - f2) / f2);
float df3 = fabsf((t[3] - f3) / f3);
if ((std::isnan(t[0]) && std::isnan(f0)) || (t[0] == 0 && f0 == 0) || (std::isinf(t[0]) && std::isinf(f0))) {
if ((isnan(t[0]) && isnan(f0)) || (t[0] == 0 && f0 == 0) || (isinf(t[0]) && isinf(f0))) {
df0 = 0;
}
if ((std::isnan(t[1]) && std::isnan(f1)) || (t[1] == 0 && f1 == 0) || (std::isinf(t[1]) && std::isinf(f1))) {
if ((isnan(t[1]) && isnan(f1)) || (t[1] == 0 && f1 == 0) || (isinf(t[1]) && isinf(f1))) {
df1 = 0;
}
if ((std::isnan(t[2]) && std::isnan(f2)) || (t[2] == 0 && f2 == 0) || (std::isinf(t[2]) && std::isinf(f2))) {
if ((isnan(t[2]) && isnan(f2)) || (t[2] == 0 && f2 == 0) || (isinf(t[2]) && isinf(f2))) {
df2 = 0;
}
if ((std::isnan(t[3]) && std::isnan(f3)) || (t[3] == 0 && f3 == 0) || (std::isinf(t[3]) && std::isinf(f3))) {
if ((isnan(t[3]) && isnan(f3)) || (t[3] == 0 && f3 == 0) || (isinf(t[3]) && isinf(f3))) {
df3 = 0;
}
ASSERT_RETURN(df0 < err);
Expand All @@ -686,10 +687,10 @@ result_t validate_float_error(float32x2_t a, float f0, float f1, float err) {
const float *t = (const float *)&a;
float df0 = fabsf((t[0] - f0) / f0);
float df1 = fabsf((t[1] - f1) / f1);
if ((std::isnan(t[0]) && std::isnan(f0)) || (t[0] == 0 && f0 == 0) || (std::isinf(t[0]) && std::isinf(f0))) {
if ((isnan(t[0]) && isnan(f0)) || (t[0] == 0 && f0 == 0) || (isinf(t[0]) && isinf(f0))) {
df0 = 0;
}
if ((std::isnan(t[1]) && std::isnan(f1)) || (t[1] == 0 && f1 == 0) || (std::isinf(t[1]) && std::isinf(f1))) {
if ((isnan(t[1]) && isnan(f1)) || (t[1] == 0 && f1 == 0) || (isinf(t[1]) && isinf(f1))) {
df1 = 0;
}
ASSERT_RETURN(df0 < err);
Expand All @@ -699,7 +700,7 @@ result_t validate_float_error(float32x2_t a, float f0, float f1, float err) {

result_t validate_float_error(float32_t a, float f0, float err) {
float df0 = fabsf((a - f0) / f0);
if ((std::isnan(a) && std::isnan(f0)) || (a == 0 && f0 == 0) || (std::isinf(a) && std::isinf(f0))) {
if ((isnan(a) && isnan(f0)) || (a == 0 && f0 == 0) || (isinf(a) && isinf(f0))) {
df0 = 0;
}
ASSERT_RETURN(df0 < err);
Expand Down Expand Up @@ -755,10 +756,10 @@ result_t validate_double_error(float64x2_t a, double d0, double d1, double err)
const double *t = (const double *)&a;
double td0 = fabs((t[0] - d0) / d0);
double td1 = fabs((t[1] - d1) / d1);
if (std::isnan(t[0]) && std::isnan(d0)) {
if (isnan(t[0]) && isnan(d0)) {
td0 = 0;
}
if (std::isnan(t[1]) && std::isnan(d1)) {
if (isnan(t[1]) && isnan(d1)) {
td1 = 0;
}
ASSERT_RETURN(td0 < err);
Expand All @@ -769,16 +770,16 @@ result_t validate_double_error(float64x2_t a, double d0, double d1, double err)
result_t validate_double_error(float64x1_t a, double d0, double err) {
const double *t = (const double *)&a;
double td0 = fabs((t[0] - d0) / d0);
if (std::isnan(t[0]) && std::isnan(d0)) {
if (isnan(t[0]) && isnan(d0)) {
td0 = 0;
}
ASSERT_RETURN(td0 < err);
return TEST_SUCCESS;
}

result_t validate_double_error(double a, double d0, double err) {
double df0 = abs((a - d0) / d0);
if ((std::isnan(a) && std::isnan(d0)) || (a == 0 && d0 == 0) || (std::isinf(a) && std::isinf(d0))) {
double df0 = fabs((a - d0) / d0);
if ((isnan(a) && isnan(d0)) || (a == 0 && d0 == 0) || (isinf(a) && isinf(d0))) {
df0 = 0;
}
ASSERT_RETURN(df0 < err);
Expand Down
Loading
Loading