Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion r/src/vendor/highway/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ To prep the source tree starting from the highway root dir, run:
```bash
mkdir -p lib-copy
cp -r hwy lib-copy
rm -r lib-copy/hwy/{examples,tests,*_test.cc} lib-copy/hwy/contrib/{bit_pack,dot,image,sort,unroller}
rm -r lib-copy/hwy/{examples,tests,*_test.cc} lib-copy/hwy/contrib/{bit_pack,dot,image,sort,unroller,random,thread_pool,matvec}
```

The files in `manual-build` are custom scripts for manually building the library without a cmake dependency
Expand Down
117 changes: 117 additions & 0 deletions r/src/vendor/highway/hwy/abort.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright 2019 Google LLC
// Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: BSD-3-Clause

#include "hwy/abort.h"

#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>

#include <atomic>
#include <string>

#include "hwy/base.h"

#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
#include "sanitizer/common_interface_defs.h" // __sanitizer_print_stack_trace
#endif

namespace hwy {

namespace {

std::atomic<WarnFunc>& AtomicWarnFunc() {
static std::atomic<WarnFunc> func;
return func;
}

std::atomic<AbortFunc>& AtomicAbortFunc() {
static std::atomic<AbortFunc> func;
return func;
}

std::string GetBaseName(std::string const& file_name) {
auto last_slash = file_name.find_last_of("/\\");
return file_name.substr(last_slash + 1);
}

} // namespace

// Returning a reference is unfortunately incompatible with `std::atomic`, which
// is required to safely implement `SetWarnFunc`. As a workaround, we store a
// copy here, update it when called, and return a reference to the copy. This
// has the added benefit of protecting the actual pointer from modification.
HWY_DLLEXPORT WarnFunc& GetWarnFunc() {
static WarnFunc func;
func = AtomicWarnFunc().load();
return func;
}

HWY_DLLEXPORT AbortFunc& GetAbortFunc() {
static AbortFunc func;
func = AtomicAbortFunc().load();
return func;
}

HWY_DLLEXPORT WarnFunc SetWarnFunc(WarnFunc func) {
return AtomicWarnFunc().exchange(func);
}

HWY_DLLEXPORT AbortFunc SetAbortFunc(AbortFunc func) {
return AtomicAbortFunc().exchange(func);
}

HWY_DLLEXPORT void HWY_FORMAT(3, 4)
Warn(const char* file, int line, const char* format, ...) {
char buf[800];
va_list args;
va_start(args, format);
vsnprintf(buf, sizeof(buf), format, args);
va_end(args);

WarnFunc handler = AtomicWarnFunc().load();
if (handler != nullptr) {
handler(file, line, buf);
} else {
fprintf(stderr, "Warn at %s:%d: %s\n", GetBaseName(file).data(), line, buf);
}
}

HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
Abort(const char* file, int line, const char* format, ...) {
char buf[800];
va_list args;
va_start(args, format);
vsnprintf(buf, sizeof(buf), format, args);
va_end(args);

AbortFunc handler = AtomicAbortFunc().load();
if (handler != nullptr) {
handler(file, line, buf);
} else {
fprintf(stderr, "Abort at %s:%d: %s\n", GetBaseName(file).data(), line,
buf);
}

// If compiled with any sanitizer, they can also print a stack trace.
#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
__sanitizer_print_stack_trace();
#endif // HWY_IS_*
fflush(stderr);

// Now terminate the program:
#if HWY_ARCH_RISCV
exit(1); // trap/abort just freeze Spike.
#elif HWY_IS_DEBUG_BUILD && !HWY_COMPILER_MSVC && !HWY_ARCH_ARM
// Facilitates breaking into a debugger, but don't use this in non-debug
// builds because it looks like "illegal instruction", which is misleading.
// Also does not work on Arm.
__builtin_trap();
#else
abort(); // Compile error without this due to HWY_NORETURN.
#endif
}

} // namespace hwy
44 changes: 44 additions & 0 deletions r/src/vendor/highway/hwy/abort.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: BSD-3-Clause

#ifndef HIGHWAY_HWY_ABORT_H_
#define HIGHWAY_HWY_ABORT_H_

#include "hwy/highway_export.h"

namespace hwy {

// Interfaces for custom Warn/Abort handlers.
typedef void (*WarnFunc)(const char* file, int line, const char* message);

typedef void (*AbortFunc)(const char* file, int line, const char* message);

// Returns current Warn() handler, or nullptr if no handler was yet registered,
// indicating Highway should print to stderr.
// DEPRECATED because this is thread-hostile and prone to misuse (modifying the
// underlying pointer through the reference).
HWY_DLLEXPORT WarnFunc& GetWarnFunc();

// Returns current Abort() handler, or nullptr if no handler was yet registered,
// indicating Highway should print to stderr and abort.
// DEPRECATED because this is thread-hostile and prone to misuse (modifying the
// underlying pointer through the reference).
HWY_DLLEXPORT AbortFunc& GetAbortFunc();

// Sets a new Warn() handler and returns the previous handler, which is nullptr
// if no previous handler was registered, and should otherwise be called from
// the new handler. Thread-safe.
HWY_DLLEXPORT WarnFunc SetWarnFunc(WarnFunc func);

// Sets a new Abort() handler and returns the previous handler, which is nullptr
// if no previous handler was registered, and should otherwise be called from
// the new handler. If all handlers return, then Highway will terminate the app.
// Thread-safe.
HWY_DLLEXPORT AbortFunc SetAbortFunc(AbortFunc func);

// Abort()/Warn() and HWY_ABORT/HWY_WARN are declared in base.h.

} // namespace hwy

#endif // HIGHWAY_HWY_ABORT_H_
23 changes: 14 additions & 9 deletions r/src/vendor/highway/hwy/aligned_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
namespace hwy {
namespace {

#if HWY_ARCH_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
__riscv_v_intrinsic >= 11000
// Not actually an upper bound on the size, but this value prevents crossing a
// 4K boundary (relevant on Andes).
constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, 4096);
Expand All @@ -36,9 +37,11 @@ constexpr size_t kAlignment = HWY_ALIGNMENT;
#endif

#if HWY_ARCH_X86
// On x86, aliasing can only occur at multiples of 2K, but that's too wasteful
// if this is used for single-vector allocations. 256 is more reasonable.
constexpr size_t kAlias = kAlignment * 4;
// On x86, aliasing can only occur at multiples of 2K. To reduce the chance of
// allocations being equal mod 2K, we round up to kAlias and add a cyclic
// offset which is a multiple of kAlignment. Rounding up to only 1K decreases
// the number of alias-free allocations, but also wastes less memory.
constexpr size_t kAlias = HWY_MAX(kAlignment, 1024);
#else
constexpr size_t kAlias = kAlignment;
#endif
Expand All @@ -52,9 +55,10 @@ struct AllocationHeader {

// Returns a 'random' (cyclical) offset for AllocateAlignedBytes.
size_t NextAlignedOffset() {
static std::atomic<uint32_t> next{0};
constexpr uint32_t kGroups = kAlias / kAlignment;
const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
static std::atomic<size_t> next{0};
static_assert(kAlias % kAlignment == 0, "kAlias must be a multiple");
constexpr size_t kGroups = kAlias / kAlignment;
const size_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
const size_t offset = kAlignment * group;
HWY_DASSERT((offset % kAlignment == 0) && offset <= kAlias);
return offset;
Expand All @@ -79,8 +83,7 @@ HWY_DLLEXPORT void* AllocateAlignedBytes(const size_t payload_size,
// To avoid wasting space, the header resides at the end of `unused`,
// which therefore cannot be empty (offset == 0).
if (offset == 0) {
offset = kAlignment; // = RoundUpTo(sizeof(AllocationHeader), kAlignment)
static_assert(sizeof(AllocationHeader) <= kAlignment, "Else: round up");
offset = RoundUpTo(sizeof(AllocationHeader), kAlignment);
}

const size_t allocated_size = kAlias + offset + payload_size;
Expand All @@ -99,10 +102,12 @@ HWY_DLLEXPORT void* AllocateAlignedBytes(const size_t payload_size,
aligned &= ~(kAlias - 1);

const uintptr_t payload = aligned + offset; // still aligned
HWY_DASSERT(payload % kAlignment == 0);

// Stash `allocated` and payload_size inside header for FreeAlignedBytes().
// The allocated_size can be reconstructed from the payload_size.
AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
HWY_DASSERT(reinterpret_cast<uintptr_t>(header) >= aligned);
header->allocated = allocated;
header->payload_size = payload_size;

Expand Down
Loading