Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 18 additions & 21 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,6 @@ env:
CC: clang

jobs:
format-check:
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v4

- name: Format source code
run: |
find lib test \
-type f \
-a \( -name "*.c" -o -name "*.cpp" -o -name "*.h" \) \
-print0 \
| xargs -0 clang-format-14 -i

- name: Format check
run: |
git status --porcelain --untracked-files=no
git status --porcelain --untracked-files=no | xargs -o -I {} test -z \"{}\"

codespell:
runs-on: ubuntu-22.04

Expand All @@ -44,19 +25,35 @@ jobs:
- llvm-version: 14
os: ubuntu-22.04
preset: release
cuda: 11.8.0
- llvm-version: 19
os: ubuntu-24.04
preset: release
cuda: 12.6.0
- llvm-version: 19
os: ubuntu-24.04
preset: release
cuda: 12.6.0
cusan-option: -DCUSAN_DEVICE_SYNC_CALLBACKS=ON

runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v4

- name: LLVM apt
if: ${{ matrix.llvm-version == 19 }}
run: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-19 main" | sudo tee /etc/apt/sources.list.d/llvm-19.list

- name: Update apt
run: sudo apt-get update

- uses: Jimver/cuda-toolkit@v0.2.19
id: cuda-toolkit
with:
cuda: '11.8.0'
cuda: '${{ matrix.cuda }}'
method: network
sub-packages: '["nvcc", "cudart", "cudart-dev"]'
non-cuda-sub-packages: '["libcurand", "libcurand-dev"]'
Expand All @@ -78,7 +75,7 @@ jobs:
echo "EXTERNAL_LIT=/usr/lib/llvm-${{ matrix.llvm-version }}/build/utils/lit/lit.py" >> $GITHUB_ENV

- name: Configure CuSan
run: cmake -B build --preset ${{ matrix.preset }} -DLLVM_DIR=${LLVM_CMAKE_DIR} -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT}
run: cmake -B build --preset ${{ matrix.preset }} -DLLVM_DIR=${LLVM_CMAKE_DIR} -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} ${{ matrix.cusan-option }}

- name: Build CuSan
run: cmake --build build --parallel 2
Expand Down
12 changes: 7 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ project(
)

set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

list(
APPEND
Expand All @@ -28,11 +30,6 @@ add_subdirectory(externals)
add_subdirectory(lib)
add_subdirectory(scripts)

if(PROJECT_IS_TOP_LEVEL)
enable_testing()
add_subdirectory(test)
endif()

write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/cusanConfigVersion.cmake
VERSION ${PROJECT_VERSION}
Expand Down Expand Up @@ -66,3 +63,8 @@ if(PROJECT_IS_TOP_LEVEL)
QUIET_ON_EMPTY
)
endif()

if(PROJECT_IS_TOP_LEVEL)
enable_testing()
add_subdirectory(test)
endif()
4 changes: 3 additions & 1 deletion cmake/cusanToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${PROJECT_SOURCE_DIR}"
)

find_package(CUDAToolkit REQUIRED)
find_package(MPI REQUIRED)
find_package(MPI QUIET)

option(CUSAN_TEST_CONFIGURE_IDE "Add targets for tests to help the IDE with completion etc." ON)
mark_as_advanced(CUSAN_TEST_CONFIGURE_IDE)
Expand All @@ -41,6 +41,8 @@ option(CUSAN_FIBERPOOL "Use external fiber pool to manage ThreadSanitizer fibers
option(CUSAN_SOFTCOUNTER "Print runtime counters" OFF)
option(CUSAN_SYNC_DETAIL_LEVEL "Enable implicit sync analysis of memcpy/memset" ON)

option(CUSAN_DEVICE_SYNC_CALLBACKS "Enable runtime callbacks after sync calls" OFF)

option(CUSAN_TEST_WORKAROUNDS "Enable workarounds for MPI + TSan regarding runtime tests" ON)
mark_as_advanced(CUSAN_TEST_WORKAROUNDS)

Expand Down
7 changes: 4 additions & 3 deletions cmake/modules/cusan-format.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ function(cusan_target_format target comment)
filter_dir(${exclude})
endforeach()

find_program(FORMAT_COMMAND
find_program(CLANG_FORMAT_COMMAND
NAMES clang-format-${LLVM_VERSION_MAJOR} clang-format)
if(FORMAT_COMMAND)
mark_as_advanced(CLANG_FORMAT_COMMAND)
if(CLANG_FORMAT_COMMAND)
add_custom_target(${target}
COMMAND ${FORMAT_COMMAND} -i -style=file ${ARG_OTHER} ${ARG_UNPARSED_ARGUMENTS}
COMMAND ${CLANG_FORMAT_COMMAND} -i -style=file ${ARG_OTHER} ${ARG_UNPARSED_ARGUMENTS}
${ALL_CXX_FILES}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "${comment}"
Expand Down
6 changes: 6 additions & 0 deletions externals/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ if(CUSAN_TYPEART)
)
FetchContent_MakeAvailable(typeart)
endif()

mark_as_advanced(
FETCHCONTENT_BASE_DIR
FETCHCONTENT_FULLY_DISCONNECTED
FETCHCONTENT_QUIET
FETCHCONTENT_UPDATES_DISCONNECTED)
83 changes: 83 additions & 0 deletions lib/pass/AnalysisTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,48 @@ llvm::SmallVector<Value*> CudaFree::map_arguments(IRBuilder<>& irb, llvm::ArrayR
return {ptr};
}

// CudaMallocPitch

CudaMallocPitch::CudaMallocPitch(callback::FunctionDecl* decls) {
setup("cudaMallocPitch", &decls->cusan_device_alloc.f);
}
llvm::SmallVector<Value*> CudaMallocPitch::map_arguments(IRBuilder<>& irb, llvm::ArrayRef<Value*> args) {
//(void** devPtr, size_t* pitch, size_t width, size_t height )
assert(args.size() == 4);
auto* ptr = irb.CreateBitOrPointerCast(args[0], get_void_ptr_type(irb));

//"The function may pad the allocation"
//"*pitch by cudaMallocPitch() is the width in bytes of the allocation"
auto* pitch = irb.CreateLoad(irb.getIntPtrTy(irb.GetInsertBlock()->getModule()->getDataLayout()), args[1]);
// auto* width = args[2];
auto* height = args[3];

auto* real_size = irb.CreateMul(pitch, height);
return {ptr, real_size};
}

// CudaSetDevice

CudaSetDevice::CudaSetDevice(callback::FunctionDecl* decls) {
setup("cudaSetDevice", &decls->cusan_set_device.f);
}
llvm::SmallVector<Value*> CudaSetDevice::map_arguments(IRBuilder<>&, llvm::ArrayRef<Value*> args) {
// cudaSetDevice ( int device )
assert(args.size() == 1);
return {args[0]};
}

// CudaCHooseDevice

CudaChooseDevice::CudaChooseDevice(callback::FunctionDecl* decls) {
setup("cudaChooseDevice", &decls->cusan_choose_device.f);
}
llvm::SmallVector<Value*> CudaChooseDevice::map_arguments(IRBuilder<>&, llvm::ArrayRef<Value*> args) {
// cudaChooseDevice ( int* device, const cudaDeviceProp* prop )
assert(args.size() == 2);
return {args[0]};
}

// CudaStreamQuery

CudaStreamQuery::CudaStreamQuery(callback::FunctionDecl* decls) {
Expand Down Expand Up @@ -614,4 +656,45 @@ llvm::SmallVector<Value*, 1> CudaEventQuery::map_return_value(IRBuilder<>& irb,
return {result};
}

CudaStreamSyncCallback::CudaStreamSyncCallback(callback::FunctionDecl* decls) {
setup("cudaStreamSynchronize", &decls->cusan_sync_callback.f);
}
llvm::SmallVector<Value*> CudaStreamSyncCallback::map_arguments(IRBuilder<>& irb, llvm::ArrayRef<Value*> args) {
//( void* stream)
assert(args.size() == 1);
return {irb.getInt8(1), args[0]};
}
llvm::SmallVector<Value*, 1> CudaStreamSyncCallback::map_return_value(IRBuilder<>&, Value* result) {
return {result};
}

CudaEventSyncCallback::CudaEventSyncCallback(callback::FunctionDecl* decls) {
setup("cudaEventSynchronize", &decls->cusan_sync_callback.f);
}
llvm::SmallVector<Value*> CudaEventSyncCallback::map_arguments(IRBuilder<>& irb, llvm::ArrayRef<Value*> args) {
//( void* event)
assert(args.size() == 1);
return {irb.getInt8(2), args[0]};
}
llvm::SmallVector<Value*, 1> CudaEventSyncCallback::map_return_value(IRBuilder<>&, Value* result) {
return {result};
}

CudaDeviceSyncCallback::CudaDeviceSyncCallback(callback::FunctionDecl* decls) {
setup("cudaDeviceSynchronize", &decls->cusan_sync_callback.f);
}
llvm::SmallVector<Value*> CudaDeviceSyncCallback::map_arguments(IRBuilder<>& irb, llvm::ArrayRef<Value*> args) {
//( )
assert(args.size() == 0);
#if LLVM_VERSION_MAJOR < 15
auto* ptr_type = PointerType::getUnqual(irb.getContext());
#else
auto* ptr_type = irb.getPtrTy();
#endif
return {irb.getInt8(0), ConstantPointerNull::get(ptr_type)};
}
llvm::SmallVector<Value*, 1> CudaDeviceSyncCallback::map_return_value(IRBuilder<>&, Value* result) {
return {result};
}

} // namespace cusan::transform
43 changes: 22 additions & 21 deletions lib/pass/AnalysisTransform.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,27 +211,9 @@ BasicInstrumenterDecl(CudaHostFree);
BasicInstrumenterDecl(CudaMallocManaged);
BasicInstrumenterDecl(CudaMalloc);
BasicInstrumenterDecl(CudaFree);

class CudaMallocPitch : public SimpleInstrumenter<CudaMallocPitch> {
public:
CudaMallocPitch(callback::FunctionDecl* decls) {
setup("cudaMallocPitch", &decls->cusan_device_alloc.f);
}
static llvm::SmallVector<Value*, 2> map_arguments(IRBuilder<>& irb, llvm::ArrayRef<Value*> args) {
//(void** devPtr, size_t* pitch, size_t width, size_t height )
assert(args.size() == 4);
auto* ptr = irb.CreateBitOrPointerCast(args[0], irb.getInt8Ty()->getPointerTo());

//"The function may pad the allocation"
//"*pitch by cudaMallocPitch() is the width in bytes of the allocation"
auto* pitch = irb.CreateLoad(irb.getIntPtrTy(irb.GetInsertBlock()->getModule()->getDataLayout()), args[1]);
// auto* width = args[2];
auto* height = args[3];

auto* real_size = irb.CreateMul(pitch, height);
return {ptr, real_size};
}
};
BasicInstrumenterDecl(CudaMallocPitch);
BasicInstrumenterDecl(CudaSetDevice);
BasicInstrumenterDecl(CudaChooseDevice);

class CudaStreamQuery : public SimpleInstrumenter<CudaStreamQuery> {
public:
Expand All @@ -247,6 +229,25 @@ class CudaEventQuery : public SimpleInstrumenter<CudaEventQuery> {
static llvm::SmallVector<Value*, 1> map_return_value(IRBuilder<>& irb, Value* result);
};

class CudaStreamSyncCallback : public SimpleInstrumenter<CudaStreamSyncCallback> {
public:
CudaStreamSyncCallback(callback::FunctionDecl* decls);
static llvm::SmallVector<Value*> map_arguments(IRBuilder<>& irb, llvm::ArrayRef<Value*> args);
static llvm::SmallVector<Value*, 1> map_return_value(IRBuilder<>& irb, Value* result);
};
class CudaEventSyncCallback : public SimpleInstrumenter<CudaEventSyncCallback> {
public:
CudaEventSyncCallback(callback::FunctionDecl* decls);
static llvm::SmallVector<Value*> map_arguments(IRBuilder<>& irb, llvm::ArrayRef<Value*> args);
static llvm::SmallVector<Value*, 1> map_return_value(IRBuilder<>& irb, Value* result);
};
class CudaDeviceSyncCallback : public SimpleInstrumenter<CudaDeviceSyncCallback> {
public:
CudaDeviceSyncCallback(callback::FunctionDecl* decls);
static llvm::SmallVector<Value*> map_arguments(IRBuilder<>& irb, llvm::ArrayRef<Value*> args);
static llvm::SmallVector<Value*, 1> map_return_value(IRBuilder<>& irb, Value* result);
};

} // namespace transform
} // namespace cusan

Expand Down
1 change: 1 addition & 0 deletions lib/pass/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mark_as_advanced(LLVM_CUSAN_TRANSFORMPASS_LINK_INTO_TOOLS)
target_compile_definitions(cusan_TransformPass
PRIVATE
$<$<BOOL:${CUSAN_TYPEART}>:CUSAN_TYPEART=1>
$<$<BOOL:${CUSAN_DEVICE_SYNC_CALLBACKS}>:CUSAN_DEVICE_SYNC_CALLBACKS=1>
)

set_target_properties(
Expand Down
9 changes: 9 additions & 0 deletions lib/pass/CusanPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,15 @@ bool CusanPass::runOnFunc(llvm::Function& function) {
modified |= transform::StreamCreateWithFlagsInstrumenter(&cusan_decls_).instrument(function);
modified |= transform::StreamCreateWithPriorityInstrumenter(&cusan_decls_).instrument(function);
modified |= transform::CudaMallocPitch(&cusan_decls_).instrument(function);
modified |= transform::CudaChooseDevice(&cusan_decls_).instrument(function);
modified |= transform::CudaSetDevice(&cusan_decls_).instrument(function);

// callbacks
#ifdef CUSAN_DEVICE_SYNC_CALLBACKS
modified |= transform::CudaDeviceSyncCallback(&cusan_decls_).instrument(function);
modified |= transform::CudaEventSyncCallback(&cusan_decls_).instrument(function);
modified |= transform::CudaStreamSyncCallback(&cusan_decls_).instrument(function);
#endif

auto data_for_host = host::kernel_model_for_stub(&function, this->kernel_models_);
if (data_for_host) {
Expand Down
14 changes: 14 additions & 0 deletions lib/pass/FunctionDecl.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "FunctionDecl.h"

#include <cstdint>

namespace cusan::callback {

void FunctionDecl::initialize(llvm::Module& module) {
Expand Down Expand Up @@ -124,6 +126,18 @@ void FunctionDecl::initialize(llvm::Module& module) {
// void* devPtr, size_t pitch, size_t width, size_t height
ArgTypes arg_types_2d_memset = {void_ptr, size_t_ty, size_t_ty, size_t_ty};
make_function(cusan_memset_2d, arg_types_2d_memset);

// int device
ArgTypes arg_types_set_device = {Type::getInt32Ty(c)};
make_function(cusan_set_device, arg_types_set_device);

// void* device
ArgTypes arg_types_choose_device = {Type::getInt32Ty(c)->getPointerTo()};
make_function(cusan_choose_device, arg_types_choose_device);

// u8 evenType, u32 returnValue
ArgTypes arg_types_sync_callback = {Type::getInt8Ty(c), void_ptr, Type::getInt32Ty(c)};
make_function(cusan_sync_callback, arg_types_sync_callback);
}

} // namespace cusan::callback
4 changes: 3 additions & 1 deletion lib/pass/FunctionDecl.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ struct FunctionDecl {
CusanFunction cusan_host_register{"_cusan_host_register"};
CusanFunction cusan_host_unregister{"_cusan_host_unregister"};
CusanFunction cusan_device_alloc{"_cusan_device_alloc"};
CusanFunction cusan_set_device{"_cusan_set_device"};
CusanFunction cusan_choose_device{"_cusan_choose_device"};
CusanFunction cusan_device_free{"_cusan_device_free"};
CusanFunction cusan_stream_query{"_cusan_stream_query"};
CusanFunction cusan_event_query{"_cusan_event_query"};

CusanFunction cusan_sync_callback{"cusan_sync_callback"};
void initialize(llvm::Module& m);
};

Expand Down
Loading