Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .ci/docker/common/install_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ else
arch_path='sbsa'
fi

NVSHMEM_VERSION=3.3.24
NVSHMEM_VERSION=3.4.5

function install_cuda {
version=$1
Expand Down Expand Up @@ -178,10 +178,10 @@ function install_130 {
NCCL_VERSION=v2.27.7-1
CUSPARSELT_VERSION=0.8.0.4_cuda13

echo "Installing CUDA 12.8.1, cuDNN ${CUDNN_VERSION}, NCCL ${NCCL_VERSION}, NVSHMEM ${NVSHMEM_VERSION} and cuSparseLt ${CUSPARSELT_VERSION}"
echo "Installing CUDA 13.0.2, cuDNN ${CUDNN_VERSION}, NCCL ${NCCL_VERSION}, NVSHMEM ${NVSHMEM_VERSION} and cuSparseLt ${CUSPARSELT_VERSION}"

# install CUDA 13.0 in the same container
install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
install_cuda 13.0.2 cuda_13.0.2_580.95.05_linux

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
install_cudnn 13 $CUDNN_VERSION
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/create_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
fail-fast: false
matrix:
python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
cuda: ["12.6", "12.8", "12.9"]
cuda: ["12.6", "12.8", "13.0"]
torch: ["2.8.0"]
runs-on: [self-hosted, linux]
env:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_devel_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
strategy:
fail-fast: false
matrix:
cuda: ["12.6", "12.8", "12.9", "13.0"]
cuda: ["12.6", "12.8", "13.0"]
gcc: ["12"]
runs-on: [self-hosted, linux]
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_manylinux_2_28_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
strategy:
fail-fast: false
matrix:
cuda: ["12.6", "12.8", "12.9"]
cuda: ["12.6", "12.8", "13.0"]
runs-on: [self-hosted, linux]
steps:
- name: Checkout repository
Expand Down
98 changes: 54 additions & 44 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ option(USE_MANYLINUX "Build for manylinux" OFF)
option(BUILD_NVBENCH "Build the nvbench binary" OFF)
option(INSTALL_PY_MODULE "Install python module to scalellm directory" OFF)

option(BUILD_KERNEL_ONLY "Build only the CUDA kernel library" OFF)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
Expand Down Expand Up @@ -92,6 +94,9 @@ message(STATUS "TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}")

# configure vcpkg
# have to set CMAKE_TOOLCHAIN_FILE before first project call.
if (NOT BUILD_KERNEL_ONLY)
set(VCPKG_MANIFEST_FEATURES service)
endif()
if (DEFINED ENV{VCPKG_ROOT} AND NOT DEFINED CMAKE_TOOLCHAIN_FILE)
set(CMAKE_TOOLCHAIN_FILE "$ENV{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
CACHE STRING "Vcpkg toolchain file")
Expand Down Expand Up @@ -121,64 +126,71 @@ project(
LANGUAGES C CXX CUDA
)

find_package(CUDAToolkit REQUIRED)

# setup CMake module path, defines path for include() and find_package()
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
enable_language(Rust)
find_package(Rust REQUIRED)

# include custom cmake modules
include(static_analyzers)
# TODO: can't use sanitizers with CUDA for now.
# include(sanitizers)

if(UNIX)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og")
endif()
# include current and third_party paths
list(APPEND COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src)
list(APPEND COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/third_party)

find_package(Boost REQUIRED)
find_package(Threads REQUIRED)
# find all dependencies from vcpkg
find_package(fmt CONFIG REQUIRED GLOBAL)
# find required common packages
find_package(CUDAToolkit REQUIRED)
find_package(glog CONFIG REQUIRED)
find_package(gflags CONFIG REQUIRED)
find_package(absl CONFIG REQUIRED)
find_package(Protobuf CONFIG REQUIRED)
find_package(gRPC CONFIG REQUIRED)
find_package(re2 CONFIG REQUIRED)
find_package(folly CONFIG REQUIRED)
find_package(GTest CONFIG REQUIRED)
find_package(benchmark CONFIG REQUIRED)
find_package(nlohmann_json CONFIG REQUIRED)
find_package(prometheus-cpp CONFIG REQUIRED)
find_package(RapidJSON CONFIG REQUIRED)

# find packages for service build
if (NOT BUILD_KERNEL_ONLY)
enable_language(Rust)
find_package(Rust REQUIRED)

# include custom cmake modules
include(static_analyzers)
# TODO: can't use sanitizers with CUDA for now.
# include(sanitizers)

if(UNIX)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og")
endif()

find_package(Boost REQUIRED)
find_package(Threads REQUIRED)
# find all dependencies from vcpkg
find_package(fmt CONFIG REQUIRED GLOBAL)
find_package(gflags CONFIG REQUIRED)
find_package(Protobuf CONFIG REQUIRED)
find_package(gRPC CONFIG REQUIRED)
find_package(re2 CONFIG REQUIRED)
find_package(folly CONFIG REQUIRED)
find_package(benchmark CONFIG REQUIRED)
find_package(nlohmann_json CONFIG REQUIRED)
find_package(prometheus-cpp CONFIG REQUIRED)
find_package(RapidJSON CONFIG REQUIRED)

find_package(NCCL REQUIRED)

find_package(Jemalloc)
if(Jemalloc_FOUND)
link_libraries(Jemalloc::jemalloc)
endif()
endif()

if (USE_MANYLINUX)
# manylinux doesn't ship Development.Embed
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
else()
find_package(Python REQUIRED COMPONENTS Interpreter Development)
endif()

find_package(NCCL REQUIRED)

find_package(Jemalloc)
if(Jemalloc_FOUND)
link_libraries(Jemalloc::jemalloc)
endif()

# Important Note: Always invoke find_package for other dependencies
# before including libtorch, as doing so afterwards may lead to
# unexpected linker errors.
if (DEFINED ENV{LIBTORCH_ROOT})
find_package(Torch REQUIRED HINTS "$ENV{LIBTORCH_ROOT}")
message(STATUS "Using libtorch at $ENV{LIBTORCH_ROOT}")
else()
SET(TORCH_VERSION "2.8.0")
SET(TORCH_VERSION "2.9.0")
include(FetchContent)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.9)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu129/libtorch-shared-with-deps-${TORCH_VERSION}%2Bcu129.zip")
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 13.0)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu130/libtorch-shared-with-deps-${TORCH_VERSION}%2Bcu130.zip")
elseif (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.8)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu128/libtorch-shared-with-deps-${TORCH_VERSION}%2Bcu128.zip")
elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.6)
Expand Down Expand Up @@ -235,12 +247,10 @@ message(STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS}")
include(CTest)
include(GoogleTest)

# include current path
list(APPEND COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src)
list(APPEND COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/third_party)

# add subdirectories
add_subdirectory(proto)
add_subdirectory(src)
add_subdirectory(third_party)
add_subdirectory(scalellm)
add_subdirectory(src)
if (NOT BUILD_KERNEL_ONLY)
add_subdirectory(proto)
add_subdirectory(scalellm)
endif()
38 changes: 21 additions & 17 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
add_subdirectory(chat_template)
add_subdirectory(common)
add_subdirectory(handlers)
add_subdirectory(gtest_main)
add_subdirectory(kernels)
add_subdirectory(tokenizer)
add_subdirectory(layers)
add_subdirectory(models)
add_subdirectory(model_loader)
add_subdirectory(model_parallel)
add_subdirectory(sampling)
add_subdirectory(request)
add_subdirectory(memory)
add_subdirectory(scheduler)
add_subdirectory(speculative)
add_subdirectory(engine)
add_subdirectory(server)
add_subdirectory(benchmark)
add_subdirectory(huggingface)

if (NOT BUILD_KERNEL_ONLY)
add_subdirectory(chat_template)
add_subdirectory(common)
add_subdirectory(handlers)
add_subdirectory(tokenizer)
add_subdirectory(layers)
add_subdirectory(models)
add_subdirectory(model_loader)
add_subdirectory(model_parallel)
add_subdirectory(sampling)
add_subdirectory(request)
add_subdirectory(memory)
add_subdirectory(scheduler)
add_subdirectory(speculative)
add_subdirectory(engine)
add_subdirectory(server)
add_subdirectory(benchmark)
add_subdirectory(huggingface)
endif()
18 changes: 2 additions & 16 deletions src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ include(cc_library)
include(cc_test)

cc_library(
NAME
NAME
common
HDRS
macros.h
Expand All @@ -28,21 +28,8 @@ cc_library(
glog::glog
)

cc_library(
TESTONLY
NAME
gtest_main
SRCS
gtest_main.cpp
DEPS
GTest::gtest
CUDA::toolkit
LINKOPTS
cudart
)

cc_test(
NAME
NAME
common_test
SRCS
range_test.cpp
Expand All @@ -55,4 +42,3 @@ cc_test(
:gtest_main
torch
)

14 changes: 14 additions & 0 deletions src/gtest_main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
include(cc_library)

cc_library(
TESTONLY
NAME
gtest_main
SRCS
gtest_main.cpp
DEPS
GTest::gtest
CUDA::toolkit
LINKOPTS
cudart
)
File renamed without changes.
34 changes: 13 additions & 21 deletions src/huggingface/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
// A simple C wrapper of safetensors and tokenizers library
// adapted from https://github.com/huggingface/safetensors/tree/c_bindings

// #[repr(C)]: use C language’s data layout
// extern "C": tells Rust to use C ABI conventions
// #[no_mangle]: prevent Rust from renaming the function

// Import the needed libraries
use core::ffi::c_uint;
use core::str::Utf8Error;
Expand Down Expand Up @@ -415,7 +419,6 @@ unsafe fn _get_tensor(
Ok(())
}


// A simple C wrapper of hf-tokenzier library
// ported from https://github.com/mlc-ai/tokenizers-cpp

Expand All @@ -433,12 +436,8 @@ pub struct TokenizerWrapper {
impl TokenizerWrapper {
pub fn encode(&mut self, text: &str, add_special_tokens: bool) {
// Encode the text and store the ids
self.encode_ids = Vec::from(
self.tokenizer
.encode(text, add_special_tokens)
.unwrap()
.get_ids(),
);
let encoded = self.tokenizer.encode(text, add_special_tokens).unwrap();
self.encode_ids = encoded.get_ids().to_vec();
}

pub fn decode(&mut self, ids: Vec<u32>, skip_special_tokens: bool) {
Expand All @@ -453,11 +452,8 @@ impl TokenizerWrapper {

#[no_mangle]
extern "C" fn tokenizer_from_file(path: *const c_char) -> *mut TokenizerWrapper {
let c_str = unsafe { CStr::from_ptr(path) };
let path_str = match c_str.to_str() {
Ok(s) => s,
Err(_) => panic!("Failed to convert C string to Rust string"),
};
let c_str = unsafe { CStr::from_ptr(path) }; // borrowed C string
let path_str = c_str.to_str().unwrap(); // convert CString to &str

let boxed = Box::new(TokenizerWrapper {
tokenizer: Tokenizer::from_file(path_str).unwrap().into(),
Expand All @@ -466,6 +462,7 @@ extern "C" fn tokenizer_from_file(path: *const c_char) -> *mut TokenizerWrapper
id_to_token_result: String::new(),
});

// Convert into a raw pointer: *mut TokenizerWrapper
Box::into_raw(boxed)
}

Expand Down Expand Up @@ -527,11 +524,7 @@ extern "C" fn tokenizer_free(wrapper: *mut TokenizerWrapper) {
}

#[no_mangle]
extern "C" fn tokenizer_token_to_id(
handle: *mut TokenizerWrapper,
token: *const u8,
len: usize
) {
extern "C" fn tokenizer_token_to_id(handle: *mut TokenizerWrapper, token: *const u8, len: usize) {
unsafe {
let token: &str = std::str::from_utf8(std::slice::from_raw_parts(token, len)).unwrap();
let id = (*handle).tokenizer.token_to_id(token);
Expand Down Expand Up @@ -564,8 +557,7 @@ extern "C" fn tokenizer_id_to_token(
#[no_mangle]
extern "C" fn tokenizer_get_vocab_size(
handle: *mut TokenizerWrapper,
with_added_tokens: bool) -> usize {
unsafe {
(*handle).get_vocab_size(with_added_tokens)
}
with_added_tokens: bool,
) -> usize {
unsafe { (*handle).get_vocab_size(with_added_tokens) }
}
1 change: 1 addition & 0 deletions src/kernels/attention/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ cc_library(
DEPS
:attention.template
glog::glog
torch
)

cc_test(
Expand Down
Loading