Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/target
Cargo.lock
/examples/**/target/
13 changes: 13 additions & 0 deletions examples/simple_ptx/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[package]
name = "simple"
version = "0.1.0"
edition = "2024"

[lib]
crate-type = ["dylib"]

[build-dependencies]
bindgen_cuda = { version = "0.1.5", path = "../.." }

[dev-dependencies]
cudarc = { version = "0.15.2", default-features = false, features = ["cuda-version-from-build-system", "dynamic-linking", "driver", "runtime"] }
5 changes: 5 additions & 0 deletions examples/simple_ptx/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
fn main() {
let builder = bindgen_cuda::Builder::default();
let bindings = builder.build_ptx().unwrap();
bindings.write("src/kernel.rs").unwrap();
}
6 changes: 6 additions & 0 deletions examples/simple_ptx/src/cuda.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extern "C" __global__ void sin_kernel(float *out, const float *inp, const size_t numel) {
unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < numel) {
out[i] = sin(inp[i]);
}
}
1 change: 1 addition & 0 deletions examples/simple_ptx/src/kernel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub const CUDA: &str = include_str!(concat!(env!("OUT_DIR"), "/cuda.ptx"));
39 changes: 39 additions & 0 deletions examples/simple_ptx/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
mod kernel;

#[cfg(test)]
mod tests {
use cudarc::driver::{DriverError, LaunchConfig, PushKernelArg};

#[test]
fn test_simple() -> Result<(), DriverError> {
let data: Vec<f32> = (0..100).map(|u| u as f32).collect();
// Get a stream for GPU 0
let ctx = cudarc::driver::CudaContext::new(0)?;
let stream = ctx.default_stream();

// copy a rust slice to the device
let inp = stream.memcpy_stod(&data)?;

// or allocate directly
let mut out = stream.alloc_zeros::<f32>(100)?;
// Dynamically load it into the device
let ptx = cudarc::nvrtc::Ptx::from_src(crate::kernel::CUDA);
let module = ctx.load_module(ptx)?;
let sin_kernel = module.load_function("sin_kernel")?;
let mut builder = stream.launch_builder(&sin_kernel);
builder.arg(&mut out);
builder.arg(&inp);
builder.arg(&100usize);
unsafe { builder.launch(LaunchConfig::for_num_elems(100)) }?;
let out_host: Vec<f32> = stream.memcpy_dtov(&out)?;

assert_eq!(out_host.len(), data.len());
// Only approximations can be asserted
let expected: Vec<_> = data.into_iter().map(f32::sin).collect();
for (i, (l, r)) in out_host.into_iter().zip(expected.into_iter()).enumerate() {
let diff = (l - r).abs() / (l + 1e-10);
assert!(diff < 1e-3, "{l} != {r} (diff = {diff:?}, location = {i})");
}
Ok(())
}
}
13 changes: 13 additions & 0 deletions examples/simple_static/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[package]
name = "simple"
version = "0.1.0"
edition = "2024"

[lib]
crate-type = ["dylib"]

[build-dependencies]
bindgen_cuda = { version = "0.1.5", path = "../.." }

[dependencies]
cudarc = { version = "0.13", default-features = false, features = ["cuda-version-from-build-system", "dynamic-linking", "driver", "runtime"] }
6 changes: 6 additions & 0 deletions examples/simple_static/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
fn main() {
let builder = bindgen_cuda::Builder::default();
builder.build_lib("libsin.a");
println!("cargo:rustc-link-lib=sin");
println!("cargo:rustc-link-search=native={}", ".");
}
Binary file added examples/simple_static/libsin.a
Binary file not shown.
14 changes: 14 additions & 0 deletions examples/simple_static/src/cuda.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include "cuda.h"
#include "stdio.h"

extern "C" __global__ void sin_kernel(float *out, const float *inp, const int32_t numel) {
unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < numel) {
out[i] = sin(inp[i]);
}
}

extern "C" int launch_sin2(float *out, const float *inp, int32_t n, cudaStream_t stream) {
sin_kernel<<<n, 1, 0>>>(out, inp, n);
return 0;
}
39 changes: 39 additions & 0 deletions examples/simple_static/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
use std::ffi::{c_int, c_void};

use cudarc::driver::sys::CUstream_st;

unsafe extern "C" {
pub fn launch_sin2(out: *mut c_void, inp: *const c_void, n: c_int, stream: &*mut CUstream_st);
}

#[cfg(test)]
mod tests {
use cudarc::driver::{DevicePtr, DevicePtrMut, DriverError};

#[test]
fn test_simple() -> Result<(), DriverError> {
let data: Vec<f32> = (0..100).map(|u| u as f32).collect();
// Get a stream for GPU 0
let dev = cudarc::driver::CudaDevice::new(0)?;

// copy a rust slice to the device
let inp = dev.htod_copy(data.clone())?;
let mut out = dev.alloc_zeros::<f32>(100)?;

let out_ptr = *out.device_ptr_mut() as *mut core::ffi::c_void;
let inp_ptr = *inp.device_ptr() as *const core::ffi::c_void;
unsafe { super::launch_sin2(out_ptr, inp_ptr, 100, dev.cu_stream()) };

let out_host: Vec<f32> = dev.dtoh_sync_copy(&out)?;
assert_eq!(out_host.len(), data.len());
// Only approximations can be asserted
let expected: Vec<_> = data.into_iter().map(f32::sin).collect();
println!("Expect {expected:?}");
println!("Got {out_host:?}");
for (i, (l, r)) in out_host.into_iter().zip(expected.into_iter()).enumerate() {
let diff = (l - r).abs() / (l + 1e-10);
assert!(diff < 1e-3, "{l} != {r} (diff = {diff:?}, location = {i})");
}
Ok(())
}
}
27 changes: 27 additions & 0 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
};

outputs =
{ nixpkgs, ... }:
let
forAllSystems = nixpkgs.lib.genAttrs [
"aarch64-linux"
"x86_64-linux"
"aarch64-darwin"
];
in
{
devShells = forAllSystems (
system:
let
pkgs = import nixpkgs {
inherit system;
config.allowUnfree = true;
};
in
with pkgs;
{
default = pkgs.mkShell {
nativeBuildInputs = [ pkg-config ];
buildInputs = [
rustup
cudaPackages.cudatoolkit
cudaPackages.cuda_nvcc
];
CUDA_ROOT = "${pkgs.cudaPackages.cudatoolkit}";
LD_LIBRARY_PATH = "/run/opengl-driver/lib";
};

}
);
};
}
7 changes: 0 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,6 @@ impl Builder {
include_paths.sort();
include_paths.dedup();

#[allow(unused)]
let mut include_options: Vec<String> = include_paths
.into_iter()
.map(|s| {
Expand Down Expand Up @@ -441,7 +440,6 @@ fn cuda_include_dir() -> Option<PathBuf> {
"CUDA_TOOLKIT_ROOT_DIR",
"CUDNN_LIB",
];
#[allow(unused)]
let env_vars = env_vars
.into_iter()
.map(std::env::var)
Expand All @@ -459,13 +457,8 @@ fn cuda_include_dir() -> Option<PathBuf> {

println!("cargo:info={roots:?}");

#[allow(unused)]
let roots = roots.into_iter().map(Into::<PathBuf>::into);

#[cfg(feature = "ci-check")]
let root: PathBuf = "ci".into();

#[cfg(not(feature = "ci-check"))]
env_vars
.chain(roots)
.find(|path| path.join("include").join("cuda.h").is_file())
Expand Down