diff --git a/.gitignore b/.gitignore index 96ef6c0..3b1b994 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target Cargo.lock +/examples/**/target/ diff --git a/examples/simple_ptx/Cargo.toml b/examples/simple_ptx/Cargo.toml new file mode 100644 index 0000000..3543328 --- /dev/null +++ b/examples/simple_ptx/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "simple" +version = "0.1.0" +edition = "2024" + +[lib] +crate-type = ["dylib"] + +[build-dependencies] +bindgen_cuda = { version = "0.1.5", path = "../.." } + +[dev-dependencies] +cudarc = { version = "0.15.2", default-features = false, features = ["cuda-version-from-build-system", "dynamic-linking", "driver", "runtime"] } diff --git a/examples/simple_ptx/build.rs b/examples/simple_ptx/build.rs new file mode 100644 index 0000000..53d842a --- /dev/null +++ b/examples/simple_ptx/build.rs @@ -0,0 +1,5 @@ +fn main() { + let builder = bindgen_cuda::Builder::default(); + let bindings = builder.build_ptx().unwrap(); + bindings.write("src/kernel.rs").unwrap(); +} diff --git a/examples/simple_ptx/src/cuda.cu b/examples/simple_ptx/src/cuda.cu new file mode 100644 index 0000000..eb51677 --- /dev/null +++ b/examples/simple_ptx/src/cuda.cu @@ -0,0 +1,6 @@ +extern "C" __global__ void sin_kernel(float *out, const float *inp, const size_t numel) { + unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < numel) { + out[i] = sin(inp[i]); + } +} diff --git a/examples/simple_ptx/src/kernel.rs b/examples/simple_ptx/src/kernel.rs new file mode 100644 index 0000000..4784915 --- /dev/null +++ b/examples/simple_ptx/src/kernel.rs @@ -0,0 +1 @@ +pub const CUDA: &str = include_str!(concat!(env!("OUT_DIR"), "/cuda.ptx")); diff --git a/examples/simple_ptx/src/lib.rs b/examples/simple_ptx/src/lib.rs new file mode 100644 index 0000000..a1c4108 --- /dev/null +++ b/examples/simple_ptx/src/lib.rs @@ -0,0 +1,39 @@ +mod kernel; + +#[cfg(test)] +mod tests { + use cudarc::driver::{DriverError, LaunchConfig, PushKernelArg}; + + #[test] + fn test_simple() -> Result<(), DriverError> { + let data: Vec = (0..100).map(|u| u as f32).collect(); + // Get a stream for GPU 0 + let ctx = cudarc::driver::CudaContext::new(0)?; + let stream = ctx.default_stream(); + + // copy a rust slice to the device + let inp = stream.memcpy_stod(&data)?; + + // or allocate directly + let mut out = stream.alloc_zeros::(100)?; + // Dynamically load it into the device + let ptx = cudarc::nvrtc::Ptx::from_src(crate::kernel::CUDA); + let module = ctx.load_module(ptx)?; + let sin_kernel = module.load_function("sin_kernel")?; + let mut builder = stream.launch_builder(&sin_kernel); + builder.arg(&mut out); + builder.arg(&inp); + builder.arg(&100usize); + unsafe { builder.launch(LaunchConfig::for_num_elems(100)) }?; + let out_host: Vec = stream.memcpy_dtov(&out)?; + + assert_eq!(out_host.len(), data.len()); + // Only approximations can be asserted + let expected: Vec<_> = data.into_iter().map(f32::sin).collect(); + for (i, (l, r)) in out_host.into_iter().zip(expected.into_iter()).enumerate() { + let diff = (l - r).abs() / (l + 1e-10); + assert!(diff < 1e-3, "{l} != {r} (diff = {diff:?}, location = {i})"); + } + Ok(()) + } +} diff --git a/examples/simple_static/Cargo.toml b/examples/simple_static/Cargo.toml new file mode 100644 index 0000000..2d5128f --- /dev/null +++ b/examples/simple_static/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "simple" +version = "0.1.0" +edition = "2024" + +[lib] +crate-type = ["dylib"] + +[build-dependencies] +bindgen_cuda = { version = "0.1.5", path = "../.." } + +[dependencies] +cudarc = { version = "0.13", default-features = false, features = ["cuda-version-from-build-system", "dynamic-linking", "driver", "runtime"] } diff --git a/examples/simple_static/build.rs b/examples/simple_static/build.rs new file mode 100644 index 0000000..2ed4abf --- /dev/null +++ b/examples/simple_static/build.rs @@ -0,0 +1,6 @@ +fn main() { + let builder = bindgen_cuda::Builder::default(); + builder.build_lib("libsin.a"); + println!("cargo:rustc-link-lib=sin"); + println!("cargo:rustc-link-search=native={}", "."); +} diff --git a/examples/simple_static/libsin.a b/examples/simple_static/libsin.a new file mode 100644 index 0000000..06e8652 Binary files /dev/null and b/examples/simple_static/libsin.a differ diff --git a/examples/simple_static/src/cuda.cu b/examples/simple_static/src/cuda.cu new file mode 100644 index 0000000..4673af5 --- /dev/null +++ b/examples/simple_static/src/cuda.cu @@ -0,0 +1,14 @@ +#include "cuda.h" +#include "stdio.h" + +extern "C" __global__ void sin_kernel(float *out, const float *inp, const int32_t numel) { + unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < numel) { + out[i] = sin(inp[i]); + } +} + +extern "C" int launch_sin2(float *out, const float *inp, int32_t n, cudaStream_t stream) { + sin_kernel<<>>(out, inp, n); + return 0; +} diff --git a/examples/simple_static/src/lib.rs b/examples/simple_static/src/lib.rs new file mode 100644 index 0000000..63a7255 --- /dev/null +++ b/examples/simple_static/src/lib.rs @@ -0,0 +1,39 @@ +use std::ffi::{c_int, c_void}; + +use cudarc::driver::sys::CUstream_st; + +unsafe extern "C" { + pub fn launch_sin2(out: *mut c_void, inp: *const c_void, n: c_int, stream: &*mut CUstream_st); +} + +#[cfg(test)] +mod tests { + use cudarc::driver::{DevicePtr, DevicePtrMut, DriverError}; + + #[test] + fn test_simple() -> Result<(), DriverError> { + let data: Vec = (0..100).map(|u| u as f32).collect(); + // Get a stream for GPU 0 + let dev = cudarc::driver::CudaDevice::new(0)?; + + // copy a rust slice to the device + let inp = dev.htod_copy(data.clone())?; + let mut out = dev.alloc_zeros::(100)?; + + let out_ptr = *out.device_ptr_mut() as *mut core::ffi::c_void; + let inp_ptr = *inp.device_ptr() as *const core::ffi::c_void; + unsafe { super::launch_sin2(out_ptr, inp_ptr, 100, dev.cu_stream()) }; + + let out_host: Vec = dev.dtoh_sync_copy(&out)?; + assert_eq!(out_host.len(), data.len()); + // Only approximations can be asserted + let expected: Vec<_> = data.into_iter().map(f32::sin).collect(); + println!("Expect {expected:?}"); + println!("Got {out_host:?}"); + for (i, (l, r)) in out_host.into_iter().zip(expected.into_iter()).enumerate() { + let diff = (l - r).abs() / (l + 1e-10); + assert!(diff < 1e-3, "{l} != {r} (diff = {diff:?}, location = {i})"); + } + Ok(()) + } +} diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..a44ef7a --- /dev/null +++ b/flake.lock @@ -0,0 +1,27 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1730531603, + "narHash": "sha256-Dqg6si5CqIzm87sp57j5nTaeBbWhHFaVyG7V6L8k3lY=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "7ffd9ae656aec493492b44d0ddfb28e79a1ea25d", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..dad1ba8 --- /dev/null +++ b/flake.nix @@ -0,0 +1,40 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + }; + + outputs = + { nixpkgs, ... }: + let + forAllSystems = nixpkgs.lib.genAttrs [ + "aarch64-linux" + "x86_64-linux" + "aarch64-darwin" + ]; + in + { + devShells = forAllSystems ( + system: + let + pkgs = import nixpkgs { + inherit system; + config.allowUnfree = true; + }; + in + with pkgs; + { + default = pkgs.mkShell { + nativeBuildInputs = [ pkg-config ]; + buildInputs = [ + rustup + cudaPackages.cudatoolkit + cudaPackages.cuda_nvcc + ]; + CUDA_ROOT = "${pkgs.cudaPackages.cudatoolkit}"; + LD_LIBRARY_PATH = "/run/opengl-driver/lib"; + }; + + } + ); + }; +} diff --git a/src/lib.rs b/src/lib.rs index 6078c5a..0695c19 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -325,7 +325,6 @@ impl Builder { include_paths.sort(); include_paths.dedup(); - #[allow(unused)] let mut include_options: Vec = include_paths .into_iter() .map(|s| { @@ -441,7 +440,6 @@ fn cuda_include_dir() -> Option { "CUDA_TOOLKIT_ROOT_DIR", "CUDNN_LIB", ]; - #[allow(unused)] let env_vars = env_vars .into_iter() .map(std::env::var) @@ -459,13 +457,8 @@ fn cuda_include_dir() -> Option { println!("cargo:info={roots:?}"); - #[allow(unused)] let roots = roots.into_iter().map(Into::::into); - #[cfg(feature = "ci-check")] - let root: PathBuf = "ci".into(); - - #[cfg(not(feature = "ci-check"))] env_vars .chain(roots) .find(|path| path.join("include").join("cuda.h").is_file())