From 2cf88b34f160b6f72decc7f21d2a194ea65f704f Mon Sep 17 00:00:00 2001 From: Manish Kumar Date: Fri, 9 Jan 2026 09:50:27 +0000 Subject: [PATCH] Refactor test cases --- bolt.sh | 21 + tachyon/compute/tests/math_eval_tests.rs | 558 +++++------------------ tachyon/compute/tests/test_utils.rs | 249 ++++++++++ tachyon/gpu/src/ffi/kernels/limits.cuh | 16 +- tachyon/gpu/src/ffi/kernels/math.cuh | 42 +- tachyon/gpu/src/ffi/kernels/types.cuh | 48 +- 6 files changed, 441 insertions(+), 493 deletions(-) create mode 100644 tachyon/compute/tests/test_utils.rs diff --git a/bolt.sh b/bolt.sh index c0dfeff..2bddcd1 100755 --- a/bolt.sh +++ b/bolt.sh @@ -65,6 +65,23 @@ check_rust() { echo "[OK] Clippy checks passed!" } +check_miri() { + echo "[INFO] Running cargo miri..." + rustup +nightly component add miri + export MIRIFLAGS="-Zmiri-disable-isolation" + if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi >/dev/null 2>&1; then + echo "[INFO] CUDA GPU detected — but miri not supported for FFI( unsupported operation: can't call foreign function `cudaMalloc` on OS `linux`)..." + // cargo +nightly miri nextest run --features gpu --no-fail-fast --test-threads=4 + else + echo "[INFO] Running CPU tests with miri..." + cargo +nightly miri nextest run --no-default-features --no-fail-fast --test-threads=4 + echo "[WARN] CUDA GPU not detected — skipping GPU tests." + echo "[INFO] (CUDA toolkit may be installed, but no working GPU/driver found)" + fi + + echo "[OK] Miri checks passed!" +} + check_cpp() { echo "[INFO] Checking CPP" files=$(find . -type f \( -name "*.cpp" -o -name "*.cc" -o -name "*.cxx" -o -name "*.hpp" -o -name "*.h" -o -name "*.cu" -o -name "*.cuh" \)) @@ -129,6 +146,7 @@ help() { echo " check - Run cargo check, fmt, and clippy" echo " build - Only build the workspace (runs check first)" echo " test - Only run tests" + echo " miri - Run miri checks" echo " coverage - Run coverage" echo " all - Run check, build, and test" echo " help - Show this help message" @@ -152,6 +170,9 @@ main() { test) test ;; + miri) + check_miri + ;; coverage) coverage ;; diff --git a/tachyon/compute/tests/math_eval_tests.rs b/tachyon/compute/tests/math_eval_tests.rs index b7cc3fc..3db72fd 100644 --- a/tachyon/compute/tests/math_eval_tests.rs +++ b/tachyon/compute/tests/math_eval_tests.rs @@ -1,252 +1,8 @@ -use std::fmt::Debug; -use std::sync::Once; - -use arrow::datatypes::{ - ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, - Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type, -}; +mod test_utils; +use compute::operator::Operator; use half::f16; -use tracing_subscriber; - -pub trait ArrowMapper { - type ArrowType: ArrowPrimitiveType; -} - -macro_rules! arrow_mapper { - ($t:ty, $arrow_type:ty) => { - impl ArrowMapper for $t { - type ArrowType = $arrow_type; - } - }; -} - -static TRACING: Once = Once::new(); - -fn init_tracing() { - TRACING.call_once(|| { - tracing_subscriber::fmt() - .with_max_level(tracing::Level::DEBUG) - .with_test_writer() - .try_init() - .ok(); - }); -} - -arrow_mapper!(i8, Int8Type); -arrow_mapper!(i16, Int16Type); -arrow_mapper!(i32, Int32Type); -arrow_mapper!(i64, Int64Type); -arrow_mapper!(u8, UInt8Type); -arrow_mapper!(u16, UInt16Type); -arrow_mapper!(u32, UInt32Type); -arrow_mapper!(u64, UInt64Type); -arrow_mapper!(f16, Float16Type); -arrow_mapper!(f32, Float32Type); -arrow_mapper!(f64, Float64Type); - -macro_rules! impl_numeric_cast { - ($target:ty, $($source:ty)*) => { - $( - impl CastTo<$target> for $source { - fn cast(self) -> $target { - self as $target - } - } - )* - }; -} - -trait CastTo { - fn cast(self) -> T; -} - -impl_numeric_cast!(f64, u8 u16 u32 u64 usize i8 i16 i32 i64 isize f32 f64); -impl_numeric_cast!(f32, u8 u16 u32 u64 usize i8 i16 i32 i64 isize f32); -impl_numeric_cast!(u64, u8 u16 u32 u64 i8 i16 i32); -impl_numeric_cast!(u32, u8 u16 u32 i8 i16); -impl_numeric_cast!(u16, u8 u16 i8); -impl_numeric_cast!(u8, u8); -impl_numeric_cast!(i64, i8 i16 i32 i64 isize u8 u16 u32); -impl_numeric_cast!(i32, i8 i16 i32 u8 u16); -impl_numeric_cast!(i16, i8 i16 u8); -impl_numeric_cast!(i8, i8); - -// Manual implementation for specialized types like f16 -impl CastTo for f16 { - fn cast(self) -> f64 { - f64::from(f32::from(self)) - } -} -impl CastTo for f16 { - fn cast(self) -> f32 { - f32::from(self) - } -} - -impl CastTo for f16 { - fn cast(self) -> f16 { - self - } -} - -impl CastTo for u8 { - fn cast(self) -> f16 { - f16::from_f32(self as f32) - } -} -impl CastTo for u16 { - fn cast(self) -> f16 { - f16::from_f32(self as f32) - } -} - -impl CastTo for u32 { - fn cast(self) -> f16 { - f16::from_f32(self as f32) - } -} - -impl CastTo for u64 { - fn cast(self) -> f16 { - f16::from_f32(self as f32) - } -} - -impl CastTo for i8 { - fn cast(self) -> f16 { - f16::from_f32(self as f32) - } -} - -impl CastTo for i16 { - fn cast(self) -> f16 { - f16::from_f32(self as f32) - } -} - -impl CastTo for i32 { - fn cast(self) -> f16 { - f16::from_f32(self as f32) - } -} - -impl CastTo for i64 { - fn cast(self) -> f16 { - f16::from_f32(self as f32) - } -} -trait TypeTestRange: Sized + Copy + Debug { - fn test_range() -> (Self, Self); -} -macro_rules! impl_test_range { - ($($t:ty)*) => { - $( - impl TypeTestRange for $t { - fn test_range() -> (Self, Self) { - (<$t>::MIN, <$t>::MAX) - } - } - )* - }; -} -impl_test_range!(u8 u16 u32 u64 usize i8 i16 i32 i64 isize f16); - -impl TypeTestRange for f32 { - fn test_range() -> (Self, Self) { - (f32::MIN / 2.0, f32::MAX / 2.0) - } -} - -impl TypeTestRange for f64 { - fn test_range() -> (Self, Self) { - (f64::MIN / 2.0, f64::MAX / 2.0) - } -} - -#[macro_export] -macro_rules! random_num { - ($min:expr, $max:expr) => {{ - use rand; - use rand::Rng; - let mut rng = rand::rng(); - let num: usize = rng.random_range($min..$max); - num - }}; -} - -#[macro_export] -macro_rules! random_vec { - ($size:expr, $ty:ty, $min:expr, $max:expr) => {{ - use rand::Rng; - let mut rng = rand::rng(); - (0..$size).map(|_| rng.random_range($min..$max)).collect::>() - }}; -} - -#[macro_export] -macro_rules! random_bit_vec { - ($size:expr, $ty:ty) => {{ - use compute::bit_vector::BitVector; - use rand; - use rand::Rng; - let mut rng = rand::rng(); - const BITS: usize = std::mem::size_of::<$ty>() * 8; - let num_blocks = $size.div_ceil(BITS); - let mut bits: Vec<$ty> = Vec::with_capacity(num_blocks); - - for _ in 0..(num_blocks.saturating_sub(1)) { - let random_block: $ty = rng.random_range(0..=<$ty>::MAX); - bits.push(random_block); - } - - if num_blocks > 0 { - let last_idx = num_blocks - 1; - let total_used_bits = last_idx * BITS; - let valid_bits_in_last_block = $size - total_used_bits; - let last_block: $ty = rng.random_range(0..=<$ty>::MAX); - - if valid_bits_in_last_block < BITS { - let low_bits_mask = !(<$ty>::MAX << valid_bits_in_last_block); - bits.push(last_block & low_bits_mask); - } else { - bits.push(last_block); - } - } - - BitVector::new(bits, $size) - }}; -} - -#[macro_export] -macro_rules! create_arrow_array { - ($vec:expr, $bit_vec:expr, $native_type:ty) => {{ - let arrow_vec: Vec> = $vec - .iter() - .enumerate() - .map(|(i, &x)| { - if $bit_vec.is_valid(i) { - let y: $native_type = x.cast(); - Some(y) - } else { - None - } - }) - .collect(); - PrimitiveArray::<<$native_type as ArrowMapper>::ArrowType>::from(arrow_vec) - }}; -} - -#[macro_export] -macro_rules! create_column { - ($vec:expr, $bit_vec:expr, $name:expr, $data_type:expr) => {{ - use std::sync::Arc; - - use compute::column::{Column, VecArray}; - let arr = Arc::new(VecArray { data: $vec.clone(), datatype: $data_type }); - Column::new($name, arr, $bit_vec) - }}; -} +use crate::test_utils::{ArrowMapper, CastTo, TypeTestRange, init_tracing}; macro_rules! test_eval_binary_matrix { ( @@ -368,6 +124,10 @@ macro_rules! test_eval_binary_fn { } } }; + if actual.is_nan() && expected.is_nan() { + //Treat it as equal + continue; + } assert!( diff <= epsilon as f64, "Mismatch at index {}: expected {} op {} = {}, got {}, diff {}", @@ -396,7 +156,7 @@ macro_rules! test_eval_binary_cmp_matrix { $size_max:expr, [ $( - ( $test_name:ident, $native_type:ty, $data_type:expr) + ( $test_name:ident, $native_type1:ty, $data_type1:expr, $native_type2:ty, $data_type2:expr) ),* $(,)? ] ) => { @@ -406,8 +166,10 @@ macro_rules! test_eval_binary_cmp_matrix { $test_name, $operator, $error_mode, - $native_type, - $data_type, + $native_type1, + $data_type1, + $native_type2, + $data_type2, $size_min, $size_max, ); @@ -421,8 +183,10 @@ macro_rules! test_eval_binary_cmp_fn { $test_name:ident, $operator:expr, $error_mode:expr, - $native_type:ty, - $data_type:expr, + $native_type1:ty, + $data_type1:expr, + $native_type2:ty, + $data_type2:expr, $size_min:expr, $size_max:expr, ) => { @@ -436,15 +200,16 @@ macro_rules! test_eval_binary_cmp_fn { use compute::operator::Operator; init_tracing(); let size = random_num!($size_min, $size_max); - let value_range = <$native_type>::test_range(); - let a_vec: Vec<$native_type> = random_vec!(size, $native_type, value_range.0, value_range.1); - let b_vec: Vec<$native_type> = random_vec!(size, $native_type, value_range.0, value_range.1); + let value_range1 = <$native_type1>::test_range(); + let value_range2 = <$native_type2>::test_range(); + let a_vec: Vec<$native_type1> = random_vec!(size, $native_type1, value_range1.0, value_range1.1); + let b_vec: Vec<$native_type2> = random_vec!(size, $native_type2, value_range2.0, value_range2.1); let a_bit_vec = random_bit_vec!(size, u32); let b_bit_vec = random_bit_vec!(size, u32); - let col_a = create_column!(a_vec, Some(a_bit_vec.clone()), "a", $data_type); - let col_b = create_column!(b_vec, Some(b_bit_vec.clone()), "b", $data_type); + let col_a = create_column!(a_vec, Some(a_bit_vec.clone()), "a", $data_type1); + let col_b = create_column!(b_vec, Some(b_bit_vec.clone()), "b", $data_type2); let expr = Expr::binary($operator, Expr::col("a"), Expr::col("b")); @@ -458,14 +223,14 @@ macro_rules! test_eval_binary_cmp_fn { if a_bit_vec.is_null(i) || b_bit_vec.is_null(i) { assert!(bit_vec.is_null(i)); } else { - let actual = output[i]; - let expected = &a_vec[i] $op &b_vec[i]; - assert_eq!( - actual, expected, - "Mismatch at index {}: expected {} op {} = {}, got {}", - i, &a_vec[i], &b_vec[i], expected, actual, - ); - } + let actual = output[i]; + let expected = &a_vec[i] $op &b_vec[i]; + assert_eq!( + actual, expected, + "Mismatch at index {}: expected {} op {} = {}, got {}", + i, &a_vec[i], &b_vec[i], expected, actual, + ); + } } } }; @@ -1444,16 +1209,17 @@ test_eval_binary_cmp_matrix!( 100, 200_000, [ - (test_eq_i8, i8, DataType::I8), - (test_eq_u8, u8, DataType::U8), - (test_eq_i16, i16, DataType::I16), - (test_eq_u16, u16, DataType::U16), - (test_eq_i32, i32, DataType::I32), - (test_eq_u32, u32, DataType::U32), - (test_eq_i64, i64, DataType::I64), - (test_eq_u64, u64, DataType::U64), - (test_eq_f32, f32, DataType::F32), - (test_eq_f64, f64, DataType::F64), + (test_eq_i8_i8, i8, DataType::I8, i8, DataType::I8), + (test_eq_u8_u8, u8, DataType::U8, u8, DataType::U8), + (test_eq_i16_i16, i16, DataType::I16, i16, DataType::I16), + (test_eq_u16_u16, u16, DataType::U16, u16, DataType::U16), + (test_eq_i32_i32, i32, DataType::I32, i32, DataType::I32), + (test_eq_u32_u32, u32, DataType::U32, u32, DataType::U32), + (test_eq_i64_i64, i64, DataType::I64, i64, DataType::I64), + (test_eq_u64_u64, u64, DataType::U64, u64, DataType::U64), + (test_eq_f16_f16, f16, DataType::F16, f16, DataType::F16), + (test_eq_f32_f32, f32, DataType::F32, f32, DataType::F32), + (test_eq_f64_f64, f64, DataType::F64, f64, DataType::F64), ] ); @@ -1464,16 +1230,17 @@ test_eval_binary_cmp_matrix!( 100, 400_000, [ - (test_neq_i8, i8, DataType::I8), - (test_neq_u8, u8, DataType::U8), - (test_neq_i16, i16, DataType::I16), - (test_neq_u16, u16, DataType::U16), - (test_neq_i32, i32, DataType::I32), - (test_neq_u32, u32, DataType::U32), - (test_neq_i64, i64, DataType::I64), - (test_neq_u64, u64, DataType::U64), - (test_neq_f32, f32, DataType::F32), - (test_neq_f64, f64, DataType::F64), + (test_neq_i8_i8, i8, DataType::I8, i8, DataType::I8), + (test_neq_u8_u8, u8, DataType::U8, u8, DataType::U8), + (test_neq_i16_i16, i16, DataType::I16, i16, DataType::I16), + (test_neq_u16_u16, u16, DataType::U16, u16, DataType::U16), + (test_neq_i32_i32, i32, DataType::I32, i32, DataType::I32), + (test_neq_u32_u32, u32, DataType::U32, u32, DataType::U32), + (test_neq_i64_i64, i64, DataType::I64, i64, DataType::I64), + (test_neq_u64_u64, u64, DataType::U64, u64, DataType::U64), + (test_neq_f16_f16, f16, DataType::F16, f16, DataType::F16), + (test_neq_f32_f32, f32, DataType::F32, f32, DataType::F32), + (test_neq_f64_f64, f64, DataType::F64, f64, DataType::F64), ] ); @@ -1484,16 +1251,17 @@ test_eval_binary_cmp_matrix!( 200, 500_000, [ - (test_gt_i8, i8, DataType::I8), - (test_gt_u8, u8, DataType::U8), - (test_gt_i16, i16, DataType::I16), - (test_gt_u16, u16, DataType::U16), - (test_gt_i32, i32, DataType::I32), - (test_gt_u32, u32, DataType::U32), - (test_gt_i64, i64, DataType::I64), - (test_gt_u64, u64, DataType::U64), - (test_gt_f32, f32, DataType::F32), - (test_gt_f64, f64, DataType::F64), + (test_gt_i8_i8, i8, DataType::I8, i8, DataType::I8), + (test_gt_u8_u8, u8, DataType::U8, u8, DataType::U8), + (test_gt_i16_i16, i16, DataType::I16, i16, DataType::I16), + (test_gt_u16_u16, u16, DataType::U16, u16, DataType::U16), + (test_gt_i32_i32, i32, DataType::I32, i32, DataType::I32), + (test_gt_u32_u32, u32, DataType::U32, u32, DataType::U32), + (test_gt_i64_i64, i64, DataType::I64, i64, DataType::I64), + (test_gt_u64_u64, u64, DataType::U64, u64, DataType::U64), + (test_gt_f16_f16, f16, DataType::F16, f16, DataType::F16), + (test_gt_f32_f32, f32, DataType::F32, f32, DataType::F32), + (test_gt_f64_f64, f64, DataType::F64, f64, DataType::F64), ] ); @@ -1504,16 +1272,17 @@ test_eval_binary_cmp_matrix!( 128, 512_000, [ - (test_gteq_i8, i8, DataType::I8), - (test_gteq_u8, u8, DataType::U8), - (test_gteq_i16, i16, DataType::I16), - (test_gteq_u16, u16, DataType::U16), - (test_gteq_i32, i32, DataType::I32), - (test_gteq_u32, u32, DataType::U32), - (test_gteq_i64, i64, DataType::I64), - (test_gteq_u64, u64, DataType::U64), - (test_gteq_f32, f32, DataType::F32), - (test_gteq_f64, f64, DataType::F64), + (test_gteq_i8_i8, i8, DataType::I8, i8, DataType::I8), + (test_gteq_u8_u8, u8, DataType::U8, u8, DataType::U8), + (test_gteq_i16_i16, i16, DataType::I16, i16, DataType::I16), + (test_gteq_u16_u16, u16, DataType::U16, u16, DataType::U16), + (test_gteq_i32_i32, i32, DataType::I32, i32, DataType::I32), + (test_gteq_u32_u32, u32, DataType::U32, u32, DataType::U32), + (test_gteq_i64_i64, i64, DataType::I64, i64, DataType::I64), + (test_gteq_u64_u64, u64, DataType::U64, u64, DataType::U64), + (test_gteq_f16_f16, f16, DataType::F16, f16, DataType::F16), + (test_gteq_f32_f32, f32, DataType::F32, f32, DataType::F32), + (test_gteq_f64_f64, f64, DataType::F64, f64, DataType::F64), ] ); @@ -1524,16 +1293,17 @@ test_eval_binary_cmp_matrix!( 10, 100_000, [ - (test_lt_i8, i8, DataType::I8), - (test_lt_u8, u8, DataType::U8), - (test_lt_i16, i16, DataType::I16), - (test_lt_u16, u16, DataType::U16), - (test_lt_i32, i32, DataType::I32), - (test_lt_u32, u32, DataType::U32), - (test_lt_i64, i64, DataType::I64), - (test_lt_u64, u64, DataType::U64), - (test_lt_f32, f32, DataType::F32), - (test_lt_f64, f64, DataType::F64), + (test_lt_i8_i8, i8, DataType::I8, i8, DataType::I8), + (test_lt_u8_u8, u8, DataType::U8, u8, DataType::U8), + (test_lt_i16_i16, i16, DataType::I16, i16, DataType::I16), + (test_lt_u16_u16, u16, DataType::U16, u16, DataType::U16), + (test_lt_i32_i32, i32, DataType::I32, i32, DataType::I32), + (test_lt_u32_u32, u32, DataType::U32, u32, DataType::U32), + (test_lt_i64_i64, i64, DataType::I64, i64, DataType::I64), + (test_lt_u64_u64, u64, DataType::U64, u64, DataType::U64), + (test_lt_f16_f16, f16, DataType::F16, f16, DataType::F16), + (test_lt_f32_f32, f32, DataType::F32, f32, DataType::F32), + (test_lt_f64_f64, f64, DataType::F64, f64, DataType::F64), ] ); @@ -1544,16 +1314,17 @@ test_eval_binary_cmp_matrix!( 100, 250_000, [ - (test_lteq_i8, i8, DataType::I8), - (test_lteq_u8, u8, DataType::U8), - (test_lteq_i16, i16, DataType::I16), - (test_lteq_u16, u16, DataType::U16), - (test_lteq_i32, i32, DataType::I32), - (test_lteq_u32, u32, DataType::U32), - (test_lteq_i64, i64, DataType::I64), - (test_lteq_u64, u64, DataType::U64), - (test_lteq_f32, f32, DataType::F32), - (test_lteq_f64, f64, DataType::F64), + (test_lteq_i8_i8, i8, DataType::I8, i8, DataType::I8), + (test_lteq_u8_u8, u8, DataType::U8, u8, DataType::U8), + (test_lteq_i16_i16, i16, DataType::I16, i16, DataType::I16), + (test_lteq_u16_u16, u16, DataType::U16, u16, DataType::U16), + (test_lteq_i32_i32, i32, DataType::I32, i32, DataType::I32), + (test_lteq_u32_u32, u32, DataType::U32, u32, DataType::U32), + (test_lteq_i64_i64, i64, DataType::I64, i64, DataType::I64), + (test_lteq_u64_u64, u64, DataType::U64, u64, DataType::U64), + (test_lteq_f16_f16, f16, DataType::F16, f16, DataType::F16), + (test_lteq_f32_f32, f32, DataType::F32, f32, DataType::F32), + (test_lteq_f64_f64, f64, DataType::F64, f64, DataType::F64), ] ); @@ -1796,162 +1567,52 @@ async fn test_cmp_different_types() { #[cfg(feature = "gpu")] #[tokio::test] async fn test_cmp_nan_eq() { - use compute::bit_vector::BitVector; - use compute::data_type::DataType; - use compute::error::ErrorMode; - use compute::evaluate::{Device, evaluate}; - use compute::expr::Expr; - use compute::operator::Operator; - init_tracing(); - let a_vec: Vec = - vec![f32::NAN, 2.0, 3.0, f32::NAN, 5.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY]; - let b_vec: Vec = - vec![1.0, f64::NAN, 3.0, f64::NAN, 6.0, f64::INFINITY, f64::NAN, f64::INFINITY]; - - let a_bit_vec = BitVector::::new_all_valid(a_vec.len()); - let b_bit_vec = BitVector::::new_all_valid(b_vec.len()); - - let col_a = create_column!(a_vec, Some(a_bit_vec), "a", DataType::F32); - let col_b = create_column!(b_vec, Some(b_bit_vec), "b", DataType::F64); - - let expr = Expr::binary(Operator::Eq, Expr::col("a"), Expr::col("b")); - - let result = evaluate(Device::GPU, ErrorMode::Tachyon, &expr, &vec![col_a, col_b]).await; - let result = result.unwrap(); - assert!(result[0].data_as_slice::().is_some()); - let output = result[0].data_as_slice::().unwrap(); + let output = evaluate_cmp(Operator::Eq).await; assert_eq!(output, vec![false, false, true, true, false, false, false, true]); //Nan == Nan for Databases/Dataframe, different than language } #[cfg(feature = "gpu")] #[tokio::test] async fn test_cmp_nan_neq() { - use compute::bit_vector::BitVector; - use compute::data_type::DataType; - use compute::error::ErrorMode; - use compute::evaluate::{Device, evaluate}; - use compute::expr::Expr; - use compute::operator::Operator; - init_tracing(); - let a_vec: Vec = - vec![f32::NAN, 2.0, 3.0, f32::NAN, 5.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY]; - let b_vec: Vec = - vec![1.0, f64::NAN, 3.0, f64::NAN, 6.0, f64::INFINITY, f64::NAN, f64::INFINITY]; - - let a_bit_vec = BitVector::::new_all_valid(a_vec.len()); - let b_bit_vec = BitVector::::new_all_valid(b_vec.len()); - - let col_a = create_column!(a_vec, Some(a_bit_vec), "a", DataType::F32); - let col_b = create_column!(b_vec, Some(b_bit_vec), "b", DataType::F64); - - let expr = Expr::binary(Operator::NotEq, Expr::col("a"), Expr::col("b")); - - let result = evaluate(Device::GPU, ErrorMode::Tachyon, &expr, &vec![col_a, col_b]).await; - let result = result.unwrap(); - assert!(result[0].data_as_slice::().is_some()); - let output = result[0].data_as_slice::().unwrap(); + let output = evaluate_cmp(Operator::NotEq).await; assert_eq!(output, vec![true, true, false, false, true, true, true, false]); //Nan == Nan for Databases/Dataframe, different than language } #[cfg(feature = "gpu")] #[tokio::test] async fn test_cmp_nan_lt() { - use compute::bit_vector::BitVector; - use compute::data_type::DataType; - use compute::error::ErrorMode; - use compute::evaluate::{Device, evaluate}; - use compute::expr::Expr; - use compute::operator::Operator; - init_tracing(); - let a_vec: Vec = - vec![f32::NAN, 2.0, 3.0, f32::NAN, 5.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY]; - let b_vec: Vec = - vec![1.0, f64::NAN, 3.0, f64::NAN, 6.0, f64::INFINITY, f64::NAN, f64::INFINITY]; - - let a_bit_vec = BitVector::::new_all_valid(a_vec.len()); - let b_bit_vec = BitVector::::new_all_valid(b_vec.len()); - - let col_a = create_column!(a_vec, Some(a_bit_vec), "a", DataType::F32); - let col_b = create_column!(b_vec, Some(b_bit_vec), "b", DataType::F64); - - let expr = Expr::binary(Operator::Lt, Expr::col("a"), Expr::col("b")); - - let result = evaluate(Device::GPU, ErrorMode::Tachyon, &expr, &vec![col_a, col_b]).await; - let result = result.unwrap(); - assert!(result[0].data_as_slice::().is_some()); - let output = result[0].data_as_slice::().unwrap(); + let output = evaluate_cmp(Operator::Lt).await; assert_eq!(output, vec![false, true, false, false, true, false, true, false]); //Nan < Nan for Databases/Dataframe, different than language } #[cfg(feature = "gpu")] #[tokio::test] async fn test_cmp_nan_lteq() { - use compute::bit_vector::BitVector; - use compute::data_type::DataType; - use compute::error::ErrorMode; - use compute::evaluate::{Device, evaluate}; - use compute::expr::Expr; - use compute::operator::Operator; - init_tracing(); - let a_vec: Vec = - vec![f32::NAN, 2.0, 3.0, f32::NAN, 5.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY]; - let b_vec: Vec = - vec![1.0, f64::NAN, 3.0, f64::NAN, 6.0, f64::INFINITY, f64::NAN, f64::INFINITY]; - - let a_bit_vec = BitVector::::new_all_valid(a_vec.len()); - let b_bit_vec = BitVector::::new_all_valid(b_vec.len()); - - let col_a = create_column!(a_vec, Some(a_bit_vec), "a", DataType::F32); - let col_b = create_column!(b_vec, Some(b_bit_vec), "b", DataType::F64); - - let expr = Expr::binary(Operator::LtEq, Expr::col("a"), Expr::col("b")); - - let result = evaluate(Device::GPU, ErrorMode::Tachyon, &expr, &vec![col_a, col_b]).await; - let result = result.unwrap(); - assert!(result[0].data_as_slice::().is_some()); - let output = result[0].data_as_slice::().unwrap(); + let output = evaluate_cmp(Operator::LtEq).await; assert_eq!(output, vec![false, true, true, true, true, false, true, true]); // any_number <= Nan for Databases/Dataframe, different than language } #[cfg(feature = "gpu")] #[tokio::test] async fn test_cmp_nan_gt() { - use compute::bit_vector::BitVector; - use compute::data_type::DataType; - use compute::error::ErrorMode; - use compute::evaluate::{Device, evaluate}; - use compute::expr::Expr; - use compute::operator::Operator; - init_tracing(); - let a_vec: Vec = - vec![f32::NAN, 2.0, 3.0, f32::NAN, 5.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY]; - let b_vec: Vec = - vec![1.0, f64::NAN, 3.0, f64::NAN, 6.0, f64::INFINITY, f64::NAN, f64::INFINITY]; - - let a_bit_vec = BitVector::::new_all_valid(a_vec.len()); - let b_bit_vec = BitVector::::new_all_valid(b_vec.len()); - - let col_a = create_column!(a_vec, Some(a_bit_vec), "a", DataType::F32); - let col_b = create_column!(b_vec, Some(b_bit_vec), "b", DataType::F64); - - let expr = Expr::binary(Operator::Gt, Expr::col("a"), Expr::col("b")); - - let result = evaluate(Device::GPU, ErrorMode::Tachyon, &expr, &vec![col_a, col_b]).await; - let result = result.unwrap(); - assert!(result[0].data_as_slice::().is_some()); - let output = result[0].data_as_slice::().unwrap(); + let output = evaluate_cmp(Operator::Gt).await; assert_eq!(output, vec![true, false, false, false, false, true, false, false]); //Nan > Nan for Databases/Dataframe, different than language } #[cfg(feature = "gpu")] #[tokio::test] async fn test_cmp_nan_gteq() { + let output = evaluate_cmp(Operator::GtEq).await; + assert_eq!(output, vec![true, false, true, true, false, true, false, true]); //Nan >= Nan for Databases/Dataframe, different than language +} + +async fn evaluate_cmp(op: Operator) -> Vec { use compute::bit_vector::BitVector; use compute::data_type::DataType; use compute::error::ErrorMode; use compute::evaluate::{Device, evaluate}; use compute::expr::Expr; - use compute::operator::Operator; + init_tracing(); let a_vec: Vec = vec![f32::NAN, 2.0, 3.0, f32::NAN, 5.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY]; @@ -1964,11 +1625,12 @@ async fn test_cmp_nan_gteq() { let col_a = create_column!(a_vec, Some(a_bit_vec), "a", DataType::F32); let col_b = create_column!(b_vec, Some(b_bit_vec), "b", DataType::F64); - let expr = Expr::binary(Operator::GtEq, Expr::col("a"), Expr::col("b")); + let expr = Expr::binary(op, Expr::col("a"), Expr::col("b")); let result = evaluate(Device::GPU, ErrorMode::Tachyon, &expr, &vec![col_a, col_b]).await; let result = result.unwrap(); assert!(result[0].data_as_slice::().is_some()); let output = result[0].data_as_slice::().unwrap(); - assert_eq!(output, vec![true, false, true, true, false, true, false, true]); //Nan >= Nan for Databases/Dataframe, different than language + + return output.to_vec(); } diff --git a/tachyon/compute/tests/test_utils.rs b/tachyon/compute/tests/test_utils.rs new file mode 100644 index 0000000..cd37322 --- /dev/null +++ b/tachyon/compute/tests/test_utils.rs @@ -0,0 +1,249 @@ +use std::fmt::Debug; +use std::sync::Once; + +use arrow::datatypes::{ + ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, + Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type, +}; +use half::f16; +use tracing_subscriber; + +static TRACING: Once = Once::new(); + +pub fn init_tracing() { + TRACING.call_once(|| { + tracing_subscriber::fmt() + .with_max_level(tracing::Level::DEBUG) + .with_test_writer() + .try_init() + .ok(); + }); +} + +pub trait ArrowMapper { + type ArrowType: ArrowPrimitiveType; +} + +macro_rules! arrow_mapper { + ($t:ty, $arrow_type:ty) => { + impl ArrowMapper for $t { + type ArrowType = $arrow_type; + } + }; +} + +arrow_mapper!(i8, Int8Type); +arrow_mapper!(i16, Int16Type); +arrow_mapper!(i32, Int32Type); +arrow_mapper!(i64, Int64Type); +arrow_mapper!(u8, UInt8Type); +arrow_mapper!(u16, UInt16Type); +arrow_mapper!(u32, UInt32Type); +arrow_mapper!(u64, UInt64Type); +arrow_mapper!(f16, Float16Type); +arrow_mapper!(f32, Float32Type); +arrow_mapper!(f64, Float64Type); + +macro_rules! impl_numeric_cast { + ($target:ty, $($source:ty)*) => { + $( + impl CastTo<$target> for $source { + fn cast(self) -> $target { + self as $target + } + } + )* + }; +} + +pub trait CastTo { + fn cast(self) -> T; +} + +impl_numeric_cast!(f64, u8 u16 u32 u64 usize i8 i16 i32 i64 isize f32 f64); +impl_numeric_cast!(f32, u8 u16 u32 u64 usize i8 i16 i32 i64 isize f32); +impl_numeric_cast!(u64, u8 u16 u32 u64 i8 i16 i32); +impl_numeric_cast!(u32, u8 u16 u32 i8 i16); +impl_numeric_cast!(u16, u8 u16 i8); +impl_numeric_cast!(u8, u8); +impl_numeric_cast!(i64, i8 i16 i32 i64 isize u8 u16 u32); +impl_numeric_cast!(i32, i8 i16 i32 u8 u16); +impl_numeric_cast!(i16, i8 i16 u8); +impl_numeric_cast!(i8, i8); + +impl CastTo for f16 { + fn cast(self) -> f64 { + f64::from(f32::from(self)) + } +} +impl CastTo for f16 { + fn cast(self) -> f32 { + f32::from(self) + } +} + +impl CastTo for f16 { + fn cast(self) -> f16 { + self + } +} + +impl CastTo for u8 { + fn cast(self) -> f16 { + f16::from_f32(self as f32) + } +} +impl CastTo for u16 { + fn cast(self) -> f16 { + f16::from_f32(self as f32) + } +} + +impl CastTo for u32 { + fn cast(self) -> f16 { + f16::from_f32(self as f32) + } +} + +impl CastTo for u64 { + fn cast(self) -> f16 { + f16::from_f32(self as f32) + } +} + +impl CastTo for i8 { + fn cast(self) -> f16 { + f16::from_f32(self as f32) + } +} + +impl CastTo for i16 { + fn cast(self) -> f16 { + f16::from_f32(self as f32) + } +} + +impl CastTo for i32 { + fn cast(self) -> f16 { + f16::from_f32(self as f32) + } +} + +impl CastTo for i64 { + fn cast(self) -> f16 { + f16::from_f32(self as f32) + } +} + +pub trait TypeTestRange: Sized + Copy + Debug { + fn test_range() -> (Self, Self); +} + +macro_rules! impl_test_range { + ($($t:ty)*) => { + $( + impl TypeTestRange for $t { + fn test_range() -> (Self, Self) { + (<$t>::MIN, <$t>::MAX) + } + } + )* + }; +} +impl_test_range!(u8 u16 u32 u64 usize i8 i16 i32 i64 isize f16); + +impl TypeTestRange for f32 { + fn test_range() -> (Self, Self) { + (f32::MIN / 2.0, f32::MAX / 2.0) + } +} + +impl TypeTestRange for f64 { + fn test_range() -> (Self, Self) { + (f64::MIN / 2.0, f64::MAX / 2.0) + } +} + +#[macro_export] +macro_rules! random_num { + ($min:expr, $max:expr) => {{ + use rand; + use rand::Rng; + let mut rng = rand::rng(); + let num: usize = rng.random_range($min..$max); + num + }}; +} + +#[macro_export] +macro_rules! random_vec { + ($size:expr, $ty:ty, $min:expr, $max:expr) => {{ + use rand::Rng; + let mut rng = rand::rng(); + (0..$size).map(|_| rng.random_range($min..$max)).collect::>() + }}; +} + +#[macro_export] +macro_rules! random_bit_vec { + ($size:expr, $ty:ty) => {{ + use compute::bit_vector::BitVector; + use rand; + use rand::Rng; + let mut rng = rand::rng(); + const BITS: usize = std::mem::size_of::<$ty>() * 8; + let num_blocks = $size.div_ceil(BITS); + let mut bits: Vec<$ty> = Vec::with_capacity(num_blocks); + + for _ in 0..(num_blocks.saturating_sub(1)) { + let random_block: $ty = rng.random_range(0..=<$ty>::MAX); + bits.push(random_block); + } + + if num_blocks > 0 { + let last_idx = num_blocks - 1; + let total_used_bits = last_idx * BITS; + let valid_bits_in_last_block = $size - total_used_bits; + let last_block: $ty = rng.random_range(0..=<$ty>::MAX); + + if valid_bits_in_last_block < BITS { + let low_bits_mask = !(<$ty>::MAX << valid_bits_in_last_block); + bits.push(last_block & low_bits_mask); + } else { + bits.push(last_block); + } + } + + BitVector::new(bits, $size) + }}; +} + +#[macro_export] +macro_rules! create_arrow_array { + ($vec:expr, $bit_vec:expr, $native_type:ty) => {{ + let arrow_vec: Vec> = $vec + .iter() + .enumerate() + .map(|(i, &x)| { + if $bit_vec.is_valid(i) { + let y: $native_type = x.cast(); + Some(y) + } else { + None + } + }) + .collect(); + PrimitiveArray::<<$native_type as ArrowMapper>::ArrowType>::from(arrow_vec) + }}; +} + +#[macro_export] +macro_rules! create_column { + ($vec:expr, $bit_vec:expr, $name:expr, $data_type:expr) => {{ + use std::sync::Arc; + + use compute::column::{Column, VecArray}; + let arr = Arc::new(VecArray { data: $vec.clone(), datatype: $data_type }); + Column::new($name, arr, $bit_vec) + }}; +} diff --git a/tachyon/gpu/src/ffi/kernels/limits.cuh b/tachyon/gpu/src/ffi/kernels/limits.cuh index 9fdb1a3..92ab323 100644 --- a/tachyon/gpu/src/ffi/kernels/limits.cuh +++ b/tachyon/gpu/src/ffi/kernels/limits.cuh @@ -23,7 +23,7 @@ typedef unsigned long long uint64_t; typedef __nv_bfloat16 bfloat16; typedef __half float16; -namespace std { +namespace cuda_utils { template class numeric_limits { public: @@ -208,4 +208,16 @@ template <> struct is_signed { template struct is_unsigned { static const bool value = !is_signed::value; }; -} // namespace std + +template __device__ inline bool is_nan(const T v) { + return isnan(v); +} + +template <> __device__ inline bool is_nan(const float16 value) { + return __hisnan(value); +} + +template <> __device__ inline bool is_nan(const bfloat16 value) { + return __hisnan(value); +} +} // namespace cuda_utils diff --git a/tachyon/gpu/src/ffi/kernels/math.cuh b/tachyon/gpu/src/ffi/kernels/math.cuh index 202a6e0..2f70100 100644 --- a/tachyon/gpu/src/ffi/kernels/math.cuh +++ b/tachyon/gpu/src/ffi/kernels/math.cuh @@ -15,7 +15,7 @@ template __device__ __forceinline__ bool __check_add_overflow(T a, T b, T *res) { *res = a + b; - if constexpr (std::is_unsigned::value) { + if constexpr (cuda_utils::is_unsigned::value) { return *res < a; } else { return ((a ^ *res) & (b ^ *res)) < 0; @@ -47,7 +47,7 @@ __device__ __forceinline__ T add(C *__restrict__ ctx, const T &a, const T &b) { template __device__ __forceinline__ bool __check_sub_overflow(T a, T b, T *res) { *res = a - b; - if constexpr (std::is_unsigned::value) { + if constexpr (cuda_utils::is_unsigned::value) { return *res > a; } else { return ((a ^ b) & (a ^ *res)) < 0; @@ -80,19 +80,19 @@ __device__ __forceinline__ T sub(C *__restrict__ ctx, const T &a, const T &b) { template __device__ __forceinline__ bool __check_mul_overflow(T a, T b, T *res) { if constexpr (sizeof(T) <= 4) { - if constexpr (std::is_unsigned::value) { + if constexpr (cuda_utils::is_unsigned::value) { unsigned long long wide_a = a; unsigned long long wide_b = b; unsigned long long wide_res = wide_a * wide_b; *res = (T)wide_res; - return wide_res > std::numeric_limits::max(); + return wide_res > cuda_utils::numeric_limits::max(); } else { long long wide_a = a; long long wide_b = b; long long wide_res = wide_a * wide_b; *res = (T)wide_res; - return wide_res < std::numeric_limits::min() || - wide_res > std::numeric_limits::max(); + return wide_res < cuda_utils::numeric_limits::min() || + wide_res > cuda_utils::numeric_limits::max(); } } else { if (a == 0 || b == 0) { @@ -108,26 +108,26 @@ __device__ __forceinline__ bool __check_mul_overflow(T a, T b, T *res) { return false; } - if constexpr (std::is_unsigned::value) { - if (a > std::numeric_limits::max() / b) { + if constexpr (cuda_utils::is_unsigned::value) { + if (a > cuda_utils::numeric_limits::max() / b) { *res = a * b; return true; } *res = a * b; return false; } else { - if (a == -1 && b == std::numeric_limits::min()) { - *res = std::numeric_limits::min(); + if (a == -1 && b == cuda_utils::numeric_limits::min()) { + *res = cuda_utils::numeric_limits::min(); return true; } - if (b == -1 && a == std::numeric_limits::min()) { - *res = std::numeric_limits::min(); + if (b == -1 && a == cuda_utils::numeric_limits::min()) { + *res = cuda_utils::numeric_limits::min(); return true; } long long abs_a = (a < 0) ? -(long long)a : a; long long abs_b = (b < 0) ? -(long long)b : b; - if (abs_a > std::numeric_limits::max() / abs_b) { + if (abs_a > cuda_utils::numeric_limits::max() / abs_b) { *res = a * b; return true; } @@ -199,7 +199,8 @@ __device__ __forceinline__ Bool eq(C *__restrict__ _ctx, const T1 &lhs, // Databases/dataframes behavior for Nan == Nan is different than pure // programming languages if constexpr (T1::is_floating && T2::is_floating) { - if (isnan(lhs.value) && isnan(rhs.value)) { + if (cuda_utils::is_nan(lhs.value) && cuda_utils::is_nan(rhs.value)) + [[unlikely]] { result.value = true; } else { result.value = lhs.value == rhs.value; @@ -222,7 +223,8 @@ __device__ __forceinline__ Bool neq(C *__restrict__ _ctx, const T1 &lhs, // Databases/dataframes behavior for Nan != Nan is different than pure // programming languages if constexpr (T1::is_floating && T2::is_floating) { - if (isnan(lhs.value) && isnan(rhs.value)) { + if (cuda_utils::is_nan(lhs.value) && cuda_utils::is_nan(rhs.value)) + [[unlikely]] { result.value = false; } else { result.value = lhs.value != rhs.value; @@ -245,7 +247,8 @@ __device__ __forceinline__ Bool lt(C *__restrict__ _ctx, const T1 &lhs, // Databases/dataframes behavior for valid_number < Nan is different than // pure programming languages if constexpr (T1::is_floating && T2::is_floating) { - if (!isnan(lhs) && isnan(rhs)) { + if (!cuda_utils::is_nan(lhs.value) && cuda_utils::is_nan(rhs.value)) + [[unlikely]] { result.value = true; } else { result.value = lhs.value < rhs.value; @@ -268,7 +271,7 @@ __device__ __forceinline__ Bool lteq(C *__restrict__ _ctx, const T1 &lhs, // Databases/dataframes behavior for valid_number <= Nan // is different // than pure programming languages if constexpr (T1::is_floating && T2::is_floating) { - if (isnan(rhs)) { + if (cuda_utils::is_nan(rhs.value)) [[unlikely]] { result.value = true; } else { result.value = lhs.value <= rhs.value; @@ -291,7 +294,8 @@ __device__ __forceinline__ Bool gt(C *__restrict__ _ctx, const T1 &lhs, // Databases/dataframes behavior for Nan > valid_number is different than // pure programming languages if constexpr (T1::is_floating && T2::is_floating) { - if (isnan(lhs) && !isnan(rhs)) { + if (cuda_utils::is_nan(lhs.value) && !cuda_utils::is_nan(rhs.value)) + [[unlikely]] { result.value = true; } else { result.value = lhs.value > rhs.value; @@ -314,7 +318,7 @@ __device__ __forceinline__ Bool gteq(C *__restrict__ _ctx, const T1 &lhs, // Databases/dataframes behavior for Nan >= valid_number is different than // pure programming languages if constexpr (T1::is_floating && T2::is_floating) { - if (isnan(lhs)) { + if (cuda_utils::is_nan(lhs.value)) [[unlikely]] { result.value = true; } else { result.value = lhs.value >= rhs.value; diff --git a/tachyon/gpu/src/ffi/kernels/types.cuh b/tachyon/gpu/src/ffi/kernels/types.cuh index 4e85d32..b1f8c6a 100644 --- a/tachyon/gpu/src/ffi/kernels/types.cuh +++ b/tachyon/gpu/src/ffi/kernels/types.cuh @@ -63,41 +63,41 @@ template struct TypeTraits; DEFINE_TYPE(Bool, bool, sizeof(bool), false, false, false, true, false) DEFINE_TYPE(Int8, int8_t, sizeof(int8_t), true, false, - std::numeric_limits::min(), - std::numeric_limits::max(), 0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0) DEFINE_TYPE(UInt8, uint8_t, sizeof(uint8_t), false, false, - std::numeric_limits::min(), - std::numeric_limits::max(), 0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0) DEFINE_TYPE(Int16, int16_t, sizeof(int16_t), true, false, - std::numeric_limits::min(), - std::numeric_limits::max(), 0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0) DEFINE_TYPE(UInt16, uint16_t, sizeof(uint16_t), false, false, - std::numeric_limits::min(), - std::numeric_limits::max(), 0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0) DEFINE_TYPE(Int32, int32_t, sizeof(int32_t), true, false, - std::numeric_limits::min(), - std::numeric_limits::max(), 0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0) DEFINE_TYPE(UInt32, uint32_t, sizeof(uint32_t), false, false, - std::numeric_limits::min(), - std::numeric_limits::max(), 0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0) DEFINE_TYPE(Int64, int64_t, sizeof(int64_t), true, false, - std::numeric_limits::min(), - std::numeric_limits::max(), 0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0) DEFINE_TYPE(UInt64, uint64_t, sizeof(uint64_t), false, false, - std::numeric_limits::min(), - std::numeric_limits::max(), 0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0) DEFINE_TYPE(BFloat16, bfloat16, sizeof(bfloat16), true, true, - std::numeric_limits::min(), - std::numeric_limits::max(), 0.0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0.0) DEFINE_TYPE(Float16, float16, sizeof(float16), true, true, - std::numeric_limits::min(), - std::numeric_limits::max(), 0.0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0.0) DEFINE_TYPE(Float32, float, sizeof(float), true, true, - std::numeric_limits::min(), - std::numeric_limits::max(), 0.0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0.0) DEFINE_TYPE(Float64, double, sizeof(double), true, true, - std::numeric_limits::min(), - std::numeric_limits::max(), 0.0) + cuda_utils::numeric_limits::min(), + cuda_utils::numeric_limits::max(), 0.0) #undef DEFINE_TYPE