diff --git a/c/parallel/test/test_segmented_sort.cpp b/c/parallel/test/test_segmented_sort.cpp index 90b31ff88b1..b817852a937 100644 --- a/c/parallel/test/test_segmented_sort.cpp +++ b/c/parallel/test/test_segmented_sort.cpp @@ -13,6 +13,7 @@ #include #include // std::optional #include +#include #include #include @@ -72,8 +73,14 @@ auto& get_cache() return fixture::get_or_create().get_value(); } +template struct segmented_sort_build { + static bool should_check_sass(int cc_major) + { + return !(DisableSassCheckOnSm120 && cc_major >= 12); + } + CUresult operator()( BuildResultT* build_ptr, cccl_sort_order_t sort_order, @@ -144,7 +151,9 @@ struct segmented_sort_run } }; -template +template void segmented_sort( cccl_sort_order_t sort_order, cccl_iterator_t keys_in, @@ -160,7 +169,12 @@ void segmented_sort( std::optional& cache, const std::optional& lookup_key) { - AlgorithmExecute( + AlgorithmExecute, + segmented_sort_cleanup, + segmented_sort_run, + BuildCache, + KeyT>( cache, lookup_key, sort_order, @@ -186,10 +200,11 @@ C2H_TEST("segmented_sort can sort keys-only", "[segmented_sort][keys_only]", tes using T = c2h::get<0, TestType>; using key_t = typename T::KeyT; - constexpr auto this_test_params = T(); - constexpr bool is_descending = this_test_params.is_descending(); - constexpr auto order = is_descending ? CCCL_DESCENDING : CCCL_ASCENDING; - constexpr bool is_overwrite_okay = this_test_params.is_overwrite_okay(); + constexpr auto this_test_params = T(); + constexpr bool is_descending = this_test_params.is_descending(); + constexpr auto order = is_descending ? CCCL_DESCENDING : CCCL_ASCENDING; + constexpr bool is_overwrite_okay = this_test_params.is_overwrite_okay(); + constexpr bool disable_sass_check_on_sm120 = std::is_same_v>; const std::size_t n_segments = GENERATE(0, 13, take(2, random(1 << 10, 1 << 12))); const std::size_t segment_size = GENERATE(1, 12, take(2, random(1 << 10, 1 << 12))); @@ -272,7 +287,7 @@ C2H_TEST("segmented_sort can sort keys-only", "[segmented_sort][keys_only]", tes int selector = -1; - segmented_sort( + segmented_sort( order, keys_in_ptr, keys_out_ptr, @@ -315,10 +330,11 @@ C2H_TEST("segmented_sort can sort key-value pairs", "[segmented_sort][key_value] using T = c2h::get<0, TestType>; using key_t = typename T::KeyT; - constexpr auto this_test_params = T(); - constexpr bool is_descending = this_test_params.is_descending(); - constexpr auto order = is_descending ? CCCL_DESCENDING : CCCL_ASCENDING; - constexpr bool is_overwrite_okay = this_test_params.is_overwrite_okay(); + constexpr auto this_test_params = T(); + constexpr bool is_descending = this_test_params.is_descending(); + constexpr auto order = is_descending ? CCCL_DESCENDING : CCCL_ASCENDING; + constexpr bool is_overwrite_okay = this_test_params.is_overwrite_okay(); + constexpr bool disable_sass_check_on_sm120 = !std::is_same_v>; const std::size_t n_segments = GENERATE(0, 13, take(2, random(1 << 10, 1 << 12))); const std::size_t segment_size = GENERATE(1, 12, take(2, random(1 << 10, 1 << 12))); @@ -371,7 +387,7 @@ C2H_TEST("segmented_sort can sort key-value pairs", "[segmented_sort][key_value] int selector = -1; - segmented_sort( + segmented_sort( order, keys_in_ptr, keys_out_ptr, @@ -583,6 +599,8 @@ C2H_TEST("SegmentedSort works with variable segment sizes", "[segmented_sort][va constexpr bool is_descending = this_test_params.is_descending(); constexpr auto order = is_descending ? CCCL_DESCENDING : CCCL_ASCENDING; constexpr bool is_overwrite_okay = this_test_params.is_overwrite_okay(); + constexpr bool disable_sass_check_on_sm120 = + std::is_same_v> || std::is_same_v>; const std::size_t n_segments = GENERATE(20, 600); @@ -644,7 +662,7 @@ C2H_TEST("SegmentedSort works with variable segment sizes", "[segmented_sort][va int selector = -1; - segmented_sort( + segmented_sort( order, keys_in_ptr, keys_out_ptr, diff --git a/c/parallel/test/test_three_way_partition.cpp b/c/parallel/test/test_three_way_partition.cpp index d10972129c4..cf36105ed19 100644 --- a/c/parallel/test/test_three_way_partition.cpp +++ b/c/parallel/test/test_three_way_partition.cpp @@ -49,7 +49,7 @@ auto& get_cache() return fixture::get_or_create().get_value(); } -template +template struct three_way_partition_build { template @@ -77,9 +77,9 @@ struct three_way_partition_build rest...); } - static constexpr bool should_check_sass(int) + static constexpr bool should_check_sass(int cc_major) { - return !DisableSassCheck; + return !DisableSassCheck && !(DisableSassCheckOnSm120 && cc_major >= 12); } }; @@ -200,7 +200,12 @@ std_partition(FirstPartSelectionOp first_selector, SecondPartSelectionOp second_ return result; } -template +template three_way_partition_result_t c_parallel_partition(OperationT first_selector, OperationT second_selector, const std::vector& input) { @@ -215,7 +220,7 @@ c_parallel_partition(OperationT first_selector, OperationT second_selector, cons auto& build_cache = get_cache(); const auto& test_key = make_key(); - three_way_partition( + three_way_partition( input_ptr, first_part_output_ptr, second_part_output_ptr, @@ -241,9 +246,10 @@ c_parallel_partition(OperationT first_selector, OperationT second_selector, cons num_items - num_selected[0] - num_selected[1]); } -template +template void three_way_partition( cccl_iterator_t d_in, cccl_iterator_t d_first_part_out, @@ -257,7 +263,7 @@ void three_way_partition( const std::optional& lookup_key) { AlgorithmExecute, + three_way_partition_build, three_way_partition_cleanup, three_way_partition_run, BuildCache, @@ -362,7 +368,9 @@ extern "C" __device__ void greater_or_equal_op(void* state_ptr, void* x_ptr, voi c_parallel_partition, key_t, num_selected_t, - ThreeWayPartition_StatefulOperations_Fixture_Tag>(less_op, greater_or_equal_op, input); + ThreeWayPartition_StatefulOperations_Fixture_Tag, + false, + true>(less_op, greater_or_equal_op, input); auto std_result = std_partition(less_than_t{key_t{21}}, greater_or_equal_t{key_t{21}}, input); REQUIRE(c_parallel_result == std_result); @@ -491,7 +499,7 @@ C2H_TEST("ThreeWayPartition works with iterators", "[three_way_partition]") auto& build_cache = get_cache(); const auto& test_key = make_key(); - three_way_partition( + three_way_partition( input_it, first_part_output_it, second_part_output_it, diff --git a/ci/matrix.yaml b/ci/matrix.yaml index cee1f205e7e..237f6a2f681 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -61,6 +61,8 @@ workflows: # c.parallel -- pinned to gcc13 on Linux to match python - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']} - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']} + # RTX PRO 6000 coverage (limited due to small number of runners): + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13'], gpu: ['rtxpro6000']} # c.experimental.stf-- pinned to gcc13 to match python - {jobs: ['test'], project: 'cccl_c_stf', ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: 'cccl_c_stf', ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} @@ -182,6 +184,8 @@ workflows: # c.parallel -- pinned to gcc13 to match python - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']} - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']} + # RTX PRO 6000 coverage (limited due to small number of runners): + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13'], gpu: ['rtxpro6000']} # c.experimental.stf -- pinned to gcc13 to match python - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']} @@ -267,6 +271,8 @@ workflows: # c.parallel -- pinned to gcc13 to match python - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '12.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080']} - {jobs: ['test'], project: ['cccl_c_parallel'], ctk: '13.X', cxx: ['gcc13', 'msvc'], gpu: ['rtx2080', 'l4', 'h100']} + # RTX PRO 6000 coverage (limited due to small number of runners): + - {jobs: ['test'], project: 'cccl_c_parallel', ctk: '13.X', cxx: ['gcc13'], gpu: ['rtxpro6000']} # c.experimental.stf -- pinned to gcc13 to match python - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '12.X', cxx: 'gcc13', gpu: ['rtx2080']} - {jobs: ['test'], project: ['cccl_c_stf'], ctk: '13.X', cxx: 'gcc13', gpu: ['rtx2080', 'l4', 'h100']}