-
Notifications
You must be signed in to change notification settings - Fork 9
Dynamic loading + NVRTC #93
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Co-authored-by: Guillaume Fraux <guillaume.fraux@epfl.ch>
…partial periodicity.
Resolved conflicts in mic_neighbourlist.cu by keeping the NVRTC-compatible flat array implementation (gpulite) instead of the double3 types (main). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Fixes MinGW build failure where int32_t was not declared. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
macos-13 runners are being retired. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
macos-15 is ARM64 only and cannot build x86_64 wheels. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
macos-13 is retired, macos-15-intel is the replacement for Intel builds. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This reverts commit 7ec8311.
This reverts commit bbfbb9b.
This reverts commit 54852da.
e796f6c to
f975dce
Compare
Luthaf
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are a lot of changes in vesin/src/mic_neighbourlist.cu that seems to undo b5559d1. Is there a specific reason for it?
|
|
||
|
|
||
| @pytest.mark.skipif( | ||
| not (HAS_CUPY and CUDA_AVAILABLE), reason="CuPy not available or CUDA not available" | ||
| ) | ||
| @pytest.mark.parametrize("full_list", [False, True]) | ||
| def test_cupy_large_box_small_cutoff(full_list): | ||
| """Test CuPy with synthetic data - large box and small cutoff""" | ||
| # Use synthetic data with large box to avoid CUDA cutoff <= cell/2 limitation | ||
| points_np = np.array( | ||
| [ | ||
| [0.0, 0.0, 0.0], | ||
| [0.0, 2.0, 0.0], | ||
| [0.0, 0.0, 2.0], | ||
| [-6.0, 0.0, 0.0], | ||
| [-6.0, -2.0, 0.0], | ||
| [-6.0, 0.0, -2.0], | ||
| ], | ||
| dtype=np.float64, | ||
| ) | ||
|
|
||
| box_np = np.array( | ||
| [ | ||
| [54.0, 0.0, 0.0], | ||
| [0.0, 54.0, 0.0], | ||
| [0.0, 0.0, 54.0], | ||
| ], | ||
| dtype=np.float64, | ||
| ) | ||
|
|
||
| # NumPy computation | ||
| calculator_cpu = vesin.NeighborList(cutoff=2.1, full_list=full_list, sorted=True) | ||
| i_np, j_np, d_np, S_np = calculator_cpu.compute( | ||
| points=points_np, | ||
| box=box_np, | ||
| periodic=True, | ||
| quantities="ijdS", | ||
| ) | ||
|
|
||
| # CuPy computation | ||
| calculator_gpu = vesin.NeighborList(cutoff=2.1, full_list=full_list, sorted=True) | ||
| points_cp = cp.asarray(points_np, dtype=cp.float64) | ||
| box_cp = cp.asarray(box_np, dtype=cp.float64) | ||
|
|
||
| i_cp, j_cp, d_cp, S_cp = calculator_gpu.compute( | ||
| points=points_cp, | ||
| box=box_cp, | ||
| periodic=True, | ||
| quantities="ijdS", | ||
| ) | ||
|
|
||
| # Verify outputs are CuPy arrays | ||
| assert isinstance(i_cp, cp.ndarray) | ||
| assert isinstance(j_cp, cp.ndarray) | ||
| assert isinstance(d_cp, cp.ndarray) | ||
| assert isinstance(S_cp, cp.ndarray) | ||
|
|
||
| # Verify expected pairs based on full_list | ||
| pairs_np = list(zip(i_np.tolist(), j_np.tolist())) | ||
| if full_list: | ||
| expected_pairs = sorted( | ||
| [ | ||
| (0, 1), | ||
| (0, 2), | ||
| (1, 0), | ||
| (2, 0), | ||
| (3, 4), | ||
| (3, 5), | ||
| (4, 3), | ||
| (5, 3), | ||
| ] | ||
| ) | ||
| else: | ||
| expected_pairs = sorted( | ||
| [ | ||
| (0, 1), | ||
| (0, 2), | ||
| (3, 4), | ||
| (3, 5), | ||
| ] | ||
| ) | ||
| assert sorted(pairs_np) == expected_pairs | ||
|
|
||
|
|
||
| @pytest.mark.skipif( | ||
| not (HAS_CUPY and CUDA_AVAILABLE), reason="CuPy not available or CUDA not available" | ||
| ) | ||
| def test_cupy_no_neighbors(): | ||
| """Test CuPy when there are no neighbors""" | ||
| points_np = np.array([[0.0, 0.0, 0.0], [0.5, 0.5, 0.5]], dtype=np.float64) | ||
| box_np = np.eye(3, dtype=np.float64) | ||
|
|
||
| calculator = vesin.NeighborList(cutoff=0.1, full_list=True) | ||
|
|
||
| # NumPy | ||
| i_np, j_np, S_np, D_np = calculator.compute( | ||
| points_np, box_np, True, quantities="ijSD" | ||
| ) | ||
|
|
||
| # CuPy | ||
| calculator_gpu = vesin.NeighborList(cutoff=0.1, full_list=True) | ||
| points_cp = cp.asarray(points_np) | ||
| box_cp = cp.asarray(box_np) | ||
| i_cp, j_cp, S_cp, D_cp = calculator_gpu.compute( | ||
| points_cp, box_cp, True, quantities="ijSD" | ||
| ) | ||
|
|
||
| # Both should have no neighbors | ||
| assert len(i_np) == 0 | ||
| assert len(i_cp) == 0 | ||
| assert isinstance(i_cp, cp.ndarray) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| @pytest.mark.skipif( | |
| not (HAS_CUPY and CUDA_AVAILABLE), reason="CuPy not available or CUDA not available" | |
| ) | |
| @pytest.mark.parametrize("full_list", [False, True]) | |
| def test_cupy_large_box_small_cutoff(full_list): | |
| """Test CuPy with synthetic data - large box and small cutoff""" | |
| # Use synthetic data with large box to avoid CUDA cutoff <= cell/2 limitation | |
| points_np = np.array( | |
| [ | |
| [0.0, 0.0, 0.0], | |
| [0.0, 2.0, 0.0], | |
| [0.0, 0.0, 2.0], | |
| [-6.0, 0.0, 0.0], | |
| [-6.0, -2.0, 0.0], | |
| [-6.0, 0.0, -2.0], | |
| ], | |
| dtype=np.float64, | |
| ) | |
| box_np = np.array( | |
| [ | |
| [54.0, 0.0, 0.0], | |
| [0.0, 54.0, 0.0], | |
| [0.0, 0.0, 54.0], | |
| ], | |
| dtype=np.float64, | |
| ) | |
| # NumPy computation | |
| calculator_cpu = vesin.NeighborList(cutoff=2.1, full_list=full_list, sorted=True) | |
| i_np, j_np, d_np, S_np = calculator_cpu.compute( | |
| points=points_np, | |
| box=box_np, | |
| periodic=True, | |
| quantities="ijdS", | |
| ) | |
| # CuPy computation | |
| calculator_gpu = vesin.NeighborList(cutoff=2.1, full_list=full_list, sorted=True) | |
| points_cp = cp.asarray(points_np, dtype=cp.float64) | |
| box_cp = cp.asarray(box_np, dtype=cp.float64) | |
| i_cp, j_cp, d_cp, S_cp = calculator_gpu.compute( | |
| points=points_cp, | |
| box=box_cp, | |
| periodic=True, | |
| quantities="ijdS", | |
| ) | |
| # Verify outputs are CuPy arrays | |
| assert isinstance(i_cp, cp.ndarray) | |
| assert isinstance(j_cp, cp.ndarray) | |
| assert isinstance(d_cp, cp.ndarray) | |
| assert isinstance(S_cp, cp.ndarray) | |
| # Verify expected pairs based on full_list | |
| pairs_np = list(zip(i_np.tolist(), j_np.tolist())) | |
| if full_list: | |
| expected_pairs = sorted( | |
| [ | |
| (0, 1), | |
| (0, 2), | |
| (1, 0), | |
| (2, 0), | |
| (3, 4), | |
| (3, 5), | |
| (4, 3), | |
| (5, 3), | |
| ] | |
| ) | |
| else: | |
| expected_pairs = sorted( | |
| [ | |
| (0, 1), | |
| (0, 2), | |
| (3, 4), | |
| (3, 5), | |
| ] | |
| ) | |
| assert sorted(pairs_np) == expected_pairs | |
| @pytest.mark.skipif( | |
| not (HAS_CUPY and CUDA_AVAILABLE), reason="CuPy not available or CUDA not available" | |
| ) | |
| def test_cupy_no_neighbors(): | |
| """Test CuPy when there are no neighbors""" | |
| points_np = np.array([[0.0, 0.0, 0.0], [0.5, 0.5, 0.5]], dtype=np.float64) | |
| box_np = np.eye(3, dtype=np.float64) | |
| calculator = vesin.NeighborList(cutoff=0.1, full_list=True) | |
| # NumPy | |
| i_np, j_np, S_np, D_np = calculator.compute( | |
| points_np, box_np, True, quantities="ijSD" | |
| ) | |
| # CuPy | |
| calculator_gpu = vesin.NeighborList(cutoff=0.1, full_list=True) | |
| points_cp = cp.asarray(points_np) | |
| box_cp = cp.asarray(box_np) | |
| i_cp, j_cp, S_cp, D_cp = calculator_gpu.compute( | |
| points_cp, box_cp, True, quantities="ijSD" | |
| ) | |
| # Both should have no neighbors | |
| assert len(i_np) == 0 | |
| assert len(i_cp) == 0 | |
| assert isinstance(i_cp, cp.ndarray) |
This is already in test_cuda.py
| try: | ||
| import cupy as cp | ||
|
|
||
| HAS_CUPY = True | ||
| # Check if CUDA is available | ||
| try: | ||
| cp.cuda.Device(0).compute_capability | ||
| CUDA_AVAILABLE = True | ||
| except cp.cuda.runtime.CUDARuntimeError: | ||
| CUDA_AVAILABLE = False | ||
| except ImportError: | ||
| HAS_CUPY = False | ||
| CUDA_AVAILABLE = False | ||
| cp = None | ||
|
|
||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| try: | |
| import cupy as cp | |
| HAS_CUPY = True | |
| # Check if CUDA is available | |
| try: | |
| cp.cuda.Device(0).compute_capability | |
| CUDA_AVAILABLE = True | |
| except cp.cuda.runtime.CUDARuntimeError: | |
| CUDA_AVAILABLE = False | |
| except ImportError: | |
| HAS_CUPY = False | |
| CUDA_AVAILABLE = False | |
| cp = None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please undo the changes on this file, they have been integrated differently
| # Handle single bool, numpy bool, or 0-dim array | ||
| if is_cupy: | ||
| periodic = cp.asarray(periodic, dtype=cp.bool_) | ||
| # For CuPy arrays, handle conversion differently | ||
| if isinstance(periodic, cp.ndarray): | ||
| if periodic.ndim == 0: | ||
| val = bool(periodic.get()) | ||
| periodic = cp.array([val, val, val], dtype=cp.bool_) | ||
| else: | ||
| periodic = cp.asarray(periodic, dtype=cp.bool_) | ||
| else: | ||
| # periodic is a Python bool or similar | ||
| periodic_arr = np.asarray(periodic) | ||
| if periodic_arr.ndim == 0: | ||
| val = bool(periodic_arr) | ||
| periodic = cp.array([val, val, val], dtype=cp.bool_) | ||
| else: | ||
| periodic = cp.asarray(periodic_arr, dtype=cp.bool_) | ||
| else: | ||
| periodic = np.asarray(periodic, dtype=np.bool_) | ||
| periodic_arr = np.asarray(periodic) | ||
| if periodic_arr.ndim == 0: | ||
| val = bool(periodic_arr) | ||
| periodic = np.array([val, val, val], dtype=np.bool_) | ||
| else: | ||
| periodic = np.asarray(periodic_arr, dtype=np.bool_) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see why this change is required? Is this a leftover from previous version of the PR?
| [lint] | ||
| select = ["E", "F", "B", "I"] | ||
| ignore = ["B018", "B904"] | ||
| ignore = ["B018", "B904", "B905"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ignore = ["B018", "B904", "B905"] | |
| ignore = ["B018", "B904"] |
| #include "cuda_cache.hpp" | ||
| #include "dynamic_cuda.hpp" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should these be hidden behind a gpulite header/directory?
| #include "cuda_cache.hpp" | |
| #include "dynamic_cuda.hpp" | |
| #include <gpulite/cuda_cache.hpp> | |
| #include <gpulite/dynamic_cuda.hpp> |
or even
| #include "cuda_cache.hpp" | |
| #include "dynamic_cuda.hpp" | |
| #include <gpulite.hpp> |
| #include "cuda_cache.hpp" | |
| #include "dynamic_cuda.hpp" | |
| #include <gpulite/gpulite.hpp> |
| if (neighbors.vectors && is_device_ptr(get_ptr_attributes(neighbors.vectors), "vectors")) { | ||
| CUDA_CHECK(cudaFree(neighbors.vectors)); | ||
| if (neighbors.vectors && is_device_ptr(getPtrAttributes(neighbors.vectors), "vectors")) { | ||
| CUDART_SAFE_CALL(CUDART_INSTANCE.cudaFree(neighbors.vectors)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
random API suggestion: should this be
| CUDART_SAFE_CALL(CUDART_INSTANCE.cudaFree(neighbors.vectors)); | |
| CUDART_CALL(cudaFree(neighbors.vectors)); |
(or some other name like CUDART_DYNAMIC_CALL, GPULITE_CUDART_CALL, …)?
Basically hiding the global instance from users of gpu-lite
| neighbors.shifts = nullptr; | ||
| neighbors.distances = nullptr; | ||
| neighbors.vectors = nullptr; | ||
| extras->length_ptr = nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this required? I would think that the default constructor below would handle it
| // Vector math helpers using flat array indexing | ||
| __device__ inline double dot3(const double* a, const double* b) { | ||
| return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why undo these changes?
| double* vector, | ||
| int32_t* shift, | ||
| const double* box, | ||
| const double* inv_box, | ||
| const bool* periodic, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same here, why use pointers instead of richer cuda types?
No description provided.