Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion python/src/spark_rapids_ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,6 @@ def _single_fit(init_parameters: Dict[str, Any]) -> Dict[str, Any]:
cuda_managed_mem_enabled,
cuda_system_mem_enabled,
cuda_system_mem_headroom,
force_sam_headroom=True,
)

logistic_regression.fit(
Expand Down
2 changes: 0 additions & 2 deletions python/src/spark_rapids_ml/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,6 @@ def _cuml_fit(
cuda_managed_mem_enabled,
cuda_system_mem_enabled,
cuda_system_mem_headroom,
force_sam_headroom=True,
)

kmeans_object._fit(
Expand Down Expand Up @@ -997,7 +996,6 @@ def _cuml_fit(
cuda_managed_mem_enabled,
cuda_system_mem_enabled,
cuda_system_mem_headroom,
force_sam_headroom=True,
)

# Set out_dtype tp 64bit to get larger indexType in cuML for avoiding overflow
Expand Down
1 change: 0 additions & 1 deletion python/src/spark_rapids_ml/knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,6 @@ async def do_allGather() -> List[str]:
cuda_managed_mem_enabled,
cuda_system_mem_enabled,
cuda_system_mem_headroom,
force_sam_headroom=True,
)

res_tuple: Tuple[List[np.ndarray], List[np.ndarray]] = nn_object.kneighbors(
Expand Down
1 change: 0 additions & 1 deletion python/src/spark_rapids_ml/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,6 @@ def _single_fit(rf: cuRf) -> Dict[str, Any]:
cuda_managed_mem_enabled,
cuda_system_mem_enabled,
cuda_system_mem_headroom,
force_sam_headroom=True,
)

# Fit a random forest model on the dataset (X, y)
Expand Down
2 changes: 0 additions & 2 deletions python/src/spark_rapids_ml/umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1044,7 +1044,6 @@ def _cuml_fit(
cuda_managed_mem_enabled,
cuda_system_mem_enabled,
cuda_system_mem_headroom,
force_sam_headroom=True,
)

umap_model = umap_object.fit(concated, y=labels)
Expand All @@ -1054,7 +1053,6 @@ def _cuml_fit(
cuda_managed_mem_enabled,
cuda_system_mem_enabled,
cuda_system_mem_headroom,
force_sam_headroom=True,
)

# Call unsupervised fit
Expand Down
25 changes: 21 additions & 4 deletions python/src/spark_rapids_ml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,17 +163,29 @@ def _get_gpu_id(task_context: TaskContext) -> int:
return gpu_id


# When changing default rmm memory resources we retain the old ones
# in this global array singleton to so that any (C++) allocations using them can
# invoke the corresponding deallocate methods. They will get cleaned up only when
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does that cause the memory already allocated within the default RMM mem resources not to be freed?

# the process exits. This avoids a segfault in the case of creating a new
# SAM resource with a smaller headroom.
_old_memory_resources = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: potential thread safety issue if _configure_memory_resource is called concurrently from multiple threads. list.append() is atomic in CPython due to GIL, but consider using threading.Lock if multi-threaded scenarios are possible

Suggested change
_old_memory_resources = []
import threading
_old_memory_resources: List[Any] = []
_memory_resource_lock = threading.Lock()


# keep track of last headroom to check if new sam mr is needed.
_last_sam_headroom_size = None


def _configure_memory_resource(
uvm_enabled: bool = False,
sam_enabled: bool = False,
sam_headroom: Optional[int] = None,
force_sam_headroom: bool = False,
) -> None:
import cupy as cp
import rmm
from cuda.bindings import runtime
from rmm.allocators.cupy import rmm_cupy_allocator

global _last_sam_headroom_size

_SYSTEM_MEMORY_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute(
runtime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess,
rmm._cuda.gpu.getDevice(),
Expand All @@ -193,19 +205,24 @@ def _configure_memory_resource(
if not type(rmm.mr.get_current_device_resource()) == type(
rmm.mr.SystemMemoryResource()
):
_old_memory_resources.append(rmm.mr.get_current_device_resource())
_last_sam_headroom_size = None
mr = rmm.mr.SystemMemoryResource()
rmm.mr.set_current_device_resource(mr)
elif sam_enabled and sam_headroom is not None:
if force_sam_headroom or not type(rmm.mr.get_current_device_resource()) == type(
rmm.mr.SamHeadroomMemoryResource(headroom=sam_headroom)
):
if sam_headroom != _last_sam_headroom_size or not type(
rmm.mr.get_current_device_resource()
) == type(rmm.mr.SamHeadroomMemoryResource(headroom=sam_headroom)):
_old_memory_resources.append(rmm.mr.get_current_device_resource())
_last_sam_headroom_size = sam_headroom
mr = rmm.mr.SamHeadroomMemoryResource(headroom=sam_headroom)
rmm.mr.set_current_device_resource(mr)

if uvm_enabled:
if not type(rmm.mr.get_current_device_resource()) == type(
rmm.mr.ManagedMemoryResource()
):
_old_memory_resources.append(rmm.mr.get_current_device_resource())
rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource())

if sam_enabled or uvm_enabled:
Expand Down