From d96155d922a74e056fa862e36450dff40929def6 Mon Sep 17 00:00:00 2001 From: skirui-source Date: Fri, 9 Jun 2023 15:19:35 -0700 Subject: [PATCH 1/3] replace pynvml with nvidia-ml-py in codebase --- source/cloud/azure/azure-vm-multi.md | 6 +++--- source/guides/azure/infiniband.md | 2 +- source/guides/mig.md | 2 +- source/tools/dask-cuda.md | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/source/cloud/azure/azure-vm-multi.md b/source/cloud/azure/azure-vm-multi.md index 11584292..5ea8105f 100644 --- a/source/cloud/azure/azure-vm-multi.md +++ b/source/cloud/azure/azure-vm-multi.md @@ -54,10 +54,10 @@ client = Client(cluster) def get_gpu_model(): - import pynvml + import nvidia-ml-py - pynvml.nvmlInit() - return pynvml.nvmlDeviceGetName(pynvml.nvmlDeviceGetHandleByIndex(0)) + nvidia-ml-py.nvmlInit() + return nvidia-ml-py.nvmlDeviceGetName(nvidia-ml-py.nvmlDeviceGetHandleByIndex(0)) client.submit(get_gpu_model).result() diff --git a/source/guides/azure/infiniband.md b/source/guides/azure/infiniband.md index daca2391..a65840e3 100644 --- a/source/guides/azure/infiniband.md +++ b/source/guides/azure/infiniband.md @@ -252,7 +252,7 @@ Accept the default and allow conda init to run. Then start a new shell. Create a conda environment (see [UCX-Py](https://ucx-py.readthedocs.io/en/latest/install.html) docs) ```shell -mamba create -n ucxpy {{ rapids_conda_channels }} {{ rapids_conda_packages }} ipython ucx-proc=*=gpu ucx ucx-py dask distributed numpy cupy pytest pynvml -y +mamba create -n ucxpy {{ rapids_conda_channels }} {{ rapids_conda_packages }} ipython ucx-proc=*=gpu ucx ucx-py dask distributed numpy cupy pytest nvidia-ml-py -y mamba activate ucxpy ``` diff --git a/source/guides/mig.md b/source/guides/mig.md index f2e3ad6d..57a5ca38 100644 --- a/source/guides/mig.md +++ b/source/guides/mig.md @@ -30,7 +30,7 @@ GPU 0: NVIDIA A100-PCIE-40GB (UUID: GPU-84fd49f2-48ad-50e8-9f2e-3bf0dfd47ccb) In the example case above the system has one NVIDIA A100 with 3 x 10GB MIG instances. In the next sections we will see how to use the instance names to startup a Dask cluster composed of MIG GPUs. Please note that once a GPU is partitioned, the physical GPU (named `GPU-84fd49f2-48ad-50e8-9f2e-3bf0dfd47ccb` above) is inaccessible for CUDA compute and cannot be used as part of a Dask cluster. -Alternatively, MIG instance names can be obtained programatically using [NVML](https://developer.nvidia.com/nvidia-management-library-nvml) or [PyNVML](https://github.com/gpuopenanalytics/pynvml). Please refer to the [NVML API](https://docs.nvidia.com/deploy/nvml-api/) to write appropriate utilities for that purpose. +Alternatively, MIG instance names can be obtained programatically using [NVML](https://developer.nvidia.com/nvidia-management-library-nvml) or [Nvidia-ml-py](https://github.com/conda-forge/nvidia-ml-py-feedstock). Please refer to the [NVML API](https://docs.nvidia.com/deploy/nvml-api/) to write appropriate utilities for that purpose. ### LocalCUDACluster diff --git a/source/tools/dask-cuda.md b/source/tools/dask-cuda.md index 5f0c383d..c16f987f 100644 --- a/source/tools/dask-cuda.md +++ b/source/tools/dask-cuda.md @@ -48,10 +48,10 @@ cluster = LocalCUDACluster() client = Client(cluster) def get_gpu_model(): - import pynvml + import nvidia-ml-py - pynvml.nvmlInit() - return pynvml.nvmlDeviceGetName(pynvml.nvmlDeviceGetHandleByIndex(0)) + nvidia-ml-py.nvmlInit() + return nvidia-ml-py.nvmlDeviceGetName(nvidia-ml-py.nvmlDeviceGetHandleByIndex(0)) client.submit(get_gpu_model).result() From 66b1127060e3ec3946810aa17508d6de9165436e Mon Sep 17 00:00:00 2001 From: skirui-source Date: Mon, 12 Jun 2023 14:28:46 -0700 Subject: [PATCH 2/3] revert changes, link to PyPI for nvidia-ml-py --- source/cloud/azure/azure-vm-multi.md | 6 +++--- source/guides/mig.md | 2 +- source/tools/dask-cuda.md | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/source/cloud/azure/azure-vm-multi.md b/source/cloud/azure/azure-vm-multi.md index 5ea8105f..11584292 100644 --- a/source/cloud/azure/azure-vm-multi.md +++ b/source/cloud/azure/azure-vm-multi.md @@ -54,10 +54,10 @@ client = Client(cluster) def get_gpu_model(): - import nvidia-ml-py + import pynvml - nvidia-ml-py.nvmlInit() - return nvidia-ml-py.nvmlDeviceGetName(nvidia-ml-py.nvmlDeviceGetHandleByIndex(0)) + pynvml.nvmlInit() + return pynvml.nvmlDeviceGetName(pynvml.nvmlDeviceGetHandleByIndex(0)) client.submit(get_gpu_model).result() diff --git a/source/guides/mig.md b/source/guides/mig.md index 57a5ca38..4d69bec6 100644 --- a/source/guides/mig.md +++ b/source/guides/mig.md @@ -30,7 +30,7 @@ GPU 0: NVIDIA A100-PCIE-40GB (UUID: GPU-84fd49f2-48ad-50e8-9f2e-3bf0dfd47ccb) In the example case above the system has one NVIDIA A100 with 3 x 10GB MIG instances. In the next sections we will see how to use the instance names to startup a Dask cluster composed of MIG GPUs. Please note that once a GPU is partitioned, the physical GPU (named `GPU-84fd49f2-48ad-50e8-9f2e-3bf0dfd47ccb` above) is inaccessible for CUDA compute and cannot be used as part of a Dask cluster. -Alternatively, MIG instance names can be obtained programatically using [NVML](https://developer.nvidia.com/nvidia-management-library-nvml) or [Nvidia-ml-py](https://github.com/conda-forge/nvidia-ml-py-feedstock). Please refer to the [NVML API](https://docs.nvidia.com/deploy/nvml-api/) to write appropriate utilities for that purpose. +Alternatively, MIG instance names can be obtained programatically using [NVML](https://developer.nvidia.com/nvidia-management-library-nvml) or [Nvidia-ml-py](https://pypi.org/project/nvidia-ml-py/). Please refer to the [NVML API](https://docs.nvidia.com/deploy/nvml-api/) to write appropriate utilities for that purpose. ### LocalCUDACluster diff --git a/source/tools/dask-cuda.md b/source/tools/dask-cuda.md index c16f987f..5f0c383d 100644 --- a/source/tools/dask-cuda.md +++ b/source/tools/dask-cuda.md @@ -48,10 +48,10 @@ cluster = LocalCUDACluster() client = Client(cluster) def get_gpu_model(): - import nvidia-ml-py + import pynvml - nvidia-ml-py.nvmlInit() - return nvidia-ml-py.nvmlDeviceGetName(nvidia-ml-py.nvmlDeviceGetHandleByIndex(0)) + pynvml.nvmlInit() + return pynvml.nvmlDeviceGetName(pynvml.nvmlDeviceGetHandleByIndex(0)) client.submit(get_gpu_model).result() From 6ec0c70e8f8e8ca40ac8532fdc96b45d7bf30a23 Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Wed, 21 Jun 2023 11:50:04 -0700 Subject: [PATCH 3/3] Update source/guides/mig.md Co-authored-by: Peter Andreas Entschev --- source/guides/mig.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/guides/mig.md b/source/guides/mig.md index 4d69bec6..cb01d5ae 100644 --- a/source/guides/mig.md +++ b/source/guides/mig.md @@ -30,7 +30,7 @@ GPU 0: NVIDIA A100-PCIE-40GB (UUID: GPU-84fd49f2-48ad-50e8-9f2e-3bf0dfd47ccb) In the example case above the system has one NVIDIA A100 with 3 x 10GB MIG instances. In the next sections we will see how to use the instance names to startup a Dask cluster composed of MIG GPUs. Please note that once a GPU is partitioned, the physical GPU (named `GPU-84fd49f2-48ad-50e8-9f2e-3bf0dfd47ccb` above) is inaccessible for CUDA compute and cannot be used as part of a Dask cluster. -Alternatively, MIG instance names can be obtained programatically using [NVML](https://developer.nvidia.com/nvidia-management-library-nvml) or [Nvidia-ml-py](https://pypi.org/project/nvidia-ml-py/). Please refer to the [NVML API](https://docs.nvidia.com/deploy/nvml-api/) to write appropriate utilities for that purpose. +Alternatively, MIG instance names can be obtained programatically using [NVML](https://developer.nvidia.com/nvidia-management-library-nvml) or [nvidia-ml-py](https://pypi.org/project/nvidia-ml-py/). Please refer to the [NVML API](https://docs.nvidia.com/deploy/nvml-api/) to write appropriate utilities for that purpose. ### LocalCUDACluster