From 12ee1e6951f6962e464a67d9e67a18efd6eaf2bb Mon Sep 17 00:00:00 2001 From: james Date: Thu, 13 Nov 2025 14:24:30 +0800 Subject: [PATCH 1/3] docs: add docs for using ascend device in volcano Signed-off-by: james --- .../installation/how-to-use-volcano-ascend.md | 110 ++++++++++++++++++ .../installation/how-to-use-volcano-ascend.md | 105 +++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 docs/installation/how-to-use-volcano-ascend.md create mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/installation/how-to-use-volcano-ascend.md diff --git a/docs/installation/how-to-use-volcano-ascend.md b/docs/installation/how-to-use-volcano-ascend.md new file mode 100644 index 0000000..37a9b34 --- /dev/null +++ b/docs/installation/how-to-use-volcano-ascend.md @@ -0,0 +1,110 @@ +# User Guide for Ascend Devices in Volcano + +## Introduction + + Volcano supports vNPU feature for both Ascend 310 and Ascend 910 using the `ascend-device-plugin`. It also supports managing heterogeneous Ascend cluster(Cluster with multiple Ascend types, i.e. 910A,910B2,910B3,310p) + +**Use case**: + +- NPU and vNPU cluster for Ascend 910 series +- NPU and vNPU cluster for Ascend 310 series +- Heterogeneous Ascend cluster + +This feature is only available in volcano >= 1.14. + +## Quick Start + +### Prerequisites + +[ascend-docker-runtime](https://gitcode.com/Ascend/mind-cluster/tree/master/component/ascend-docker-runtime) + +### Install Volcano + +``` +helm repo add volcano-sh https://volcano-sh.github.io/helm-charts +helm install volcano volcano-sh/volcano -n volcano-system --create-namespace +``` + +Additional installation methods can be found [here](https://github.com/volcano-sh/volcano?tab=readme-ov-file#quick-start-guide). + +### Label the Node with ascend=on + +``` +kubectl label node {ascend-node} ascend=on +``` + +### Deploy `hami-scheduler-device` config map + +``` +kubectl apply -f https://raw.githubusercontent.com/Project-HAMi/ascend-device-plugin/refs/heads/main/ascend-device-configmap.yaml +``` + +### Deploy ascend-device-plugin + +``` +kubectl apply -f https://raw.githubusercontent.com/Project-HAMi/ascend-device-plugin/refs/heads/main/ascend-device-plugin.yaml +``` + +For more information, refer to the [ascend-device-plugin documentation](https://github.com/Project-HAMi/ascend-device-plugin). + +### Scheduler Config Update + +Update the scheduler configuration: + +```shell script +kubectl edit cm -n volcano-system volcano-scheduler-configmap +``` + +```yaml +kind: ConfigMap +apiVersion: v1 +metadata: + name: volcano-scheduler-configmap + namespace: volcano-system +data: + volcano-scheduler.conf: | + actions: "enqueue, allocate, backfill" + tiers: + - plugins: + - name: predicates + - name: deviceshare + arguments: + deviceshare.AscendHAMiVNPUEnable: true # enable ascend vnpu + deviceshare.SchedulePolicy: binpack # scheduling policy. binpack / spread + deviceshare.KnownGeometriesCMNamespace: kube-system + deviceshare.KnownGeometriesCMName: hami-scheduler-device +``` + + **Note:** You may notice that, `volcano-vgpu` has its own `KnownGeometriesCMName` and `KnownGeometriesCMNamespace`, which means if you want to use both vNPU and vGPU in a same volcano cluster, you need to merge the configMap from both sides and set it here. + +## Usage + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: ascend-pod +spec: + schedulerName: volcano + containers: + - name: ubuntu-container + image: swr.cn-south-1.myhuaweicloud.com/ascendhub/ascend-pytorch:24.0.RC1-A2-1.11.0-ubuntu20.04 + command: ["sleep"] + args: ["100000"] + resources: + limits: + huawei.com/Ascend310P: "1" + huawei.com/Ascend310P-memory: "4096" + +``` + +The supported Ascend chips and their `ResourceNames` are shown in the following table: + +| ChipName | ResourceName | ResourceMemoryName | +|-------|-------|-------| +| 910A | huawei.com/Ascend910A | huawei.com/Ascend910A-memory | +| 910B2 | huawei.com/Ascend910B2 | huawei.com/Ascend910B2-memory | +| 910B3 | huawei.com/Ascend910B3 | huawei.com/Ascend910B3-memory | +| 910B4 | huawei.com/Ascend910B4 | huawei.com/Ascend910B4-memory | +| 910B4-1 | huawei.com/Ascend910B4-1 | huawei.com/Ascend910B4-1-memory | +| 310P3 | huawei.com/Ascend310P | huawei.com/Ascend310P-memory | \ No newline at end of file diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/installation/how-to-use-volcano-ascend.md b/i18n/zh/docusaurus-plugin-content-docs/current/installation/how-to-use-volcano-ascend.md new file mode 100644 index 0000000..046593e --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/current/installation/how-to-use-volcano-ascend.md @@ -0,0 +1,105 @@ +# Volcano 中 Ascend 设备使用指南 + +## 介绍 + +Volcano 通过 `ascend-device-plugin` 支持 Ascend 310 和 Ascend 910 的 vNPU 功能。同时支持管理异构 Ascend 集群(包含多种 Ascend 类型的集群,例如 910A、910B2、910B3、310p)。 + +**使用场景**: + +- Ascend 910 系列的 NPU 和 vNPU 集群 +- Ascend 310 系列的 NPU 和 vNPU 集群 +- 异构 Ascend 集群 + +此功能仅在Volcano 1.14及以上版本中可用。 + +## 快速开始 + +### 环境要求 + +[ascend-docker-runtime](https://gitcode.com/Ascend/mind-cluster/tree/master/component/ascend-docker-runtime) + +### 安装Volcano + +``` +helm repo add volcano-sh https://volcano-sh.github.io/helm-charts +helm install volcano volcano-sh/volcano -n volcano-system --create-namespace +``` + +更多安装方式请参考[这里](https://github.com/volcano-sh/volcano?tab=readme-ov-file#quick-start-guide)。 + +### 给 Ascend 设备打上 ascend=on 标签 + +``` +kubectl label node {ascend-node} ascend=on +``` + +### 部署 hami-scheduler-device ConfigMap + +``` +kubectl apply -f https://raw.githubusercontent.com/Project-HAMi/ascend-device-plugin/refs/heads/main/ascend-device-configmap.yaml +``` + +### 部署 ascend-device-plugin + +``` +kubectl apply -f https://raw.githubusercontent.com/Project-HAMi/ascend-device-plugin/refs/heads/main/ascend-device-plugin.yaml +``` +更多信息请参考 [ascend-device-plugin 文档](https://github.com/Project-HAMi/ascend-device-plugin)。 + +### 更新调度器配置 + +```shell script +kubectl edit cm -n volcano-system volcano-scheduler-configmap +``` + +```yaml +kind: ConfigMap +apiVersion: v1 +metadata: + name: volcano-scheduler-configmap + namespace: volcano-system +data: + volcano-scheduler.conf: | + actions: "enqueue, allocate, backfill" + tiers: + - plugins: + - name: predicates + - name: deviceshare + arguments: + deviceshare.AscendHAMiVNPUEnable: true # enable ascend vnpu + deviceshare.SchedulePolicy: binpack # scheduling policy. binpack / spread + deviceshare.KnownGeometriesCMNamespace: kube-system + deviceshare.KnownGeometriesCMName: hami-scheduler-device +``` + +**注意:** 您可能会注意到 `volcano-vgpu` 有自己的 `GeometriesCMName` 和 `KnownGeometriesCMNamespace`,这意味着如果要在同一个 Volcano 集群中同时使用 vNPU 和 vGPU,您需要合并两边的 configMap。 + +## 使用方法 + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: ascend-pod +spec: + schedulerName: volcano + containers: + - name: ubuntu-container + image: swr.cn-south-1.myhuaweicloud.com/ascendhub/ascend-pytorch:24.0.RC1-A2-1.11.0-ubuntu20.04 + command: ["sleep"] + args: ["100000"] + resources: + limits: + huawei.com/Ascend310P: "1" + huawei.com/Ascend310P-memory: "4096" + +``` +支持的 Ascend 芯片及其对应的资源名称如下表所示: +| ChipName | ResourceName | ResourceMemoryName | +|-------|-------|-------| +| 910A | huawei.com/Ascend910A | huawei.com/Ascend910A-memory | +| 910B2 | huawei.com/Ascend910B2 | huawei.com/Ascend910B2-memory | +| 910B3 | huawei.com/Ascend910B3 | huawei.com/Ascend910B3-memory | +| 910B4 | huawei.com/Ascend910B4 | huawei.com/Ascend910B4-memory | +| 910B4-1 | huawei.com/Ascend910B4-1 | huawei.com/Ascend910B4-1-memory | +| 310P3 | huawei.com/Ascend310P | huawei.com/Ascend310P-memory | \ No newline at end of file From 71491c096ecfb63a05f5d1c0e56ea08b99629b21 Mon Sep 17 00:00:00 2001 From: james Date: Thu, 13 Nov 2025 15:39:40 +0800 Subject: [PATCH 2/3] fix: fix comment Signed-off-by: james --- docs/installation/how-to-use-volcano-ascend.md | 16 ++++++++++------ .../installation/how-to-use-volcano-ascend.md | 16 ++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/docs/installation/how-to-use-volcano-ascend.md b/docs/installation/how-to-use-volcano-ascend.md index 37a9b34..de0cb7b 100644 --- a/docs/installation/how-to-use-volcano-ascend.md +++ b/docs/installation/how-to-use-volcano-ascend.md @@ -20,7 +20,7 @@ This feature is only available in volcano >= 1.14. ### Install Volcano -``` +```shell helm repo add volcano-sh https://volcano-sh.github.io/helm-charts helm install volcano volcano-sh/volcano -n volcano-system --create-namespace ``` @@ -29,19 +29,19 @@ Additional installation methods can be found [here](https://github.com/volcano-s ### Label the Node with ascend=on -``` +```shell kubectl label node {ascend-node} ascend=on ``` ### Deploy `hami-scheduler-device` config map -``` +```shell kubectl apply -f https://raw.githubusercontent.com/Project-HAMi/ascend-device-plugin/refs/heads/main/ascend-device-configmap.yaml ``` ### Deploy ascend-device-plugin -``` +```shell kubectl apply -f https://raw.githubusercontent.com/Project-HAMi/ascend-device-plugin/refs/heads/main/ascend-device-plugin.yaml ``` @@ -51,7 +51,7 @@ For more information, refer to the [ascend-device-plugin documentation](https:// Update the scheduler configuration: -```shell script +```shell kubectl edit cm -n volcano-system volcano-scheduler-configmap ``` @@ -75,7 +75,11 @@ data: deviceshare.KnownGeometriesCMName: hami-scheduler-device ``` - **Note:** You may notice that, `volcano-vgpu` has its own `KnownGeometriesCMName` and `KnownGeometriesCMNamespace`, which means if you want to use both vNPU and vGPU in a same volcano cluster, you need to merge the configMap from both sides and set it here. +:::note + +You may notice that, `volcano-vgpu` has its own `KnownGeometriesCMName` and `KnownGeometriesCMNamespace`, which means if you want to use both vNPU and vGPU in a same volcano cluster, you need to merge the configMap from both sides and set it here. + +::: ## Usage diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/installation/how-to-use-volcano-ascend.md b/i18n/zh/docusaurus-plugin-content-docs/current/installation/how-to-use-volcano-ascend.md index 046593e..aa26150 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/installation/how-to-use-volcano-ascend.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/installation/how-to-use-volcano-ascend.md @@ -20,7 +20,7 @@ Volcano 通过 `ascend-device-plugin` 支持 Ascend 310 和 Ascend 910 的 vNPU ### 安装Volcano -``` +```shell helm repo add volcano-sh https://volcano-sh.github.io/helm-charts helm install volcano volcano-sh/volcano -n volcano-system --create-namespace ``` @@ -29,26 +29,26 @@ helm install volcano volcano-sh/volcano -n volcano-system --create-namespace ### 给 Ascend 设备打上 ascend=on 标签 -``` +```shell kubectl label node {ascend-node} ascend=on ``` ### 部署 hami-scheduler-device ConfigMap -``` +```shell kubectl apply -f https://raw.githubusercontent.com/Project-HAMi/ascend-device-plugin/refs/heads/main/ascend-device-configmap.yaml ``` ### 部署 ascend-device-plugin -``` +```shell kubectl apply -f https://raw.githubusercontent.com/Project-HAMi/ascend-device-plugin/refs/heads/main/ascend-device-plugin.yaml ``` 更多信息请参考 [ascend-device-plugin 文档](https://github.com/Project-HAMi/ascend-device-plugin)。 ### 更新调度器配置 -```shell script +```shell kubectl edit cm -n volcano-system volcano-scheduler-configmap ``` @@ -72,7 +72,11 @@ data: deviceshare.KnownGeometriesCMName: hami-scheduler-device ``` -**注意:** 您可能会注意到 `volcano-vgpu` 有自己的 `GeometriesCMName` 和 `KnownGeometriesCMNamespace`,这意味着如果要在同一个 Volcano 集群中同时使用 vNPU 和 vGPU,您需要合并两边的 configMap。 +:::note + + 您可能会注意到 `volcano-vgpu` 有自己的 `GeometriesCMName` 和 `KnownGeometriesCMNamespace`,这意味着如果要在同一个 Volcano 集群中同时使用 vNPU 和 vGPU,您需要合并两边的 configMap。 + + ::: ## 使用方法 From fd2143da2d3c5853e2ed934fb19fa5d1e1f74ad0 Mon Sep 17 00:00:00 2001 From: james Date: Thu, 13 Nov 2025 17:17:58 +0800 Subject: [PATCH 3/3] fix: add filename to sidebar Signed-off-by: james --- sidebars.js | 1 + 1 file changed, 1 insertion(+) diff --git a/sidebars.js b/sidebars.js index 4b3ab3f..f2eab41 100644 --- a/sidebars.js +++ b/sidebars.js @@ -35,6 +35,7 @@ module.exports = { "installation/uninstall", "installation/webui-installation", "installation/how-to-use-volcano-vgpu", + "installation/how-to-use-volcano-ascend", "installation/aws-installation" ] },