From 86aed9716812d39070714a07706a584779c13b46 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 5 Mar 2026 11:15:03 +1100 Subject: [PATCH] fix: use an alternate subnet for the docker bridge network The default subnet of 172.17/16 for the docker bridge interface (docker0) overlaps a physical network subnet used in Azure CycleCloud deployments. To avoid this overlap from causing issues with either the physical network or docker containers, change the docker bridge subnet to use the high end of the private ckass B address spaces range at 172.31/16. Using the high end of the range means that the docker bridge is now unlikely to conflict with any other static network configuration inside the CycleCloud clusters. Typically machines have a maximum of 8 InfiniBand devices, so they should not consume the entire Class B subnet range even if they use a whole /16 subnet per interface. Because the docker config file is in json format, inserting a new config line is a lot more complex than just using the lineinfile module. We have to parse the contents of the existing config file, insert new values, then write out the new config file. We also have to handle the cases where the config file doesn't already exist or is empty to avoid the input parsing from failing in these cases. This results in the docker0 interface being created with the following IP address configuration: $ ip addr show docker0 5: docker0: mtu 1500 qdisc noqueue state DOWN group default link/ether 6e:a8:94:80:15:d3 brd ff:ff:ff:ff:ff:ff inet 172.31.0.1/16 brd 172.31.255.255 scope global docker0 valid_lft forever preferred_lft forever $ Jira: https://issues.redhat.com/browse/RHELHPC-153 Signed-off-by: Dave Chinner --- README.md | 11 +++++++++++ defaults/main.yml | 1 + tasks/docker-subnet.yml | 36 ++++++++++++++++++++++++++++++++++++ tasks/main.yml | 3 +++ vars/main.yml | 2 ++ 5 files changed, 53 insertions(+) create mode 100644 tasks/docker-subnet.yml diff --git a/README.md b/README.md index fe8b475..e5c472c 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,17 @@ Default: `"{{ hpc_install_nvidia_container_toolkit }}"` Type: `bool` +### hpc_docker_subnet + +The default docker bridge interface address and subnet configuration of 172.17.0.1/16 conflicts with the subnets Azure CycleCloud uses for internal physical cluster networks. + +To avoid this conflict with the Azure CycleCloud networks, the system role will configure the docker interface with a 10.88.0.1/16 address and subnet. +However, if this is inappropriate for the cluster being deployed, the subnet can be customised to any private subnet using this variable. + +Default: `10.88.0.1/16` + +Type: `string` + ### hpc_install_moneo Whether to install the Azure Moneo monitoring tool. diff --git a/defaults/main.yml b/defaults/main.yml index 07e9108..1b3f67b 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -28,6 +28,7 @@ hpc_build_openmpi_w_nvidia_gpu_support: true hpc_install_moneo: true hpc_install_nvidia_container_toolkit: true hpc_install_docker: "{{ hpc_install_nvidia_container_toolkit }}" +hpc_docker_subnet: 10.88.0.1/16 hpc_install_azurehpc_health_checks: "{{ hpc_install_nvidia_container_toolkit }}" hpc_tuning: true hpc_sku_customisation: true diff --git a/tasks/docker-subnet.yml b/tasks/docker-subnet.yml new file mode 100644 index 0000000..82133a3 --- /dev/null +++ b/tasks/docker-subnet.yml @@ -0,0 +1,36 @@ +--- +- name: Ensure docker config directory exists + file: + path: "{{ __hpc_docker_conf_dir }}" + state: directory + owner: root + group: root + mode: '0755' + +- name: Check if daemon.json exists + stat: + path: "{{ __hpc_docker_conf_file }}" + register: __hpc_docker_conf_stat + +- name: Slurp file if it exists + slurp: + src: "{{ __hpc_docker_conf_file }}" + register: __hpc_docker_daemon_json_raw + when: __hpc_docker_conf_stat.stat.exists and __hpc_docker_conf_stat.stat.size > 0 + +- name: Write the updated JSON safely + copy: + content: "{{ updated_daemon_config | to_nice_json }}" + dest: "{{ __hpc_docker_conf_file }}" + owner: root + group: root + mode: '0644' + validate: '/usr/bin/python3 -m json.tool %s' + vars: + updated_daemon_config: >- + {{ + (__hpc_docker_daemon_json_raw.content | b64decode | from_json + if (__hpc_docker_daemon_json_raw.content is defined) + else {}) + | combine({"bip": hpc_docker_subnet | default('10.88.0.1/16')}) + }} diff --git a/tasks/main.yml b/tasks/main.yml index e213818..6a72f93 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -941,6 +941,9 @@ register: __hpc_docker_packages_install until: __hpc_docker_packages_install is success + - name: Configure Docker bridge network address range + include_tasks: tasks/docker-subnet.yml + - name: Enable and start Docker service service: name: docker diff --git a/vars/main.yml b/vars/main.yml index 860b529..9d31d86 100644 --- a/vars/main.yml +++ b/vars/main.yml @@ -21,6 +21,8 @@ __hpc_microsoft_prod_rpm_key: https://packages.microsoft.com/keys/microsoft.asc __hpc_base_packages: - pssh +__hpc_docker_conf_dir: /etc/docker +__hpc_docker_conf_file: "{{ __hpc_docker_conf_dir }}/daemon.json" __hpc_rdma_rename_path: /usr/lib/udev/rdma_rename __hpc_dkms_packages: