From c7b92517a8527375a0db29269373bbdf15967950 Mon Sep 17 00:00:00 2001 From: Josh Lay Date: Sat, 3 May 2025 19:27:06 -0500 Subject: [PATCH 1/4] packer: -static 'ubuntu' user, use 'packer' (and remove) --- packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl | 14 +++++++------- packer-rocm/ubuntu/user-data-rocm | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl b/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl index 53f8456..f6c6cd7 100644 --- a/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl +++ b/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl @@ -22,9 +22,9 @@ source "qemu" "rocm" { efi_drop_efivars = true # don't place efivars.fd in output artifact format = "raw" # qcow2 may not be converted. if written to drives, can't be read back/won't find 'curtin' headless = var.hidden - shutdown_command = "sudo -S shutdown -P now" + shutdown_command = "sudo -S bash -c \"rm -fv /etc/sudoers.d/packer /etc/sudoers.d/90-cloud-init-users; userdel --remove --force packer; shutdown -P now\"" ssh_handshake_attempts = 500 - ssh_username = "ubuntu" + ssh_username = "packer" ssh_password = "ubuntu" ssh_wait_timeout = "1h" ssh_timeout = "1h" @@ -78,7 +78,7 @@ build { provisioner "ansible" { playbook_file = "${path.root}/../playbooks/os_prep.yml" - user = "ubuntu" + user = "packer" ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"] extra_arguments = [ "-e", "ansible_python_interpreter=/usr/bin/python3", # work around Packer/SSH proxy+client limitations @@ -89,7 +89,7 @@ build { provisioner "ansible" { playbook_file = "${path.root}/../playbooks/amdgpu_install.yml" - user = "ubuntu" + user = "packer" ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"] extra_arguments = [ "-e", "ansible_python_interpreter=/usr/bin/python3", @@ -106,7 +106,7 @@ build { provisioner "ansible" { playbook_file = "${path.root}/../playbooks/tuned.yml" - user = "ubuntu" + user = "packer" ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"] extra_arguments = [ "-e", "ansible_python_interpreter=/usr/bin/python3", @@ -116,7 +116,7 @@ build { provisioner "ansible" { playbook_file = "${path.root}/../playbooks/niccli.yml" - user = "ubuntu" + user = "packer" ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"] extra_arguments = [ "-e", "ansible_python_interpreter=/usr/bin/python3", @@ -130,7 +130,7 @@ build { provisioner "ansible" { playbook_file = "${path.root}/../playbooks/tuned.yml" - user = "ubuntu" + user = "packer" ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"] extra_arguments = [ "-e", "ansible_python_interpreter=/usr/bin/python3", diff --git a/packer-rocm/ubuntu/user-data-rocm b/packer-rocm/ubuntu/user-data-rocm index c3ab19e..3f149e1 100644 --- a/packer-rocm/ubuntu/user-data-rocm +++ b/packer-rocm/ubuntu/user-data-rocm @@ -3,7 +3,7 @@ autoinstall: version: 1 identity: hostname: ubuntu - username: ubuntu + username: packer password: "$6$5OcQlL5Sfjzsczoq$i8LnaFro3xOZ8mZ258DYtGMeWymBdLHts37F7LS.eV4SGe8cGWGWbcHQ/423aijQgMCGIKLMvEQCHpD2pSPKV0" packages: # support potential use of the ansible-local provisioner @@ -55,6 +55,6 @@ autoinstall: # reset-partition: true # likely impractical with ROCm/amdgpu/etc, multiplies usage late-commands: # 'execute_command' in 'ubuntu-rocm.pkr.hcl' depends on this sudo rule - - echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > /target/etc/sudoers.d/ubuntu + - echo 'packer ALL=(ALL) NOPASSWD:ALL' > /target/etc/sudoers.d/packer package_update: true package_upgrade: true From 4d6b4ca4f2d6005bcd488631b8292c3af829c730 Mon Sep 17 00:00:00 2001 From: Josh Lay Date: Sat, 3 May 2025 19:31:05 -0500 Subject: [PATCH 2/4] packer: +'sos' and 'gpu_perf_config' (limits.d) --- packer-rocm/playbooks/limits.yml | 11 +++++++ .../gpu_perf_config/files/10-sre-limits.conf | 4 +++ .../roles/gpu_perf_config/tasks/main.yml | 9 ++++++ packer-rocm/playbooks/roles/sos/README.md | 32 +++++++++++++++++++ .../playbooks/roles/sos/defaults/main.yml | 15 +++++++++ .../playbooks/roles/sos/tasks/main.yml | 28 ++++++++++++++++ packer-rocm/playbooks/sos.yml | 11 +++++++ packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl | 20 ++++++++++++ 8 files changed, 130 insertions(+) create mode 100644 packer-rocm/playbooks/limits.yml create mode 100644 packer-rocm/playbooks/roles/gpu_perf_config/files/10-sre-limits.conf create mode 100644 packer-rocm/playbooks/roles/gpu_perf_config/tasks/main.yml create mode 100644 packer-rocm/playbooks/roles/sos/README.md create mode 100644 packer-rocm/playbooks/roles/sos/defaults/main.yml create mode 100644 packer-rocm/playbooks/roles/sos/tasks/main.yml create mode 100644 packer-rocm/playbooks/sos.yml diff --git a/packer-rocm/playbooks/limits.yml b/packer-rocm/playbooks/limits.yml new file mode 100644 index 0000000..9091e02 --- /dev/null +++ b/packer-rocm/playbooks/limits.yml @@ -0,0 +1,11 @@ +--- +# yamllint disable rule:line-length +# vim: ft=yaml.ansible +- name: "Set limits.conf w/ 'gpu_perf_config' Role" + hosts: all + environment: # may be superfluous for your environment; mapped through Packer HCL with 'ansible_env_vars' + http_proxy: "{{ lookup('ansible.builtin.env', 'http_proxy') | default(omit) }}" + https_proxy: "{{ lookup('ansible.builtin.env', 'https_proxy') | default(omit) }}" + no_proxy: "{{ lookup('ansible.builtin.env', 'no_proxy') | default(omit) }}" + roles: + - { role: gpu_perf_config } diff --git a/packer-rocm/playbooks/roles/gpu_perf_config/files/10-sre-limits.conf b/packer-rocm/playbooks/roles/gpu_perf_config/files/10-sre-limits.conf new file mode 100644 index 0000000..4f143f5 --- /dev/null +++ b/packer-rocm/playbooks/roles/gpu_perf_config/files/10-sre-limits.conf @@ -0,0 +1,4 @@ +* soft memlock unlimited +* hard memlock unlimited +* soft nofile 1048576 +* hard nofile 1048576 diff --git a/packer-rocm/playbooks/roles/gpu_perf_config/tasks/main.yml b/packer-rocm/playbooks/roles/gpu_perf_config/tasks/main.yml new file mode 100644 index 0000000..339f49d --- /dev/null +++ b/packer-rocm/playbooks/roles/gpu_perf_config/tasks/main.yml @@ -0,0 +1,9 @@ +--- +- name: Push configuration files + become: true + ansible.builtin.copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + mode: "{{ item.mode }}" + loop: + - { src: "10-sre-limits.conf", mode: "0644", dest: "/etc/security/limits.d/10-sre-limits.conf" } diff --git a/packer-rocm/playbooks/roles/sos/README.md b/packer-rocm/playbooks/roles/sos/README.md new file mode 100644 index 0000000..eaed282 --- /dev/null +++ b/packer-rocm/playbooks/roles/sos/README.md @@ -0,0 +1,32 @@ +# sos + +This role installs [sosreport](https://github.com/sosreport/sos), +[xsos](https://github.com/ryran/xsos), +and `extras.d` entries. Report _generation/collection_ is left as an admin activity. + +## Variables + +1. `sos_extras`: custom commands or files in `sos` reports. +Default: see the [example playbook](#example) +2. `sos_xsos`: controls [xsos](https://github.com/ryran/xsos) installation. +Default: `true` +3. `sos_xsos_url`: `xsos` installation URL. +[Default](https://github.com/ryran/xsos/raw/master/xsos) + +## Example + +```yaml +--- +- name: "'sos' role" + hosts: all + roles: + - name: Configure 'sos', extras, and 'xsos' + role: sos + vars: + sos_xsos_url: 'https://raw.githubusercontent.com/ryran/xsos/v0.7.33/xsos' + sos_extras: + amdgpu: + - 'rocm-smi -a' + yours: + - ':/some/file/to/read' +``` diff --git a/packer-rocm/playbooks/roles/sos/defaults/main.yml b/packer-rocm/playbooks/roles/sos/defaults/main.yml new file mode 100644 index 0000000..c3e8fa9 --- /dev/null +++ b/packer-rocm/playbooks/roles/sos/defaults/main.yml @@ -0,0 +1,15 @@ +--- +# defaults file for sos +# +# dictionary of sos/related packages to install, mapped by Ansible fact (ansible_os_family) +sos_pkgs: + Debian: # includes derivatives (eg: Ubuntu, Mint, etc) + - sosreport + RedHat: # includes derivatives (eg: Fedora, Alma, etc) + - sos + +# provides '/etc/sos/extras.d' entries. expected to be provided by the user, see README for current example +sos_extras: {} +# sosreport examiner +sos_xsos: true +sos_xsos_url: "https://github.com/ryran/xsos/raw/master/xsos" diff --git a/packer-rocm/playbooks/roles/sos/tasks/main.yml b/packer-rocm/playbooks/roles/sos/tasks/main.yml new file mode 100644 index 0000000..c649cc3 --- /dev/null +++ b/packer-rocm/playbooks/roles/sos/tasks/main.yml @@ -0,0 +1,28 @@ +--- +# tasks file for sos + +- name: Install packages + become: true + tags: ["pkgs", "packages"] + ansible.builtin.package: + name: "{{ sos_pkgs[ansible_os_family] }}" + update_cache: "{{ true if ansible_os_family in ['RedHat', 'Debian'] else omit }}" # these modules behind 'package' will accept this + +- name: Install 'xsos' + become: true + tags: ["xsos"] + when: sos_xsos is truthy(convert_bool=True) + ansible.builtin.get_url: + url: "{{ sos_xsos_url }}" + dest: /usr/local/bin/xsos + mode: "+x" + +- name: Extras + become: true + tags: ["extras", "extras.d"] + loop: "{{ sos_extras | dict2items }}" + loop_control: { loop_var: entry } + ansible.builtin.copy: + dest: "/etc/sos/extras.d/{{ entry.key }}" + content: "{{ entry.value | join('\n') }}" + mode: "0644" # lint; suggested in case umask is restricted diff --git a/packer-rocm/playbooks/sos.yml b/packer-rocm/playbooks/sos.yml new file mode 100644 index 0000000..4d546f9 --- /dev/null +++ b/packer-rocm/playbooks/sos.yml @@ -0,0 +1,11 @@ +--- +# yamllint disable rule:line-length +# vim: ft=yaml.ansible +- name: "SOS" + hosts: all + environment: # may be superfluous for your environment; mapped through Packer HCL with 'ansible_env_vars' + http_proxy: "{{ lookup('ansible.builtin.env', 'http_proxy') | default(omit) }}" + https_proxy: "{{ lookup('ansible.builtin.env', 'https_proxy') | default(omit) }}" + no_proxy: "{{ lookup('ansible.builtin.env', 'no_proxy') | default(omit) }}" + roles: + - { role: sos } diff --git a/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl b/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl index f6c6cd7..688f1c4 100644 --- a/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl +++ b/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl @@ -76,6 +76,26 @@ build { scripts = ["${path.root}/../packer-maas/ubuntu/scripts/curtin.sh", "${path.root}/../packer-maas/ubuntu/scripts/networking.sh", "${path.root}/../packer-maas/ubuntu/scripts/cloudimg/install-custom-kernel.sh"] } + provisioner "ansible" { + playbook_file = "${path.root}/../playbooks/sos.yml" + user = "packer" + ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"] + extra_arguments = [ + "-e", "ansible_python_interpreter=/usr/bin/python3", + "--scp-extra-args", "'-O'" + ] + } + + provisioner "ansible" { + playbook_file = "${path.root}/../playbooks/limits.yml" + user = "packer" + ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"] + extra_arguments = [ + "-e", "ansible_python_interpreter=/usr/bin/python3", + "--scp-extra-args", "'-O'" + ] + } + provisioner "ansible" { playbook_file = "${path.root}/../playbooks/os_prep.yml" user = "packer" From c629016af1a5407e393fef6d240bf731e182c867 Mon Sep 17 00:00:00 2001 From: Josh Lay Date: Mon, 5 May 2025 14:19:27 -0500 Subject: [PATCH 3/4] packer-rocm/re-entry: remove 'custom-packages.tar.gz' --- packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl b/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl index 688f1c4..5b3b4e5 100644 --- a/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl +++ b/packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl @@ -36,12 +36,13 @@ source "qemu" "rocm" { build { sources = ["source.qemu.rocm"] - # generate/copy tarball of custom packages; 'packer-maas' will process + # regenerate/copy tarball of custom packages; 'packer-maas' will process provisioner "shell-local" { inline = [ - "tar cvzf ${path.root}/custom-packages.tar.gz -C ${path.root}/packages --overwrite .", + "rm -f ${path.root}/custom-packages.tar.gz", + "tar cvzf ${path.root}/custom-packages.tar.gz -C ${path.root}/packages ." ] - inline_shebang = "/bin/bash -e" + inline_shebang = "/bin/bash" } provisioner "file" { destination = "/tmp/" From 11548c8e9ed6ba4a78fbcfa2eb1b60f8daab273f Mon Sep 17 00:00:00 2001 From: Josh Lay Date: Mon, 5 May 2025 14:26:42 -0500 Subject: [PATCH 4/4] packer-rocm/build: get plugins, run 'packer init' --- packer-rocm/playbooks/build.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/packer-rocm/playbooks/build.yml b/packer-rocm/playbooks/build.yml index 0273986..4e55e94 100644 --- a/packer-rocm/playbooks/build.yml +++ b/packer-rocm/playbooks/build.yml @@ -131,6 +131,14 @@ ansible.builtin.set_fact: packer_vars: "{{ packer_rocm_hcl_awk.stdout_lines | replace('\"', '') }}" + - name: "Run 'packer init', ensure plugin coverage" + ansible.builtin.command: + cmd: 'packer init .' + chdir: "{{ (workdir, packer_dist) | path_join }}" + environment: + PACKER_LOG: '1' # wanted as str + changed_when: false # command w/ assumed changes: report 'ok' or failure + - name: "Run 'packer build', create '{{ _creates }}'" ansible.builtin.command: cmd: >