Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions packer-rocm/playbooks/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@
ansible.builtin.set_fact:
packer_vars: "{{ packer_rocm_hcl_awk.stdout_lines | replace('\"', '') }}"

- name: "Run 'packer init', ensure plugin coverage"
ansible.builtin.command:
cmd: 'packer init .'
chdir: "{{ (workdir, packer_dist) | path_join }}"
environment:
PACKER_LOG: '1' # wanted as str
changed_when: false # command w/ assumed changes: report 'ok' or failure

- name: "Run 'packer build', create '{{ _creates }}'"
ansible.builtin.command:
cmd: >
Expand Down
11 changes: 11 additions & 0 deletions packer-rocm/playbooks/limits.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
# yamllint disable rule:line-length
# vim: ft=yaml.ansible
- name: "Set limits.conf w/ 'gpu_perf_config' Role"
hosts: all
environment: # may be superfluous for your environment; mapped through Packer HCL with 'ansible_env_vars'
http_proxy: "{{ lookup('ansible.builtin.env', 'http_proxy') | default(omit) }}"
https_proxy: "{{ lookup('ansible.builtin.env', 'https_proxy') | default(omit) }}"
no_proxy: "{{ lookup('ansible.builtin.env', 'no_proxy') | default(omit) }}"
roles:
- { role: gpu_perf_config }
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
* soft memlock unlimited
* hard memlock unlimited
* soft nofile 1048576
* hard nofile 1048576
9 changes: 9 additions & 0 deletions packer-rocm/playbooks/roles/gpu_perf_config/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
- name: Push configuration files
become: true
ansible.builtin.copy:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
mode: "{{ item.mode }}"
loop:
- { src: "10-sre-limits.conf", mode: "0644", dest: "/etc/security/limits.d/10-sre-limits.conf" }
32 changes: 32 additions & 0 deletions packer-rocm/playbooks/roles/sos/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# sos

This role installs [sosreport](https://github.com/sosreport/sos),
[xsos](https://github.com/ryran/xsos),
and `extras.d` entries. Report _generation/collection_ is left as an admin activity.

## Variables

1. `sos_extras`: custom commands or files in `sos` reports.
Default: see the [example playbook](#example)
2. `sos_xsos`: controls [xsos](https://github.com/ryran/xsos) installation.
Default: `true`
3. `sos_xsos_url`: `xsos` installation URL.
[Default](https://github.com/ryran/xsos/raw/master/xsos)

## Example

```yaml
---
- name: "'sos' role"
hosts: all
roles:
- name: Configure 'sos', extras, and 'xsos'
role: sos
vars:
sos_xsos_url: 'https://raw.githubusercontent.com/ryran/xsos/v0.7.33/xsos'
sos_extras:
amdgpu:
- 'rocm-smi -a'
yours:
- ':/some/file/to/read'
```
15 changes: 15 additions & 0 deletions packer-rocm/playbooks/roles/sos/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
# defaults file for sos
#
# dictionary of sos/related packages to install, mapped by Ansible fact (ansible_os_family)
sos_pkgs:
Debian: # includes derivatives (eg: Ubuntu, Mint, etc)
- sosreport
RedHat: # includes derivatives (eg: Fedora, Alma, etc)
- sos

# provides '/etc/sos/extras.d' entries. expected to be provided by the user, see README for current example
sos_extras: {}
# sosreport examiner
sos_xsos: true
sos_xsos_url: "https://github.com/ryran/xsos/raw/master/xsos"
28 changes: 28 additions & 0 deletions packer-rocm/playbooks/roles/sos/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---
# tasks file for sos

- name: Install packages
become: true
tags: ["pkgs", "packages"]
ansible.builtin.package:
name: "{{ sos_pkgs[ansible_os_family] }}"
update_cache: "{{ true if ansible_os_family in ['RedHat', 'Debian'] else omit }}" # these modules behind 'package' will accept this

- name: Install 'xsos'
become: true
tags: ["xsos"]
when: sos_xsos is truthy(convert_bool=True)
ansible.builtin.get_url:
url: "{{ sos_xsos_url }}"
dest: /usr/local/bin/xsos
mode: "+x"

- name: Extras
become: true
tags: ["extras", "extras.d"]
loop: "{{ sos_extras | dict2items }}"
loop_control: { loop_var: entry }
ansible.builtin.copy:
dest: "/etc/sos/extras.d/{{ entry.key }}"
content: "{{ entry.value | join('\n') }}"
mode: "0644" # lint; suggested in case umask is restricted
11 changes: 11 additions & 0 deletions packer-rocm/playbooks/sos.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
# yamllint disable rule:line-length
# vim: ft=yaml.ansible
- name: "SOS"
hosts: all
environment: # may be superfluous for your environment; mapped through Packer HCL with 'ansible_env_vars'
http_proxy: "{{ lookup('ansible.builtin.env', 'http_proxy') | default(omit) }}"
https_proxy: "{{ lookup('ansible.builtin.env', 'https_proxy') | default(omit) }}"
no_proxy: "{{ lookup('ansible.builtin.env', 'no_proxy') | default(omit) }}"
roles:
- { role: sos }
41 changes: 31 additions & 10 deletions packer-rocm/ubuntu/ubuntu-rocm.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ source "qemu" "rocm" {
efi_drop_efivars = true # don't place efivars.fd in output artifact
format = "raw" # qcow2 may not be converted. if written to drives, can't be read back/won't find 'curtin'
headless = var.hidden
shutdown_command = "sudo -S shutdown -P now"
shutdown_command = "sudo -S bash -c \"rm -fv /etc/sudoers.d/packer /etc/sudoers.d/90-cloud-init-users; userdel --remove --force packer; shutdown -P now\""
ssh_handshake_attempts = 500
ssh_username = "ubuntu"
ssh_username = "packer"
ssh_password = "ubuntu"
ssh_wait_timeout = "1h"
ssh_timeout = "1h"
Expand All @@ -36,12 +36,13 @@ source "qemu" "rocm" {
build {
sources = ["source.qemu.rocm"]

# generate/copy tarball of custom packages; 'packer-maas' will process
# regenerate/copy tarball of custom packages; 'packer-maas' will process
provisioner "shell-local" {
inline = [
"tar cvzf ${path.root}/custom-packages.tar.gz -C ${path.root}/packages --overwrite .",
"rm -f ${path.root}/custom-packages.tar.gz",
"tar cvzf ${path.root}/custom-packages.tar.gz -C ${path.root}/packages ."
]
inline_shebang = "/bin/bash -e"
inline_shebang = "/bin/bash"
}
provisioner "file" {
destination = "/tmp/"
Expand Down Expand Up @@ -76,9 +77,29 @@ build {
scripts = ["${path.root}/../packer-maas/ubuntu/scripts/curtin.sh", "${path.root}/../packer-maas/ubuntu/scripts/networking.sh", "${path.root}/../packer-maas/ubuntu/scripts/cloudimg/install-custom-kernel.sh"]
}

provisioner "ansible" {
playbook_file = "${path.root}/../playbooks/sos.yml"
user = "packer"
ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"]
extra_arguments = [
"-e", "ansible_python_interpreter=/usr/bin/python3",
"--scp-extra-args", "'-O'"
]
}

provisioner "ansible" {
playbook_file = "${path.root}/../playbooks/limits.yml"
user = "packer"
ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"]
extra_arguments = [
"-e", "ansible_python_interpreter=/usr/bin/python3",
"--scp-extra-args", "'-O'"
]
}

provisioner "ansible" {
playbook_file = "${path.root}/../playbooks/os_prep.yml"
user = "ubuntu"
user = "packer"
ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"]
extra_arguments = [
"-e", "ansible_python_interpreter=/usr/bin/python3", # work around Packer/SSH proxy+client limitations
Expand All @@ -89,7 +110,7 @@ build {

provisioner "ansible" {
playbook_file = "${path.root}/../playbooks/amdgpu_install.yml"
user = "ubuntu"
user = "packer"
ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"]
extra_arguments = [
"-e", "ansible_python_interpreter=/usr/bin/python3",
Expand All @@ -106,7 +127,7 @@ build {

provisioner "ansible" {
playbook_file = "${path.root}/../playbooks/tuned.yml"
user = "ubuntu"
user = "packer"
ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"]
extra_arguments = [
"-e", "ansible_python_interpreter=/usr/bin/python3",
Expand All @@ -116,7 +137,7 @@ build {

provisioner "ansible" {
playbook_file = "${path.root}/../playbooks/niccli.yml"
user = "ubuntu"
user = "packer"
ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"]
extra_arguments = [
"-e", "ansible_python_interpreter=/usr/bin/python3",
Expand All @@ -130,7 +151,7 @@ build {

provisioner "ansible" {
playbook_file = "${path.root}/../playbooks/tuned.yml"
user = "ubuntu"
user = "packer"
ansible_env_vars = ["http_proxy=${var.http_proxy}", "https_proxy=${var.https_proxy}", "no_proxy=${var.no_proxy}"]
extra_arguments = [
"-e", "ansible_python_interpreter=/usr/bin/python3",
Expand Down
4 changes: 2 additions & 2 deletions packer-rocm/ubuntu/user-data-rocm
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ autoinstall:
version: 1
identity:
hostname: ubuntu
username: ubuntu
username: packer
password: "$6$5OcQlL5Sfjzsczoq$i8LnaFro3xOZ8mZ258DYtGMeWymBdLHts37F7LS.eV4SGe8cGWGWbcHQ/423aijQgMCGIKLMvEQCHpD2pSPKV0"
packages:
# support potential use of the ansible-local provisioner
Expand Down Expand Up @@ -55,6 +55,6 @@ autoinstall:
# reset-partition: true # likely impractical with ROCm/amdgpu/etc, multiplies usage
late-commands:
# 'execute_command' in 'ubuntu-rocm.pkr.hcl' depends on this sudo rule
- echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > /target/etc/sudoers.d/ubuntu
- echo 'packer ALL=(ALL) NOPASSWD:ALL' > /target/etc/sudoers.d/packer
package_update: true
package_upgrade: true