Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
run: devbox run go test -v ./pkg/...

- name: Run integration tests
run: devbox run go test -v ./tests/integration
run: dist/bloom test tests/integration/step/*/*/bloom.yaml

- name: Run UI tests
run: |
Expand Down
46 changes: 46 additions & 0 deletions pkg/args/args.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"regexp"
"strings"

"github.com/silogen/cluster-bloom/pkg/mockablecmd"
log "github.com/sirupsen/logrus"
"github.com/spf13/viper"
"gopkg.in/yaml.v3"
Expand Down Expand Up @@ -202,6 +203,46 @@ func ValidateSkipDiskCheckConsistency(skipDiskCheckStr string) error {
return nil
}

// validatePremountedNotBloomManaged checks that CLUSTER_PREMOUNTED_DISKS paths are not bloom-managed in /etc/fstab
func validatePremountedNotBloomManaged(diskList []string) error {
const bloomFstabTag = "# managed by cluster-bloom"

fstabContent, err := mockablecmd.ReadFile("ValidateArgs.ReadFstab", "/etc/fstab")
if err != nil {
// If we can't read fstab, warn but don't fail validation
log.Warnf("Could not read /etc/fstab to validate CLUSTER_PREMOUNTED_DISKS: %v", err)
return nil
}

lines := strings.Split(string(fstabContent), "\n")
for lineNum, line := range lines {
trimmedLine := strings.TrimSpace(line)

// Skip lines that aren't bloom-managed
if !strings.HasSuffix(trimmedLine, bloomFstabTag) {
continue
}

// Extract mount point from bloom-managed entry
fields := strings.Fields(trimmedLine)
if len(fields) < 2 {
continue
}

mountPoint := fields[1]

// Check if this mount point is in CLUSTER_PREMOUNTED_DISKS
for _, disk := range diskList {
disk = strings.TrimSpace(disk)
if disk == mountPoint {
return fmt.Errorf("CLUSTER_PREMOUNTED_DISKS contains %s which is tagged '# managed by cluster-bloom' in /etc/fstab at line %d:\n %s\nPlease delete the tag from the fstab line first or use a different mount point", disk, lineNum+1, trimmedLine)
}
}
}

return nil
}

// ValidateLonghornDisksArg validates CLUSTER_PREMOUNTED_DISKS configuration
func ValidateLonghornDisksArg(disks string) error {
selectedDisks := viper.GetString("CLUSTER_DISKS")
Expand All @@ -222,6 +263,11 @@ func ValidateLonghornDisksArg(disks string) error {
return fmt.Errorf("CLUSTER_PREMOUNTED_DISKS contains a path that does not exist: %s", disk)
}
}

// Check that none of the CLUSTER_PREMOUNTED_DISKS are bloom-managed in /etc/fstab
if err := validatePremountedNotBloomManaged(diskList); err != nil {
return err
}
}

// Both cannot be set
Expand Down
18 changes: 2 additions & 16 deletions pkg/steps.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ var SetupAndCheckRocmStep = Step{
if strings.HasPrefix(trimmedLine, "WARNING:") {
continue
}

parts := strings.Fields(line)
if len(parts) > 0 {
if _, err := strconv.Atoi(parts[0]); err != nil {
Expand All @@ -238,7 +238,7 @@ var SetupAndCheckRocmStep = Step{
}
}
}

if !validLineFound {
LogMessage(Error, "rocm-smi did not return any valid GPU lines: "+string(output))
return StepResult{
Expand Down Expand Up @@ -1111,20 +1111,6 @@ var CleanLonghornMountsStep = Step{
return stepResult
}

// Find /mnt/disk* mount points that contain longhorn-disk.cfg and unmount them
shellCmd := `
for mount_point in /mnt/disk*; do
if [ -d "$mount_point" ] && find "$mount_point" -name "longhorn-disk.cfg" 2>/dev/null | grep -q .; then
echo "Found longhorn-disk.cfg in $mount_point, unmounting..."
sudo umount -lf "$mount_point" 2>/dev/null || true
fi
done
`
stepResult = shellCmdHelper(shellCmd)
if stepResult.Error != nil {
return stepResult
}

// Find and unmount CSI volume mounts
stepResult = shellCmdHelper("sudo umount -Af /var/lib/kubelet/pods/*/volumes/kubernetes.io~csi/pvc-* 2>/dev/null || true")
if stepResult.Error != nil {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: true
GPU_NODE: true
DOMAIN: example.com
USE_CERT_MANAGER: false
CERT_OPTION: generate
CLUSTER_DISKS: /dev/nvme0n1
RKE2_INSTALLATION_URL: https://get.rke2.io
ROCM_BASE_URL: https://repo.radeon.com/amdgpu-install/7.0.2/ubuntu/
ROCM_DEB_PACKAGE: amdgpu-install_7.0.2.70002-1_all.deb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: false
GPU_NODE: false
SERVER_IP: 192.168.1.100
JOIN_TOKEN: K107c7e3e6e3e6e3e6e3e6e3e6e3e6e3e6e3e6::server:abc123def456
CLUSTER_DISKS: /dev/nvme0n1
RKE2_INSTALLATION_URL: https://get.rke2.io
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: true
DOMAIN: example.com
USE_CERT_MANAGER: false
CERT_OPTION: generate
CLUSTER_DISKS: /dev/nvme0n1
CLUSTER_PREMOUNTED_DISKS: /tmp

expected_error: "CLUSTER_PREMOUNTED_DISKS and CLUSTER_DISKS cannot both be set"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: true
DOMAIN: example.com
USE_CERT_MANAGER: false
CERT_OPTION: generate
NO_DISKS_FOR_CLUSTER: false

expected_error: "either CLUSTER_PREMOUNTED_DISKS or CLUSTER_DISKS must be set"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: true
DOMAIN: example.com
USE_CERT_MANAGER: false
CERT_OPTION: generate
CLUSTER_PREMOUNTED_DISKS: mnt/disk0

expected_error: "CLUSTER_PREMOUNTED_DISKS contains a non-absolute path: mnt/disk0"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: true
DOMAIN: example.com
USE_CERT_MANAGER: false
CERT_OPTION: generate
CLUSTER_PREMOUNTED_DISKS: /mnt/nonexistent

expected_error: "CLUSTER_PREMOUNTED_DISKS contains a path that does not exist: /mnt/nonexistent"
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: true
DOMAIN: example.com
USE_CERT_MANAGER: false
CERT_OPTION: generate
CLUSTER_PREMOUNTED_DISKS: /tmp

mocks:
# Mock /etc/fstab with bloom-tagged entry
ValidateArgs.ReadFstab:
output: |
# /etc/fstab: static file system information.
UUID=12345 / ext4 errors=remount-ro 0 1
UUID=abc123 /tmp ext4 defaults,nofail 0 2 # managed by cluster-bloom
UUID=def456 /mnt/disk1 ext4 defaults,nofail 0 2

expected_error: "CLUSTER_PREMOUNTED_DISKS contains /tmp which is tagged '# managed by cluster-bloom' in /etc/fstab at line 3"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ENABLED_STEPS: ValidateArgsStep
DISABLED_STEPS: SetupRKE2Step
FIRST_NODE: true
DOMAIN: example.com
USE_CERT_MANAGER: false
CERT_OPTION: generate
CLUSTER_DISKS: /dev/nvme0n1

expected_error: "DISABLED_STEPS and ENABLED_STEPS cannot both be set"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: false
SERVER_IP: not-an-ip
JOIN_TOKEN: K107c7e3e6e3e6e3e6e3e6e3e6e3e6e3e6e3e6::server:abc123def456
CLUSTER_DISKS: /dev/nvme0n1

expected_error: "invalid IP address"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: false
SERVER_IP: 192.168.1.100
JOIN_TOKEN: tooshort
CLUSTER_DISKS: /dev/nvme0n1

expected_error: "JOIN_TOKEN is too short"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
ENABLED_STEPS: ValidateArgsStep
FIRST_NODE: true
DOMAIN: example.com
USE_CERT_MANAGER: false
CERT_OPTION: generate
CLUSTER_PREMOUNTED_DISKS: /tmp

mocks:
# Mock /etc/fstab WITHOUT bloom-tagged entries for /tmp
ValidateArgs.ReadFstab:
output: |
# /etc/fstab: static file system information.
UUID=12345 / ext4 errors=remount-ro 0 1
UUID=abc123 /tmp ext4 defaults,nofail 0 2
UUID=def456 /mnt/disk1 ext4 defaults,nofail 0 2 # managed by cluster-bloom
161 changes: 0 additions & 161 deletions tests/integration/step_test.go

This file was deleted.

Loading