From ce97104a835c6b8f85c3b3ee6358d95471f49bf3 Mon Sep 17 00:00:00 2001 From: Ritesh Harihar Date: Mon, 30 Mar 2026 18:51:29 +0530 Subject: [PATCH 1/3] Added logic to identify child jobs --- internal/runner/job/reconcile.go | 43 +++ internal/runner/job/reconcile_test.go | 412 ++++++++++++++++++++++++++ 2 files changed, 455 insertions(+) create mode 100644 internal/runner/job/reconcile_test.go diff --git a/internal/runner/job/reconcile.go b/internal/runner/job/reconcile.go index ef83df55..c3b6db95 100644 --- a/internal/runner/job/reconcile.go +++ b/internal/runner/job/reconcile.go @@ -5,6 +5,7 @@ package job import ( "fmt" + "strings" "github.com/hashicorp/nomad/api" @@ -100,6 +101,10 @@ func (r *Runner) findStaleJobs(currentJobIDs map[string]struct{}) ([]*api.JobLis var staleJobs []*api.JobListStub for _, stub := range jobStubs { + if stub == nil { + continue + } + // Skip already-stopped jobs — nothing to do. if stub.Status == "dead" { continue @@ -109,6 +114,11 @@ func (r *Runner) findStaleJobs(currentJobIDs map[string]struct{}) ([]*api.JobLis continue } + // Child jobs should not be treated as pack-managed root jobs that can be stopped. + if isChildJobStub(stub) { + continue + } + // If this job is not among the ones just deployed, it is stale. if _, exists := currentJobIDs[stub.ID]; !exists { staleJobs = append(staleJobs, stub) @@ -117,3 +127,36 @@ func (r *Runner) findStaleJobs(currentJobIDs map[string]struct{}) ([]*api.JobLis return staleJobs, nil } + +func isChildJobStub(stub *api.JobListStub) bool { + if stub == nil { + return false + } + + // Preferred signal from Nomad. + if stub.ParentID != "" { + return true + } + + // Metadata-based signal from nomad-pack tags. For child jobs, pack.job points + // to the parent/root job name, which differs from the child ID. + if packJob, ok := stub.Meta[PackJobKey]; ok && packJob != "" { + return packJob != stub.ID + } + + // Fallback only when pack.job metadata is missing. + return isChildStyleJobID(stub.ID) +} + +// isChildStyleJobID is a conservative fallback: child jobs are represented as +// "/", so they always contain a non-leading slash. +func isChildStyleJobID(jobID string) bool { + // Find the last occurrence of "/" to handle job names that might contain slashes + idx := strings.LastIndex(jobID, "/") + if idx <= 0 { + // No slash found, or slash is at the beginning (invalid) + return false + } + + return idx < len(jobID)-1 +} diff --git a/internal/runner/job/reconcile_test.go b/internal/runner/job/reconcile_test.go new file mode 100644 index 00000000..ac4dd2cb --- /dev/null +++ b/internal/runner/job/reconcile_test.go @@ -0,0 +1,412 @@ +// Copyright IBM Corp. 2023, 2026 +// SPDX-License-Identifier: MPL-2.0 + +package job + +import ( + "testing" + + "github.com/hashicorp/nomad/api" + "github.com/shoenig/test/must" +) + +func TestIsChildJobID(t *testing.T) { + testCases := []struct { + desc string + jobID string + expect bool + }{ + { + desc: "empty job ID is not a child job", + jobID: "", + expect: false, + }, + { + desc: "simple job ID without slash is not a child job", + jobID: "my-job", + expect: false, + }, + { + desc: "job ID with slash is treated as child-style fallback", + jobID: "namespace/my-job", + expect: true, + }, + { + desc: "periodic child job is detected", + jobID: "job-requester/periodic-1774607280", + expect: true, + }, + { + desc: "dispatch child job is detected", + jobID: "nuclei/dispatch-1774607281-093f6605", + expect: true, + }, + { + desc: "dispatch child job with longer suffix is detected", + jobID: "my-job/dispatch-1234567890-abcdef123456", + expect: true, + }, + { + desc: "job with periodic in name but not as child pattern is not a child job", + jobID: "periodic-backup", + expect: false, + }, + { + desc: "job with dispatch in name but not as child pattern is not a child job", + jobID: "dispatch-service", + expect: false, + }, + { + desc: "job with slash at the beginning is not a child job", + jobID: "/periodic-123", + expect: false, + }, + { + desc: "job with multiple slashes uses last one for pattern matching", + jobID: "namespace/parent/periodic-1234567890", + expect: true, + }, + { + desc: "job with multiple slashes is treated as child-style fallback", + jobID: "namespace/parent/my-job", + expect: true, + }, + { + desc: "parameterized root job is not a child job", + jobID: "nuclei", + expect: false, + }, + { + desc: "periodic root job is not a child job", + jobID: "job-requester", + expect: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + actual := isChildStyleJobID(tc.jobID) + must.Eq(t, tc.expect, actual) + }) + } +} + +func TestIsChildJobStub_PackJobMetadataPrecedence(t *testing.T) { + t.Run("pack.job equal to ID is root even with slash", func(t *testing.T) { + stub := &api.JobListStub{ + ID: "namespace/my-root-job", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "namespace/my-root-job", + }, + } + + must.Eq(t, false, isChildJobStub(stub)) + }) + + t.Run("pack.job different from ID is child", func(t *testing.T) { + stub := &api.JobListStub{ + ID: "namespace/my-root-job/dispatch-123", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "namespace/my-root-job", + }, + } + + must.Eq(t, true, isChildJobStub(stub)) + }) +} + +// TestFindStaleJobs_ChildJobsWithParentIDFiltered tests that child jobs with +// ParentID set are correctly filtered out during reconciliation. +func TestFindStaleJobs_ChildJobsWithParentIDFiltered(t *testing.T) { + currentJobIDs := map[string]struct{}{ + "parent-job": {}, + } + nomadJobStubs := []*api.JobListStub{ + { + ID: "parent-job", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "parent-job", + }, + }, + { + ID: "parent-job/periodic-1774607280", + Status: "running", + ParentID: "parent-job", + Meta: map[string]string{ + PackJobKey: "parent-job", + }, + }, + { + ID: "parent-job/dispatch-1774607281-093f6605", + Status: "running", + ParentID: "parent-job", + Meta: map[string]string{ + PackJobKey: "parent-job", + }, + }, + } + + staleJobs := filterStaleJobs(currentJobIDs, nomadJobStubs) + must.SliceEmpty(t, staleJobs, must.Sprint("child jobs should be filtered out")) +} + +// TestFindStaleJobs_ChildJobsWithoutParentIDFiltered tests that child jobs +// without ParentID are filtered by metadata/ID fallback mechanism. +func TestFindStaleJobs_ChildJobsWithoutParentIDFiltered(t *testing.T) { + currentJobIDs := map[string]struct{}{ + "nuclei": {}, + } + nomadJobStubs := []*api.JobListStub{ + { + ID: "nuclei", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "nuclei", + }, + }, + { + ID: "nuclei/dispatch-1774607281-093f6605", + Status: "running", + ParentID: "", // ParentID NOT set (simulating API inconsistency) + Meta: map[string]string{ + PackJobKey: "nuclei", + }, + }, + { + ID: "job-requester/periodic-1774607280", + Status: "running", + ParentID: "", // ParentID NOT set (simulating API inconsistency) + Meta: map[string]string{ + PackJobKey: "job-requester", + }, + }, + } + + staleJobs := filterStaleJobs(currentJobIDs, nomadJobStubs) + must.SliceEmpty(t, staleJobs, must.Sprint("child jobs should be filtered")) +} + +// TestFindStaleJobs_ActualStaleJobsIdentified tests that actual stale jobs +// (not in current deployment) are correctly identified. +func TestFindStaleJobs_ActualStaleJobsIdentified(t *testing.T) { + currentJobIDs := map[string]struct{}{ + "new-job": {}, + } + nomadJobStubs := []*api.JobListStub{ + { + ID: "new-job", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "new-job", + }, + }, + { + ID: "old-job", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "old-job", + }, + }, + } + + staleJobs := filterStaleJobs(currentJobIDs, nomadJobStubs) + must.SliceLen(t, 1, staleJobs) + must.Eq(t, "old-job", staleJobs[0]) +} + +// TestFindStaleJobs_StaleParentNotChildren tests that when a parent job is stale, +// only the parent is identified, not its children. +func TestFindStaleJobs_StaleParentNotChildren(t *testing.T) { + currentJobIDs := map[string]struct{}{ + "new-job": {}, + } + nomadJobStubs := []*api.JobListStub{ + { + ID: "new-job", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "new-job", + }, + }, + { + ID: "old-parent", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "old-parent", + }, + }, + { + ID: "old-parent/periodic-123", + Status: "running", + ParentID: "old-parent", + Meta: map[string]string{ + PackJobKey: "old-parent", + }, + }, + { + ID: "old-parent/dispatch-456-abc", + Status: "running", + ParentID: "old-parent", + Meta: map[string]string{ + PackJobKey: "old-parent", + }, + }, + } + + staleJobs := filterStaleJobs(currentJobIDs, nomadJobStubs) + must.SliceLen(t, 1, staleJobs) + must.Eq(t, "old-parent", staleJobs[0]) +} + +// TestFindStaleJobs_DeadJobsFiltered tests that dead jobs are filtered out. +func TestFindStaleJobs_DeadJobsFiltered(t *testing.T) { + currentJobIDs := map[string]struct{}{ + "active-job": {}, + } + nomadJobStubs := []*api.JobListStub{ + { + ID: "active-job", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "active-job", + }, + }, + { + ID: "dead-job", + Status: "dead", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "dead-job", + }, + }, + } + + staleJobs := filterStaleJobs(currentJobIDs, nomadJobStubs) + must.SliceEmpty(t, staleJobs, must.Sprint("dead jobs should be filtered out")) +} + +// TestFindStaleJobs_EmptyIDsFiltered tests that jobs with empty IDs are filtered out. +func TestFindStaleJobs_EmptyIDsFiltered(t *testing.T) { + currentJobIDs := map[string]struct{}{ + "valid-job": {}, + } + nomadJobStubs := []*api.JobListStub{ + { + ID: "valid-job", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "valid-job", + }, + }, + { + ID: "", + Status: "running", + ParentID: "", + Meta: map[string]string{}, + }, + } + + staleJobs := filterStaleJobs(currentJobIDs, nomadJobStubs) + must.SliceEmpty(t, staleJobs, must.Sprint("empty IDs should be filtered out")) +} + +// TestFindStaleJobs_NilStubFiltered tests that nil stubs are safely ignored. +func TestFindStaleJobs_NilStubFiltered(t *testing.T) { + currentJobIDs := map[string]struct{}{ + "valid-job": {}, + } + nomadJobStubs := []*api.JobListStub{ + nil, + { + ID: "valid-job", + Status: "running", + ParentID: "", + Meta: map[string]string{ + PackJobKey: "valid-job", + }, + }, + } + + staleJobs := filterStaleJobs(currentJobIDs, nomadJobStubs) + must.SliceEmpty(t, staleJobs, must.Sprint("nil stubs should be filtered out")) +} + +// TestFindStaleJobs_ComplexScenario tests a complex real-world scenario with +// multiple job types, child jobs, stale jobs, and dead jobs. +func TestFindStaleJobs_ComplexScenario(t *testing.T) { + currentJobIDs := map[string]struct{}{ + "web-service": {}, + "batch-job": {}, + "periodic-cron": {}, + "dispatch-queue": {}, + } + nomadJobStubs := []*api.JobListStub{ + // Current active jobs + {ID: "web-service", Status: "running", Meta: map[string]string{PackJobKey: "web-service"}}, + {ID: "batch-job", Status: "running", Meta: map[string]string{PackJobKey: "batch-job"}}, + {ID: "periodic-cron", Status: "running", Meta: map[string]string{PackJobKey: "periodic-cron"}}, + {ID: "dispatch-queue", Status: "running", Meta: map[string]string{PackJobKey: "dispatch-queue"}}, + + // Child jobs that should be filtered + {ID: "periodic-cron/periodic-1774607280", Status: "running", ParentID: "periodic-cron", Meta: map[string]string{PackJobKey: "periodic-cron"}}, + {ID: "dispatch-queue/dispatch-1774607281-abc123", Status: "running", ParentID: "dispatch-queue", Meta: map[string]string{PackJobKey: "dispatch-queue"}}, + {ID: "orphan-child/dispatch-1774607282-def456", Status: "running", ParentID: "", Meta: map[string]string{PackJobKey: "orphan-child"}}, + + // Actual stale jobs that should be identified + {ID: "old-service", Status: "running", Meta: map[string]string{PackJobKey: "old-service"}}, + {ID: "legacy-batch", Status: "running", Meta: map[string]string{PackJobKey: "legacy-batch"}}, + + // Dead job that should be filtered out + {ID: "dead-service", Status: "dead", Meta: map[string]string{PackJobKey: "dead-service"}}, + + // Invalid entries that should be filtered out + nil, + {ID: "", Status: "running", Meta: map[string]string{}}, + } + + staleJobs := filterStaleJobs(currentJobIDs, nomadJobStubs) + must.SliceLen(t, 2, staleJobs) + must.Eq(t, "old-service", staleJobs[0]) + must.Eq(t, "legacy-batch", staleJobs[1]) +} + +// filterStaleJobs extracts the core filtering logic from findStaleJobs for unit testing. +// This allows us to test the logic without requiring a real Nomad client. +func filterStaleJobs(currentJobIDs map[string]struct{}, jobStubs []*api.JobListStub) []string { + var staleJobs []string + for _, stub := range jobStubs { + if stub == nil { + continue + } + + if stub.Status == "dead" { + continue + } + + if stub.ID == "" { + continue + } + + if isChildJobStub(stub) { + continue + } + + if _, exists := currentJobIDs[stub.ID]; !exists { + staleJobs = append(staleJobs, stub.ID) + } + } + return staleJobs +} From 2ed0ad12aa72230b54c1eff9f43c7b4eaa1b14fe Mon Sep 17 00:00:00 2001 From: Ritesh Harihar Date: Wed, 1 Apr 2026 10:24:27 +0530 Subject: [PATCH 2/3] removed slash check, updated changelog file --- CHANGELOG.md | 1 + internal/runner/job/reconcile.go | 25 ++------ internal/runner/job/reconcile_test.go | 92 ++++----------------------- 3 files changed, 19 insertions(+), 99 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e414cb6e..0df03e85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ IMPROVEMENTS: * cli: Add registry now honors default main/master branch [[GH-843](https://github.com/hashicorp/nomad-pack/pull/843)] + * variable: Variables declaration now supports validation stanza [[GH-841](https://github.com/hashicorp/nomad-pack/pull/841)] ## 0.4.2 (March 16, 2026) diff --git a/internal/runner/job/reconcile.go b/internal/runner/job/reconcile.go index c3b6db95..13520662 100644 --- a/internal/runner/job/reconcile.go +++ b/internal/runner/job/reconcile.go @@ -5,7 +5,6 @@ package job import ( "fmt" - "strings" "github.com/hashicorp/nomad/api" @@ -140,23 +139,13 @@ func isChildJobStub(stub *api.JobListStub) bool { // Metadata-based signal from nomad-pack tags. For child jobs, pack.job points // to the parent/root job name, which differs from the child ID. - if packJob, ok := stub.Meta[PackJobKey]; ok && packJob != "" { - return packJob != stub.ID - } - - // Fallback only when pack.job metadata is missing. - return isChildStyleJobID(stub.ID) -} - -// isChildStyleJobID is a conservative fallback: child jobs are represented as -// "/", so they always contain a non-leading slash. -func isChildStyleJobID(jobID string) bool { - // Find the last occurrence of "/" to handle job names that might contain slashes - idx := strings.LastIndex(jobID, "/") - if idx <= 0 { - // No slash found, or slash is at the beginning (invalid) - return false + if stub.Meta != nil { + if packJob, ok := stub.Meta[PackJobKey]; ok && packJob != "" { + return packJob != stub.ID + } } - return idx < len(jobID)-1 + // If neither ParentID nor pack.job metadata indicates a child job, + // treat it as a root job to avoid false positives. + return false } diff --git a/internal/runner/job/reconcile_test.go b/internal/runner/job/reconcile_test.go index ac4dd2cb..cf0a33ee 100644 --- a/internal/runner/job/reconcile_test.go +++ b/internal/runner/job/reconcile_test.go @@ -10,87 +10,6 @@ import ( "github.com/shoenig/test/must" ) -func TestIsChildJobID(t *testing.T) { - testCases := []struct { - desc string - jobID string - expect bool - }{ - { - desc: "empty job ID is not a child job", - jobID: "", - expect: false, - }, - { - desc: "simple job ID without slash is not a child job", - jobID: "my-job", - expect: false, - }, - { - desc: "job ID with slash is treated as child-style fallback", - jobID: "namespace/my-job", - expect: true, - }, - { - desc: "periodic child job is detected", - jobID: "job-requester/periodic-1774607280", - expect: true, - }, - { - desc: "dispatch child job is detected", - jobID: "nuclei/dispatch-1774607281-093f6605", - expect: true, - }, - { - desc: "dispatch child job with longer suffix is detected", - jobID: "my-job/dispatch-1234567890-abcdef123456", - expect: true, - }, - { - desc: "job with periodic in name but not as child pattern is not a child job", - jobID: "periodic-backup", - expect: false, - }, - { - desc: "job with dispatch in name but not as child pattern is not a child job", - jobID: "dispatch-service", - expect: false, - }, - { - desc: "job with slash at the beginning is not a child job", - jobID: "/periodic-123", - expect: false, - }, - { - desc: "job with multiple slashes uses last one for pattern matching", - jobID: "namespace/parent/periodic-1234567890", - expect: true, - }, - { - desc: "job with multiple slashes is treated as child-style fallback", - jobID: "namespace/parent/my-job", - expect: true, - }, - { - desc: "parameterized root job is not a child job", - jobID: "nuclei", - expect: false, - }, - { - desc: "periodic root job is not a child job", - jobID: "job-requester", - expect: false, - }, - } - - for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { - actual := isChildStyleJobID(tc.jobID) - must.Eq(t, tc.expect, actual) - }) - } -} - func TestIsChildJobStub_PackJobMetadataPrecedence(t *testing.T) { t.Run("pack.job equal to ID is root even with slash", func(t *testing.T) { stub := &api.JobListStub{ @@ -117,6 +36,17 @@ func TestIsChildJobStub_PackJobMetadataPrecedence(t *testing.T) { must.Eq(t, true, isChildJobStub(stub)) }) + + t.Run("missing parent and metadata is treated as root", func(t *testing.T) { + stub := &api.JobListStub{ + ID: "namespace/my-root-job", + Status: "running", + ParentID: "", + Meta: map[string]string{}, + } + + must.Eq(t, false, isChildJobStub(stub)) + }) } // TestFindStaleJobs_ChildJobsWithParentIDFiltered tests that child jobs with From 7a9c80ffb933f12feab20f76acfdfe0c1faaa55c Mon Sep 17 00:00:00 2001 From: Ritesh Harihar Date: Wed, 1 Apr 2026 13:58:23 +0530 Subject: [PATCH 3/3] updated changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0df03e85..2dfd65c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ IMPROVEMENTS: * cli: Add registry now honors default main/master branch [[GH-843](https://github.com/hashicorp/nomad-pack/pull/843)] - +* cli: Fixed stale-job reconciliation so nomad-pack run no longer stops active parameterized/periodic child jobs [[GH-855](https://github.com/hashicorp/nomad-pack/pull/855)] * variable: Variables declaration now supports validation stanza [[GH-841](https://github.com/hashicorp/nomad-pack/pull/841)] ## 0.4.2 (March 16, 2026)