diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index 6ef677d..93fa059 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -33,7 +33,7 @@ export default defineConfig({ { text: "Examples", link: "/examples/" }, { text: "Overlay Testing", link: "/overlay/" }, { - text: "v1.1.6", + text: "v1.1.8", items: [{ text: "Changelog", link: "/changelog" }], }, ], diff --git a/docs/api/deployment/rhdh-deployment.md b/docs/api/deployment/rhdh-deployment.md index 3f3fe1c..c7ddec1 100644 --- a/docs/api/deployment/rhdh-deployment.md +++ b/docs/api/deployment/rhdh-deployment.md @@ -94,10 +94,10 @@ Wait for RHDH deployment to be ready. | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `timeout` | `number` | `300000` | Timeout in milliseconds | +| `timeout` | `number` | `500` | Timeout in seconds | ```typescript -await rhdh.waitUntilReady(600000); // 10 minutes +await rhdh.waitUntilReady(600); // 10 minutes ``` ### `rolloutRestart()` diff --git a/docs/api/playwright/base-config.md b/docs/api/playwright/base-config.md index b003f2b..b96586b 100644 --- a/docs/api/playwright/base-config.md +++ b/docs/api/playwright/base-config.md @@ -57,7 +57,7 @@ Raw base configuration object. Use for advanced customization. }, fullyParallel: true, forbidOnly: !!process.env.CI, - retries: process.env.CI ? 2 : 0, + retries: process.env.CI ? 1 : 0, workers: "50%", reporter: [["list"], ["html"]], use: { @@ -91,7 +91,7 @@ export default playwrightDefineConfig({ | `timeout` | `90000` | | `expect.timeout` | `30000` | | `fullyParallel` | `true` | -| `retries` | `2` (CI), `0` (local) | +| `retries` | `1` (CI), `0` (local) | | `workers` | `"50%"` | | `viewport` | `1920x1080` | | `video` | `"on"` | diff --git a/docs/changelog.md b/docs/changelog.md index 930b655..1759ff0 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,15 +2,24 @@ All notable changes to this project will be documented in this file. -## [1.1.7] - Current +## [1.1.8] - Current + +### Fixed +- Fixed namespace deletion race condition during test retries +- Improved 404 error detection for different Kubernetes client versions + +### Changed +- Increased default timeouts (300s → 500s) and test timeout (600s) +- Reduced CI retries from 2 to 1 +- Added pod diagnostics logging on timeout and periodic status updates + +## [1.1.7] ### Fixed - **Secrets with control characters**: Fixed `SyntaxError: Bad control character in string literal` when secrets contain newlines or special characters (e.g., GitHub App private keys) - - Replaced `JSON.parse(envsubst(JSON.stringify(obj)))` approach with `lodash.clonedeepwith` for safe environment variable substitution - - Now correctly handles private keys, tabs, carriage returns, and other control characters in secret values ### Dependencies -- Added `lodash.clonedeepwith@^4.5.0` for safe deep object traversal with custom value substitution +- Added `lodash.clonedeepwith@^4.5.0` for safe environment variable substitution ## [1.1.6] diff --git a/docs/guide/core-concepts/playwright-config.md b/docs/guide/core-concepts/playwright-config.md index 3126cb0..ab802db 100644 --- a/docs/guide/core-concepts/playwright-config.md +++ b/docs/guide/core-concepts/playwright-config.md @@ -26,7 +26,7 @@ The `defineConfig` function extends your configuration with sensible defaults fo | `testDir` | `./tests` | Test files location | | `timeout` | 90,000ms | Test timeout | | `expect.timeout` | 10,000ms | Assertion timeout | -| `retries` | 2 (CI), 0 (local) | Test retries | +| `retries` | 1 (CI), 0 (local) | Test retries | | `workers` | 50% of CPUs | Parallel workers | | `outputDir` | `node_modules/.cache/e2e-test-results` | Playwright artifacts | diff --git a/docs/guide/deployment/keycloak-deployment.md b/docs/guide/deployment/keycloak-deployment.md index fe0fc42..6376a28 100644 --- a/docs/guide/deployment/keycloak-deployment.md +++ b/docs/guide/deployment/keycloak-deployment.md @@ -210,7 +210,7 @@ await keycloak.teardown(); Wait for Keycloak to be ready: ```typescript -await keycloak.waitUntilReady(300000); // 5 minutes +await keycloak.waitUntilReady(500); // default: 500 seconds (~8 minutes) ``` ## Properties diff --git a/docs/guide/deployment/rhdh-deployment.md b/docs/guide/deployment/rhdh-deployment.md index f0b7305..f5aa06d 100644 --- a/docs/guide/deployment/rhdh-deployment.md +++ b/docs/guide/deployment/rhdh-deployment.md @@ -127,11 +127,11 @@ Any other value will throw an error during deployment. Wait for the RHDH deployment to be ready: ```typescript -// Default timeout: 300000ms (5 minutes) +// Default timeout: 500 seconds (~8 minutes) await deployment.waitUntilReady(); // Custom timeout -await deployment.waitUntilReady(600000); // 10 minutes +await deployment.waitUntilReady(600); // 10 minutes ``` ### `rolloutRestart()` diff --git a/docs/guide/utilities/kubernetes-client.md b/docs/guide/utilities/kubernetes-client.md index 2fbe6b6..f362eb3 100644 --- a/docs/guide/utilities/kubernetes-client.md +++ b/docs/guide/utilities/kubernetes-client.md @@ -105,7 +105,7 @@ Wait for pods to be ready with early failure detection. Unlike `oc rollout statu await k8sClient.waitForPodsWithFailureDetection( "my-namespace", "app.kubernetes.io/instance=my-app", - 300, // timeout in seconds (default: 300) + 500, // timeout in seconds (default: 500) 5000 // poll interval in ms (default: 5000) ); ``` diff --git a/docs/overlay/reference/patterns.md b/docs/overlay/reference/patterns.md index 81c74a5..391c33d 100644 --- a/docs/overlay/reference/patterns.md +++ b/docs/overlay/reference/patterns.md @@ -248,7 +248,7 @@ test.beforeAll(async ({ rhdh }) => { }); ``` -Note: `rhdh.deploy()` already increases the test timeout (500s). If your setup does more work before deploy, set a higher timeout in `beforeAll`. +Note: `rhdh.deploy()` already increases the test timeout (600s / 10 minutes). If your setup does more work before deploy, set a higher timeout in `beforeAll`. ## Error Handling Patterns diff --git a/docs/tutorials/ci-cd-integration.md b/docs/tutorials/ci-cd-integration.md index 8214338..8716cd0 100644 --- a/docs/tutorials/ci-cd-integration.md +++ b/docs/tutorials/ci-cd-integration.md @@ -96,7 +96,7 @@ oc create token e2e-tests -n default --duration=8760h The `CI` environment variable enables: - Auto-cleanup of namespaces -- Increased retries (2 instead of 0) +- Increased retries (1 instead of 0) - Non-interactive mode ## Parallel Jobs diff --git a/package.json b/package.json index 679892e..1979c4b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "rhdh-e2e-test-utils", - "version": "1.1.7", + "version": "1.1.8", "description": "Test utilities for RHDH E2E tests", "license": "Apache-2.0", "type": "module", diff --git a/src/deployment/keycloak/deployment.ts b/src/deployment/keycloak/deployment.ts index 18805cc..92ba5bb 100644 --- a/src/deployment/keycloak/deployment.ts +++ b/src/deployment/keycloak/deployment.ts @@ -353,7 +353,7 @@ export class KeycloakHelper { /** * Wait for Keycloak to be ready */ - async waitUntilReady(timeout: number = 300): Promise { + async waitUntilReady(timeout: number = 500): Promise { this._log(`Waiting for Keycloak to be ready...`); const labelSelector = `app.kubernetes.io/instance=${this.deploymentConfig.releaseName}`; await this.k8sClient.waitForPodsWithFailureDetection( @@ -423,7 +423,7 @@ spec: private async _waitForKeycloak(): Promise { this._log("Waiting for Keycloak API to be ready..."); - const timeout = 300; + const timeout = 500; const startTime = Date.now(); while (true) { @@ -432,7 +432,7 @@ spec: } if ((Date.now() - startTime) / 1000 >= timeout) { - throw new Error("Keycloak API not ready after 5 minutes"); + throw new Error(`Keycloak API not ready after ${timeout} seconds`); } await new Promise((resolve) => setTimeout(resolve, 5000)); diff --git a/src/deployment/rhdh/deployment.ts b/src/deployment/rhdh/deployment.ts index a71e2c6..8e7bdf5 100644 --- a/src/deployment/rhdh/deployment.ts +++ b/src/deployment/rhdh/deployment.ts @@ -39,7 +39,7 @@ export class RHDHDeployment { async deploy(): Promise { this._log("Starting RHDH deployment..."); - test.setTimeout(500_000); + test.setTimeout(600_000); await this.k8sClient.createNamespaceIfNotExists( this.deploymentConfig.namespace, @@ -256,7 +256,7 @@ export class RHDHDeployment { await $`oc scale deployment -l 'app.kubernetes.io/instance in (redhat-developer-hub,developer-hub)' --replicas=1 -n ${namespace}`; } - async waitUntilReady(timeout: number = 300): Promise { + async waitUntilReady(timeout: number = 500): Promise { this._log( `Waiting for RHDH deployment to be ready in namespace ${this.deploymentConfig.namespace}...`, ); diff --git a/src/playwright/base-config.ts b/src/playwright/base-config.ts index 3e61285..3760938 100644 --- a/src/playwright/base-config.ts +++ b/src/playwright/base-config.ts @@ -11,7 +11,7 @@ import { resolve } from "path"; export const baseConfig: PlaywrightTestConfig = { testDir: "./tests", forbidOnly: !!process.env.CI, - retries: process.env.CI ? 2 : 0, + retries: process.env.CI ? 1 : 0, workers: "50%", outputDir: "node_modules/.cache/e2e-test-results", timeout: 90_000, diff --git a/src/utils/kubernetes-client.ts b/src/utils/kubernetes-client.ts index 17c72c0..ca1c1c4 100644 --- a/src/utils/kubernetes-client.ts +++ b/src/utils/kubernetes-client.ts @@ -311,27 +311,23 @@ class KubernetesClientHelper { } /** - * Delete a namespace + * Delete a namespace and wait for it to be fully terminated */ - async deleteNamespace(namespace: string): Promise { + async deleteNamespace( + namespace: string, + waitForDeletion: boolean = true, + timeoutSeconds: number = 180, + ): Promise { try { await this._k8sApi.deleteNamespace({ name: namespace }); - console.log(`✓ Deleted namespace ${namespace}`); + console.log(`[K8sHelper] Deleting namespace ${namespace}...`); } catch (error) { // Ignore if namespace doesn't exist (already deleted), but throw other errors - const err = error as { - body?: { code?: number }; - response?: { statusCode?: number }; - statusCode?: number; - }; - if ( - err.body?.code === 404 || - err.response?.statusCode === 404 || - err.statusCode === 404 - ) { + if (this._isNotFoundError(error)) { console.log( `✓ Namespace ${namespace} already deleted or doesn't exist`, ); + return; } else { console.error( `✗ Failed to delete namespace ${namespace}:`, @@ -340,6 +336,82 @@ class KubernetesClientHelper { throw error; } } + + if (waitForDeletion) { + await this._waitForNamespaceDeletion(namespace, timeoutSeconds); + } + } + + /** + * Wait for a namespace to be fully deleted + */ + private async _waitForNamespaceDeletion( + namespace: string, + timeoutSeconds: number = 180, + ): Promise { + const startTime = Date.now(); + const timeoutMs = timeoutSeconds * 1000; + const pollIntervalMs = 3000; + + while (Date.now() - startTime < timeoutMs) { + try { + const ns = await this._k8sApi.readNamespace({ name: namespace }); + const phase = ns.status?.phase; + // Namespace still exists, wait and retry + if (phase === "Terminating") { + // Only log occasionally to avoid spam + const elapsed = Math.round((Date.now() - startTime) / 1000); + if (elapsed % 10 === 0) { + console.log( + `[K8sHelper] Namespace ${namespace} still terminating (${elapsed}s)...`, + ); + } + } + await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); + } catch (error) { + // Check for 404 in various error formats from different k8s client versions + if (this._isNotFoundError(error)) { + console.log(`✓ Namespace ${namespace} fully deleted`); + return; + } + throw error; + } + } + + throw new Error( + `Timeout waiting for namespace ${namespace} to be deleted after ${timeoutSeconds}s`, + ); + } + + /** + * Check if an error is a "not found" (404) error. + * Handles different error formats from various k8s client versions. + */ + private _isNotFoundError(error: unknown): boolean { + if (!error) return false; + + // Check error message for "404" or "not found" + if (error instanceof Error) { + const msg = error.message.toLowerCase(); + if (msg.includes("404") || msg.includes("not found")) { + return true; + } + } + + // Check various object properties for 404 status codes + const err = error as { + body?: { code?: number }; + response?: { statusCode?: number }; + statusCode?: number; + code?: number; + }; + + return ( + err.body?.code === 404 || + err.response?.statusCode === 404 || + err.statusCode === 404 || + err.code === 404 + ); } /** @@ -482,7 +554,7 @@ class KubernetesClientHelper { async waitForPodsWithFailureDetection( namespace: string, labelSelector: string, - timeoutSeconds: number = 300, + timeoutSeconds: number = 500, pollIntervalMs: number = 5000, ): Promise { const startTime = Date.now(); @@ -541,9 +613,38 @@ class KubernetesClientHelper { return; } + // Log pod status every 20 seconds + const elapsedSec = Math.floor((Date.now() - startTime) / 1000); + if (elapsedSec > 0 && elapsedSec % 20 === 0) { + try { + await $`oc get pods -n ${namespace} -l ${labelSelector}`; + } catch { + // Ignore errors + } + } + await new Promise((r) => setTimeout(r, pollIntervalMs)); } + // Timeout reached - collect diagnostic info before throwing + console.log(`\n[K8sHelper] ═══ Pod Diagnostics (timeout reached) ═══`); + try { + console.log(`\n[K8sHelper] ─── Pod Status ───`); + await $`oc get pods -n ${namespace} -l ${labelSelector} -o wide`; + + console.log(`\n[K8sHelper] ─── Pod Details ───`); + await $`oc describe pods -n ${namespace} -l ${labelSelector}`; + + console.log(`\n[K8sHelper] ─── Namespace Events ───`); + await $`oc get events -n ${namespace} --sort-by='.lastTimestamp'`; + + console.log(`\n[K8sHelper] ─── Pod Logs ───`); + await $`oc logs -n ${namespace} -l ${labelSelector} --all-containers --tail=100 2>&1 || true`; + } catch { + // Ignore errors from diagnostic commands + } + console.log(`\n[K8sHelper] ═══ End Pod Diagnostics ═══\n`); + throw new Error( `Timeout waiting for pods (${labelSelector}) after ${timeoutSeconds}s`, );