Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
stopContainers,
cleanup,
preserveIptablesAudit,
fastKillAgentContainer,
} from './docker-manager';
import {
ensureFirewallNetwork,
Expand Down Expand Up @@ -1898,13 +1899,25 @@ program
};

// Register signal handlers
// Fast-kill the agent container immediately so it cannot outlive the awf
// process. GH Actions sends SIGTERM then SIGKILL ~10 s later; the full
// docker compose down in performCleanup() is too slow to finish in that
// window, leaving the container running as an orphan.
/* istanbul ignore next -- signal handlers cannot be unit-tested */
process.on('SIGINT', async () => {
if (containersStarted && !config.keepContainers) {
await fastKillAgentContainer();
}
await performCleanup('SIGINT');
console.error(`Process exiting with code: 130`);
process.exit(130); // Standard exit code for SIGINT
});

/* istanbul ignore next -- signal handlers cannot be unit-tested */
process.on('SIGTERM', async () => {
if (containersStarted && !config.keepContainers) {
await fastKillAgentContainer();
}
await performCleanup('SIGTERM');
console.error(`Process exiting with code: 143`);
process.exit(143); // Standard exit code for SIGTERM
Expand Down
82 changes: 80 additions & 2 deletions src/docker-manager.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { generateDockerCompose, subnetsOverlap, writeConfigs, startContainers, stopContainers, cleanup, runAgentCommand, validateIdNotInSystemRange, getSafeHostUid, getSafeHostGid, getRealUserHome, extractGhHostFromServerUrl, readGitHubPathEntries, mergeGitHubPathEntries, readEnvFile, MIN_REGULAR_UID, ACT_PRESET_BASE_IMAGE } from './docker-manager';
import { generateDockerCompose, subnetsOverlap, writeConfigs, startContainers, stopContainers, fastKillAgentContainer, isAgentExternallyKilled, resetAgentExternallyKilled, AGENT_CONTAINER_NAME, cleanup, runAgentCommand, validateIdNotInSystemRange, getSafeHostUid, getSafeHostGid, getRealUserHome, extractGhHostFromServerUrl, readGitHubPathEntries, mergeGitHubPathEntries, readEnvFile, MIN_REGULAR_UID, ACT_PRESET_BASE_IMAGE } from './docker-manager';
import { WrapperConfig } from './types';
import * as fs from 'fs';
import * as path from 'path';
Expand Down Expand Up @@ -1726,7 +1726,7 @@ describe('docker-manager', () => {
// Verify working_dir is set
expect(result.services.agent.working_dir).toBe('/custom/workdir');
// Verify other config is still present
expect(result.services.agent.container_name).toBe('awf-agent');
expect(result.services.agent.container_name).toBe(AGENT_CONTAINER_NAME);
expect(result.services.agent.cap_add).toContain('SYS_CHROOT');
});

Expand Down Expand Up @@ -2950,12 +2950,65 @@ describe('docker-manager', () => {
});
});

describe('fastKillAgentContainer', () => {
beforeEach(() => {
jest.clearAllMocks();
resetAgentExternallyKilled();
});

it('should call docker stop with default 3-second timeout', async () => {
mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any);

await fastKillAgentContainer();

expect(mockExecaFn).toHaveBeenCalledWith(
'docker',
['stop', '-t', '3', AGENT_CONTAINER_NAME],
{ reject: false, timeout: 8000 }
);
});

it('should accept a custom stop timeout', async () => {
mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any);

await fastKillAgentContainer(5);

expect(mockExecaFn).toHaveBeenCalledWith(
'docker',
['stop', '-t', '5', AGENT_CONTAINER_NAME],
{ reject: false, timeout: 10000 }
);
});

it('should not throw when docker stop fails', async () => {
mockExecaFn.mockRejectedValueOnce(new Error('docker not found'));

await expect(fastKillAgentContainer()).resolves.toBeUndefined();
});

it('should set the externally-killed flag', async () => {
mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any);

expect(isAgentExternallyKilled()).toBe(false);
await fastKillAgentContainer();
expect(isAgentExternallyKilled()).toBe(true);
});

it('should set the externally-killed flag even when docker stop fails', async () => {
mockExecaFn.mockRejectedValueOnce(new Error('docker not found'));

await fastKillAgentContainer();
expect(isAgentExternallyKilled()).toBe(true);
});
});

describe('runAgentCommand', () => {
let testDir: string;

beforeEach(() => {
testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'awf-test-'));
jest.clearAllMocks();
resetAgentExternallyKilled();
});

afterEach(() => {
Expand Down Expand Up @@ -3132,6 +3185,31 @@ describe('docker-manager', () => {

jest.useRealTimers();
});

it('should skip post-run analysis when agent was externally killed', async () => {
// Create access.log with denied entries — these should be ignored
const squidLogsDir = path.join(testDir, 'squid-logs');
fs.mkdirSync(squidLogsDir, { recursive: true });
fs.writeFileSync(
path.join(squidLogsDir, 'access.log'),
'1760994429.358 172.30.0.20:36274 blocked.com:443 -:- 1.1 CONNECT 403 TCP_DENIED:HIER_NONE blocked.com:443 "curl/7.81.0"\n'
);

// Simulate fastKillAgentContainer having been called
mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any); // fastKill docker stop
await fastKillAgentContainer();

// Mock docker logs -f
mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any);
// Mock docker wait — container was stopped externally, returns 143
mockExecaFn.mockResolvedValueOnce({ stdout: '143', stderr: '', exitCode: 0 } as any);

const result = await runAgentCommand(testDir, ['github.com']);

// Should return 143 and skip log analysis (empty blockedDomains)
expect(result.exitCode).toBe(143);
expect(result.blockedDomains).toEqual([]);
});
});

describe('cleanup', () => {
Expand Down
84 changes: 74 additions & 10 deletions src/docker-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@ import { DEFAULT_DNS_SERVERS } from './dns-resolver';

const SQUID_PORT = 3128;

/**
* Container names used in Docker Compose and referenced by docker CLI commands.
* Extracted as constants so that generateDockerCompose() and helpers like
* fastKillAgentContainer() stay in sync.
*/
export const AGENT_CONTAINER_NAME = 'awf-agent';
const SQUID_CONTAINER_NAME = 'awf-squid';
const IPTABLES_INIT_CONTAINER_NAME = 'awf-iptables-init';
const API_PROXY_CONTAINER_NAME = 'awf-api-proxy';
const DOH_PROXY_CONTAINER_NAME = 'awf-doh-proxy';

/**
* Flag set by fastKillAgentContainer() to signal runAgentCommand() that
* the container was externally stopped. When true, runAgentCommand() skips
* its own docker wait / log collection to avoid racing with the signal handler.
*/
let agentExternallyKilled = false;

// When bundled with esbuild, this global is replaced at build time with the
// JSON content of containers/agent/seccomp-profile.json. In normal (tsc)
// builds the identifier remains undeclared, so the typeof check below is safe.
Expand Down Expand Up @@ -418,7 +436,7 @@ export function generateDockerCompose(

// Squid service configuration
const squidService: any = {
container_name: 'awf-squid',
container_name: SQUID_CONTAINER_NAME,
networks: {
'awf-net': {
ipv4_address: networkConfig.squidIp,
Expand Down Expand Up @@ -1143,7 +1161,7 @@ export function generateDockerCompose(

// Agent service configuration
const agentService: any = {
container_name: 'awf-agent',
container_name: AGENT_CONTAINER_NAME,
networks: {
'awf-net': {
ipv4_address: networkConfig.agentIp,
Expand Down Expand Up @@ -1316,7 +1334,7 @@ export function generateDockerCompose(
// that shares the agent's network namespace but NEVER gives NET_ADMIN to the agent.
// This eliminates the window where the agent holds NET_ADMIN during startup.
const iptablesInitService: any = {
container_name: 'awf-iptables-init',
container_name: IPTABLES_INIT_CONTAINER_NAME,
// Share agent's network namespace so iptables rules apply to agent's traffic
network_mode: 'service:agent',
// Only mount the init signal volume and the iptables setup script
Expand Down Expand Up @@ -1379,7 +1397,7 @@ export function generateDockerCompose(
// Add Node.js API proxy sidecar if enabled
if (config.enableApiProxy && networkConfig.proxyIp) {
const proxyService: any = {
container_name: 'awf-api-proxy',
container_name: API_PROXY_CONTAINER_NAME,
networks: {
'awf-net': {
ipv4_address: networkConfig.proxyIp,
Expand Down Expand Up @@ -1513,7 +1531,7 @@ export function generateDockerCompose(
// Add DNS-over-HTTPS proxy sidecar if enabled
if (config.dnsOverHttps && networkConfig.dohProxyIp) {
const dohService: any = {
container_name: 'awf-doh-proxy',
container_name: DOH_PROXY_CONTAINER_NAME,
image: 'cloudflare/cloudflared:latest',
networks: {
'awf-net': {
Expand Down Expand Up @@ -1918,7 +1936,7 @@ export async function startContainers(workDir: string, allowedDomains: string[],
// This handles orphaned containers from failed/interrupted previous runs
logger.debug('Removing any existing containers with conflicting names...');
try {
await execa('docker', ['rm', '-f', 'awf-squid', 'awf-agent', 'awf-iptables-init', 'awf-api-proxy'], {
await execa('docker', ['rm', '-f', SQUID_CONTAINER_NAME, AGENT_CONTAINER_NAME, IPTABLES_INIT_CONTAINER_NAME, API_PROXY_CONTAINER_NAME], {
reject: false,
});
} catch {
Expand Down Expand Up @@ -2011,7 +2029,7 @@ export async function runAgentCommand(workDir: string, allowedDomains: string[],
try {
// Stream logs in real-time using docker logs -f (follow mode)
// Run this in the background and wait for the container to exit separately
const logsProcess = execa('docker', ['logs', '-f', 'awf-agent'], {
const logsProcess = execa('docker', ['logs', '-f', AGENT_CONTAINER_NAME], {
stdio: 'inherit',
reject: false,
});
Expand All @@ -2023,7 +2041,7 @@ export async function runAgentCommand(workDir: string, allowedDomains: string[],
logger.info(`Agent timeout: ${agentTimeoutMinutes} minutes`);

// Race docker wait against a timeout
const waitPromise = execa('docker', ['wait', 'awf-agent']).then(result => ({
const waitPromise = execa('docker', ['wait', AGENT_CONTAINER_NAME]).then(result => ({
type: 'completed' as const,
exitCodeStr: result.stdout,
}));
Expand All @@ -2038,7 +2056,7 @@ export async function runAgentCommand(workDir: string, allowedDomains: string[],
if (raceResult.type === 'timeout') {
logger.warn(`Agent command timed out after ${agentTimeoutMinutes} minutes, stopping container...`);
// Stop the container gracefully (10 second grace period before SIGKILL)
await execa('docker', ['stop', '-t', '10', 'awf-agent'], { reject: false });
await execa('docker', ['stop', '-t', '10', AGENT_CONTAINER_NAME], { reject: false });
exitCode = 124; // Standard timeout exit code (same as coreutils timeout)
} else {
// Clear the timeout timer so it doesn't keep the event loop alive
Expand All @@ -2047,13 +2065,21 @@ export async function runAgentCommand(workDir: string, allowedDomains: string[],
}
} else {
// No timeout - wait indefinitely
const { stdout: exitCodeStr } = await execa('docker', ['wait', 'awf-agent']);
const { stdout: exitCodeStr } = await execa('docker', ['wait', AGENT_CONTAINER_NAME]);
exitCode = parseInt(exitCodeStr.trim(), 10);
}

// Wait for the logs process to finish (it should exit automatically when container stops)
await logsProcess;

// If the container was killed externally (e.g. by fastKillAgentContainer in a
// signal handler), skip the remaining log analysis — the container state is
// unreliable and the signal handler will drive the rest of the shutdown.
if (agentExternallyKilled) {
logger.debug('Agent was externally killed, skipping post-run analysis');
return { exitCode: exitCode || 143, blockedDomains: [] };
}

logger.debug(`Agent exit code: ${exitCode}`);

// Small delay to ensure Squid logs are flushed to disk
Expand Down Expand Up @@ -2108,6 +2134,44 @@ export async function runAgentCommand(workDir: string, allowedDomains: string[],
}
}

/**
* Fast-kills the agent container with a short grace period.
* Used in signal handlers (SIGTERM/SIGINT) to ensure the agent cannot outlive
* the awf process — e.g. when GH Actions sends SIGTERM followed by SIGKILL
* after ~10 seconds. The full `docker compose down -v` in stopContainers() is
* too slow to reliably complete in that window.
*
* @param stopTimeoutSeconds - Grace period before SIGKILL (default: 3)
*/
export async function fastKillAgentContainer(stopTimeoutSeconds = 3): Promise<void> {
agentExternallyKilled = true;
try {
await execa('docker', ['stop', '-t', String(stopTimeoutSeconds), AGENT_CONTAINER_NAME], {
reject: false,
timeout: (stopTimeoutSeconds + 5) * 1000, // hard deadline on the stop command itself
});
} catch {
// Best-effort — if docker CLI is unavailable or hangs, we still proceed
// to performCleanup which will attempt docker compose down.
}
}

/**
* Returns whether the agent was externally killed via fastKillAgentContainer().
* @internal Exported for testing.
*/
export function isAgentExternallyKilled(): boolean {
return agentExternallyKilled;
}

/**
* Resets the externally-killed flag. Only used in tests.
* @internal Exported for testing.
*/
export function resetAgentExternallyKilled(): void {
agentExternallyKilled = false;
}

/**
* Stops and removes Docker Compose services
*/
Expand Down
Loading