|
| 1 | +/** |
| 2 | + * Input Validation for Agent Hallucination Hardening |
| 3 | + * |
| 4 | + * Reusable validators that defend against malformed inputs from AI agents. |
| 5 | + * Agents hallucinate differently than humans typo — they embed query params |
| 6 | + * in resource IDs, double-encode URLs, and inject path traversals. |
| 7 | + * |
| 8 | + * These validators are applied at the CLI argument parsing layer so all |
| 9 | + * commands benefit automatically. The Sentry API handles most of these |
| 10 | + * server-side (404, 400), but client-side validation provides: |
| 11 | + * - Better error messages with actionable suggestions |
| 12 | + * - Faster failure without a network round-trip |
| 13 | + * - Defense-in-depth against unexpected URL manipulation |
| 14 | + * |
| 15 | + * @see https://github.com/getsentry/cli/issues/350 |
| 16 | + */ |
| 17 | + |
| 18 | +import { ValidationError } from "./errors.js"; |
| 19 | + |
| 20 | +/** |
| 21 | + * Characters that are never valid in Sentry resource identifiers (slugs, IDs). |
| 22 | + * These would cause URL injection if interpolated into API paths. |
| 23 | + * |
| 24 | + * - `?` — query string injection |
| 25 | + * - `#` — fragment injection |
| 26 | + * - `%` — pre-URL-encoded values (double-encoding risk) |
| 27 | + * - whitespace — breaks URL structure |
| 28 | + */ |
| 29 | +const RESOURCE_ID_FORBIDDEN = /[?#%\s]/; |
| 30 | + |
| 31 | +/** |
| 32 | + * Matches `%XX` hex-encoded sequences (e.g., `%2F`, `%20`, `%3A`). |
| 33 | + * Used to detect pre-encoded strings that would get double-encoded. |
| 34 | + */ |
| 35 | +const PRE_ENCODED_PATTERN = /%[0-9a-fA-F]{2}/; |
| 36 | + |
| 37 | +/** |
| 38 | + * Matches ASCII control characters (code points 0x00–0x1F). |
| 39 | + * These are invisible and have no valid use in CLI string inputs. |
| 40 | + * |
| 41 | + * Built via RegExp constructor to avoid lint warnings about literal |
| 42 | + * control characters in regex patterns. |
| 43 | + */ |
| 44 | +// biome-ignore lint/suspicious/noControlCharactersInRegex: intentionally matching control chars for input validation |
| 45 | +const CONTROL_CHAR_PATTERN = /[\x00-\x1f]/; |
| 46 | + |
| 47 | +/** |
| 48 | + * Matches `..` path segments that could be used for path traversal. |
| 49 | + * Anchored to segment boundaries (start/end of string or `/`). |
| 50 | + */ |
| 51 | +const PATH_TRAVERSAL_PATTERN = /(^|\/)\.\.(\/|$)/; |
| 52 | + |
| 53 | +/** |
| 54 | + * Human-readable descriptions for the first forbidden character found. |
| 55 | + * Provides clear context in error messages. |
| 56 | + */ |
| 57 | +function describeForbiddenChar(char: string): string { |
| 58 | + const code = char.charCodeAt(0); |
| 59 | + |
| 60 | + if (char === "?") { |
| 61 | + return '"?" (query string)'; |
| 62 | + } |
| 63 | + if (char === "#") { |
| 64 | + return '"#" (URL fragment)'; |
| 65 | + } |
| 66 | + if (char === "%") { |
| 67 | + return '"%" (URL encoding)'; |
| 68 | + } |
| 69 | + if (char === " ") { |
| 70 | + return "a space"; |
| 71 | + } |
| 72 | + if (char === "\t") { |
| 73 | + return "a tab character"; |
| 74 | + } |
| 75 | + if (char === "\n") { |
| 76 | + return "a newline"; |
| 77 | + } |
| 78 | + if (char === "\r") { |
| 79 | + return "a carriage return"; |
| 80 | + } |
| 81 | + if (char === "\0") { |
| 82 | + return "a null byte"; |
| 83 | + } |
| 84 | + // Generic control character |
| 85 | + if (code < 0x20) { |
| 86 | + return `control character (0x${code.toString(16).padStart(2, "0")})`; |
| 87 | + } |
| 88 | + // Other whitespace |
| 89 | + return `whitespace character (U+${code.toString(16).padStart(4, "0")})`; |
| 90 | +} |
| 91 | + |
| 92 | +/** |
| 93 | + * Reject ASCII control characters (below 0x20) in any string input. |
| 94 | + * |
| 95 | + * Control characters are invisible and have no valid use in CLI arguments. |
| 96 | + * An agent could embed them to manipulate downstream processing. |
| 97 | + * |
| 98 | + * @param input - String to validate |
| 99 | + * @param label - Human-readable name for error messages (e.g., "organization slug") |
| 100 | + * @throws {ValidationError} When input contains control characters |
| 101 | + */ |
| 102 | +export function rejectControlChars(input: string, label: string): void { |
| 103 | + const match = CONTROL_CHAR_PATTERN.exec(input); |
| 104 | + if (match) { |
| 105 | + const capitalizedLabel = label.charAt(0).toUpperCase() + label.slice(1); |
| 106 | + throw new ValidationError( |
| 107 | + `Invalid ${label}: contains ${describeForbiddenChar(match[0])}.\n` + |
| 108 | + ` ${capitalizedLabel} must not contain control characters.` |
| 109 | + ); |
| 110 | + } |
| 111 | +} |
| 112 | + |
| 113 | +/** |
| 114 | + * Reject pre-URL-encoded sequences (`%XX`) in resource identifiers. |
| 115 | + * |
| 116 | + * Resource IDs (slugs, issue IDs) should always be plain strings. If they |
| 117 | + * contain `%XX` patterns, they were likely pre-encoded by an agent and |
| 118 | + * would get double-encoded when interpolated into API URLs. |
| 119 | + * |
| 120 | + * @param input - String to validate |
| 121 | + * @param label - Human-readable name for error messages (e.g., "project slug") |
| 122 | + * @throws {ValidationError} When input contains percent-encoded sequences |
| 123 | + */ |
| 124 | +export function rejectPreEncoded(input: string, label: string): void { |
| 125 | + const match = PRE_ENCODED_PATTERN.exec(input); |
| 126 | + if (match) { |
| 127 | + throw new ValidationError( |
| 128 | + `Invalid ${label}: contains URL-encoded sequence "${match[0]}".\n` + |
| 129 | + ` Use plain text instead of percent-encoding (e.g., "my project" not "my%20project").` |
| 130 | + ); |
| 131 | + } |
| 132 | +} |
| 133 | + |
| 134 | +/** |
| 135 | + * Validate a resource identifier (org slug, project slug, or issue ID component). |
| 136 | + * |
| 137 | + * Rejects characters that would cause URL injection when the identifier |
| 138 | + * is interpolated into API endpoint paths. This is the primary defense |
| 139 | + * against agent-hallucinated inputs like: |
| 140 | + * - `my-org?query=foo` (query injection) |
| 141 | + * - `my-project#anchor` (fragment injection) |
| 142 | + * - `CLI-G%20extra` (pre-encoded space → double encoding) |
| 143 | + * - `my-org\tother` (control character injection) |
| 144 | + * |
| 145 | + * @param input - Resource identifier to validate |
| 146 | + * @param label - Human-readable name for error messages (e.g., "organization slug") |
| 147 | + * @throws {ValidationError} When input contains forbidden characters |
| 148 | + */ |
| 149 | +export function validateResourceId(input: string, label: string): void { |
| 150 | + // Check control characters first (subset of the broader check) |
| 151 | + rejectControlChars(input, label); |
| 152 | + |
| 153 | + // Check for URL-significant characters and whitespace |
| 154 | + const match = RESOURCE_ID_FORBIDDEN.exec(input); |
| 155 | + if (match) { |
| 156 | + throw new ValidationError( |
| 157 | + `Invalid ${label}: contains ${describeForbiddenChar(match[0])}.\n` + |
| 158 | + " Slugs and IDs must contain only letters, numbers, hyphens, and underscores." |
| 159 | + ); |
| 160 | + } |
| 161 | +} |
| 162 | + |
| 163 | +/** |
| 164 | + * Validate an API endpoint path for path traversal attacks. |
| 165 | + * |
| 166 | + * Rejects `..` path segments that could be used to escape the API prefix: |
| 167 | + * ``` |
| 168 | + * sentry api "../../admin/settings/" → rejected |
| 169 | + * sentry api "organizations/../admin" → rejected |
| 170 | + * ``` |
| 171 | + * |
| 172 | + * While the Sentry API server handles this safely (404), rejecting on the |
| 173 | + * client provides better error messages and defense-in-depth. |
| 174 | + * |
| 175 | + * Also validates that the endpoint doesn't contain control characters. |
| 176 | + * |
| 177 | + * @param endpoint - API endpoint path to validate |
| 178 | + * @throws {ValidationError} When endpoint contains path traversal or control characters |
| 179 | + */ |
| 180 | +export function validateEndpoint(endpoint: string): void { |
| 181 | + rejectControlChars(endpoint, "API endpoint"); |
| 182 | + |
| 183 | + if (PATH_TRAVERSAL_PATTERN.test(endpoint)) { |
| 184 | + throw new ValidationError( |
| 185 | + 'Invalid API endpoint: contains ".." path traversal.\n' + |
| 186 | + " Use absolute API paths (e.g., /api/0/organizations/my-org/issues/)." |
| 187 | + ); |
| 188 | + } |
| 189 | +} |
0 commit comments