From d44aab1463f6f6d19ff4f7bdc38f00b79db8fb86 Mon Sep 17 00:00:00 2001 From: Megha C Date: Fri, 28 Nov 2025 15:40:21 +0530 Subject: [PATCH] url validation functions Signed-off-by: Megha C --- README.md | 8 ++ src/constants/url.ts | 21 +++ src/index.ts | 17 ++- src/test-fixtures/url.ts | 59 ++++++++ src/types/url.ts | 54 ++++++++ src/urlUtils.ts | 292 +++++++++++++++++++++++++++++++++++++++ test/url.spec.ts | 58 ++++++++ 7 files changed, 507 insertions(+), 2 deletions(-) create mode 100644 src/constants/url.ts create mode 100644 src/test-fixtures/url.ts create mode 100644 src/types/url.ts create mode 100644 src/urlUtils.ts create mode 100644 test/url.spec.ts diff --git a/README.md b/README.md index 1458c54..7e5bc10 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,14 @@ This package exports two methods: * verifyCredential * verifyPresentation +* parseAndValidateUrl +* checkUrlSafety + +### URL utilities + +`parseAndValidateUrl(value, options)` normalizes and validates URLs for use inside LCW or Verifier Plus. It enforces `http/https` schemes by default, optional TLD requirements, rejects private/localhost hosts, and lets you provide block/allow lists so a single rule-set can be shared across applications. + +`checkUrlSafety(value, options)` builds on the validator and lets you plug in blocklists or external reputation matchers (for example Google Safe Browsing). The helper returns `{ status: 'safe' | 'suspicious' | 'blocked', reasons: [...] }` so callers can disable links or show warnings while reusing the same policy surface in multiple repos. ### verifyCredential diff --git a/src/constants/url.ts b/src/constants/url.ts new file mode 100644 index 0000000..2903210 --- /dev/null +++ b/src/constants/url.ts @@ -0,0 +1,21 @@ +/*! + * Copyright (c) 2025 Digital Credentials Consortium. + * All rights reserved. + */ + +export const DEFAULT_ALLOWED_SCHEMES = ['http', 'https'] as const; +export const DEFAULT_MAX_URL_LENGTH = 2048; + +export const RESERVED_HOST_SUFFIXES = [ + '.localhost', + '.local', + '.internal', + '.home', + '.invalid', + '.test', + '.example' +]; + +export const RESERVED_HOSTNAMES = ['localhost']; + + diff --git a/src/index.ts b/src/index.ts index 0595abb..403229d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,18 @@ /*! * Copyright (c) 2022 Digital Credentials Consortium. All rights reserved. */ -export { verifyCredential, verifyPresentation - } from './Verify.js' +export { verifyCredential, verifyPresentation } from './Verify.js'; +export { + parseAndValidateUrl, + checkUrlSafety +} from './urlUtils.js'; +export type { + UrlSafetyMatcher, + UrlSafetyMatcherResult, + UrlSafetyOptions, + UrlSafetyResult, + UrlSafetyStatus, + UrlValidationOptions, + UrlValidationReason, + UrlValidationResult +} from './types/url.js'; diff --git a/src/test-fixtures/url.ts b/src/test-fixtures/url.ts new file mode 100644 index 0000000..b78e64d --- /dev/null +++ b/src/test-fixtures/url.ts @@ -0,0 +1,59 @@ +/*! + * Copyright (c) 2025 Digital Credentials Consortium. + * All rights reserved. + */ + +import type { UrlValidationOptions } from '../types/url.js'; + +export type ValidUrlFixture = { + value: string; + hostname?: string; + options?: UrlValidationOptions; +}; + +export type InvalidUrlFixture = { + value: string; + reason: string; + options?: UrlValidationOptions; +}; + +export const validUrlFixtures: ValidUrlFixture[] = [ + { + value: 'https://example.org/path?foo=bar#hash', + hostname: 'example.org' + }, + { + value: 'http://sub.example.com/resource', + hostname: 'sub.example.com' + }, + { + value: 'https://10.0.0.5/data', + hostname: '10.0.0.5', + options: { allowPrivateHosts: true, requireTld: false } + } +]; + +export const invalidUrlFixtures: InvalidUrlFixture[] = [ + { + value: '', + reason: 'empty' + }, + { + value: 'ftp://example.org', + reason: 'disallowed_scheme' + }, + { + value: 'https://192.168.1.20', + reason: 'private_host' + }, + { + value: 'https://intranet', + reason: 'missing_tld' + }, + { + value: 'https://danger.example.com', + reason: 'blocked_host', + options: { blockedHosts: ['danger.example.com'] } + } +]; + diff --git a/src/types/url.ts b/src/types/url.ts new file mode 100644 index 0000000..04cfcc2 --- /dev/null +++ b/src/types/url.ts @@ -0,0 +1,54 @@ +/*! + * Copyright (c) 2025 Digital Credentials Consortium. + * All rights reserved. + */ + +export type UrlValidationReason = + | 'empty' + | 'too_long' + | 'invalid_format' + | 'disallowed_scheme' + | 'missing_hostname' + | 'missing_tld' + | 'private_host' + | 'blocked_host'; + +export interface UrlValidationOptions { + allowedSchemes?: string[]; + maxLength?: number; + requireTld?: boolean; + allowPrivateHosts?: boolean; + blockedHosts?: string[]; + allowedHosts?: string[]; +} + +export interface UrlValidationResult { + ok: boolean; + normalizedUrl?: string; + hostname?: string; + reason?: UrlValidationReason; +} + +export type UrlSafetyStatus = 'safe' | 'suspicious' | 'blocked'; + +export interface UrlSafetyResult { + url: string; + status: UrlSafetyStatus; + reasons: string[]; +} + +export type UrlSafetyMatcher = + | ((url: URL) => Promise) + | ((url: URL) => UrlSafetyMatcherResult | null); + +export interface UrlSafetyMatcherResult { + status: UrlSafetyStatus; + reason: string; +} + +export interface UrlSafetyOptions { + blocklist?: Iterable; + allowlist?: Iterable; + matchers?: UrlSafetyMatcher[]; +} + diff --git a/src/urlUtils.ts b/src/urlUtils.ts new file mode 100644 index 0000000..75f9737 --- /dev/null +++ b/src/urlUtils.ts @@ -0,0 +1,292 @@ +/*! + * Copyright (c) 2025 Digital Credentials Consortium. + * All rights reserved. + */ + +import { + DEFAULT_ALLOWED_SCHEMES, + DEFAULT_MAX_URL_LENGTH, + RESERVED_HOSTNAMES, + RESERVED_HOST_SUFFIXES +} from './constants/url.js'; +import type { + UrlSafetyMatcherResult, + UrlSafetyOptions, + UrlSafetyResult, + UrlSafetyStatus, + UrlValidationOptions, + UrlValidationReason, + UrlValidationResult +} from './types/url.js'; + +const URL_PATTERN = /^[a-z][a-z0-9+.-]*:\/\/[^\s/$.?#].[^\s]*$/i; +const HOSTNAME_TLD_PATTERN = /[a-z\u00a1-\uffff]{2,}$/i; +const HOST_LABEL_PATTERN = /^[a-z0-9-]+$/i; +const IPV4_PATTERN = /^(?:\d{1,3}\.){3}\d{1,3}$/; +const IPV6_SEGMENT = '[0-9a-fA-F]{1,4}'; +const IPV6_PATTERN = new RegExp( + '^(' + + `(?:${IPV6_SEGMENT}:){7}${IPV6_SEGMENT}|` + + `(?:${IPV6_SEGMENT}:){1,7}:|` + + `(?:${IPV6_SEGMENT}:){1,6}:${IPV6_SEGMENT}|` + + `(?:${IPV6_SEGMENT}:){1,5}(?::${IPV6_SEGMENT}){1,2}|` + + `(?:${IPV6_SEGMENT}:){1,4}(?::${IPV6_SEGMENT}){1,3}|` + + `(?:${IPV6_SEGMENT}:){1,3}(?::${IPV6_SEGMENT}){1,4}|` + + `(?:${IPV6_SEGMENT}:){1,2}(?::${IPV6_SEGMENT}){1,5}|` + + `${IPV6_SEGMENT}:(?::${IPV6_SEGMENT}){1,6}|` + + ':(?::' + IPV6_SEGMENT + '){1,7}' + + ')(%[0-9a-zA-Z]{1,})?$' +); + +const STATUS_RANK: Record = { + safe: 0, + suspicious: 1, + blocked: 2 +}; + +/** + * Normalize hostnames for comparisons. + */ +function normalizeHost(hostname: string): string { + return hostname.trim().replace(/\.+$/, '').toLowerCase(); +} + +function hasValidTld(hostname: string): boolean { + const labels = hostname.split('.'); + if (labels.length < 2) { + return false; + } + + const tld = labels[labels.length - 1]; + return HOSTNAME_TLD_PATTERN.test(tld); +} + +function isExplicitlyBlocked( + hostname: string, + { blockedHosts, allowedHosts }: UrlValidationOptions +): UrlValidationReason | null { + if (allowedHosts?.some(host => normalizeHost(host) === hostname)) { + return null; + } + if (blockedHosts?.some(host => normalizeHost(host) === hostname)) { + return 'blocked_host'; + } + return null; +} + +function isPrivateHostname(hostname: string): boolean { + if (RESERVED_HOSTNAMES.includes(hostname)) { + return true; + } + + if (RESERVED_HOST_SUFFIXES.some(suffix => hostname.endsWith(suffix))) { + return true; + } + + if (isIPv4Address(hostname)) { + return isPrivateIPv4(hostname); + } + + if (isIPv6Address(hostname)) { + return isPrivateIPv6(hostname); + } + + return false; +} + +function isIPv4Address(value: string): boolean { + if (!IPV4_PATTERN.test(value)) { + return false; + } + + return value.split('.').every(segment => { + const numeric = Number(segment); + return numeric >= 0 && numeric <= 255; + }); +} + +function isIPv6Address(value: string): boolean { + return IPV6_PATTERN.test(value); +} + +function isPrivateIPv4(ip: string): boolean { + const [aStr, bStr, cStr] = ip.split('.'); + const a = Number(aStr); + const b = Number(bStr); + const c = Number(cStr); + + if (a === 10) return true; + if (a === 127) return true; + if (a === 169 && b === 254) return true; + if (a === 172 && b >= 16 && b <= 31) return true; + if (a === 192 && b === 168) return true; + if (a === 198 && (c === 0 || c === 1)) return true; + + return false; +} + +function isPrivateIPv6(ip: string): boolean { + const lower = ip.toLowerCase(); + if (lower === '::1') { + return true; + } + + const cleaned = lower.split('%')[0]; + const firstHextet = cleaned.split(':')[0] || '0'; + const first = parseInt(firstHextet, 16); + + if ((first & 0xfe00) === 0xfc00) { + return true; + } + + if ((first & 0xffc0) === 0xfe80) { + return true; + } + + return false; +} + +export function parseAndValidateUrl( + value: string, + options: UrlValidationOptions = {} +): UrlValidationResult { + const trimmed = value?.trim(); + if (!trimmed) { + return { ok: false, reason: 'empty' }; + } + + const { + allowedSchemes = [...DEFAULT_ALLOWED_SCHEMES], + maxLength = DEFAULT_MAX_URL_LENGTH, + requireTld = true, + allowPrivateHosts = false + } = options; + + if (trimmed.length > maxLength) { + return { ok: false, reason: 'too_long' }; + } + + if (!URL_PATTERN.test(trimmed)) { + return { ok: false, reason: 'invalid_format' }; + } + + let parsed: URL; + try { + parsed = new URL(trimmed); + } catch { + return { ok: false, reason: 'invalid_format' }; + } + + const scheme = parsed.protocol.replace(/:$/, ''); + + if (!allowedSchemes.includes(scheme)) { + return { ok: false, reason: 'disallowed_scheme' }; + } + + const hostname = normalizeHost(parsed.hostname); + if (!hostname) { + return { ok: false, reason: 'missing_hostname' }; + } + + if (hostname.split('.').some(label => label.length === 0 || !HOST_LABEL_PATTERN.test(label))) { + return { ok: false, reason: 'invalid_format' }; + } + + if (!allowPrivateHosts && isPrivateHostname(hostname)) { + return { ok: false, reason: 'private_host' }; + } + + if (requireTld && !hasValidTld(hostname)) { + return { ok: false, reason: 'missing_tld' }; + } + + const blockedReason = isExplicitlyBlocked(hostname, options); + if (blockedReason) { + return { ok: false, reason: blockedReason }; + } + + return { + ok: true, + normalizedUrl: parsed.toString(), + hostname + }; +} + +export async function checkUrlSafety( + urlInput: string, + options: UrlSafetyOptions = {} +): Promise { + const validation = parseAndValidateUrl(urlInput, { + allowPrivateHosts: true, + requireTld: false + }); + + if (!validation.ok || !validation.normalizedUrl) { + return { + url: urlInput, + status: 'blocked', + reasons: [`validation:${validation.reason ?? 'invalid_format'}`] + }; + } + + const normalizedHost = validation.hostname ?? new URL(validation.normalizedUrl).hostname; + let reasons: string[] = []; + let status: UrlSafetyStatus = 'safe'; + + if (matchesList(options.allowlist, normalizedHost)) { + return { url: validation.normalizedUrl, status: 'safe', reasons: ['allowlist'] }; + } + + if (matchesList(options.blocklist, normalizedHost)) { + return { url: validation.normalizedUrl, status: 'blocked', reasons: ['blocklist'] }; + } + + if (options.matchers?.length) { + for (const matcher of options.matchers) { + const result = await matcher(new URL(validation.normalizedUrl)); + if (!result) continue; + ({ status, reasons } = pickStronger(status, reasons, result)); + if (status === 'blocked') { + break; + } + } + } + + return { url: validation.normalizedUrl, status, reasons }; +} + +function matchesList(list: Iterable | undefined, hostname: string): boolean { + if (!list) { + return false; + } + + for (const entry of list) { + const candidate = entry.trim().toLowerCase(); + if (!candidate) { + continue; + } + + if (hostname === candidate || hostname.endsWith(`.${candidate}`)) { + return true; + } + } + + return false; +} + +function pickStronger( + currentStatus: UrlSafetyStatus, + reasons: string[], + update: UrlSafetyMatcherResult +): { status: UrlSafetyStatus; reasons: string[] } { + if (STATUS_RANK[update.status] > STATUS_RANK[currentStatus]) { + return { status: update.status, reasons: [update.reason] }; + } + + if (STATUS_RANK[update.status] === STATUS_RANK[currentStatus]) { + return { status: currentStatus, reasons: [...reasons, update.reason] }; + } + + return { status: currentStatus, reasons }; +} + diff --git a/test/url.spec.ts b/test/url.spec.ts new file mode 100644 index 0000000..8e06aba --- /dev/null +++ b/test/url.spec.ts @@ -0,0 +1,58 @@ +/*! + * Copyright (c) 2025 Digital Credentials Consortium. + * All rights reserved. + */ + +import { expect } from 'chai'; + +import { checkUrlSafety, parseAndValidateUrl } from '../src/index.js'; +import { invalidUrlFixtures, validUrlFixtures } from '../src/test-fixtures/url.js'; + +describe('URL utilities', () => { + describe('parseAndValidateUrl', () => { + validUrlFixtures.forEach(({ value, hostname, options }) => { + it(`accepts ${value}`, () => { + const result = parseAndValidateUrl(value, options as any); + expect(result.ok).to.be.true; + if (hostname) { + expect(result.hostname).to.equal(hostname); + } + }); + }); + + invalidUrlFixtures.forEach(({ value, reason, options }) => { + it(`rejects ${value || ''}`, () => { + const result = parseAndValidateUrl(value, options as any); + expect(result.ok).to.be.false; + expect(result.reason).to.equal(reason); + }); + }); + }); + + describe('checkUrlSafety', () => { + it('respects allowlists before other checks', async () => { + const result = await checkUrlSafety('https://safe.example.edu', { + allowlist: ['example.edu'], + blocklist: ['malicious.com'] + }); + expect(result.status).to.equal('safe'); + expect(result.reasons).to.deep.equal(['allowlist']); + }); + + it('applies custom matchers', async () => { + const result = await checkUrlSafety('https://flagged.example.com', { + matchers: [ + async url => { + if (url.hostname === 'flagged.example.com') { + return { status: 'suspicious', reason: 'demo_matcher' }; + } + return null; + } + ] + }); + expect(result.status).to.equal('suspicious'); + expect(result.reasons).to.deep.equal(['demo_matcher']); + }); + }); +}); +