From 1ad3f90cb9956861b80352adf41f6037eee40204 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Sun, 12 Oct 2025 14:01:47 +0200 Subject: [PATCH 1/5] Add direct_url model and validator --- src/packaging/direct_url.py | 263 ++++++++++++++++++++++++++++++++++++ tests/test_direct_url.py | 205 ++++++++++++++++++++++++++++ 2 files changed, 468 insertions(+) create mode 100644 src/packaging/direct_url.py create mode 100644 tests/test_direct_url.py diff --git a/src/packaging/direct_url.py b/src/packaging/direct_url.py new file mode 100644 index 00000000..0f37ad74 --- /dev/null +++ b/src/packaging/direct_url.py @@ -0,0 +1,263 @@ +from __future__ import annotations + +import dataclasses +from collections.abc import Mapping +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Protocol, TypeVar + +if TYPE_CHECKING: # pragma: no cover + import sys + + if sys.version_info >= (3, 11): + from typing import Self + else: + from typing_extensions import Self + +__all__ = [ + "ArchiveInfo", + "DirInfo", + "DirectUrl", + "DirectUrlValidationError", + "VcsInfo", +] + +_T = TypeVar("_T") + + +class _FromMappingProtocol(Protocol): # pragma: no cover + @classmethod + def _from_dict(cls, d: Mapping[str, Any]) -> Self: ... + + +_FromMappingProtocolT = TypeVar("_FromMappingProtocolT", bound=_FromMappingProtocol) + + +def _json_dict_factory(data: list[tuple[str, Any]]) -> dict[str, Any]: + return {key: value for key, value in data if value is not None} + + +def _get(d: Mapping[str, Any], expected_type: type[_T], key: str) -> _T | None: + """Get a value from the dictionary and verify it's the expected type.""" + if (value := d.get(key)) is None: + return None + if not isinstance(value, expected_type): + raise DirectUrlValidationError( + f"Unexpected type {type(value).__name__} " + f"(expected {expected_type.__name__})", + context=key, + ) + return value + + +def _get_required(d: Mapping[str, Any], expected_type: type[_T], key: str) -> _T: + """Get a required value from the dictionary and verify it's the expected type.""" + if (value := _get(d, expected_type, key)) is None: + raise _DirectUrlRequiredKeyError(key) + return value + + +def _get_object( + d: Mapping[str, Any], target_type: type[_FromMappingProtocolT], key: str +) -> _FromMappingProtocolT | None: + """Get a dictionary value from the dictionary and convert it to a dataclass.""" + if (value := _get(d, Mapping, key)) is None: # type: ignore[type-abstract] + return None + try: + return target_type._from_dict(value) + except Exception as e: + raise DirectUrlValidationError(e, context=key) from e + + +class DirectUrlValidationError(Exception): + """Raised when when input data is not spec-compliant.""" + + context: str | None = None + message: str + + def __init__( + self, + cause: str | Exception, + *, + context: str | None = None, + ) -> None: + if isinstance(cause, DirectUrlValidationError): + if cause.context: + self.context = ( + f"{context}.{cause.context}" if context else cause.context + ) + else: + self.context = context # pragma: no cover + self.message = cause.message + else: + self.context = context + self.message = str(cause) + + def __str__(self) -> str: + if self.context: + return f"{self.message} in {self.context!r}" + return self.message + + +class _DirectUrlRequiredKeyError(DirectUrlValidationError): + def __init__(self, key: str) -> None: + super().__init__("Missing required value", context=key) + + +@dataclass(frozen=True, init=False) +class VcsInfo: + vcs: str + commit_id: str + requested_revision: str | None = None + + def __init__( + self, + *, + vcs: str, + commit_id: str, + requested_revision: str | None = None, + ) -> None: + object.__setattr__(self, "vcs", vcs) + object.__setattr__(self, "commit_id", commit_id) + object.__setattr__(self, "requested_revision", requested_revision) + + @classmethod + def _from_dict(cls, d: Mapping[str, Any]) -> Self: + # We can't validate vcs value because is not closed. + return cls( + vcs=_get_required(d, str, "vcs"), + requested_revision=_get(d, str, "requested_revision"), + commit_id=_get_required(d, str, "commit_id"), + ) + + +@dataclass(frozen=True, init=False) +class ArchiveInfo: + hashes: Mapping[str, str] | None = None + hash: str | None = None # Deprecated, use `hashes` instead + + def __init__( + self, + *, + hashes: Mapping[str, str] | None = None, + hash: str | None = None, + ) -> None: + object.__setattr__(self, "hashes", hashes) + object.__setattr__(self, "hash", hash) + + @classmethod + def _from_dict(cls, d: Mapping[str, Any]) -> Self: + archive_info = cls( + hashes=_get(d, Mapping, "hashes"), # type: ignore[type-abstract] + hash=_get(d, str, "hash"), + ) + hashes = archive_info.hashes or {} + if not all(isinstance(h, str) for h in hashes.values()): + raise DirectUrlValidationError( + "Hash values must be strings", context="hashes" + ) + if archive_info.hash is not None: + if "=" not in archive_info.hash: + raise DirectUrlValidationError( + "Invalid hash format (expected '=')", + context="hash", + ) + if archive_info.hashes is not None: + # if `hashes` are present, the legacy `hash` must match one of them + hash_algorithm, hash_value = archive_info.hash.split("=", 1) + if hash_algorithm not in hashes: + raise DirectUrlValidationError( + f"Algorithm {hash_algorithm!r} used in hash field " + f"is not present in hashes field", + context="hashes", + ) + if hashes[hash_algorithm] != hash_value: + raise DirectUrlValidationError( + f"Algorithm {hash_algorithm!r} used in hash field " + f"has different value in hashes field", + context="hash", + ) + return archive_info + + +@dataclass(frozen=True, init=False) +class DirInfo: + editable: bool | None = None + + def __init__( + self, + *, + editable: bool | None = None, + ) -> None: + object.__setattr__(self, "editable", editable) + + @classmethod + def _from_dict(cls, d: Mapping[str, Any]) -> Self: + return cls( + editable=_get(d, bool, "editable"), + ) + + +@dataclass(frozen=True, init=False) +class DirectUrl: + url: str + archive_info: ArchiveInfo | None = None + vcs_info: VcsInfo | None = None + dir_info: DirInfo | None = None + subdirectory: str | None = None # XXX Path or str? + + def __init__( + self, + *, + url: str, + archive_info: ArchiveInfo | None = None, + vcs_info: VcsInfo | None = None, + dir_info: DirInfo | None = None, + subdirectory: str | None = None, + ) -> None: + object.__setattr__(self, "url", url) + object.__setattr__(self, "archive_info", archive_info) + object.__setattr__(self, "vcs_info", vcs_info) + object.__setattr__(self, "dir_info", dir_info) + object.__setattr__(self, "subdirectory", subdirectory) + + @classmethod + def _from_dict(cls, d: Mapping[str, Any]) -> Self: + direct_url = cls( + url=_get_required(d, str, "url"), + archive_info=_get_object(d, ArchiveInfo, "archive_info"), + vcs_info=_get_object(d, VcsInfo, "vcs_info"), + dir_info=_get_object(d, DirInfo, "dir_info"), + subdirectory=_get(d, str, "subdirectory"), + ) + if ( + bool(direct_url.vcs_info) + + bool(direct_url.archive_info) + + bool(direct_url.dir_info) + ) != 1: + raise DirectUrlValidationError( + "Exactly one of vcs_info, archive_info, dir_info must be present" + ) + if direct_url.dir_info is not None and not direct_url.url.startswith("file://"): + raise DirectUrlValidationError( + "URL scheme must be file:// when dir_info is present", + context="url", + ) + # XXX subdirectory must be relative, can we, should we validate that here? + # XXX url MUST be stripped of any sensitive authentication information. + # We can't validate it here because it MAY contain git or other non security + # sensitive auth strings. + return direct_url + + @classmethod + def from_dict(cls, d: Mapping[str, Any], /) -> Self: + return cls._from_dict(d) + + def to_dict(self) -> Mapping[str, Any]: + return dataclasses.asdict(self, dict_factory=_json_dict_factory) + + def validate(self) -> None: + """Validate the DirectUrl instance against the specification. + + Raises :class:`DirectUrlValidationError` otherwise. + """ + self.from_dict(self.to_dict()) diff --git a/tests/test_direct_url.py b/tests/test_direct_url.py new file mode 100644 index 00000000..845e95d1 --- /dev/null +++ b/tests/test_direct_url.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +import pytest + +from packaging.direct_url import DirectUrl, DirectUrlValidationError, DirInfo + + +@pytest.mark.parametrize( + "direct_url_dict", + [ + { + "url": "file:///projects/myproject", + "dir_info": {}, + }, + { + "url": "file:///projects/myproject", + "dir_info": {"editable": True}, + }, + { + "url": "file:///projects/myproject", + "dir_info": {"editable": False}, + }, + { + "url": "https://example.com/archive.zip", + "archive_info": { + "hashes": {"sha256": "f" * 40}, + "hash": f"sha256={'f' * 40}", + }, + }, + { + "url": "https://example.com/archive.zip", + "archive_info": { + "hashes": {"sha256": "f" * 40}, + }, + }, + { + "url": "https://example.com/archive.zip", + "archive_info": { + "hash": f"sha256={'f' * 40}", + }, + }, + { + "url": "https://g.c/user/repo.git", + "vcs_info": { + "vcs": "git", + "commit_id": "a" * 40, + "requested_revision": "main", + }, + }, + ], +) +def test_direct_url_round_trips(direct_url_dict: dict[str, object]) -> None: + assert DirectUrl.from_dict(direct_url_dict).to_dict() == direct_url_dict + + +def test_validate_archive_info_hashes() -> None: + with pytest.raises( + DirectUrlValidationError, + match=r"Hash values must be strings in 'archive_info.hashes'", + ): + DirectUrl.from_dict( + { + "url": "https://example.com/archive.zip", + "archive_info": { + "hashes": {"md5": 12345}, + }, + } + ) + + +def test_validate_archive_info_hash_invalid_format() -> None: + with pytest.raises( + DirectUrlValidationError, + match=( + r"Invalid hash format \(expected '='\) " + r"in 'archive_info.hash'" + ), + ): + DirectUrl.from_dict( + { + "url": "https://example.com/archive.zip", + "archive_info": { + "hash": "md5:12345", + }, + } + ) + + +def test_validate_archive_info_hash_missing_in_hashes() -> None: + with pytest.raises( + DirectUrlValidationError, + match=r"Algorithm 'md5' used in hash field is not present in hashes field", + ): + DirectUrl.from_dict( + { + "url": "https://example.com/archive.zip", + "archive_info": { + "hashes": {"sha256": "f" * 40}, + "hash": "md5=12345", + }, + } + ) + + +def test_validate_archive_info_hash_different_in_hashes() -> None: + with pytest.raises( + DirectUrlValidationError, + match=( + r"Algorithm 'md5' used in hash field has different value in hashes field " + r"in 'archive_info.hash'" + ), + ): + DirectUrl.from_dict( + { + "url": "https://example.com/archive.zip", + "archive_info": { + "hashes": {"md5": "123456"}, + "hash": "md5=12345", + }, + } + ) + + +@pytest.mark.parametrize( + "direct_url_dict", + [ + { + "url": "file:///projects/myproject", + }, + { + "url": "https://example.com/archive.zip", + "archive_info": {}, + "dir_info": {}, + }, + { + "url": "https://g.c/user/repo.git", + "vcs_info": { + "vcs": "git", + "commit_id": "a" * 40, + }, + "archive_info": {}, + }, + ], +) +def test_one_info_field(direct_url_dict: dict[str, object]) -> None: + with pytest.raises( + DirectUrlValidationError, + match=r"Exactly one of vcs_info, archive_info, dir_info must be present", + ): + DirectUrl.from_dict(direct_url_dict) + + +def test_dir_info_url_scheme_file() -> None: + DirectUrl.from_dict( + { + "url": "file:///home/myproject", + "dir_info": {}, + } + ) + with pytest.raises( + DirectUrlValidationError, + match=r"URL scheme must be file:// when dir_info is present", + ): + DirectUrl.from_dict( + { + "url": "https://example.com/projects/myproject", + "dir_info": {}, + } + ) + + +def test_missing_url() -> None: + with pytest.raises( + DirectUrlValidationError, + match=r"Missing required value in 'url'", + ): + DirectUrl.from_dict( + { + "dir_info": {}, + } + ) + + +def test_commit_id_type() -> None: + with pytest.raises( + DirectUrlValidationError, + match=r"Unexpected type int \(expected str\) in 'vcs_info.commit_id'", + ): + DirectUrl.from_dict( + { + "url": "https://g.c/user/repo.git", + "vcs_info": {"vcs": "git", "commit_id": 12345}, + } + ) + + +def test_validate() -> None: + direct_url = DirectUrl(url="file:///projects/myproject", dir_info=DirInfo()) + direct_url.validate() + + +def test_validate_error() -> None: + direct_url = DirectUrl(url="file:///projects/myproject") + with pytest.raises(DirectUrlValidationError): + direct_url.validate() From b059a94ab2e679cf3165b3fc26bbe0a14ada5061 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Sun, 22 Feb 2026 11:30:26 +0100 Subject: [PATCH 2/5] direct_url: better handling of legacy hashes --- src/packaging/direct_url.py | 33 ++++++++-------- tests/test_direct_url.py | 76 ++++++++++++++++++++++++++++++------- 2 files changed, 79 insertions(+), 30 deletions(-) diff --git a/src/packaging/direct_url.py b/src/packaging/direct_url.py index 0f37ad74..e3f97f46 100644 --- a/src/packaging/direct_url.py +++ b/src/packaging/direct_url.py @@ -133,37 +133,34 @@ def _from_dict(cls, d: Mapping[str, Any]) -> Self: @dataclass(frozen=True, init=False) class ArchiveInfo: hashes: Mapping[str, str] | None = None - hash: str | None = None # Deprecated, use `hashes` instead def __init__( self, *, hashes: Mapping[str, str] | None = None, - hash: str | None = None, ) -> None: object.__setattr__(self, "hashes", hashes) - object.__setattr__(self, "hash", hash) @classmethod def _from_dict(cls, d: Mapping[str, Any]) -> Self: - archive_info = cls( - hashes=_get(d, Mapping, "hashes"), # type: ignore[type-abstract] - hash=_get(d, str, "hash"), - ) - hashes = archive_info.hashes or {} - if not all(isinstance(h, str) for h in hashes.values()): + hashes = _get(d, Mapping, "hashes") # type: ignore[type-abstract] + if hashes is not None and not all(isinstance(h, str) for h in hashes.values()): raise DirectUrlValidationError( "Hash values must be strings", context="hashes" ) - if archive_info.hash is not None: - if "=" not in archive_info.hash: + legacy_hash = _get(d, str, "hash") + if legacy_hash is not None: + if "=" not in legacy_hash: raise DirectUrlValidationError( "Invalid hash format (expected '=')", context="hash", ) - if archive_info.hashes is not None: + hash_algorithm, hash_value = legacy_hash.split("=", 1) + if hashes is None: + # if `hashes` are not present, we can derive it from the legacy `hash` + hashes = {hash_algorithm: hash_value} + else: # if `hashes` are present, the legacy `hash` must match one of them - hash_algorithm, hash_value = archive_info.hash.split("=", 1) if hash_algorithm not in hashes: raise DirectUrlValidationError( f"Algorithm {hash_algorithm!r} used in hash field " @@ -176,7 +173,7 @@ def _from_dict(cls, d: Mapping[str, Any]) -> Self: f"has different value in hashes field", context="hash", ) - return archive_info + return cls(hashes=hashes) @dataclass(frozen=True, init=False) @@ -252,8 +249,12 @@ def _from_dict(cls, d: Mapping[str, Any]) -> Self: def from_dict(cls, d: Mapping[str, Any], /) -> Self: return cls._from_dict(d) - def to_dict(self) -> Mapping[str, Any]: - return dataclasses.asdict(self, dict_factory=_json_dict_factory) + def to_dict(self, *, generate_legacy_hash: bool = False) -> Mapping[str, Any]: + res = dataclasses.asdict(self, dict_factory=_json_dict_factory) + if generate_legacy_hash and self.archive_info and self.archive_info.hashes: + hash_algorithm, hash_value = next(iter(self.archive_info.hashes.items())) + res["archive_info"]["hash"] = f"{hash_algorithm}={hash_value}" + return res def validate(self) -> None: """Validate the DirectUrl instance against the specification. diff --git a/tests/test_direct_url.py b/tests/test_direct_url.py index 845e95d1..7a7a65ae 100644 --- a/tests/test_direct_url.py +++ b/tests/test_direct_url.py @@ -2,7 +2,12 @@ import pytest -from packaging.direct_url import DirectUrl, DirectUrlValidationError, DirInfo +from packaging.direct_url import ( + ArchiveInfo, + DirectUrl, + DirectUrlValidationError, + DirInfo, +) @pytest.mark.parametrize( @@ -24,19 +29,6 @@ "url": "https://example.com/archive.zip", "archive_info": { "hashes": {"sha256": "f" * 40}, - "hash": f"sha256={'f' * 40}", - }, - }, - { - "url": "https://example.com/archive.zip", - "archive_info": { - "hashes": {"sha256": "f" * 40}, - }, - }, - { - "url": "https://example.com/archive.zip", - "archive_info": { - "hash": f"sha256={'f' * 40}", }, }, { @@ -53,6 +45,50 @@ def test_direct_url_round_trips(direct_url_dict: dict[str, object]) -> None: assert DirectUrl.from_dict(direct_url_dict).to_dict() == direct_url_dict +def test_legacy_hash_populates_hashes() -> None: + direct_url = DirectUrl.from_dict( + { + "url": "https://example.com/archive.zip", + "archive_info": { + "hash": "sha256=" + "f" * 40, + }, + } + ) + assert direct_url.archive_info + assert direct_url.archive_info.hashes == {"sha256": "f" * 40} + + +def test_to_dict_generate_legacy_hash() -> None: + direct_url = DirectUrl( + url="https://example.com/archive.zip", + archive_info=ArchiveInfo(hashes={"sha256": "f" * 40}), + ) + assert "hash" not in direct_url.to_dict()["archive_info"] + assert ( + direct_url.to_dict(generate_legacy_hash=True)["archive_info"]["hash"] + == "sha256=" + "f" * 40 + ) + + +def test_to_dict_generate_legacy_hash_no_hashes() -> None: + direct_url = DirectUrl( + url="https://example.com/archive.zip", + archive_info=ArchiveInfo(), + ) + assert "hash" not in direct_url.to_dict(generate_legacy_hash=True)["archive_info"] + + +def test_to_dict_generate_legacy_hash_multiple_hashes() -> None: + direct_url = DirectUrl( + url="https://example.com/archive.zip", + archive_info=ArchiveInfo(hashes={"sha256": "f" * 40, "md5": "1" * 32}), + ) + assert ( + direct_url.to_dict(generate_legacy_hash=True)["archive_info"]["hash"] + == "sha256=" + "f" * 40 + ) + + def test_validate_archive_info_hashes() -> None: with pytest.raises( DirectUrlValidationError, @@ -121,6 +157,18 @@ def test_validate_archive_info_hash_different_in_hashes() -> None: ) +def test_validate_archive_info_hash_same_in_hashes() -> None: + DirectUrl.from_dict( + { + "url": "https://example.com/archive.zip", + "archive_info": { + "hashes": {"md5": "123456"}, + "hash": "md5=123456", + }, + } + ) + + @pytest.mark.parametrize( "direct_url_dict", [ From e0f0adf973ad496f5de8e3746b7443dde626ef4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Sun, 22 Feb 2026 14:07:44 +0100 Subject: [PATCH 3/5] direct_url: strip url in to_dict --- src/packaging/direct_url.py | 65 ++++++++++++++++++++++++++++++++++--- tests/test_direct_url.py | 64 ++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 5 deletions(-) diff --git a/src/packaging/direct_url.py b/src/packaging/direct_url.py index e3f97f46..4639a135 100644 --- a/src/packaging/direct_url.py +++ b/src/packaging/direct_url.py @@ -1,12 +1,15 @@ from __future__ import annotations import dataclasses +import re +import urllib.parse from collections.abc import Mapping from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Protocol, TypeVar if TYPE_CHECKING: # pragma: no cover import sys + from collections.abc import Collection if sys.version_info >= (3, 11): from typing import Self @@ -68,6 +71,40 @@ def _get_object( raise DirectUrlValidationError(e, context=key) from e +_PEP610_USER_PASS_ENV_VARS_REGEX = re.compile( + r"^\$\{[A-Za-z0-9-_]+\}(:\$\{[A-Za-z0-9-_]+\})?$" +) + + +def _strip_auth_from_netloc(netloc: str, safe_user_passwords: Collection[str]) -> str: + if "@" not in netloc: + return netloc + user_pass, netloc_no_user_pass = netloc.split("@", 1) + if user_pass in safe_user_passwords: + return netloc + if _PEP610_USER_PASS_ENV_VARS_REGEX.match(user_pass): + return netloc + return netloc_no_user_pass + + +def _strip_url(url: str, safe_user_passwords: Collection[str]) -> str: + """url with user:password part removed unless it is formed with + environment variables as specified in PEP 610, or it is a safe user:password + such as `git`. + """ + parsed_url = urllib.parse.urlsplit(url) + netloc = _strip_auth_from_netloc(parsed_url.netloc, safe_user_passwords) + return urllib.parse.urlunsplit( + ( + parsed_url.scheme, + netloc, + parsed_url.path, + parsed_url.query, + parsed_url.fragment, + ) + ) + + class DirectUrlValidationError(Exception): """Raised when when input data is not spec-compliant.""" @@ -240,25 +277,43 @@ def _from_dict(cls, d: Mapping[str, Any]) -> Self: context="url", ) # XXX subdirectory must be relative, can we, should we validate that here? - # XXX url MUST be stripped of any sensitive authentication information. - # We can't validate it here because it MAY contain git or other non security - # sensitive auth strings. return direct_url @classmethod def from_dict(cls, d: Mapping[str, Any], /) -> Self: return cls._from_dict(d) - def to_dict(self, *, generate_legacy_hash: bool = False) -> Mapping[str, Any]: + def to_dict( + self, + *, + generate_legacy_hash: bool = False, + strip_user_password: bool = True, + safe_user_passwords: Collection[str] = ("git",), + ) -> Mapping[str, Any]: + """Convert the DirectUrl instance to a dictionary suitable for JSON + serialization. + + :param generate_legacy_hash: If True, include a legacy `hash` field in + `archive_info` for backward compatibility with tools that don't + support the `hashes` field. + :param strip_user_password: If True, strip user:password from the URL + unless it is formed with environment variables as specified in PEP + 610, or it is a safe user:password such as `git`. + :param safe_user_passwords: A collection of user:password strings that + should not be stripped from the URL even if `strip_user_password` is + True. + """ res = dataclasses.asdict(self, dict_factory=_json_dict_factory) if generate_legacy_hash and self.archive_info and self.archive_info.hashes: hash_algorithm, hash_value = next(iter(self.archive_info.hashes.items())) res["archive_info"]["hash"] = f"{hash_algorithm}={hash_value}" + if strip_user_password: + res["url"] = _strip_url(self.url, safe_user_passwords) return res def validate(self) -> None: """Validate the DirectUrl instance against the specification. - Raises :class:`DirectUrlValidationError` otherwise. + Raises :class:`DirectUrlValidationError` if invalid. """ self.from_dict(self.to_dict()) diff --git a/tests/test_direct_url.py b/tests/test_direct_url.py index 7a7a65ae..ab20b90d 100644 --- a/tests/test_direct_url.py +++ b/tests/test_direct_url.py @@ -7,6 +7,8 @@ DirectUrl, DirectUrlValidationError, DirInfo, + VcsInfo, + _strip_url, ) @@ -251,3 +253,65 @@ def test_validate_error() -> None: direct_url = DirectUrl(url="file:///projects/myproject") with pytest.raises(DirectUrlValidationError): direct_url.validate() + + +@pytest.mark.parametrize( + ("url", "safe_user_passwords", "expected_url"), + [ + ("https://g.c/user/repo.git", ["git"], "https://g.c/user/repo.git"), + ("https://user:pass@g.c/user/repo.git", ["git"], "https://g.c/user/repo.git"), + ("ssh://git@g.c/user/repo.git", [], "ssh://g.c/user/repo.git"), + ("ssh://git@g.c/user/repo.git", ["git"], "ssh://git@g.c/user/repo.git"), + ("ssh://cvs@g.c/user/repo.git", ["git"], "ssh://g.c/user/repo.git"), + ("ssh://cvs@g.c/user/repo.git", ["git", "cvs"], "ssh://cvs@g.c/user/repo.git"), + ( + "https://${USER}:${PASSWORD}@g.c/user/repo.git", + ["git"], + "https://${USER}:${PASSWORD}@g.c/user/repo.git", + ), + ( + "https://user:${PASSWORD}@g.c/user/repo.git", + ["git"], + "https://g.c/user/repo.git", + ), + ( + "https://git:${PASSWORD}@g.c/user/repo.git", + ["git"], + "https://g.c/user/repo.git", + ), + ( + "https://${TOKEN}@g.c/user/repo.git", + ["git"], + "https://${TOKEN}@g.c/user/repo.git", + ), + ], +) +def test_strip_url(url: str, safe_user_passwords: list[str], expected_url: str) -> None: + assert _strip_url(url, safe_user_passwords) == expected_url + + +def test_to_dict_strip_url() -> None: + direct_url = DirectUrl( + url="https://user:pass@g.c/user/repo.git", + vcs_info=VcsInfo(vcs="git", commit_id="a" * 40), + ) + assert direct_url.to_dict()["url"] == "https://g.c/user/repo.git" + + +def test_to_dict_no_strip_url() -> None: + direct_url = DirectUrl( + url="https://user:pass@g.c/user/repo.git", + vcs_info=VcsInfo(vcs="git", commit_id="a" * 40), + ) + assert ( + direct_url.to_dict(strip_user_password=False)["url"] + == "https://user:pass@g.c/user/repo.git" + ) + + +def test_to_dict_strip_url_git_safe_default() -> None: + direct_url = DirectUrl( + url="ssh://git@g.c/user/repo.git", + vcs_info=VcsInfo(vcs="git", commit_id="a" * 40), + ) + assert direct_url.to_dict()["url"] == "ssh://git@g.c/user/repo.git" From 74d09a2e5ce32e0f717a7c78538971b6f511c5bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Sun, 22 Feb 2026 14:35:55 +0100 Subject: [PATCH 4/5] direct_url: add docs --- docs/direct_url.rst | 77 +++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + src/packaging/direct_url.py | 6 ++- 3 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 docs/direct_url.rst diff --git a/docs/direct_url.rst b/docs/direct_url.rst new file mode 100644 index 00000000..3442ba7f --- /dev/null +++ b/docs/direct_url.rst @@ -0,0 +1,77 @@ +Direct URLs +=========== + +.. currentmodule:: packaging.direct_url + +Parse and validate `direct_url.json files `_. + +Usage +----- + +.. code-block:: python + + import json + from pathlib import Path + + from packaging.direct_url import ArchiveInfo, DirectUrl, DirInfo, VcsInfo + + # A VCS direct URL + vcs_direct_url = DirectUrl( + url="https://git.example.com/repo.git", + vcs_info=VcsInfo( + vcs="git", + commit_id="2df7bdd8dfef7b879390b9fc4016f02af2f118d4", + requested_revision="1.1.0", + ), + ) + + # An archive direct URL + archive_direct_url = DirectUrl( + url="https://example.com/archive.tar.gz", + archive_info=ArchiveInfo( + hashes={ + "sha256": "dc321a1c18a37b5438424ef3714524229dab5f4f78b297671359426fef51be6c" + } + ), + ) + + # A local editable direct URL + archive_direct_url = DirectUrl( + url="file:///home/project/example", + dir_info=DirInfo( + editable=True, + ), + ) + + # Serialization to JSON + Path("/tmp/direct_url.json").write_text( + json.dumps(vcs_direct_url.to_dict()), encoding="utf-8" + ) + + # Load from JSON. The resulting DirectUrl object is validated against the + # specification, else a DirectUrlalidationError is raised + direct_url = DirectUrl.from_dict( + json.loads(Path("/tmp/direct_url.json").read_text(encoding="utf-8")) + ) + + # You can validate a manually constructed DirectUrl class + vcs_direct_url.validate() + + +Reference +--------- + +.. autoclass:: DirectUrl + :members: from_dict, to_dict, validate + :exclude-members: __init__, __new__ + +.. class:: ArchiveInfo + +.. class:: DirInfo + +.. class:: VcsInfo + +The following exception may be raised by this module: + +.. autoexception:: DirectUrlValidationError + :exclude-members: __init__, __new__ diff --git a/docs/index.rst b/docs/index.rst index cc9dca61..d82dbf0f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -31,6 +31,7 @@ The ``packaging`` library uses calendar-based versioning (``YY.N``). metadata tags pylock + direct_url utils .. toctree:: diff --git a/src/packaging/direct_url.py b/src/packaging/direct_url.py index 4639a135..89e30fe9 100644 --- a/src/packaging/direct_url.py +++ b/src/packaging/direct_url.py @@ -233,6 +233,8 @@ def _from_dict(cls, d: Mapping[str, Any]) -> Self: @dataclass(frozen=True, init=False) class DirectUrl: + """A class representing a direct URL.""" + url: str archive_info: ArchiveInfo | None = None vcs_info: VcsInfo | None = None @@ -281,6 +283,7 @@ def _from_dict(cls, d: Mapping[str, Any]) -> Self: @classmethod def from_dict(cls, d: Mapping[str, Any], /) -> Self: + """Create and validate a DirectUrl instance from a JSON dictionary.""" return cls._from_dict(d) def to_dict( @@ -290,8 +293,7 @@ def to_dict( strip_user_password: bool = True, safe_user_passwords: Collection[str] = ("git",), ) -> Mapping[str, Any]: - """Convert the DirectUrl instance to a dictionary suitable for JSON - serialization. + """Convert the DirectUrl instance to a JSON dictionary. :param generate_legacy_hash: If True, include a legacy `hash` field in `archive_info` for backward compatibility with tools that don't From 062f297d5c60459af7861d1287fb5d69f7f68c28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Tue, 24 Feb 2026 10:31:20 +0100 Subject: [PATCH 5/5] direct_url: handle review comments --- docs/direct_url.rst | 2 +- src/packaging/direct_url.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/direct_url.rst b/docs/direct_url.rst index 3442ba7f..caa59f78 100644 --- a/docs/direct_url.rst +++ b/docs/direct_url.rst @@ -3,7 +3,7 @@ Direct URLs .. currentmodule:: packaging.direct_url -Parse and validate `direct_url.json files `_. +Parse and validate `direct_url.json files `_. Usage ----- diff --git a/src/packaging/direct_url.py b/src/packaging/direct_url.py index 89e30fe9..5d1c56ca 100644 --- a/src/packaging/direct_url.py +++ b/src/packaging/direct_url.py @@ -4,7 +4,6 @@ import re import urllib.parse from collections.abc import Mapping -from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Protocol, TypeVar if TYPE_CHECKING: # pragma: no cover @@ -24,6 +23,11 @@ "VcsInfo", ] + +def __dir__() -> list[str]: + return __all__ + + _T = TypeVar("_T") @@ -140,7 +144,7 @@ def __init__(self, key: str) -> None: super().__init__("Missing required value", context=key) -@dataclass(frozen=True, init=False) +@dataclasses.dataclass(frozen=True, init=False) class VcsInfo: vcs: str commit_id: str @@ -167,7 +171,7 @@ def _from_dict(cls, d: Mapping[str, Any]) -> Self: ) -@dataclass(frozen=True, init=False) +@dataclasses.dataclass(frozen=True, init=False) class ArchiveInfo: hashes: Mapping[str, str] | None = None @@ -213,7 +217,7 @@ def _from_dict(cls, d: Mapping[str, Any]) -> Self: return cls(hashes=hashes) -@dataclass(frozen=True, init=False) +@dataclasses.dataclass(frozen=True, init=False) class DirInfo: editable: bool | None = None @@ -231,7 +235,7 @@ def _from_dict(cls, d: Mapping[str, Any]) -> Self: ) -@dataclass(frozen=True, init=False) +@dataclasses.dataclass(frozen=True, init=False) class DirectUrl: """A class representing a direct URL."""