Skip to content

Commit c8ced39

Browse files
committed
Fixes
1 parent 8704c93 commit c8ced39

10 files changed

Lines changed: 86 additions & 41 deletions

File tree

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
5151
- name: Update latest tag
5252
if: ${{ steps.release_info.outputs.tag == 'latest' }}
53-
uses: EndBug/latest-tag@fabb56bc8d15d5937c76719060da2226f5c3ffa8
53+
uses: EndBug/latest-tag@fabb56bc8d15d5937c76719060da2226f5c3ffa8
5454
with:
5555
ref: latest
5656
description: Last state in main

dfetch/manifest/manifest.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ class ManifestDict(TypedDict, total=True): # pylint: disable=too-many-ancestors
9797

9898
version: int | str
9999
remotes: NotRequired[Sequence[RemoteDict | Remote]]
100-
projects: Sequence[ProjectEntryDict | ProjectEntry | dict[str, str | list[str]]]
100+
projects: Sequence[
101+
ProjectEntryDict | ProjectEntry | dict[str, str | list[str] | dict[str, str]]
102+
]
101103

102104

103105
class Manifest:
@@ -140,14 +142,16 @@ def __init__(
140142
def _init_projects(
141143
self,
142144
projects: Sequence[
143-
ProjectEntryDict | ProjectEntry | dict[str, str | list[str]]
145+
ProjectEntryDict
146+
| ProjectEntry
147+
| dict[str, str | list[str] | dict[str, str]]
144148
],
145149
) -> dict[str, ProjectEntry]:
146150
"""Iterate over projects from manifest and initialize ProjectEntries from it.
147151
148152
Args:
149153
projects (Sequence[
150-
Union[ProjectEntryDict, ProjectEntry, Dict[str, Union[str, list[str]]]]
154+
Union[ProjectEntryDict, ProjectEntry, Dict[str, Union[str, list[str], dict[str, str]]]]
151155
]): Iterable with projects
152156
153157
Raises:
@@ -304,9 +308,11 @@ def _as_dict(self) -> dict[str, ManifestDict]:
304308
if len(remotes) == 1:
305309
remotes[0].pop("default", None)
306310

307-
projects: list[dict[str, str | list[str]]] = []
311+
projects: list[dict[str, str | list[str] | dict[str, str]]] = []
308312
for project in self.projects:
309-
project_yaml: dict[str, str | list[str]] = project.as_yaml()
313+
project_yaml: dict[str, str | list[str] | dict[str, str]] = (
314+
project.as_yaml()
315+
)
310316
if len(remotes) == 1:
311317
project_yaml.pop("remote", None)
312318
projects.append(project_yaml)

dfetch/manifest/project.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ def as_yaml(self) -> dict[str, str]:
370370
"repo-path": str,
371371
"vcs": str,
372372
"ignore": Sequence[str],
373-
"integrity": dict,
373+
"integrity": dict[str, str],
374374
"default_remote": str,
375375
},
376376
total=False,
@@ -398,7 +398,7 @@ def __init__(self, kwargs: ProjectEntryDict) -> None:
398398
self._tag: str = kwargs.get("tag", "")
399399
self._vcs: str = kwargs.get("vcs", "")
400400
self._ignore: Sequence[str] = kwargs.get("ignore", [])
401-
integrity_data: dict = kwargs.get("integrity", {})
401+
integrity_data: dict[str, str] = kwargs.get("integrity", {})
402402
self._integrity = Integrity(hash=integrity_data.get("hash", ""))
403403

404404
if not self._remote and not self._url:
@@ -407,7 +407,7 @@ def __init__(self, kwargs: ProjectEntryDict) -> None:
407407
@classmethod
408408
def from_yaml(
409409
cls,
410-
yamldata: dict[str, str | list[str]] | ProjectEntryDict,
410+
yamldata: dict[str, str | list[str] | dict[str, str]] | ProjectEntryDict,
411411
default_remote: str = "",
412412
) -> "ProjectEntry":
413413
"""Create a Project Entry from yaml data.

dfetch/project/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
from dfetch.project.svnsuperproject import SvnSuperProject
1616
from dfetch.util.util import resolve_absolute_path
1717

18-
SUPPORTED_SUBPROJECT_TYPES = [ArchiveSubProject, GitSubProject, SvnSubProject]
18+
SUPPORTED_SUBPROJECT_TYPES: list[
19+
type[ArchiveSubProject] | type[GitSubProject] | type[SvnSubProject]
20+
] = [ArchiveSubProject, GitSubProject, SvnSubProject]
1921
SUPPORTED_SUPERPROJECT_TYPES = [GitSuperProject, SvnSuperProject]
2022

2123
logger = get_logger(__name__)

dfetch/project/archivesubproject.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,22 @@
4444
import os
4545
import pathlib
4646
import tempfile
47+
import urllib.request as _ur
4748

4849
from dfetch.log import get_logger
4950
from dfetch.manifest.project import ProjectEntry
5051
from dfetch.manifest.version import Version
5152
from dfetch.project.subproject import SubProject
53+
from dfetch.vcs.archive import (
54+
_safe_compare_hex, # private helper, intentionally imported for internal use
55+
)
56+
from dfetch.vcs.archive import (
57+
_suffix_for_url, # private helper, intentionally imported for internal use
58+
)
5259
from dfetch.vcs.archive import (
5360
SUPPORTED_HASH_ALGORITHMS,
5461
ArchiveLocalRepo,
5562
ArchiveRemote,
56-
_safe_compare_hex, # private helper, intentionally imported for internal use
57-
_suffix_for_url, # private helper, intentionally imported for internal use
5863
compute_hash,
5964
is_archive_url,
6065
)
@@ -94,8 +99,6 @@ def revision_is_enough() -> bool:
9499
@staticmethod
95100
def list_tool_info() -> None:
96101
"""Log information about the archive fetching tool (Python's urllib)."""
97-
import urllib.request as _ur # noqa: PLC0415
98-
99102
SubProject._log_tool("urllib", _ur.__doc__ or "built-in")
100103

101104
def get_default_branch(self) -> str:

dfetch/project/subproject.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,9 @@ def freeze_project(self, project: ProjectEntry) -> str | None:
409409
return None
410410
if on_disk_version:
411411
project.version = on_disk_version
412-
return on_disk_version.revision or on_disk_version.tag or str(on_disk_version)
412+
return (
413+
on_disk_version.revision or on_disk_version.tag or str(on_disk_version)
414+
)
413415
return None
414416

415417
@staticmethod

dfetch/reporting/sbom_reporter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,11 @@
109109
from cyclonedx.schema import OutputFormat, SchemaVersion
110110

111111
import dfetch.util.purl
112-
from dfetch.util.purl import DFETCH_TO_CDX_HASH_ALGORITHM
113112
from dfetch.manifest.manifest import Manifest
114113
from dfetch.manifest.project import ProjectEntry
115114
from dfetch.reporting.reporter import Reporter
116115
from dfetch.util.license import License
116+
from dfetch.util.purl import DFETCH_TO_CDX_HASH_ALGORITHM
117117

118118
# PyRight is pedantic with decorators see https://github.com/madpah/serializable/issues/8
119119
# It might be fixable with https://github.com/microsoft/pyright/discussions/4426, would prefer

dfetch/vcs/archive.py

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,23 +31,21 @@
3131
import tarfile
3232
import tempfile
3333
import urllib.error
34+
import urllib.parse
3435
import urllib.request
3536
import zipfile
3637
from collections.abc import Sequence
3738

39+
from dfetch.log import get_logger
40+
from dfetch.project.subproject import SubProject
41+
from dfetch.util.util import find_matching_files, safe_rm
42+
3843
#: Archive file extensions recognised by DFetch.
39-
#: Defined before any intra-package imports to avoid partial-initialisation
40-
#: issues when other modules (e.g. dfetch.util.purl) import this symbol while
41-
#: the module is still being initialised.
4244
ARCHIVE_EXTENSIONS = (".tar.gz", ".tgz", ".tar.bz2", ".tar.xz", ".zip")
4345

4446
#: Hash algorithms supported by the ``integrity.hash`` manifest field.
4547
SUPPORTED_HASH_ALGORITHMS = ("sha256",)
4648

47-
from dfetch.log import get_logger # noqa: E402
48-
from dfetch.project.subproject import SubProject # noqa: E402
49-
from dfetch.util.util import find_matching_files, safe_rm # noqa: E402
50-
5149
logger = get_logger(__name__)
5250

5351
# Safety limits applied during extraction to prevent decompression bombs.
@@ -112,12 +110,15 @@ def is_accessible(self) -> bool:
112110
* ``http``/``https`` URLs first try a ``HEAD`` request. If the server
113111
rejects it (405/501) a partial ``GET`` (``Range: bytes=0-0``) is
114112
attempted instead. Returns *False* on any final failure.
113+
* Any other URL scheme returns *False*.
115114
"""
116-
from urllib.parse import urlparse as _urlparse # noqa: PLC0415
115+
parsed = urllib.parse.urlparse(self.url)
116+
117+
if parsed.scheme == "file":
118+
return os.path.exists(parsed.path)
117119

118-
if _urlparse(self.url).scheme == "file":
119-
path = _urlparse(self.url).path
120-
return os.path.exists(path)
120+
if parsed.scheme not in ("http", "https"):
121+
return False
121122

122123
for method, headers in [
123124
("HEAD", {}),
@@ -142,14 +143,27 @@ def download(self, dest_path: str) -> None:
142143
dest_path: Local file path to write the archive to.
143144
144145
Raises:
145-
RuntimeError: On download failure.
146+
RuntimeError: On download failure or unsupported URL scheme.
146147
"""
147-
try:
148-
urllib.request.urlretrieve(self.url, dest_path)
149-
except (urllib.error.URLError, OSError) as exc:
148+
parsed = urllib.parse.urlparse(self.url)
149+
if parsed.scheme == "file":
150+
try:
151+
shutil.copy(parsed.path, dest_path)
152+
except OSError as exc:
153+
raise RuntimeError(
154+
f"'{self.url}' is not a valid URL or unreachable: {exc}"
155+
) from exc
156+
elif parsed.scheme in ("http", "https"):
157+
try:
158+
urllib.request.urlretrieve(self.url, dest_path)
159+
except (urllib.error.URLError, OSError) as exc:
160+
raise RuntimeError(
161+
f"'{self.url}' is not a valid URL or unreachable: {exc}"
162+
) from exc
163+
else:
150164
raise RuntimeError(
151-
f"'{self.url}' is not a valid URL or unreachable: {exc}"
152-
) from exc
165+
f"'{self.url}' uses unsupported scheme '{parsed.scheme}'."
166+
)
153167

154168

155169
class ArchiveLocalRepo:
@@ -223,9 +237,13 @@ def _check_archive_limits(member_count: int, total_bytes: int) -> None:
223237
)
224238

225239
@staticmethod
226-
def _check_zip_members(zf: zipfile.ZipFile) -> None:
240+
def check_zip_members(zf: zipfile.ZipFile) -> list[zipfile.ZipInfo]:
227241
"""Validate all ZIP member paths against path-traversal attacks.
228242
243+
Returns:
244+
The validated list of members, safe to pass to
245+
:meth:`zipfile.ZipFile.extract`.
246+
229247
Raises:
230248
RuntimeError: When any member contains an absolute path, a ``..``
231249
component, or when the archive exceeds the size/count limits.
@@ -242,6 +260,7 @@ def _check_zip_members(zf: zipfile.ZipFile) -> None:
242260
raise RuntimeError(
243261
f"Archive contains an unsafe member path: {info.filename!r}"
244262
)
263+
return members
245264

246265
@staticmethod
247266
def _check_tar_members(tf: tarfile.TarFile) -> None:
@@ -289,13 +308,13 @@ def _extract_raw(archive_path: str, dest_dir: str) -> None:
289308
with tarfile.open(archive_path, "r:*") as tf:
290309
ArchiveLocalRepo._check_tar_members(tf)
291310
if sys.version_info >= (3, 11, 4):
292-
tf.extractall(dest_dir, filter="tar")
311+
tf.extractall(dest_dir, filter="tar") # nosec B202
293312
else:
294-
tf.extractall(dest_dir) # noqa: S202
313+
tf.extractall(dest_dir) # nosec B202
295314
elif lower.endswith(".zip") or zipfile.is_zipfile(archive_path):
296315
with zipfile.ZipFile(archive_path) as zf:
297316
ArchiveLocalRepo._check_zip_members(zf)
298-
zf.extractall(dest_dir)
317+
zf.extractall(dest_dir) # nosec B202
299318
else:
300319
raise RuntimeError(
301320
f"Unsupported archive format: '{archive_path}'. "
@@ -319,6 +338,8 @@ def _copy_with_src(
319338
shutil.copy2(s, d)
320339
elif os.path.isfile(src_path):
321340
shutil.copy2(src_path, os.path.join(dest_dir, os.path.basename(src_path)))
341+
else:
342+
raise RuntimeError(f"src {src!r} was not found in archive")
322343

323344
if keep_licenses:
324345
for item in os.listdir(extract_root):

tests/test_archive.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@ def test_compute_hash_empty_file():
3737
try:
3838
digest = compute_hash(path, "sha256")
3939
# SHA-256 of empty string
40-
assert digest == "e3b0c44298fc1c149afbf4c8996fb924" "27ae41e4649b934ca495991b7852b855"
40+
assert (
41+
digest == "e3b0c44298fc1c149afbf4c8996fb924"
42+
"27ae41e4649b934ca495991b7852b855"
43+
)
4144
finally:
4245
os.remove(path)
4346

tests/test_integrity.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from dfetch.manifest.project import Integrity, ProjectEntry
44

5-
65
# ---------------------------------------------------------------------------
76
# Integrity dataclass
87
# ---------------------------------------------------------------------------
@@ -54,7 +53,14 @@ def test_projectentry_integrity_falsy_without_hash():
5453

5554
def test_projectentry_as_yaml_includes_integrity():
5655
h = "sha256:" + "d" * 64
57-
project = ProjectEntry({"name": "lib", "url": "https://example.com/lib.tar.gz", "vcs": "archive", "integrity": {"hash": h}})
56+
project = ProjectEntry(
57+
{
58+
"name": "lib",
59+
"url": "https://example.com/lib.tar.gz",
60+
"vcs": "archive",
61+
"integrity": {"hash": h},
62+
}
63+
)
5864
yaml_data = project.as_yaml()
5965
assert yaml_data["integrity"] == {"hash": h}
6066

@@ -66,7 +72,9 @@ def test_projectentry_as_yaml_omits_empty_integrity():
6672

6773

6874
def test_projectentry_hash_setter():
69-
project = ProjectEntry({"name": "lib", "url": "https://example.com/lib.tar.gz", "vcs": "archive"})
75+
project = ProjectEntry(
76+
{"name": "lib", "url": "https://example.com/lib.tar.gz", "vcs": "archive"}
77+
)
7078
h = "sha256:" + "e" * 64
7179
project.hash = h
7280
assert project.hash == h

0 commit comments

Comments
 (0)