diff --git a/CHANGELOG.md b/CHANGELOG.md
index 16a591b159..8e7c152acd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -69,12 +69,18 @@ END_UNRELEASED_TEMPLATE
Other changes:
* (pypi) Update dependencies used for `compile_pip_requirements`, building
sdists in the `whl_library` rule and fetching wheels using `pip`.
-* (pypi) We will set `allow_fail` to `False` if the
- {attr}`experimental_index_url_overrides` is set
- to a non-empty value. This means that failures will be no-longer cached in
- this particular case.
- ([#3260](https://github.com/bazel-contrib/rules_python/issues/3260) and
- [#2632](https://github.com/bazel-contrib/rules_python/issues/2632))
+* (pypi) Before using the bazel downloader to fetch the PyPI package metadata
+ we will from now on fetch the lists of available packages on each index. The
+ used package mappings will be written as facts to the `MODULE.bazel.lock` file
+ on supported bazel versions and it should be done at most once. As a result,
+ per-package {obj}`experimental_index_url_overrides` is no longer needed if the index URLs are
+ passed to the `pip.parse` via `experimental_index_url` and `experimental_extra_index_urls`.
+ What is more, we start implementing the flags for `--index_url` and `--extra_index_urls` more in
+ line to how it is used in `uv` and `pip`, i.e. we default to `--index_url` if the package is not
+ found in `--extra_index_urls`.
+ Fixes
+ ([#3260](https://github.com/bazel-contrib/rules_python/issues/3260) and
+ [#2632](https://github.com/bazel-contrib/rules_python/issues/2632)).
{#v0-0-0-fixed}
### Fixed
diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel
index 6b4822333c..869be4705a 100644
--- a/python/private/pypi/BUILD.bazel
+++ b/python/private/pypi/BUILD.bazel
@@ -244,6 +244,7 @@ bzl_library(
srcs = ["parse_simpleapi_html.bzl"],
deps = [
":version_from_filename_bzl",
+ "//python/private:normalize_name_bzl",
],
)
@@ -424,8 +425,6 @@ bzl_library(
":urllib_bzl",
"//python/private:auth_bzl",
"//python/private:normalize_name_bzl",
- "//python/private:text_util_bzl",
- "@bazel_features//:features",
],
)
diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl
index 563130791e..7f0d2776d7 100644
--- a/python/private/pypi/parse_simpleapi_html.bzl
+++ b/python/private/pypi/parse_simpleapi_html.bzl
@@ -16,16 +16,20 @@
Parse SimpleAPI HTML in Starlark.
"""
+load("//python/private:normalize_name.bzl", "normalize_name")
load(":version_from_filename.bzl", "version_from_filename")
-def parse_simpleapi_html(*, content):
+def parse_simpleapi_html(*, content, parse_index = False):
"""Get the package URLs for given shas by parsing the Simple API HTML.
Args:
- content(str): The Simple API HTML content.
+ content: {type}`str` The Simple API HTML content.
+ parse_index: {type}`bool` whether to parse the content as the index page of the PyPI index,
+ e.g. the `https://pypi.org/simple/`. This only has the URLs for the individual package.
Returns:
- A list of structs with:
+ If it is the index page, return the map of package to URL it can be queried from.
+ Otherwise, a list of structs with:
* filename: {type}`str` The filename of the artifact.
* version: {type}`str` The version of the artifact.
* url: {type}`str` The URL to download the artifact.
@@ -59,6 +63,8 @@ def parse_simpleapi_html(*, content):
# https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api
fail("Unsupported API version: {}".format(api_version))
+ packages = {}
+
# 2. Iterate using find() to avoid huge list allocations from .split("
- tag_end = content.find(">", start_tag)
- end_tag = content.find("", tag_end)
- if tag_end == -1 or end_tag == -1:
+ # Find the closing tag first, then find the end of the opening
+ # tag using rfind. This correctly handles attributes that
+ # contain > characters, e.g. data-requires-python=">=3.6".
+ end_tag = content.find("", start_tag)
+ if end_tag == -1:
break
+ tag_end = content.rfind(">", start_tag, end_tag)
+ if tag_end == -1 or tag_end <= start_tag:
+ cursor = end_tag + 4
+ continue
# Extract only the necessary slices
- attr_part = content[start_tag + 3:tag_end]
filename = content[tag_end + 1:end_tag].strip()
+ attr_part = content[start_tag + 3:tag_end]
# Update cursor for next iteration
cursor = end_tag + 4
- # 3. Efficient Attribute Parsing
attrs = _parse_attrs(attr_part)
href = attrs.get("href", "")
if not href:
continue
+ if parse_index:
+ pkg_name = filename
+ packages[normalize_name(pkg_name)] = href
+ continue
+
+ # 3. Efficient Attribute Parsing
dist_url, _, sha256 = href.partition("#sha256=")
# Handle Yanked status
@@ -121,6 +137,9 @@ def parse_simpleapi_html(*, content):
else:
sdists[sha256] = dist
+ if parse_index:
+ return packages
+
return struct(
sdists = sdists,
whls = whls,
diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl
index 28c6cbeafb..7b24102263 100644
--- a/python/private/pypi/pypi_cache.bzl
+++ b/python/private/pypi/pypi_cache.bzl
@@ -89,6 +89,11 @@ def _pypi_cache_get(self, key):
if not cached and versions:
# Could not get from in-memory, read from lockfile facts
cached = self._facts.get(index_url, versions)
+ else:
+ # We might be using something from memory that is not yet stored in facts (e.g. we processed
+ # the requirements.txt for one Python version and the deps got cached, but new python
+ # version means different deps, which may add extras.
+ self._facts.setdefault(index_url, cached)
return cached
@@ -122,6 +127,13 @@ def _filter_packages(dists, requested_versions):
if dists == None or not requested_versions:
return dists
+ if type(dists) == "dict":
+ return {
+ pkg: url
+ for pkg, url in dists.items()
+ if pkg in requested_versions
+ }
+
sha256s_by_version = {}
whls = {}
sdists = {}
@@ -193,6 +205,12 @@ def _get_from_facts(facts, known_facts, index_url, requested_versions, facts_ver
# cannot trust known facts, different version that we know how to parse
return None
+ if type(requested_versions) == "dict":
+ return _filter_packages(
+ dists = known_facts.get("index_urls", {}).get(index_url, {}),
+ requested_versions = requested_versions,
+ )
+
known_sources = {}
root_url, _, distribution = index_url.rstrip("/").rpartition("/")
@@ -266,10 +284,46 @@ def _store_facts(facts, fact_version, index_url, value):
facts["fact_version"] = fact_version
+ if type(value) == "dict":
+ # facts: {
+ # "index_urls": {
+ # "": {
+ # "": "",
+ # },
+ # },
+ # },
+ for pkg, url in value.items():
+ facts.setdefault("index_urls", {}).setdefault(index_url, {})[pkg] = url
+ return value
+
root_url, _, distribution = index_url.rstrip("/").rpartition("/")
distribution = distribution.rstrip("/")
root_url = root_url.rstrip("/")
+ # The schema is
+ # facts: {
+ # "dist_hashes": {
+ # "": {
+ # "": {
+ # "": "",
+ # },
+ # },
+ # },
+ # "dist_filenames": {
+ # "": {
+ # "": {
+ # "": "", # if it is different from the URL
+ # },
+ # },
+ # },
+ # "dist_yanked": {
+ # "": {
+ # "": {
+ # "": "", # if the package is yanked
+ # },
+ # },
+ # },
+ # },
for sha256, d in (value.sdists | value.whls).items():
facts.setdefault("dist_hashes", {}).setdefault(root_url, {}).setdefault(distribution, {}).setdefault(d.url, sha256)
if not d.url.endswith(d.filename):
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 20d79ba9b4..2171e8b56a 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -16,11 +16,9 @@
A file that houses private functions used in the `bzlmod` extension with the same name.
"""
-load("@bazel_features//:features.bzl", "bazel_features")
load("//python/private:auth.bzl", _get_auth = "get_auth")
load("//python/private:envsubst.bzl", "envsubst")
load("//python/private:normalize_name.bzl", "normalize_name")
-load("//python/private:text_util.bzl", "render")
load(":parse_simpleapi_html.bzl", "parse_simpleapi_html")
load(":urllib.bzl", "urllib")
@@ -35,15 +33,22 @@ def simpleapi_download(
_fail = fail):
"""Download Simple API HTML.
+ First it queries all of the indexes for available packages and then it downloads the contents of
+ the per-package URLs and sha256 values. This is to enable us to use bazel_downloader with
+ `requirements.txt` files. As a side effect we also are able to "cross-compile" by fetching the
+ right wheel for the right target platform through the information that we retrieve here.
+
Args:
ctx: The module_ctx or repository_ctx.
attr: Contains the parameters for the download. They are grouped into a
struct for better clarity. It must have attributes:
- * index_url: str, the index.
+ * index_url: str, the index, or if `extra_index_urls` are passed, the default index.
* index_url_overrides: dict[str, str], the index overrides for
separate packages.
- * extra_index_urls: Extra index URLs that will be looked up after
- the main is looked up.
+ * extra_index_urls: Will be looked at in the order they are defined and the first match
+ wins. This is similar to what uv does, see
+ https://docs.astral.sh/uv/concepts/indexes/#searching-across-multiple-indexes.
+ PRs for implementing other strategies are welcome.
* sources: list[str], the sources to download things for. Each value is
the contents of requirements files.
* envsubst: list[str], the envsubst vars for performing substitution in index url.
@@ -70,111 +75,119 @@ def simpleapi_download(
normalize_name(p): i
for p, i in (attr.index_url_overrides or {}).items()
}
+ sources = {
+ normalize_name(pkg): versions
+ for pkg, versions in attr.sources.items()
+ }
- # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes
- # to replicate how `pip` would handle this case.
- contents = {}
- index_urls = [attr.index_url] + attr.extra_index_urls
read_simpleapi = read_simpleapi or _read_simpleapi
- download_kwargs = {}
- if bazel_features.external_deps.download_has_block_param:
- download_kwargs["block"] = not parallel_download
+ ctx.report_progress("Fetch package lists from PyPI index")
- if len(index_urls) == 1 or index_url_overrides:
- download_kwargs["allow_fail"] = False
- else:
- download_kwargs["allow_fail"] = True
+ # NOTE: we are not merging results from multiple indexes to replicate how `pip` would
+ # handle this case. What we do is we select a particular index to download the packages
+ dist_urls = _get_dist_urls(
+ ctx,
+ default_index = attr.index_url,
+ index_urls = attr.extra_index_urls,
+ index_url_overrides = index_url_overrides,
+ sources = sources,
+ read_simpleapi = read_simpleapi,
+ cache = cache,
+ get_auth = get_auth,
+ attr = attr,
+ block = not parallel_download,
+ _fail = _fail,
+ )
- input_sources = attr.sources
+ ctx.report_progress("Fetching package URLs from PyPI index")
- found_on_index = {}
- warn_overrides = False
- ctx.report_progress("Fetch package lists from PyPI index")
- for i, index_url in enumerate(index_urls):
- if i != 0:
- # Warn the user about a potential fix for the overrides
- warn_overrides = True
-
- async_downloads = {}
- sources = {pkg: versions for pkg, versions in input_sources.items() if pkg not in found_on_index}
- for pkg, versions in sources.items():
- pkg_normalized = normalize_name(pkg)
- url = urllib.strip_empty_path_segments("{index_url}/{distribution}/".format(
- index_url = index_url_overrides.get(pkg_normalized, index_url).rstrip("/"),
- distribution = pkg,
- ))
- result = read_simpleapi(
- ctx = ctx,
- attr = attr,
- versions = versions,
- url = url,
- cache = cache,
- get_auth = get_auth,
- **download_kwargs
- )
- if hasattr(result, "wait"):
- # We will process it in a separate loop:
- async_downloads[pkg] = struct(
- pkg_normalized = pkg_normalized,
- wait = result.wait,
- url = url,
- )
- elif result.success:
- contents[pkg_normalized] = _with_index_url(url, result.output)
- found_on_index[pkg] = index_url
-
- if not async_downloads:
- continue
+ downloads = {}
+ contents = {}
+ for pkg, url in dist_urls.items():
+ result = read_simpleapi(
+ ctx = ctx,
+ attr = attr,
+ url = url,
+ cache = cache,
+ versions = sources[pkg],
+ get_auth = get_auth,
+ block = not parallel_download,
+ parse_index = False,
+ )
+ if hasattr(result, "wait"):
+ # We will process it in a separate loop:
+ downloads[pkg] = result
+ else:
+ contents[pkg] = _with_index_url(url, result.output)
+ for pkg, d in downloads.items():
# If we use `block` == False, then we need to have a second loop that is
# collecting all of the results as they were being downloaded in parallel.
- for pkg, download in async_downloads.items():
- result = download.wait()
-
- if result.success:
- contents[download.pkg_normalized] = _with_index_url(download.url, result.output)
- found_on_index[pkg] = index_url
-
- failed_sources = [pkg for pkg in input_sources if pkg not in found_on_index]
- if failed_sources:
- pkg_index_urls = {
- pkg: index_url_overrides.get(
- normalize_name(pkg),
- index_urls,
- )
- for pkg in failed_sources
- }
-
- _fail(
- """
-Failed to download metadata of the following packages from urls:
-{pkg_index_urls}
-
-If you would like to skip downloading metadata for these packages please add 'simpleapi_skip={failed_sources}' to your 'pip.parse' call.
-""".format(
- pkg_index_urls = render.dict(pkg_index_urls),
- failed_sources = render.list(failed_sources),
- ),
+ contents[pkg] = _with_index_url(dist_urls[pkg], d.wait().output)
+
+ return contents
+
+def _get_dist_urls(ctx, *, default_index, index_urls, index_url_overrides, sources, read_simpleapi, attr, block, _fail = fail, **kwargs):
+ downloads = {}
+ results = {}
+ for extra in index_url_overrides.values():
+ if extra not in index_urls:
+ index_urls.append(extra)
+
+ index_urls = index_urls or []
+ if default_index not in index_urls:
+ index_urls.append(default_index)
+
+ for index_url in index_urls:
+ download = read_simpleapi(
+ ctx = ctx,
+ attr = attr,
+ url = urllib.strip_empty_path_segments("{index_url}/".format(
+ index_url = index_url,
+ )),
+ parse_index = True,
+ versions = {pkg: None for pkg in sources},
+ block = block,
+ **kwargs
)
- return None
-
- if warn_overrides:
- index_url_overrides = {
- pkg: found_on_index[pkg]
- for pkg in attr.sources
- if found_on_index[pkg] != attr.index_url
- }
-
- if index_url_overrides:
- # buildifier: disable=print
- print("You can use the following `index_url_overrides` to avoid the 404 warnings:\n{}".format(
- render.dict(index_url_overrides),
+ if hasattr(download, "wait"):
+ downloads[index_url] = download
+ else:
+ results[index_url] = download
+
+ for index_url, download in downloads.items():
+ results[index_url] = download.wait()
+
+ found_on_index = {}
+ for index_url, result in results.items():
+ for pkg in sources:
+ if pkg in found_on_index:
+ # We have already found the package, skip searching for it in
+ # other indexes.
+ #
+ # If we wanted to merge all of the index results, we would have to continue here
+ # and in the outer function process merging of the results.
+ continue
+
+ if index_url_overrides.get(pkg, index_url) != index_url:
+ # we should not use this index for the package
+ continue
+
+ found = result.output.get(pkg)
+ if not found:
+ continue
+
+ # Ignore the URL here because we know how to construct it.
+
+ found_on_index[pkg] = urllib.strip_empty_path_segments("{}/{}/".format(
+ index_url,
+ pkg.replace("_", "-"), # Use the official normalization for URLs
))
- return contents
+ return found_on_index
-def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download_kwargs):
+def _read_simpleapi(ctx, url, attr, cache, versions, parse_index, get_auth = None, **download_kwargs):
"""Read SimpleAPI.
Args:
@@ -189,6 +202,8 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download
cache: {type}`struct` the `pypi_cache` instance.
versions: {type}`list[str] The versions that have been requested.
get_auth: A function to get auth information. Used in tests.
+ parse_index: {type}`bool` Whether to parse the content as a root index page
+ (e.g. `/simple/`) instead of a package-specific page.
**download_kwargs: Any extra params to ctx.download.
Note that output and auth will be passed for you.
@@ -196,11 +211,6 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download
A similar object to what `download` would return except that in result.out
will be the parsed simple api contents.
"""
- # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for
- # the whl location and we cannot handle multiple URLs at once by passing
- # them to ctx.download if we want to correctly handle the relative URLs.
- # TODO: Add a test that env subbed index urls do not leak into the lock file.
-
real_url = urllib.strip_empty_path_segments(envsubst(url, attr.envsubst, ctx.getenv))
cache_key = (url, real_url, versions)
@@ -242,6 +252,7 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download
output = output,
cache = cache,
cache_key = cache_key,
+ parse_index = parse_index,
),
)
@@ -251,15 +262,16 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download
output = output,
cache = cache,
cache_key = cache_key,
+ parse_index = parse_index,
)
-def _read_index_result(ctx, *, result, output, cache, cache_key):
+def _read_index_result(ctx, *, result, output, cache, cache_key, parse_index):
if not result.success:
return struct(success = False)
content = ctx.read(output)
- output = parse_simpleapi_html(content = content)
+ output = parse_simpleapi_html(content = content, parse_index = parse_index)
if output:
cache.setdefault(cache_key, output)
return struct(success = True, output = output)
diff --git a/python/private/pypi/urllib.bzl b/python/private/pypi/urllib.bzl
index ca6ded76b1..ea4cd32cc9 100644
--- a/python/private/pypi/urllib.bzl
+++ b/python/private/pypi/urllib.bzl
@@ -3,7 +3,7 @@
def _get_root_directory(url):
scheme_end = url.find("://")
if scheme_end == -1:
- fail("Invalid URL format")
+ fail("Invalid URL format: '{}'".format(url))
scheme = url[:scheme_end]
host_end = url.find("/", scheme_end + 3)
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
index 637c7881c2..31a41f6af5 100644
--- a/tests/pypi/hub_builder/hub_builder_tests.bzl
+++ b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -247,12 +247,19 @@ def _test_simple_extras_vs_no_extras(env):
_tests.append(_test_simple_extras_vs_no_extras)
def _test_simple_extras_vs_no_extras_simpleapi(env):
- def mockread_simpleapi(*_, **__):
+ def mockread_simpleapi(*_, parse_index, **__):
+ if parse_index:
+ content = """\
+ simple-0.0.1-py3-none-any.whl
+"""
return struct(
output = parse_simpleapi_html(
- content = """\
- simple-0.0.1-py3-none-any.whl
-""",
+ content = content,
+ parse_index = parse_index,
),
success = True,
)
@@ -489,10 +496,13 @@ def _test_simple_with_markers(env):
_tests.append(_test_simple_with_markers)
def _test_torch_experimental_index_url(env):
- def mockread_simpleapi(*_, **__):
- return struct(
- output = parse_simpleapi_html(
- content = """\
+ def mockread_simpleapi(*_, parse_index, **__):
+ if parse_index:
+ content = """\
+ torch
+"""
+ else:
+ content = """\
torch-2.4.1+cpu-cp310-cp310-linux_x86_64.whl
torch-2.4.1+cpu-cp310-cp310-win_amd64.whl
torch-2.4.1+cpu-cp311-cp311-linux_x86_64.whl
@@ -513,7 +523,12 @@ def _test_torch_experimental_index_url(env):
torch-2.4.1-cp38-none-macosx_11_0_arm64.whl
torch-2.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
torch-2.4.1-cp39-none-macosx_11_0_arm64.whl
-""",
+"""
+
+ return struct(
+ output = parse_simpleapi_html(
+ content = content,
+ parse_index = parse_index,
),
success = True,
)
diff --git a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
index f72d61371c..c84140f459 100644
--- a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
+++ b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
@@ -42,6 +42,29 @@ def _generate_html(*items):
]),
)
+def _test_index(env):
+ # buildifier: disable=unsorted-dict-items
+ tests = [
+ (
+ [
+ struct(attrs = ['href="/simple/foo/"'], filename = "foo"),
+ struct(attrs = ['href="./b-ar/"'], filename = "b-._.-aR"),
+ ],
+ {
+ "b_ar": "./b-ar/",
+ "foo": "/simple/foo/",
+ },
+ ),
+ ]
+
+ for (input, want) in tests:
+ html = _generate_html(*input)
+ got = parse_simpleapi_html(content = html, parse_index = True)
+
+ env.expect.that_dict(got).contains_exactly(want)
+
+_tests.append(_test_index)
+
def _test_sdist(env):
# buildifier: disable=unsorted-dict-items
tests = [
@@ -65,7 +88,7 @@ def _test_sdist(env):
struct(
attrs = [
'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
- 'data-requires-python=">=3.7"',
+ 'data-requires-python=">=3.7"',
"data-yanked",
],
filename = "foo-0.0.1.tar.gz",
@@ -82,7 +105,7 @@ def _test_sdist(env):
struct(
attrs = [
'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
- 'data-requires-python=">=3.7"',
+ 'data-requires-python="<=3.7"',
"data-yanked=\"Something
with "quotes"
over two lines\"",
],
filename = "foo-0.0.1.tar.gz",
diff --git a/tests/pypi/pypi_cache/pypi_cache_tests.bzl b/tests/pypi/pypi_cache/pypi_cache_tests.bzl
index 7b6168ce7b..3cf01c7450 100644
--- a/tests/pypi/pypi_cache/pypi_cache_tests.bzl
+++ b/tests/pypi/pypi_cache/pypi_cache_tests.bzl
@@ -155,6 +155,39 @@ def _test_pypi_cache_writes_to_facts(env):
"fact_version": "v1", # Facts version
})
+ # When we get the other items cached in memory, they get written to facts
+ got = cache.get((key[0], key[1], ["1.1.0"]))
+ got.whls().contains_exactly({
+ "sha_whl_2": fake_result.whls["sha_whl_2"],
+ })
+ got.sdists().contains_exactly({})
+ got.sha256s_by_version().contains_exactly({
+ "1.1.0": fake_result.sha256s_by_version["1.1.0"],
+ })
+
+ # Then when we get facts at the end
+ cache.get_facts().contains_exactly({
+ "dist_hashes": {
+ # We are not using the real index URL, because we may have credentials in here
+ "https://{PYPI_INDEX_URL}": {
+ "pkg": {
+ "https://pypi.org/files/pkg-1.0.0-py3-none-any.whl": "sha_whl",
+ "https://pypi.org/files/pkg-1.0.0.tar.gz": "sha_sdist",
+ "https://pypi.org/files/pkg-1.1.0-py3-none-any.whl": "sha_whl_2",
+ },
+ },
+ },
+ "dist_yanked": {
+ "https://{PYPI_INDEX_URL}": {
+ "pkg": {
+ "sha_sdist": "",
+ "sha_whl": "Security issue",
+ },
+ },
+ },
+ "fact_version": "v1", # Facts version
+ })
+
_tests.append(_test_pypi_cache_writes_to_facts)
def _test_pypi_cache_reads_from_facts(env):
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index 9a6b7ca5af..55439c2593 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -23,26 +23,30 @@ _tests = []
def _test_simple(env):
calls = []
- def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block, allow_fail):
- _ = ctx, attr, cache, get_auth, versions # buildifier: disable=unused-variable
- env.expect.that_bool(block).equals(False)
- env.expect.that_bool(allow_fail).equals(True)
- calls.append(url)
- if "foo" in url and "main" in url:
- return struct(
- output = "",
- success = False,
- )
- else:
+ def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block, parse_index):
+ if parse_index:
return struct(
- output = struct(
- sdists = {"deadbeef": url.strip("/").split("/")[-1]},
- whls = {"deadb33f": url.strip("/").split("/")[-1]},
- sha256s_by_version = {"fizz": url.strip("/").split("/")[-1]},
- ),
success = True,
+ output = {
+ "bar": "/bar/",
+ "baz": "/baz/",
+ } if "main" in url else {
+ "foo": "/foo/",
+ },
)
+ _ = ctx, attr, cache, get_auth, versions # buildifier: disable=unused-variable
+ env.expect.that_bool(block).equals(False)
+ calls.append(url)
+ return struct(
+ output = struct(
+ sdists = {"deadbeef": url.strip("/").split("/")[-1]},
+ whls = {"deadb33f": url.strip("/").split("/")[-1]},
+ sha256s_by_version = {"fizz": url.strip("/").split("/")[-1]},
+ ),
+ success = True,
+ )
+
contents = simpleapi_download(
ctx = struct(
getenv = {}.get,
@@ -50,8 +54,8 @@ def _test_simple(env):
),
attr = struct(
index_url_overrides = {},
- index_url = "main",
- extra_index_urls = ["extra"],
+ index_url = "https://main.com",
+ extra_index_urls = ["https://extra.com"],
sources = {"bar": None, "baz": None, "foo": None},
envsubst = [],
),
@@ -61,26 +65,25 @@ def _test_simple(env):
)
env.expect.that_collection(calls).contains_exactly([
- "extra/foo/",
- "main/bar/",
- "main/baz/",
- "main/foo/",
+ "https://extra.com/foo/",
+ "https://main.com/bar/",
+ "https://main.com/baz/",
])
env.expect.that_dict(contents).contains_exactly({
"bar": struct(
- index_url = "main/bar/",
+ index_url = "https://main.com/bar/",
sdists = {"deadbeef": "bar"},
sha256s_by_version = {"fizz": "bar"},
whls = {"deadb33f": "bar"},
),
"baz": struct(
- index_url = "main/baz/",
+ index_url = "https://main.com/baz/",
sdists = {"deadbeef": "baz"},
sha256s_by_version = {"fizz": "baz"},
whls = {"deadb33f": "baz"},
),
"foo": struct(
- index_url = "extra/foo/",
+ index_url = "https://extra.com/foo/",
sdists = {"deadbeef": "foo"},
sha256s_by_version = {"fizz": "foo"},
whls = {"deadb33f": "foo"},
@@ -89,85 +92,26 @@ def _test_simple(env):
_tests.append(_test_simple)
-def _test_fail(env):
+def _test_index_overrides(env):
calls = []
fails = []
- def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block, allow_fail):
- _ = ctx, attr, cache, get_auth, versions # buildifier: disable=unused-variable
- env.expect.that_bool(block).equals(False)
- env.expect.that_bool(allow_fail).equals(True)
- calls.append(url)
- if "foo" in url:
- return struct(
- output = "",
- success = False,
- )
- if "bar" in url:
- return struct(
- output = "",
- success = False,
- )
- else:
+ def read_simpleapi(ctx, *, url, versions, attr, cache, get_auth, block, parse_index):
+ if parse_index:
return struct(
- output = struct(
- sdists = {},
- whls = {},
- sha256s_by_version = {},
- ),
success = True,
+ output = {
+ # normalized
+ "ba_z": "/ba-z/",
+ "bar": "/bar/",
+ "foo": "/foo-should-fail/",
+ } if "main" in url else {
+ "foo": "/foo/",
+ },
)
- simpleapi_download(
- ctx = struct(
- getenv = {}.get,
- report_progress = lambda _: None,
- ),
- attr = struct(
- index_url_overrides = {},
- index_url = "main",
- extra_index_urls = ["extra"],
- sources = {"bar": None, "baz": None, "foo": None},
- envsubst = [],
- ),
- cache = pypi_cache(),
- parallel_download = True,
- read_simpleapi = read_simpleapi,
- _fail = fails.append,
- )
-
- env.expect.that_collection(fails).contains_exactly([
- """
-Failed to download metadata of the following packages from urls:
-{
- "bar": ["main", "extra"],
- "foo": ["main", "extra"],
-}
-
-If you would like to skip downloading metadata for these packages please add 'simpleapi_skip=[
- "bar",
- "foo",
-]' to your 'pip.parse' call.
-""",
- ])
- env.expect.that_collection(calls).contains_exactly([
- "main/foo/",
- "main/bar/",
- "main/baz/",
- "extra/foo/",
- "extra/bar/",
- ])
-
-_tests.append(_test_fail)
-
-def _test_allow_fail_single_index(env):
- calls = []
- fails = []
-
- def read_simpleapi(ctx, *, url, versions, attr, cache, get_auth, block, allow_fail):
_ = ctx, attr, cache, get_auth, versions # buildifier: disable=unused-variable
env.expect.that_bool(block).equals(False)
- env.expect.that_bool(allow_fail).equals(False)
calls.append(url)
return struct(
output = struct(
@@ -185,11 +129,11 @@ def _test_allow_fail_single_index(env):
),
attr = struct(
index_url_overrides = {
- "foo": "extra",
+ "foo": "https://extra.com",
},
- index_url = "main",
+ index_url = "https://main.com",
extra_index_urls = [],
- sources = {"bar": None, "baz": None, "foo": None},
+ sources = {"ba_z": None, "bar": None, "foo": None},
envsubst = [],
),
cache = pypi_cache(),
@@ -200,35 +144,46 @@ def _test_allow_fail_single_index(env):
env.expect.that_collection(fails).contains_exactly([])
env.expect.that_collection(calls).contains_exactly([
- "main/bar/",
- "main/baz/",
- "extra/foo/",
+ "https://main.com/bar/",
+ "https://main.com/ba-z/",
+ "https://extra.com/foo/",
])
env.expect.that_dict(contents).contains_exactly({
+ "ba_z": struct(
+ index_url = "https://main.com/ba-z/",
+ sdists = {"deadbeef": "ba-z"},
+ sha256s_by_version = {"fizz": "ba-z"},
+ whls = {"deadb33f": "ba-z"},
+ ),
"bar": struct(
- index_url = "main/bar/",
+ index_url = "https://main.com/bar/",
sdists = {"deadbeef": "bar"},
sha256s_by_version = {"fizz": "bar"},
whls = {"deadb33f": "bar"},
),
- "baz": struct(
- index_url = "main/baz/",
- sdists = {"deadbeef": "baz"},
- sha256s_by_version = {"fizz": "baz"},
- whls = {"deadb33f": "baz"},
- ),
"foo": struct(
- index_url = "extra/foo/",
+ index_url = "https://extra.com/foo/",
sdists = {"deadbeef": "foo"},
sha256s_by_version = {"fizz": "foo"},
whls = {"deadb33f": "foo"},
),
})
-_tests.append(_test_allow_fail_single_index)
+_tests.append(_test_index_overrides)
def _test_download_url(env):
downloads = {}
+ reads = [
+ # The first read is the index which seeds the downloads later
+ """
+ bar
+ baz
+ foo
+ """,
+ "",
+ "",
+ "",
+ ]
def download(url, output, **kwargs):
_ = kwargs # buildifier: disable=unused-variable
@@ -240,14 +195,16 @@ def _test_download_url(env):
getenv = {}.get,
download = download,
report_progress = lambda _: None,
- read = lambda i: "contents of " + i,
+ # We will first add a download to the list, so this is a poor man's `next(foo)`
+ # implementation
+ read = lambda i: reads[len(downloads) - 1],
path = lambda i: "path/for/" + i,
),
attr = struct(
index_url_overrides = {},
index_url = "https://example.com/main/simple/",
extra_index_urls = [],
- sources = {"bar": None, "baz": None, "foo": None},
+ sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
envsubst = [],
),
cache = pypi_cache(),
@@ -256,6 +213,7 @@ def _test_download_url(env):
)
env.expect.that_dict(downloads).contains_exactly({
+ "https://example.com/main/simple/": "path/for/https___example_com_main_simple.html",
"https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
"https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
"https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
@@ -265,6 +223,18 @@ _tests.append(_test_download_url)
def _test_download_url_parallel(env):
downloads = {}
+ reads = [
+ # The first read is the index which seeds the downloads later
+ """
+ bar
+ baz
+ foo
+ """,
+ "",
+ "",
+ "",
+ "",
+ ]
def download(url, output, **kwargs):
_ = kwargs # buildifier: disable=unused-variable
@@ -276,13 +246,15 @@ def _test_download_url_parallel(env):
getenv = {}.get,
download = download,
report_progress = lambda _: None,
- read = lambda i: "contents of " + i,
+ # We will first add a download to the list, so this is a poor man's `next(foo)`
+ # implementation. We use 2 because we will enqueue 2 downloads in parallel.
+ read = lambda i: reads[len(downloads) - 2],
path = lambda i: "path/for/" + i,
),
attr = struct(
index_url_overrides = {},
- index_url = "https://example.com/main/simple/",
- extra_index_urls = [],
+ index_url = "https://example.com/default/simple/",
+ extra_index_urls = ["https://example.com/extra/simple/"],
sources = {"bar": None, "baz": None, "foo": None},
envsubst = [],
),
@@ -292,15 +264,28 @@ def _test_download_url_parallel(env):
)
env.expect.that_dict(downloads).contains_exactly({
- "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
- "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
- "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+ "https://example.com/default/simple/": "path/for/https___example_com_default_simple.html",
+ "https://example.com/extra/simple/": "path/for/https___example_com_extra_simple.html",
+ "https://example.com/extra/simple/bar/": "path/for/https___example_com_extra_simple_bar.html",
+ "https://example.com/extra/simple/baz/": "path/for/https___example_com_extra_simple_baz.html",
+ "https://example.com/extra/simple/foo/": "path/for/https___example_com_extra_simple_foo.html",
})
_tests.append(_test_download_url_parallel)
def _test_download_envsubst_url(env):
downloads = {}
+ reads = [
+ # The first read is the index which seeds the downloads later
+ """
+ bar
+ baz
+ foo
+ """,
+ "",
+ "",
+ "",
+ ]
def download(url, output, **kwargs):
_ = kwargs # buildifier: disable=unused-variable
@@ -312,7 +297,9 @@ def _test_download_envsubst_url(env):
getenv = {"INDEX_URL": "https://example.com/main/simple/"}.get,
download = download,
report_progress = lambda _: None,
- read = lambda i: "contents of " + i,
+ # We will first add a download to the list, so this is a poor man's `next(foo)`
+ # implementation
+ read = lambda i: reads[len(downloads) - 1],
path = lambda i: "path/for/" + i,
),
attr = struct(
@@ -328,6 +315,7 @@ def _test_download_envsubst_url(env):
)
env.expect.that_dict(downloads).contains_exactly({
+ "https://example.com/main/simple/": "path/for/~index_url~.html",
"https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
"https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
"https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",