diff --git a/CHANGELOG.md b/CHANGELOG.md
index 16a591b159..8e7c152acd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -69,12 +69,18 @@ END_UNRELEASED_TEMPLATE
 Other changes:
 * (pypi) Update dependencies used for `compile_pip_requirements`, building
   sdists in the `whl_library` rule and fetching wheels using `pip`.
-* (pypi) We will set `allow_fail` to `False` if the
-  {attr}`experimental_index_url_overrides` is set
-  to a non-empty value. This means that failures will be no-longer cached in
-  this particular case.
-  ([#3260](https://github.com/bazel-contrib/rules_python/issues/3260) and
-  [#2632](https://github.com/bazel-contrib/rules_python/issues/2632))
+* (pypi) Before using the bazel downloader to fetch the PyPI package metadata
+  we will from now on fetch the lists of available packages on each index. The
+  used package mappings will be written as facts to the `MODULE.bazel.lock` file
+  on supported bazel versions and it should be done at most once. As a result,
+  per-package {obj}`experimental_index_url_overrides` is no longer needed if the index URLs are
+  passed to the `pip.parse` via `experimental_index_url` and `experimental_extra_index_urls`.
+  What is more, we start implementing the flags for `--index_url` and `--extra_index_urls` more in
+  line to how it is used in `uv` and `pip`, i.e. we default to `--index_url` if the package is not
+  found in `--extra_index_urls`.
+  Fixes
+  ([#3260](https://github.com/bazel-contrib/rules_python/issues/3260) and 
+  [#2632](https://github.com/bazel-contrib/rules_python/issues/2632)).
 
 {#v0-0-0-fixed}
 ### Fixed
diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel
index 6b4822333c..869be4705a 100644
--- a/python/private/pypi/BUILD.bazel
+++ b/python/private/pypi/BUILD.bazel
@@ -244,6 +244,7 @@ bzl_library(
     srcs = ["parse_simpleapi_html.bzl"],
     deps = [
         ":version_from_filename_bzl",
+        "//python/private:normalize_name_bzl",
     ],
 )
 
@@ -424,8 +425,6 @@ bzl_library(
         ":urllib_bzl",
         "//python/private:auth_bzl",
         "//python/private:normalize_name_bzl",
-        "//python/private:text_util_bzl",
-        "@bazel_features//:features",
     ],
 )
 
diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl
index 563130791e..7f0d2776d7 100644
--- a/python/private/pypi/parse_simpleapi_html.bzl
+++ b/python/private/pypi/parse_simpleapi_html.bzl
@@ -16,16 +16,20 @@
 Parse SimpleAPI HTML in Starlark.
 """
 
+load("//python/private:normalize_name.bzl", "normalize_name")
 load(":version_from_filename.bzl", "version_from_filename")
 
-def parse_simpleapi_html(*, content):
+def parse_simpleapi_html(*, content, parse_index = False):
     """Get the package URLs for given shas by parsing the Simple API HTML.
 
     Args:
-        content(str): The Simple API HTML content.
+        content: {type}`str` The Simple API HTML content.
+        parse_index: {type}`bool` whether to parse the content as the index page of the PyPI index,
+            e.g. the `https://pypi.org/simple/`. This only has the URLs for the individual package.
 
     Returns:
-        A list of structs with:
+        If it is the index page, return the map of package to URL it can be queried from.
+        Otherwise, a list of structs with:
           * filename: {type}`str` The filename of the artifact.
           * version: {type}`str` The version of the artifact.
           * url: {type}`str` The URL to download the artifact.
@@ -59,6 +63,8 @@ def parse_simpleapi_html(*, content):
         # https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api
         fail("Unsupported API version: {}".format(api_version))
 
+    packages = {}
+
     # 2. Iterate using find() to avoid huge list allocations from .split("<a ")
     cursor = 0
     for _ in range(1000000):  # Safety break for Starlark
@@ -66,25 +72,35 @@ def parse_simpleapi_html(*, content):
         if start_tag == -1:
             break
 
-        # Find the end of the opening tag and the closing </a>
-        tag_end = content.find(">", start_tag)
-        end_tag = content.find("</a>", tag_end)
-        if tag_end == -1 or end_tag == -1:
+        # Find the closing </a> tag first, then find the end of the opening
+        # <a ...> tag using rfind. This correctly handles attributes that
+        # contain > characters, e.g. data-requires-python=">=3.6".
+        end_tag = content.find("</a>", start_tag)
+        if end_tag == -1:
             break
+        tag_end = content.rfind(">", start_tag, end_tag)
+        if tag_end == -1 or tag_end <= start_tag:
+            cursor = end_tag + 4
+            continue
 
         # Extract only the necessary slices
-        attr_part = content[start_tag + 3:tag_end]
         filename = content[tag_end + 1:end_tag].strip()
+        attr_part = content[start_tag + 3:tag_end]
 
         # Update cursor for next iteration
         cursor = end_tag + 4
 
-        # 3. Efficient Attribute Parsing
         attrs = _parse_attrs(attr_part)
         href = attrs.get("href", "")
         if not href:
             continue
 
+        if parse_index:
+            pkg_name = filename
+            packages[normalize_name(pkg_name)] = href
+            continue
+
+        # 3. Efficient Attribute Parsing
         dist_url, _, sha256 = href.partition("#sha256=")
 
         # Handle Yanked status
@@ -121,6 +137,9 @@ def parse_simpleapi_html(*, content):
         else:
             sdists[sha256] = dist
 
+    if parse_index:
+        return packages
+
     return struct(
         sdists = sdists,
         whls = whls,
diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl
index 28c6cbeafb..7b24102263 100644
--- a/python/private/pypi/pypi_cache.bzl
+++ b/python/private/pypi/pypi_cache.bzl
@@ -89,6 +89,11 @@ def _pypi_cache_get(self, key):
     if not cached and versions:
         # Could not get from in-memory, read from lockfile facts
         cached = self._facts.get(index_url, versions)
+    else:
+        # We might be using something from memory that is not yet stored in facts (e.g. we processed
+        # the requirements.txt for one Python version and the deps got cached, but new python
+        # version means different deps, which may add extras.
+        self._facts.setdefault(index_url, cached)
 
     return cached
 
@@ -122,6 +127,13 @@ def _filter_packages(dists, requested_versions):
     if dists == None or not requested_versions:
         return dists
 
+    if type(dists) == "dict":
+        return {
+            pkg: url
+            for pkg, url in dists.items()
+            if pkg in requested_versions
+        }
+
     sha256s_by_version = {}
     whls = {}
     sdists = {}
@@ -193,6 +205,12 @@ def _get_from_facts(facts, known_facts, index_url, requested_versions, facts_ver
         # cannot trust known facts, different version that we know how to parse
         return None
 
+    if type(requested_versions) == "dict":
+        return _filter_packages(
+            dists = known_facts.get("index_urls", {}).get(index_url, {}),
+            requested_versions = requested_versions,
+        )
+
     known_sources = {}
 
     root_url, _, distribution = index_url.rstrip("/").rpartition("/")
@@ -266,10 +284,46 @@ def _store_facts(facts, fact_version, index_url, value):
 
     facts["fact_version"] = fact_version
 
+    if type(value) == "dict":
+        # facts: {
+        #   "index_urls": {
+        #     "<index_url>": {
+        #       "<pkg_normalized>": "<dist_url>",
+        #     },
+        #   },
+        # },
+        for pkg, url in value.items():
+            facts.setdefault("index_urls", {}).setdefault(index_url, {})[pkg] = url
+        return value
+
     root_url, _, distribution = index_url.rstrip("/").rpartition("/")
     distribution = distribution.rstrip("/")
     root_url = root_url.rstrip("/")
 
+    # The schema is
+    # facts: {
+    #   "dist_hashes": {
+    #     "<index_url>": {
+    #       "<last segment>": {
+    #         "<dist url>": "<sha256>",
+    #       },
+    #     },
+    #   },
+    #   "dist_filenames": {
+    #     "<index_url>": {
+    #       "<last segment>": {
+    #         "<dist url>": "<filename>",   # if it is different from the URL
+    #       },
+    #     },
+    #   },
+    #   "dist_yanked": {
+    #     "<index_url>": {
+    #       "<last segment>": {
+    #         "<sha256>": "<reason>",   # if the package is yanked
+    #       },
+    #     },
+    #   },
+    # },
     for sha256, d in (value.sdists | value.whls).items():
         facts.setdefault("dist_hashes", {}).setdefault(root_url, {}).setdefault(distribution, {}).setdefault(d.url, sha256)
         if not d.url.endswith(d.filename):
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 20d79ba9b4..2171e8b56a 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -16,11 +16,9 @@
 A file that houses private functions used in the `bzlmod` extension with the same name.
 """
 
-load("@bazel_features//:features.bzl", "bazel_features")
 load("//python/private:auth.bzl", _get_auth = "get_auth")
 load("//python/private:envsubst.bzl", "envsubst")
 load("//python/private:normalize_name.bzl", "normalize_name")
-load("//python/private:text_util.bzl", "render")
 load(":parse_simpleapi_html.bzl", "parse_simpleapi_html")
 load(":urllib.bzl", "urllib")
 
@@ -35,15 +33,22 @@ def simpleapi_download(
         _fail = fail):
     """Download Simple API HTML.
 
+    First it queries all of the indexes for available packages and then it downloads the contents of
+    the per-package URLs and sha256 values. This is to enable us to use bazel_downloader with
+    `requirements.txt` files. As a side effect we also are able to "cross-compile" by fetching the
+    right wheel for the right target platform through the information that we retrieve here.
+
     Args:
         ctx: The module_ctx or repository_ctx.
         attr: Contains the parameters for the download. They are grouped into a
           struct for better clarity. It must have attributes:
-           * index_url: str, the index.
+           * index_url: str, the index, or if `extra_index_urls` are passed, the default index.
            * index_url_overrides: dict[str, str], the index overrides for
              separate packages.
-           * extra_index_urls: Extra index URLs that will be looked up after
-             the main is looked up.
+           * extra_index_urls: Will be looked at in the order they are defined and the first match
+                wins. This is similar to what uv does, see
+                https://docs.astral.sh/uv/concepts/indexes/#searching-across-multiple-indexes.
+                PRs for implementing other strategies are welcome.
            * sources: list[str], the sources to download things for. Each value is
              the contents of requirements files.
            * envsubst: list[str], the envsubst vars for performing substitution in index url.
@@ -70,111 +75,119 @@ def simpleapi_download(
         normalize_name(p): i
         for p, i in (attr.index_url_overrides or {}).items()
     }
+    sources = {
+        normalize_name(pkg): versions
+        for pkg, versions in attr.sources.items()
+    }
 
-    # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes
-    # to replicate how `pip` would handle this case.
-    contents = {}
-    index_urls = [attr.index_url] + attr.extra_index_urls
     read_simpleapi = read_simpleapi or _read_simpleapi
 
-    download_kwargs = {}
-    if bazel_features.external_deps.download_has_block_param:
-        download_kwargs["block"] = not parallel_download
+    ctx.report_progress("Fetch package lists from PyPI index")
 
-    if len(index_urls) == 1 or index_url_overrides:
-        download_kwargs["allow_fail"] = False
-    else:
-        download_kwargs["allow_fail"] = True
+    # NOTE: we are not merging results from multiple indexes to replicate how `pip` would
+    # handle this case. What we do is we select a particular index to download the packages
+    dist_urls = _get_dist_urls(
+        ctx,
+        default_index = attr.index_url,
+        index_urls = attr.extra_index_urls,
+        index_url_overrides = index_url_overrides,
+        sources = sources,
+        read_simpleapi = read_simpleapi,
+        cache = cache,
+        get_auth = get_auth,
+        attr = attr,
+        block = not parallel_download,
+        _fail = _fail,
+    )
 
-    input_sources = attr.sources
+    ctx.report_progress("Fetching package URLs from PyPI index")
 
-    found_on_index = {}
-    warn_overrides = False
-    ctx.report_progress("Fetch package lists from PyPI index")
-    for i, index_url in enumerate(index_urls):
-        if i != 0:
-            # Warn the user about a potential fix for the overrides
-            warn_overrides = True
-
-        async_downloads = {}
-        sources = {pkg: versions for pkg, versions in input_sources.items() if pkg not in found_on_index}
-        for pkg, versions in sources.items():
-            pkg_normalized = normalize_name(pkg)
-            url = urllib.strip_empty_path_segments("{index_url}/{distribution}/".format(
-                index_url = index_url_overrides.get(pkg_normalized, index_url).rstrip("/"),
-                distribution = pkg,
-            ))
-            result = read_simpleapi(
-                ctx = ctx,
-                attr = attr,
-                versions = versions,
-                url = url,
-                cache = cache,
-                get_auth = get_auth,
-                **download_kwargs
-            )
-            if hasattr(result, "wait"):
-                # We will process it in a separate loop:
-                async_downloads[pkg] = struct(
-                    pkg_normalized = pkg_normalized,
-                    wait = result.wait,
-                    url = url,
-                )
-            elif result.success:
-                contents[pkg_normalized] = _with_index_url(url, result.output)
-                found_on_index[pkg] = index_url
-
-        if not async_downloads:
-            continue
+    downloads = {}
+    contents = {}
+    for pkg, url in dist_urls.items():
+        result = read_simpleapi(
+            ctx = ctx,
+            attr = attr,
+            url = url,
+            cache = cache,
+            versions = sources[pkg],
+            get_auth = get_auth,
+            block = not parallel_download,
+            parse_index = False,
+        )
+        if hasattr(result, "wait"):
+            # We will process it in a separate loop:
+            downloads[pkg] = result
+        else:
+            contents[pkg] = _with_index_url(url, result.output)
 
+    for pkg, d in downloads.items():
         # If we use `block` == False, then we need to have a second loop that is
         # collecting all of the results as they were being downloaded in parallel.
-        for pkg, download in async_downloads.items():
-            result = download.wait()
-
-            if result.success:
-                contents[download.pkg_normalized] = _with_index_url(download.url, result.output)
-                found_on_index[pkg] = index_url
-
-    failed_sources = [pkg for pkg in input_sources if pkg not in found_on_index]
-    if failed_sources:
-        pkg_index_urls = {
-            pkg: index_url_overrides.get(
-                normalize_name(pkg),
-                index_urls,
-            )
-            for pkg in failed_sources
-        }
-
-        _fail(
-            """
-Failed to download metadata of the following packages from urls:
-{pkg_index_urls}
-
-If you would like to skip downloading metadata for these packages please add 'simpleapi_skip={failed_sources}' to your 'pip.parse' call.
-""".format(
-                pkg_index_urls = render.dict(pkg_index_urls),
-                failed_sources = render.list(failed_sources),
-            ),
+        contents[pkg] = _with_index_url(dist_urls[pkg], d.wait().output)
+
+    return contents
+
+def _get_dist_urls(ctx, *, default_index, index_urls, index_url_overrides, sources, read_simpleapi, attr, block, _fail = fail, **kwargs):
+    downloads = {}
+    results = {}
+    for extra in index_url_overrides.values():
+        if extra not in index_urls:
+            index_urls.append(extra)
+
+    index_urls = index_urls or []
+    if default_index not in index_urls:
+        index_urls.append(default_index)
+
+    for index_url in index_urls:
+        download = read_simpleapi(
+            ctx = ctx,
+            attr = attr,
+            url = urllib.strip_empty_path_segments("{index_url}/".format(
+                index_url = index_url,
+            )),
+            parse_index = True,
+            versions = {pkg: None for pkg in sources},
+            block = block,
+            **kwargs
         )
-        return None
-
-    if warn_overrides:
-        index_url_overrides = {
-            pkg: found_on_index[pkg]
-            for pkg in attr.sources
-            if found_on_index[pkg] != attr.index_url
-        }
-
-        if index_url_overrides:
-            # buildifier: disable=print
-            print("You can use the following `index_url_overrides` to avoid the 404 warnings:\n{}".format(
-                render.dict(index_url_overrides),
+        if hasattr(download, "wait"):
+            downloads[index_url] = download
+        else:
+            results[index_url] = download
+
+    for index_url, download in downloads.items():
+        results[index_url] = download.wait()
+
+    found_on_index = {}
+    for index_url, result in results.items():
+        for pkg in sources:
+            if pkg in found_on_index:
+                # We have already found the package, skip searching for it in
+                # other indexes.
+                #
+                # If we wanted to merge all of the index results, we would have to continue here
+                # and in the outer function process merging of the results.
+                continue
+
+            if index_url_overrides.get(pkg, index_url) != index_url:
+                # we should not use this index for the package
+                continue
+
+            found = result.output.get(pkg)
+            if not found:
+                continue
+
+            # Ignore the URL here because we know how to construct it.
+
+            found_on_index[pkg] = urllib.strip_empty_path_segments("{}/{}/".format(
+                index_url,
+                pkg.replace("_", "-"),  # Use the official normalization for URLs
             ))
 
-    return contents
+    return found_on_index
 
-def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download_kwargs):
+def _read_simpleapi(ctx, url, attr, cache, versions, parse_index, get_auth = None, **download_kwargs):
     """Read SimpleAPI.
 
     Args:
@@ -189,6 +202,8 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download
         cache: {type}`struct` the `pypi_cache` instance.
         versions: {type}`list[str] The versions that have been requested.
         get_auth: A function to get auth information. Used in tests.
+        parse_index:  {type}`bool` Whether to parse the content as a root index page
+            (e.g. `/simple/`) instead of a package-specific page.
         **download_kwargs: Any extra params to ctx.download.
             Note that output and auth will be passed for you.
 
@@ -196,11 +211,6 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download
         A similar object to what `download` would return except that in result.out
         will be the parsed simple api contents.
     """
-    # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for
-    # the whl location and we cannot handle multiple URLs at once by passing
-    # them to ctx.download if we want to correctly handle the relative URLs.
-    # TODO: Add a test that env subbed index urls do not leak into the lock file.
-
     real_url = urllib.strip_empty_path_segments(envsubst(url, attr.envsubst, ctx.getenv))
 
     cache_key = (url, real_url, versions)
@@ -242,6 +252,7 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download
                 output = output,
                 cache = cache,
                 cache_key = cache_key,
+                parse_index = parse_index,
             ),
         )
 
@@ -251,15 +262,16 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download
         output = output,
         cache = cache,
         cache_key = cache_key,
+        parse_index = parse_index,
     )
 
-def _read_index_result(ctx, *, result, output, cache, cache_key):
+def _read_index_result(ctx, *, result, output, cache, cache_key, parse_index):
     if not result.success:
         return struct(success = False)
 
     content = ctx.read(output)
 
-    output = parse_simpleapi_html(content = content)
+    output = parse_simpleapi_html(content = content, parse_index = parse_index)
     if output:
         cache.setdefault(cache_key, output)
         return struct(success = True, output = output)
diff --git a/python/private/pypi/urllib.bzl b/python/private/pypi/urllib.bzl
index ca6ded76b1..ea4cd32cc9 100644
--- a/python/private/pypi/urllib.bzl
+++ b/python/private/pypi/urllib.bzl
@@ -3,7 +3,7 @@
 def _get_root_directory(url):
     scheme_end = url.find("://")
     if scheme_end == -1:
-        fail("Invalid URL format")
+        fail("Invalid URL format: '{}'".format(url))
 
     scheme = url[:scheme_end]
     host_end = url.find("/", scheme_end + 3)
diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl
index 637c7881c2..31a41f6af5 100644
--- a/tests/pypi/hub_builder/hub_builder_tests.bzl
+++ b/tests/pypi/hub_builder/hub_builder_tests.bzl
@@ -247,12 +247,19 @@ def _test_simple_extras_vs_no_extras(env):
 _tests.append(_test_simple_extras_vs_no_extras)
 
 def _test_simple_extras_vs_no_extras_simpleapi(env):
-    def mockread_simpleapi(*_, **__):
+    def mockread_simpleapi(*_, parse_index, **__):
+        if parse_index:
+            content = """\
+    <a href="/simple/>simple</a><br/>
+"""
+        else:
+            content = """\
+    <a href="/simple-0.0.1-py3-none-any.whl#sha256=deadbeef">simple-0.0.1-py3-none-any.whl</a><br/>
+"""
         return struct(
             output = parse_simpleapi_html(
-                content = """\
-    <a href="/simple-0.0.1-py3-none-any.whl#sha256=deadbeef">simple-0.0.1-py3-none-any.whl</a><br/>
-""",
+                content = content,
+                parse_index = parse_index,
             ),
             success = True,
         )
@@ -489,10 +496,13 @@ def _test_simple_with_markers(env):
 _tests.append(_test_simple_with_markers)
 
 def _test_torch_experimental_index_url(env):
-    def mockread_simpleapi(*_, **__):
-        return struct(
-            output = parse_simpleapi_html(
-                content = """\
+    def mockread_simpleapi(*_, parse_index, **__):
+        if parse_index:
+            content = """\
+    <a href="/ignored/">torch</a>
+"""
+        else:
+            content = """\
     <a href="/whl/cpu/torch-2.4.1%2Bcpu-cp310-cp310-linux_x86_64.whl#sha256=833490a28ac156762ed6adaa7c695879564fa2fd0dc51bcf3fdb2c7b47dc55e6">torch-2.4.1+cpu-cp310-cp310-linux_x86_64.whl</a><br/>
     <a href="/whl/cpu/torch-2.4.1%2Bcpu-cp310-cp310-win_amd64.whl#sha256=1dd062d296fb78aa7cfab8690bf03704995a821b5ef69cfc807af5c0831b4202">torch-2.4.1+cpu-cp310-cp310-win_amd64.whl</a><br/>
     <a href="/whl/cpu/torch-2.4.1%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=2b03e20f37557d211d14e3fb3f71709325336402db132a1e0dd8b47392185baf">torch-2.4.1+cpu-cp311-cp311-linux_x86_64.whl</a><br/>
@@ -513,7 +523,12 @@ def _test_torch_experimental_index_url(env):
     <a href="/whl/cpu/torch-2.4.1-cp38-none-macosx_11_0_arm64.whl#sha256=5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71">torch-2.4.1-cp38-none-macosx_11_0_arm64.whl</a><br/>
     <a href="/whl/cpu/torch-2.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=1495132f30f722af1a091950088baea383fe39903db06b20e6936fd99402803e">torch-2.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl</a><br/>
     <a href="/whl/cpu/torch-2.4.1-cp39-none-macosx_11_0_arm64.whl#sha256=a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec">torch-2.4.1-cp39-none-macosx_11_0_arm64.whl</a><br/>
-""",
+"""
+
+        return struct(
+            output = parse_simpleapi_html(
+                content = content,
+                parse_index = parse_index,
             ),
             success = True,
         )
diff --git a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
index f72d61371c..c84140f459 100644
--- a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
+++ b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
@@ -42,6 +42,29 @@ def _generate_html(*items):
         ]),
     )
 
+def _test_index(env):
+    # buildifier: disable=unsorted-dict-items
+    tests = [
+        (
+            [
+                struct(attrs = ['href="/simple/foo/"'], filename = "foo"),
+                struct(attrs = ['href="./b-ar/"'], filename = "b-._.-aR"),
+            ],
+            {
+                "b_ar": "./b-ar/",
+                "foo": "/simple/foo/",
+            },
+        ),
+    ]
+
+    for (input, want) in tests:
+        html = _generate_html(*input)
+        got = parse_simpleapi_html(content = html, parse_index = True)
+
+        env.expect.that_dict(got).contains_exactly(want)
+
+_tests.append(_test_index)
+
 def _test_sdist(env):
     # buildifier: disable=unsorted-dict-items
     tests = [
@@ -65,7 +88,7 @@ def _test_sdist(env):
             struct(
                 attrs = [
                     'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
-                    'data-requires-python="&gt;=3.7"',
+                    'data-requires-python=">=3.7"',
                     "data-yanked",
                 ],
                 filename = "foo-0.0.1.tar.gz",
@@ -82,7 +105,7 @@ def _test_sdist(env):
             struct(
                 attrs = [
                     'href="https://example.org/full-url/foo-0.0.1.tar.gz#sha256=deadbeefasource"',
-                    'data-requires-python="&gt;=3.7"',
+                    'data-requires-python="<=3.7"',
                     "data-yanked=\"Something &#10;with &quot;quotes&quot;&#10;over two lines\"",
                 ],
                 filename = "foo-0.0.1.tar.gz",
diff --git a/tests/pypi/pypi_cache/pypi_cache_tests.bzl b/tests/pypi/pypi_cache/pypi_cache_tests.bzl
index 7b6168ce7b..3cf01c7450 100644
--- a/tests/pypi/pypi_cache/pypi_cache_tests.bzl
+++ b/tests/pypi/pypi_cache/pypi_cache_tests.bzl
@@ -155,6 +155,39 @@ def _test_pypi_cache_writes_to_facts(env):
         "fact_version": "v1",  # Facts version
     })
 
+    # When we get the other items cached in memory, they get written to facts
+    got = cache.get((key[0], key[1], ["1.1.0"]))
+    got.whls().contains_exactly({
+        "sha_whl_2": fake_result.whls["sha_whl_2"],
+    })
+    got.sdists().contains_exactly({})
+    got.sha256s_by_version().contains_exactly({
+        "1.1.0": fake_result.sha256s_by_version["1.1.0"],
+    })
+
+    # Then when we get facts at the end
+    cache.get_facts().contains_exactly({
+        "dist_hashes": {
+            # We are not using the real index URL, because we may have credentials in here
+            "https://{PYPI_INDEX_URL}": {
+                "pkg": {
+                    "https://pypi.org/files/pkg-1.0.0-py3-none-any.whl": "sha_whl",
+                    "https://pypi.org/files/pkg-1.0.0.tar.gz": "sha_sdist",
+                    "https://pypi.org/files/pkg-1.1.0-py3-none-any.whl": "sha_whl_2",
+                },
+            },
+        },
+        "dist_yanked": {
+            "https://{PYPI_INDEX_URL}": {
+                "pkg": {
+                    "sha_sdist": "",
+                    "sha_whl": "Security issue",
+                },
+            },
+        },
+        "fact_version": "v1",  # Facts version
+    })
+
 _tests.append(_test_pypi_cache_writes_to_facts)
 
 def _test_pypi_cache_reads_from_facts(env):
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index 9a6b7ca5af..55439c2593 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -23,26 +23,30 @@ _tests = []
 def _test_simple(env):
     calls = []
 
-    def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block, allow_fail):
-        _ = ctx, attr, cache, get_auth, versions  # buildifier: disable=unused-variable
-        env.expect.that_bool(block).equals(False)
-        env.expect.that_bool(allow_fail).equals(True)
-        calls.append(url)
-        if "foo" in url and "main" in url:
-            return struct(
-                output = "",
-                success = False,
-            )
-        else:
+    def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block, parse_index):
+        if parse_index:
             return struct(
-                output = struct(
-                    sdists = {"deadbeef": url.strip("/").split("/")[-1]},
-                    whls = {"deadb33f": url.strip("/").split("/")[-1]},
-                    sha256s_by_version = {"fizz": url.strip("/").split("/")[-1]},
-                ),
                 success = True,
+                output = {
+                    "bar": "/bar/",
+                    "baz": "/baz/",
+                } if "main" in url else {
+                    "foo": "/foo/",
+                },
             )
 
+        _ = ctx, attr, cache, get_auth, versions  # buildifier: disable=unused-variable
+        env.expect.that_bool(block).equals(False)
+        calls.append(url)
+        return struct(
+            output = struct(
+                sdists = {"deadbeef": url.strip("/").split("/")[-1]},
+                whls = {"deadb33f": url.strip("/").split("/")[-1]},
+                sha256s_by_version = {"fizz": url.strip("/").split("/")[-1]},
+            ),
+            success = True,
+        )
+
     contents = simpleapi_download(
         ctx = struct(
             getenv = {}.get,
@@ -50,8 +54,8 @@ def _test_simple(env):
         ),
         attr = struct(
             index_url_overrides = {},
-            index_url = "main",
-            extra_index_urls = ["extra"],
+            index_url = "https://main.com",
+            extra_index_urls = ["https://extra.com"],
             sources = {"bar": None, "baz": None, "foo": None},
             envsubst = [],
         ),
@@ -61,26 +65,25 @@ def _test_simple(env):
     )
 
     env.expect.that_collection(calls).contains_exactly([
-        "extra/foo/",
-        "main/bar/",
-        "main/baz/",
-        "main/foo/",
+        "https://extra.com/foo/",
+        "https://main.com/bar/",
+        "https://main.com/baz/",
     ])
     env.expect.that_dict(contents).contains_exactly({
         "bar": struct(
-            index_url = "main/bar/",
+            index_url = "https://main.com/bar/",
             sdists = {"deadbeef": "bar"},
             sha256s_by_version = {"fizz": "bar"},
             whls = {"deadb33f": "bar"},
         ),
         "baz": struct(
-            index_url = "main/baz/",
+            index_url = "https://main.com/baz/",
             sdists = {"deadbeef": "baz"},
             sha256s_by_version = {"fizz": "baz"},
             whls = {"deadb33f": "baz"},
         ),
         "foo": struct(
-            index_url = "extra/foo/",
+            index_url = "https://extra.com/foo/",
             sdists = {"deadbeef": "foo"},
             sha256s_by_version = {"fizz": "foo"},
             whls = {"deadb33f": "foo"},
@@ -89,85 +92,26 @@ def _test_simple(env):
 
 _tests.append(_test_simple)
 
-def _test_fail(env):
+def _test_index_overrides(env):
     calls = []
     fails = []
 
-    def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block, allow_fail):
-        _ = ctx, attr, cache, get_auth, versions  # buildifier: disable=unused-variable
-        env.expect.that_bool(block).equals(False)
-        env.expect.that_bool(allow_fail).equals(True)
-        calls.append(url)
-        if "foo" in url:
-            return struct(
-                output = "",
-                success = False,
-            )
-        if "bar" in url:
-            return struct(
-                output = "",
-                success = False,
-            )
-        else:
+    def read_simpleapi(ctx, *, url, versions, attr, cache, get_auth, block, parse_index):
+        if parse_index:
             return struct(
-                output = struct(
-                    sdists = {},
-                    whls = {},
-                    sha256s_by_version = {},
-                ),
                 success = True,
+                output = {
+                    # normalized
+                    "ba_z": "/ba-z/",
+                    "bar": "/bar/",
+                    "foo": "/foo-should-fail/",
+                } if "main" in url else {
+                    "foo": "/foo/",
+                },
             )
 
-    simpleapi_download(
-        ctx = struct(
-            getenv = {}.get,
-            report_progress = lambda _: None,
-        ),
-        attr = struct(
-            index_url_overrides = {},
-            index_url = "main",
-            extra_index_urls = ["extra"],
-            sources = {"bar": None, "baz": None, "foo": None},
-            envsubst = [],
-        ),
-        cache = pypi_cache(),
-        parallel_download = True,
-        read_simpleapi = read_simpleapi,
-        _fail = fails.append,
-    )
-
-    env.expect.that_collection(fails).contains_exactly([
-        """
-Failed to download metadata of the following packages from urls:
-{
-    "bar": ["main", "extra"],
-    "foo": ["main", "extra"],
-}
-
-If you would like to skip downloading metadata for these packages please add 'simpleapi_skip=[
-    "bar",
-    "foo",
-]' to your 'pip.parse' call.
-""",
-    ])
-    env.expect.that_collection(calls).contains_exactly([
-        "main/foo/",
-        "main/bar/",
-        "main/baz/",
-        "extra/foo/",
-        "extra/bar/",
-    ])
-
-_tests.append(_test_fail)
-
-def _test_allow_fail_single_index(env):
-    calls = []
-    fails = []
-
-    def read_simpleapi(ctx, *, url, versions, attr, cache, get_auth, block, allow_fail):
         _ = ctx, attr, cache, get_auth, versions  # buildifier: disable=unused-variable
         env.expect.that_bool(block).equals(False)
-        env.expect.that_bool(allow_fail).equals(False)
         calls.append(url)
         return struct(
             output = struct(
@@ -185,11 +129,11 @@ def _test_allow_fail_single_index(env):
         ),
         attr = struct(
             index_url_overrides = {
-                "foo": "extra",
+                "foo": "https://extra.com",
             },
-            index_url = "main",
+            index_url = "https://main.com",
             extra_index_urls = [],
-            sources = {"bar": None, "baz": None, "foo": None},
+            sources = {"ba_z": None, "bar": None, "foo": None},
             envsubst = [],
         ),
         cache = pypi_cache(),
@@ -200,35 +144,46 @@ def _test_allow_fail_single_index(env):
 
     env.expect.that_collection(fails).contains_exactly([])
     env.expect.that_collection(calls).contains_exactly([
-        "main/bar/",
-        "main/baz/",
-        "extra/foo/",
+        "https://main.com/bar/",
+        "https://main.com/ba-z/",
+        "https://extra.com/foo/",
     ])
     env.expect.that_dict(contents).contains_exactly({
+        "ba_z": struct(
+            index_url = "https://main.com/ba-z/",
+            sdists = {"deadbeef": "ba-z"},
+            sha256s_by_version = {"fizz": "ba-z"},
+            whls = {"deadb33f": "ba-z"},
+        ),
         "bar": struct(
-            index_url = "main/bar/",
+            index_url = "https://main.com/bar/",
             sdists = {"deadbeef": "bar"},
             sha256s_by_version = {"fizz": "bar"},
             whls = {"deadb33f": "bar"},
         ),
-        "baz": struct(
-            index_url = "main/baz/",
-            sdists = {"deadbeef": "baz"},
-            sha256s_by_version = {"fizz": "baz"},
-            whls = {"deadb33f": "baz"},
-        ),
         "foo": struct(
-            index_url = "extra/foo/",
+            index_url = "https://extra.com/foo/",
             sdists = {"deadbeef": "foo"},
             sha256s_by_version = {"fizz": "foo"},
             whls = {"deadb33f": "foo"},
         ),
     })
 
-_tests.append(_test_allow_fail_single_index)
+_tests.append(_test_index_overrides)
 
 def _test_download_url(env):
     downloads = {}
+    reads = [
+        # The first read is the index which seeds the downloads later
+        """
+        <a href="/main/simple/bar/">bar</a>
+        <a href="/main/simple/baz/">baz</a>
+        <a href="/main/simple/foo/">foo</a>
+        """,
+        "",
+        "",
+        "",
+    ]
 
     def download(url, output, **kwargs):
         _ = kwargs  # buildifier: disable=unused-variable
@@ -240,14 +195,16 @@ def _test_download_url(env):
             getenv = {}.get,
             download = download,
             report_progress = lambda _: None,
-            read = lambda i: "contents of " + i,
+            # We will first add a download to the list, so this is a poor man's `next(foo)`
+            # implementation
+            read = lambda i: reads[len(downloads) - 1],
             path = lambda i: "path/for/" + i,
         ),
         attr = struct(
             index_url_overrides = {},
             index_url = "https://example.com/main/simple/",
             extra_index_urls = [],
-            sources = {"bar": None, "baz": None, "foo": None},
+            sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]},
             envsubst = [],
         ),
         cache = pypi_cache(),
@@ -256,6 +213,7 @@ def _test_download_url(env):
     )
 
     env.expect.that_dict(downloads).contains_exactly({
+        "https://example.com/main/simple/": "path/for/https___example_com_main_simple.html",
         "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
         "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
         "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
@@ -265,6 +223,18 @@ _tests.append(_test_download_url)
 
 def _test_download_url_parallel(env):
     downloads = {}
+    reads = [
+        # The first read is the index which seeds the downloads later
+        """
+        <a href="/main/simple/bar/">bar</a>
+        <a href="/main/simple/baz/">baz</a>
+        <a href="/main/simple/foo/">foo</a>
+        """,
+        "",
+        "",
+        "",
+        "",
+    ]
 
     def download(url, output, **kwargs):
         _ = kwargs  # buildifier: disable=unused-variable
@@ -276,13 +246,15 @@ def _test_download_url_parallel(env):
             getenv = {}.get,
             download = download,
             report_progress = lambda _: None,
-            read = lambda i: "contents of " + i,
+            # We will first add a download to the list, so this is a poor man's `next(foo)`
+            # implementation. We use 2 because we will enqueue 2 downloads in parallel.
+            read = lambda i: reads[len(downloads) - 2],
             path = lambda i: "path/for/" + i,
         ),
         attr = struct(
             index_url_overrides = {},
-            index_url = "https://example.com/main/simple/",
-            extra_index_urls = [],
+            index_url = "https://example.com/default/simple/",
+            extra_index_urls = ["https://example.com/extra/simple/"],
             sources = {"bar": None, "baz": None, "foo": None},
             envsubst = [],
         ),
@@ -292,15 +264,28 @@ def _test_download_url_parallel(env):
     )
 
     env.expect.that_dict(downloads).contains_exactly({
-        "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
-        "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
-        "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+        "https://example.com/default/simple/": "path/for/https___example_com_default_simple.html",
+        "https://example.com/extra/simple/": "path/for/https___example_com_extra_simple.html",
+        "https://example.com/extra/simple/bar/": "path/for/https___example_com_extra_simple_bar.html",
+        "https://example.com/extra/simple/baz/": "path/for/https___example_com_extra_simple_baz.html",
+        "https://example.com/extra/simple/foo/": "path/for/https___example_com_extra_simple_foo.html",
     })
 
 _tests.append(_test_download_url_parallel)
 
 def _test_download_envsubst_url(env):
     downloads = {}
+    reads = [
+        # The first read is the index which seeds the downloads later
+        """
+        <a href="/main/simple/bar/">bar</a>
+        <a href="/main/simple/baz/">baz</a>
+        <a href="/main/simple/foo/">foo</a>
+        """,
+        "",
+        "",
+        "",
+    ]
 
     def download(url, output, **kwargs):
         _ = kwargs  # buildifier: disable=unused-variable
@@ -312,7 +297,9 @@ def _test_download_envsubst_url(env):
             getenv = {"INDEX_URL": "https://example.com/main/simple/"}.get,
             download = download,
             report_progress = lambda _: None,
-            read = lambda i: "contents of " + i,
+            # We will first add a download to the list, so this is a poor man's `next(foo)`
+            # implementation
+            read = lambda i: reads[len(downloads) - 1],
             path = lambda i: "path/for/" + i,
         ),
         attr = struct(
@@ -328,6 +315,7 @@ def _test_download_envsubst_url(env):
     )
 
     env.expect.that_dict(downloads).contains_exactly({
+        "https://example.com/main/simple/": "path/for/~index_url~.html",
         "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
         "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
         "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",