diff --git a/CHANGELOG.md b/CHANGELOG.md index 9776a7f4c2..a41ac20103 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -108,6 +108,14 @@ END_UNRELEASED_TEMPLATE {#v0-0-0-added} ### Added * (toolchains) `3.9.25` Python toolchain from [20251031] release. +* (pypi) API to tell `pip.parse` which platforms users care about. This is very useful to ensure + that when users do `bazel query` for their deps, they don't have to download all of the + dependencies for all of the available wheels. Torch wheels can be up of 1GB and it takes a lot + of time to download those, which is unnecessary if only the host platform builds are necessary + to be performed. This is mainly for backwards/forwards compatibility whilst rolling out + `RULES_PYTHON_ENABLE_PIPSTAR=1` by default. Users of `experimental_index_url` that perform + cross-builds should add {obj}`target_platforms` to their `pip.parse` invocations, which will + become mandatory if any cross-builds are required from the next release. [20251031]: https://github.com/astral-sh/python-build-standalone/releases/tag/20251031 {#v1-7-0} diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index be1a8e4d03..eaa6c0d428 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -667,6 +667,26 @@ EXPERIMENTAL: this may be removed without notice. :::{versionadded} 1.4.0 ::: +""", + ), + "target_platforms": attr.string_list( + default = ["{os}_{arch}"], + doc = """\ +The list of platforms for which we would evaluate the requirements files. If you need to be able to +only evaluate for a particular platform (e.g. "linux_x86_64"), then put it in here. + +If you want `freethreaded` variant, then you can use `_freethreaded` suffix as `rules_python` is +defining target platforms for these variants in its `MODULE.bazel` file. The identifiers for this +function in general are the same as used in the {obj}`pip.default.platform` attribute. + +If you only care for the host platform and do not have a usecase to cross-build, then you can put in +a string `"{os}_{arch}"` as the value here. You could also use `"{os}_{arch}_freethreaded"` as well. + +:::{include} /_includes/experimental_api.md +::: + +:::{versionadded} VERSION_NEXT_FEATURE +::: """, ), "whl_modifications": attr.label_keyed_string_dict( diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl index 1378e2f122..3a1a3b07fe 100644 --- a/python/private/pypi/hub_builder.bzl +++ b/python/private/pypi/hub_builder.bzl @@ -2,6 +2,7 @@ load("//python/private:full_version.bzl", "full_version") load("//python/private:normalize_name.bzl", "normalize_name") +load("//python/private:repo_utils.bzl", "repo_utils") load("//python/private:version.bzl", "version") load("//python/private:version_label.bzl", "version_label") load(":attrs.bzl", "use_isolated") @@ -135,11 +136,15 @@ def _pip_parse(self, module_ctx, pip_attr): )) return + default_cross_setup = _set_get_index_urls(self, pip_attr) self._platforms[python_version] = _platforms( + module_ctx, python_version = full_python_version, config = self._config, + # FIXME @aignas 2025-12-06: should we have this behaviour? + # TODO @aignas 2025-12-06: use target_platforms always even when the get_index_urls is set. + target_platforms = [] if default_cross_setup else pip_attr.target_platforms, ) - _set_get_index_urls(self, pip_attr) _add_group_map(self, pip_attr.experimental_requirement_cycles) _add_extra_aliases(self, pip_attr.extra_hub_aliases) _create_whl_repos( @@ -249,7 +254,7 @@ def _set_get_index_urls(self, pip_attr): # parallel_download is set to True by default, so we are not checking/validating it # here - return + return False python_version = pip_attr.python_version self._use_downloader.setdefault(python_version, {}).update({ @@ -275,6 +280,7 @@ def _set_get_index_urls(self, pip_attr): cache = self._simpleapi_cache, parallel_download = pip_attr.parallel_download, ) + return True def _detect_interpreter(self, pip_attr): python_interpreter_target = pip_attr.python_interpreter_target @@ -301,14 +307,25 @@ def _detect_interpreter(self, pip_attr): path = pip_attr.python_interpreter, ) -def _platforms(*, python_version, config): +def _platforms(module_ctx, *, python_version, config, target_platforms): platforms = {} python_version = version.parse( python_version, strict = True, ) + target_platforms = sorted({ + p.format( + os = repo_utils.get_platforms_os_name(module_ctx), + arch = repo_utils.get_platforms_cpu_name(module_ctx), + ): None + for p in target_platforms + }) + for platform, values in config.platforms.items(): + if target_platforms and platform not in target_platforms: + continue + # TODO @aignas 2025-07-07: this is probably doing the parsing of the version too # many times. abi = "{}{}{}.{}".format( diff --git a/python/private/pypi/requirements_files_by_platform.bzl b/python/private/pypi/requirements_files_by_platform.bzl index 356bd4416e..2027b41594 100644 --- a/python/private/pypi/requirements_files_by_platform.bzl +++ b/python/private/pypi/requirements_files_by_platform.bzl @@ -140,9 +140,10 @@ def requirements_files_by_platform( platforms_from_args = _platforms_from_args(extra_pip_args) if logger: - logger.debug(lambda: "Platforms from pip args: {}".format(platforms_from_args)) + logger.debug(lambda: "Platforms from pip args: {} (from {})".format(platforms_from_args, extra_pip_args)) - default_platforms = platforms + input_platforms = platforms + default_platforms = [_platform(p, python_version) for p in platforms] if platforms_from_args: lock_files = [ @@ -174,6 +175,7 @@ def requirements_files_by_platform( platform for filter_or_platform in specifier.split(",") for platform in (_default_platforms(filter = filter_or_platform, platforms = platforms) if filter_or_platform.endswith("*") else [filter_or_platform]) + if _platform(platform, python_version) in default_platforms ] for file, specifier in requirements_by_platform.items() }.items() @@ -227,9 +229,10 @@ def requirements_files_by_platform( configured_platforms[p] = file elif logger: - logger.warn(lambda: "File {} will be ignored because there are no configured platforms: {}".format( + logger.info(lambda: "File {} will be ignored because there are no configured platforms: {} out of {}".format( file, default_platforms, + input_platforms, )) continue diff --git a/tests/pypi/extension/pip_parse.bzl b/tests/pypi/extension/pip_parse.bzl index 21569cf04e..edac12e344 100644 --- a/tests/pypi/extension/pip_parse.bzl +++ b/tests/pypi/extension/pip_parse.bzl @@ -27,6 +27,7 @@ def pip_parse( requirements_linux = None, requirements_lock = None, requirements_windows = None, + target_platforms = [], simpleapi_skip = [], timeout = 600, whl_modifications = {}, @@ -41,7 +42,9 @@ def pip_parse( envsubst = envsubst, experimental_index_url = experimental_index_url, experimental_requirement_cycles = experimental_requirement_cycles, + # TODO @aignas 2025-12-02: decide on a single attr - should we reuse this? experimental_target_platforms = experimental_target_platforms, + target_platforms = target_platforms, extra_hub_aliases = extra_hub_aliases, extra_pip_args = extra_pip_args, hub_name = hub_name, diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl index 414ad1250e..e267f4ca34 100644 --- a/tests/pypi/hub_builder/hub_builder_tests.bzl +++ b/tests/pypi/hub_builder/hub_builder_tests.bzl @@ -25,12 +25,12 @@ load("//tests/pypi/extension:pip_parse.bzl", _parse = "pip_parse") _tests = [] -def _mock_mctx(environ = {}, read = None): +def _mock_mctx(os = "unittest", arch = "exotic", environ = {}, read = None): return struct( os = struct( environ = environ, - name = "unittest", - arch = "exotic", + name = os, + arch = arch, ), read = read or (lambda _: """\ simple==0.0.1 \ @@ -723,6 +723,10 @@ simple==0.0.3 \ "requirements.linux_x86_64.txt": "linux_x86_64", "requirements.osx_aarch64.txt": "osx_aarch64", }, + target_platforms = [ + "linux_x86_64", + "osx_aarch64", + ], ), ) pypi = builder.build() @@ -1221,6 +1225,73 @@ optimum[onnxruntime-gpu]==1.17.1 ; sys_platform == 'linux' _tests.append(_test_pipstar_platforms) +def _test_pipstar_platforms_limit(env): + builder = hub_builder( + env, + enable_pipstar = True, + config = struct( + enable_pipstar = True, + netrc = None, + auth_patterns = {}, + platforms = { + "my{}{}".format(os, cpu): _plat( + name = "my{}{}".format(os, cpu), + os_name = os, + arch_name = cpu, + marker = "python_version ~= \"3.13\"", + config_settings = [ + "@platforms//os:{}".format(os), + "@platforms//cpu:{}".format(cpu), + ], + ) + for os, cpu in [ + ("linux", "x86_64"), + ("osx", "aarch64"), + ] + }, + ), + ) + builder.pip_parse( + _mock_mctx( + os = "linux", + arch = "amd64", + read = lambda x: { + "universal.txt": """\ +optimum[onnxruntime]==1.17.1 ; sys_platform == 'darwin' +optimum[onnxruntime-gpu]==1.17.1 ; sys_platform == 'linux' +""", + }[x], + ), + _parse( + hub_name = "pypi", + python_version = "3.15", + requirements_lock = "universal.txt", + target_platforms = ["my{os}{arch}"], + ), + ) + pypi = builder.build() + + pypi.exposed_packages().contains_exactly(["optimum"]) + pypi.group_map().contains_exactly({}) + pypi.whl_map().contains_exactly({ + "optimum": { + "pypi_315_optimum": [ + whl_config_setting(version = "3.15"), + ], + }, + }) + pypi.whl_libraries().contains_exactly({ + "pypi_315_optimum": { + "config_load": "@pypi//:config.bzl", + "dep_template": "@pypi//{name}:{target}", + "python_interpreter_target": "unit_test_interpreter_target", + "requirement": "optimum[onnxruntime-gpu]==1.17.1", + }, + }) + pypi.extra_aliases().contains_exactly({}) + +_tests.append(_test_pipstar_platforms_limit) + def hub_builder_test_suite(name): """Create the test suite. diff --git a/tests/pypi/requirements_files_by_platform/requirements_files_by_platform_tests.bzl b/tests/pypi/requirements_files_by_platform/requirements_files_by_platform_tests.bzl index 6688d72ffe..d6aaf3ca99 100644 --- a/tests/pypi/requirements_files_by_platform/requirements_files_by_platform_tests.bzl +++ b/tests/pypi/requirements_files_by_platform/requirements_files_by_platform_tests.bzl @@ -115,6 +115,12 @@ def _test_simple_limited(env): }, platforms = ["linux_x86_64", "osx_x86_64"], ), + requirements_files_by_platform( + requirements_by_platform = { + "requirements_lock": "linux_x86_64,osx_aarch64,osx_x86_64", + }, + platforms = ["linux_x86_64", "osx_x86_64", "windows_x86_64"], + ), ]: env.expect.that_dict(got).contains_exactly({ "requirements_lock": [ @@ -219,6 +225,17 @@ def _test_os_arch_requirements_with_default(env): "requirements_linux": "linux_x86_64,linux_aarch64", }, requirements_lock = "requirements_lock", + platforms = [ + "linux_super_exotic", + "linux_x86_64", + "linux_aarch64", + "linux_arm", + "linux_ppc", + "linux_s390x", + "osx_aarch64", + "osx_x86_64", + "windows_x86_64", + ], ) env.expect.that_dict(got).contains_exactly({ "requirements_exotic": ["linux_super_exotic"],