From fcdc05e94032513113b1827846bf98fe3ef413f8 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Mon, 8 Dec 2025 19:31:26 -0800 Subject: [PATCH 1/5] add impl --- python/private/venv_runfiles.bzl | 188 +++++++++++++++++-------------- 1 file changed, 102 insertions(+), 86 deletions(-) diff --git a/python/private/venv_runfiles.bzl b/python/private/venv_runfiles.bzl index eeedda4555..2adca2275a 100644 --- a/python/private/venv_runfiles.bzl +++ b/python/private/venv_runfiles.bzl @@ -3,7 +3,6 @@ load("@bazel_skylib//lib:paths.bzl", "paths") load( ":common.bzl", - "PYTHON_FILE_EXTENSIONS", "is_file", "relative_path", "runfiles_root_path", @@ -235,109 +234,126 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): # Append slash to prevent incorrect prefix-string matches site_packages_root += "/" - # We have to build a list of (runfiles path, site-packages path) pairs of the files to - # create in the consuming binary's venv site-packages directory. To minimize the number of - # files to create, we just return the paths to the directories containing the code of - # interest. - # - # However, namespace packages complicate matters: multiple distributions install in the - # same directory in site-packages. This works out because they don't overlap in their - # files. Typically, they install to different directories within the namespace package - # directory. We also need to ensure that we can handle a case where the main package (e.g. - # airflow) has directories only containing data files and then namespace packages coming - # along and being next to it. - # - # Lastly we have to assume python modules just being `.py` files (e.g. typing-extensions) - # is just a single Python file. - - dir_symlinks = {} # dirname -> runfile path - venv_symlinks = [] - - # Sort so order is top-down all_files = sorted(files, key = lambda f: f.short_path) + # We want to minimize the number of files symlinked. Ideally only + # the top-level directories under site-packages are symlinked. + # Unfortunately, shared libraries complicate matters: if a + # shared library's directory is linked, then the dynamic linker + # computes the wrong search path. To fix, we have to directly link + # shared libraries. This then means that all the parent directories of + # the shared library can't be linked directly. + # So what we do is identify the shared libraries, mark all their + # parent directories as "cannot be directly linked", then compute + # the files that can be linked with that constraint. + + # venv paths that cannot be directly linked. Dict acting as set. + cannot_be_linked_directly = {} + + # dict[str path, VenvSymlinkEntry] + # Where path is the venv path (i.e. relative to site_packages_prefix) + venv_symlinks = {} + + # List of (File, str venv_path) tuples + files_left_to_link = [] + + repo_runfiles_dirname = None for src in all_files: - path = _repo_relative_short_path(src.short_path) - if not path.startswith(site_packages_root): + if src.owner.repo_name != ctx.label.repo_name: + # Files in other repos complicate symlink optimization, so + # skip them for now. continue - path = path.removeprefix(site_packages_root) - dir_name, _, filename = path.rpartition("/") - runfiles_dir_name, _, _ = runfiles_root_path(ctx, src.short_path).partition("/") - + repo_rel_path = _repo_relative_short_path(src.short_path) + if not repo_rel_path.startswith(site_packages_root): + continue + venv_path = repo_rel_path.removeprefix(site_packages_root) + filename = paths.basename(venv_path) if _is_linker_loaded_library(filename): - entry = VenvSymlinkEntry( + venv_symlinks[venv_path] = VenvSymlinkEntry( kind = VenvSymlinkKind.LIB, - link_to_path = paths.join(runfiles_dir_name, site_packages_root, filename), + link_to_path = runfiles_root_path(ctx, src.short_path), link_to_file = src, package = package, version = version_str, - venv_path = path, files = depset([src]), + venv_path = venv_path, ) - venv_symlinks.append(entry) - continue - - if dir_name in dir_symlinks: - # we already have this dir, this allows us to short-circuit since most of the - # ctx.files.data might share the same directories as ctx.files.srcs + parent = paths.dirname(venv_path) + for _ in range(len(venv_path) + 1): # Iterate enough times to traverse up + if parent and parent != ".": + cannot_be_linked_directly[parent] = True + parent = paths.dirname(parent) + else: + break + else: + files_left_to_link.append((src, venv_path)) + + # At this point, venv_symlinks has entries for the shared libraries + # and cannot_be_linked_directly has the directories that cannot be + # directly linked. Next, we loop over the remaining files and group + # them into the highest level directory that can be linked. + + # dict[str venv_path, list[File]] + optimized_groups = {} + + for src, venv_path in files_left_to_link: + parent = paths.dirname(venv_path) + if not parent or parent == ".": + # File in root, must be linked directly + optimized_groups.setdefault(venv_path, []) + optimized_groups[venv_path].append(src) continue - if dir_name: - # This can be either: - # * a directory with libs (e.g. numpy.libs, created by auditwheel) - # * a directory with `__init__.py` file that potentially also needs to be - # symlinked. - # * `.dist-info` directory - # - # This could be also regular files, that just need to be symlinked, so we will - # add the directory here. - dir_symlinks[dir_name] = runfiles_dir_name - elif src.extension in PYTHON_FILE_EXTENSIONS: - # This would be files that do not have directories and we just need to add - # direct symlinks to them as is, we only allow Python files in here - entry = VenvSymlinkEntry( - kind = VenvSymlinkKind.LIB, - link_to_path = paths.join(runfiles_dir_name, site_packages_root, filename), - link_to_file = src, - package = package, - version = version_str, - venv_path = path, - files = depset([src]), - ) - venv_symlinks.append(entry) - - # Sort so that we encounter `foo` before `foo/bar`. This ensures we - # see the top-most explicit package first. - dirnames = sorted(dir_symlinks.keys()) - first_level_explicit_packages = [] - for d in dirnames: - is_sub_package = False - for existing in first_level_explicit_packages: - # Suffix with / to prevent foo matching foobar - if d.startswith(existing + "/"): - is_sub_package = True - break - if not is_sub_package: - first_level_explicit_packages.append(d) - - for dirname in first_level_explicit_packages: - prefix = dir_symlinks[dirname] - link_to_path = paths.join(prefix, site_packages_root, dirname) - entry = VenvSymlinkEntry( + if parent in cannot_be_linked_directly: + # File in a directory that cannot be directly linked, + # so link the file directly + optimized_groups.setdefault(venv_path, []) + optimized_groups[venv_path].append(src) + else: + # This path can be grouped. Find the highest-level directory to link. + venv_path = parent + next_parent = paths.dirname(parent) + for _ in range(len(venv_path) + 1): # Iterate enough times + if next_parent and next_parent != ".": + if not (next_parent in cannot_be_linked_directly): + venv_path = next_parent + next_parent = paths.dirname(next_parent) + else: + break + else: + break + + optimized_groups.setdefault(venv_path, []) + optimized_groups[venv_path].append(src) + + repo_name = ctx.label.repo_name + if repo_name == "": + repo_name = "_main" + runfiles_root_prefix = paths.join(repo_name, site_packages_root) + + # Finally, for each group, we create the VenvSymlinkEntry objects + for venv_path, files in optimized_groups.items(): + if venv_path in venv_symlinks: + fail(( + "duplicate entry: {venv_path}" + ).format( + venv_path = venv_path, + )) + if len(files) == 1: + link_to_file = files[0] + else: + link_to_file = None + venv_symlinks[venv_path] = VenvSymlinkEntry( kind = VenvSymlinkKind.LIB, - link_to_path = link_to_path, + link_to_path = paths.join(runfiles_root_prefix, venv_path), + link_to_file = link_to_file, package = package, version = version_str, - venv_path = dirname, - files = depset([ - f - for f in all_files - if runfiles_root_path(ctx, f.short_path).startswith(link_to_path + "/") - ]), + venv_path = venv_path, + files = depset(files), ) - venv_symlinks.append(entry) - return venv_symlinks + return venv_symlinks.values() def _is_linker_loaded_library(filename): """Tells if a filename is one that `dlopen()` or the runtime linker handles. From a8b0d98c064f69de3be02ce92a4724bca616f48e Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Mon, 8 Dec 2025 19:33:20 -0800 Subject: [PATCH 2/5] add tests --- python/private/venv_runfiles.bzl | 62 +++--- .../app_files_building_tests.bzl | 207 ++++++++++++++++-- 2 files changed, 216 insertions(+), 53 deletions(-) diff --git a/python/private/venv_runfiles.bzl b/python/private/venv_runfiles.bzl index 2adca2275a..a580245fa0 100644 --- a/python/private/venv_runfiles.bzl +++ b/python/private/venv_runfiles.bzl @@ -1,6 +1,7 @@ """Code for constructing venvs.""" load("@bazel_skylib//lib:paths.bzl", "paths") +load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED") # buildifier: disable=bzl-visibility load( ":common.bzl", "is_file", @@ -57,7 +58,6 @@ def create_venv_app_files(ctx, deps, venv_dir_map): if is_file(link_to): symlink_from = "{}/{}".format(ctx.label.package, bin_venv_path) runfiles_symlinks[symlink_from] = link_to - else: venv_link = ctx.actions.declare_symlink(bin_venv_path) venv_link_rf_path = runfiles_root_path(ctx, venv_link.short_path) @@ -76,14 +76,16 @@ def create_venv_app_files(ctx, deps, venv_dir_map): ) # Visible for testing -def build_link_map(ctx, entries): +def build_link_map(ctx, entries, return_conflicts = False): """Compute the mapping of venv paths to their backing objects. - Args: ctx: {type}`ctx` current ctx. entries: {type}`list[VenvSymlinkEntry]` the entries that describe the venv-relative + return_conflicts: {type}`bool`. Only present for testing. If True, + also return a list of the groups that had overlapping paths and had + to be resolved and merged. Returns: {type}`dict[str, dict[str, str|File]]` Mappings of venv paths to their @@ -113,6 +115,7 @@ def build_link_map(ctx, entries): # final paths to keep, grouped by kind keep_link_map = {} # dict[str kind, dict[path, str|File]] + conflicts = [] if return_conflicts else None for kind, entries in entries_by_kind.items(): # dict[str kind-relative path, str|File link_to] keep_kind_link_map = {} @@ -128,12 +131,17 @@ def build_link_map(ctx, entries): else: keep_kind_link_map[entry.venv_path] = entry.link_to_path else: + if return_conflicts: + conflicts.append(group) + # Merge a group of overlapping prefixes _merge_venv_path_group(ctx, group, keep_kind_link_map) keep_link_map[kind] = keep_kind_link_map - - return keep_link_map + if return_conflicts: + return keep_link_map, conflicts + else: + return keep_link_map def _group_venv_path_entries(entries): """Group entries by VenvSymlinkEntry.venv_path overlap. @@ -236,17 +244,6 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): all_files = sorted(files, key = lambda f: f.short_path) - # We want to minimize the number of files symlinked. Ideally only - # the top-level directories under site-packages are symlinked. - # Unfortunately, shared libraries complicate matters: if a - # shared library's directory is linked, then the dynamic linker - # computes the wrong search path. To fix, we have to directly link - # shared libraries. This then means that all the parent directories of - # the shared library can't be linked directly. - # So what we do is identify the shared libraries, mark all their - # parent directories as "cannot be directly linked", then compute - # the files that can be linked with that constraint. - # venv paths that cannot be directly linked. Dict acting as set. cannot_be_linked_directly = {} @@ -257,7 +254,14 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): # List of (File, str venv_path) tuples files_left_to_link = [] - repo_runfiles_dirname = None + # We want to minimize the number of files symlinked. Ideally, only the + # top-level directories are symlinked. Unfortunately, shared libraries + # complicate matters: if a shared library's directory is linked, then the + # dynamic linker computes the wrong search path. + # + # To fix, we have to directly link shared libraries. This then means that + # all the parent directories of the shared library can't be linked + # directly. for src in all_files: if src.owner.repo_name != ctx.label.repo_name: # Files in other repos complicate symlink optimization, so @@ -280,11 +284,13 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): ) parent = paths.dirname(venv_path) for _ in range(len(venv_path) + 1): # Iterate enough times to traverse up - if parent and parent != ".": - cannot_be_linked_directly[parent] = True - parent = paths.dirname(parent) - else: + if not parent: break + if cannot_be_linked_directly.get(parent, False): + # Already seen + break + cannot_be_linked_directly[parent] = True + parent = paths.dirname(parent) else: files_left_to_link.append((src, venv_path)) @@ -298,7 +304,7 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): for src, venv_path in files_left_to_link: parent = paths.dirname(venv_path) - if not parent or parent == ".": + if not parent: # File in root, must be linked directly optimized_groups.setdefault(venv_path, []) optimized_groups[venv_path].append(src) @@ -314,8 +320,8 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): venv_path = parent next_parent = paths.dirname(parent) for _ in range(len(venv_path) + 1): # Iterate enough times - if next_parent and next_parent != ".": - if not (next_parent in cannot_be_linked_directly): + if next_parent: + if next_parent not in cannot_be_linked_directly: venv_path = next_parent next_parent = paths.dirname(next_parent) else: @@ -328,7 +334,7 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): repo_name = ctx.label.repo_name if repo_name == "": - repo_name = "_main" + repo_name = "_main" if BZLMOD_ENABLED else "__main__" runfiles_root_prefix = paths.join(repo_name, site_packages_root) # Finally, for each group, we create the VenvSymlinkEntry objects @@ -339,14 +345,10 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): ).format( venv_path = venv_path, )) - if len(files) == 1: - link_to_file = files[0] - else: - link_to_file = None venv_symlinks[venv_path] = VenvSymlinkEntry( kind = VenvSymlinkKind.LIB, link_to_path = paths.join(runfiles_root_prefix, venv_path), - link_to_file = link_to_file, + link_to_file = None, package = package, version = version_str, venv_path = venv_path, diff --git a/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl index db2f21c7e7..b7118e33fe 100644 --- a/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl +++ b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl @@ -26,9 +26,15 @@ empty_files = rule( _tests = [] +# NOTE: In bzlmod, the workspace name is always "_main". +# Under workspace, the workspace name is the name configured in WORKSPACE, +# or "__main__" if was unspecified. +# NOTE: ctx.workspace_name is always the root workspace, not the workspace +# of the target being processed. def _ctx(workspace_name = "_main"): return struct( workspace_name = workspace_name, + label = Label("//:some_target"), ) def _file(short_path): @@ -36,6 +42,23 @@ def _file(short_path): short_path = short_path, ) +def _venv_symlink(venv_path, *, link_to_path = None, files = []): + return struct( + link_to_path = link_to_path, + venv_path = venv_path, + files = files, + ) + +def _venv_symlinks_from_entries(entries): + result = [] + for symlink_entry in entries: + result.append(struct( + venv_path = symlink_entry.venv_path, + link_to_path = symlink_entry.link_to_path, + files = [f.short_path for f in symlink_entry.files.to_list()], + )) + return sorted(result, key = lambda e: (e.link_to_path, e.venv_path)) + def _entry(venv_path, link_to_path, files = [], **kwargs): kwargs.setdefault("kind", VenvSymlinkKind.LIB) kwargs.setdefault("package", None) @@ -89,7 +112,7 @@ def _test_conflict_merging_impl(env, _): _entry("duplicate", "+dupe_b/site-packages/duplicate", ["d.py"]), ] - actual = build_link_map(_ctx(), entries) + actual, conflicts = build_link_map(_ctx(), entries, return_conflicts = True) expected_libs = { "a-1.0.dist-info": "+pypi_a/site-packages/a-1.0.dist-info", "a/a.txt": _file("../+pypi_a/site-packages/a/a.txt"), @@ -101,6 +124,146 @@ def _test_conflict_merging_impl(env, _): env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs) env.expect.that_dict(actual).keys().contains_exactly([VenvSymlinkKind.LIB]) + env.expect.that_int(len(conflicts)).is_greater_than(0) + +def _test_optimized_grouping_complex(name): + empty_files( + name = name + "_files", + paths = [ + "site-packages/pkg1/a.txt", + "site-packages/pkg1/b/b_mod.so", + "site-packages/pkg1/c/c1.txt", + "site-packages/pkg1/c/c2.txt", + "site-packages/pkg1/d/d1.txt", + "site-packages/pkg1/dd/dd1.txt", + "site-packages/pkg1/q1/q1.txt", + "site-packages/pkg1/q1/q2a/libq.so", + "site-packages/pkg1/q1/q2a/q2.txt", + "site-packages/pkg1/q1/q2a/q3/q3a.txt", + "site-packages/pkg1/q1/q2a/q3/q3b.txt", + "site-packages/pkg1/q1/q2b/q2b.txt", + ], + ) + analysis_test( + name = name, + impl = _test_optimized_grouping_complex_impl, + target = name + "_files", + ) + +_tests.append(_test_optimized_grouping_complex) + +def _test_optimized_grouping_complex_impl(env, target): + test_ctx = _ctx(workspace_name = env.ctx.workspace_name) + entries = get_venv_symlinks( + test_ctx, + target.files.to_list(), + package = "pkg1", + version_str = "1.0", + site_packages_root = env.ctx.label.package + "/site-packages", + ) + actual = _venv_symlinks_from_entries(entries) + + rr = "{}/{}/site-packages/".format(test_ctx.workspace_name, env.ctx.label.package) + expected = [ + _venv_symlink( + "pkg1/a.txt", + link_to_path = rr + "pkg1/a.txt", + files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/a.txt", + ], + ), + _venv_symlink( + "pkg1/b", + link_to_path = rr + "pkg1/b", + files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/b/b_mod.so", + ], + ), + _venv_symlink("pkg1/c", link_to_path = rr + "pkg1/c", files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/c/c1.txt", + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/c/c2.txt", + ]), + _venv_symlink("pkg1/d", link_to_path = rr + "pkg1/d", files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/d/d1.txt", + ]), + _venv_symlink("pkg1/dd", link_to_path = rr + "pkg1/dd", files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/dd/dd1.txt", + ]), + _venv_symlink("pkg1/q1/q1.txt", link_to_path = rr + "pkg1/q1/q1.txt", files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/q1/q1.txt", + ]), + _venv_symlink("pkg1/q1/q2a/libq.so", link_to_path = rr + "pkg1/q1/q2a/libq.so", files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/q1/q2a/libq.so", + ]), + _venv_symlink("pkg1/q1/q2a/q2.txt", link_to_path = rr + "pkg1/q1/q2a/q2.txt", files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/q1/q2a/q2.txt", + ]), + _venv_symlink("pkg1/q1/q2a/q3", link_to_path = rr + "pkg1/q1/q2a/q3", files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/q1/q2a/q3/q3a.txt", + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/q1/q2a/q3/q3b.txt", + ]), + _venv_symlink("pkg1/q1/q2b", link_to_path = rr + "pkg1/q1/q2b", files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg1/q1/q2b/q2b.txt", + ]), + ] + expected = sorted(expected, key = lambda e: (e.link_to_path, e.venv_path)) + env.expect.that_collection( + actual, + ).contains_exactly(expected) + _, conflicts = build_link_map(test_ctx, entries, return_conflicts = True) + + # The point of the optimization is to avoid having to merge conflicts. + env.expect.that_collection(conflicts).contains_exactly([]) + +def _test_optimized_grouping_single_toplevel(name): + empty_files( + name = name + "_files", + paths = [ + "site-packages/pkg2/a.txt", + "site-packages/pkg2/b_mod.so", + ], + ) + analysis_test( + name = name, + impl = _test_optimized_grouping_single_toplevel_impl, + target = name + "_files", + ) + +_tests.append(_test_optimized_grouping_single_toplevel) + +def _test_optimized_grouping_single_toplevel_impl(env, target): + test_ctx = _ctx(workspace_name = env.ctx.workspace_name) + entries = get_venv_symlinks( + test_ctx, + target.files.to_list(), + package = "pkg2", + version_str = "1.0", + site_packages_root = env.ctx.label.package + "/site-packages", + ) + actual = _venv_symlinks_from_entries(entries) + + rr = "{}/{}/site-packages/".format(test_ctx.workspace_name, env.ctx.label.package) + expected = [ + _venv_symlink( + "pkg2", + link_to_path = rr + "pkg2", + files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg2/a.txt", + "tests/venv_site_packages_libs/app_files_building/site-packages/pkg2/b_mod.so", + ], + ), + ] + expected = sorted(expected, key = lambda e: (e.link_to_path, e.venv_path)) + + env.expect.that_collection( + actual, + ).contains_exactly(expected) + + _, conflicts = build_link_map(test_ctx, entries, return_conflicts = True) + + # The point of the optimization is to avoid having to merge conflicts. + env.expect.that_collection(conflicts).contains_exactly([]) + def _test_package_version_filtering(name): analysis_test( name = name, @@ -219,8 +382,7 @@ def _test_multiple_venv_symlink_kinds_impl(env, _): "libfile", "+pypi_lib/site-packages/libfile", ["lib.txt"], - kind = - VenvSymlinkKind.LIB, + kind = VenvSymlinkKind.LIB, ), _entry( "binfile", @@ -294,34 +456,33 @@ def _test_shared_library_symlinking_impl(env, target): site_packages_root = env.ctx.label.package + "/site-packages", ) - actual = [e for e in actual_entries if e.venv_path == "foo.libs/libx.so"] - if not actual: - fail("Did not find VenvSymlinkEntry with venv_path equal to foo.libs/libx.so. " + - "Found: {}".format(actual_entries)) - elif len(actual) > 1: - fail("Found multiple entries with venv_path=foo.libs/libx.so. " + - "Found: {}".format(actual_entries)) - actual = actual[0] + actual = _venv_symlinks_from_entries(actual_entries) - actual_files = actual.files.to_list() - expected_lib_dso = [f for f in srcs if f.basename == "libx.so"] - env.expect.that_collection(actual_files).contains_exactly(expected_lib_dso) + env.expect.that_collection(actual).contains_at_least([ + _venv_symlink( + "bar/libs/liby.so", + link_to_path = "_main/tests/venv_site_packages_libs/app_files_building/site-packages/bar/libs/liby.so", + files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/bar/libs/liby.so", + ], + ), + _venv_symlink( + "foo.libs/libx.so", + link_to_path = "_main/tests/venv_site_packages_libs/app_files_building/site-packages/foo.libs/libx.so", + files = [ + "tests/venv_site_packages_libs/app_files_building/site-packages/foo.libs/libx.so", + ], + ), + ]) - entries = actual_entries - actual = build_link_map(_ctx(), entries) + actual = build_link_map(_ctx(), actual_entries) # The important condition is that each lib*.so file is linked directly. expected_libs = { "bar/libs/liby.so": srcs[0], - "bar/x.py": srcs[1], - "bar/y.so": srcs[2], - "foo": "_main/tests/venv_site_packages_libs/app_files_building/site-packages/foo", "foo.libs/libx.so": srcs[3], - "root.pth": srcs[-3], - "root.py": srcs[-2], - "root.so": srcs[-1], } - env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs) + env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_at_least(expected_libs) def app_files_building_test_suite(name): test_suite( From 204a69913209b251c6069700e0b2621788ea6086 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 9 Dec 2025 21:26:32 -0800 Subject: [PATCH 3/5] make work for workspace --- python/private/venv_runfiles.bzl | 40 ++++++++++--------- .../app_files_building_tests.bzl | 3 +- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/python/private/venv_runfiles.bzl b/python/private/venv_runfiles.bzl index a580245fa0..d8c34b8a5a 100644 --- a/python/private/venv_runfiles.bzl +++ b/python/private/venv_runfiles.bzl @@ -263,19 +263,19 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): # all the parent directories of the shared library can't be linked # directly. for src in all_files: - if src.owner.repo_name != ctx.label.repo_name: - # Files in other repos complicate symlink optimization, so - # skip them for now. + rf_root_path = runfiles_root_path(ctx, src.short_path) + repo, _, repo_rel_path = rf_root_path.partition("/") + head, found_sp_root, venv_path = repo_rel_path.partition(site_packages_root) + if head or not found_sp_root: + # If head is set, then the path didn't start with site_packages_root + # if found_sp_root is empty, then it means it wasn't found at all. continue - repo_rel_path = _repo_relative_short_path(src.short_path) - if not repo_rel_path.startswith(site_packages_root): - continue - venv_path = repo_rel_path.removeprefix(site_packages_root) + filename = paths.basename(venv_path) if _is_linker_loaded_library(filename): venv_symlinks[venv_path] = VenvSymlinkEntry( kind = VenvSymlinkKind.LIB, - link_to_path = runfiles_root_path(ctx, src.short_path), + link_to_path = rf_root_path, link_to_file = src, package = package, version = version_str, @@ -332,11 +332,6 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): optimized_groups.setdefault(venv_path, []) optimized_groups[venv_path].append(src) - repo_name = ctx.label.repo_name - if repo_name == "": - repo_name = "_main" if BZLMOD_ENABLED else "__main__" - runfiles_root_prefix = paths.join(repo_name, site_packages_root) - # Finally, for each group, we create the VenvSymlinkEntry objects for venv_path, files in optimized_groups.items(): if venv_path in venv_symlinks: @@ -345,9 +340,15 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): ).format( venv_path = venv_path, )) + link_to_path = ( + _get_label_runfiles_repo(ctx, files[0].owner) + + "/" + + site_packages_root + + venv_path + ) venv_symlinks[venv_path] = VenvSymlinkEntry( kind = VenvSymlinkKind.LIB, - link_to_path = paths.join(runfiles_root_prefix, venv_path), + link_to_path = link_to_path, link_to_file = None, package = package, version = version_str, @@ -375,9 +376,10 @@ def _is_linker_loaded_library(filename): return True return False -def _repo_relative_short_path(short_path): - # Convert `../+pypi+foo/some/file.py` to `some/file.py` - if short_path.startswith("../"): - return short_path[3:].partition("/")[2] +def _get_label_runfiles_repo(ctx, label): + repo = label.repo_name + if repo: + return repo else: - return short_path + # For files, empty repo means the main repo + return ctx.workspace_name diff --git a/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl index b7118e33fe..fc0b5d0bf3 100644 --- a/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl +++ b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl @@ -30,11 +30,10 @@ _tests = [] # Under workspace, the workspace name is the name configured in WORKSPACE, # or "__main__" if was unspecified. # NOTE: ctx.workspace_name is always the root workspace, not the workspace -# of the target being processed. +# of the target being processed (ctx.label). def _ctx(workspace_name = "_main"): return struct( workspace_name = workspace_name, - label = Label("//:some_target"), ) def _file(short_path): From 29acc630f7a53589564fb597b631cbfc8c851c72 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 9 Dec 2025 22:35:38 -0800 Subject: [PATCH 4/5] format+buildifier --- python/private/venv_runfiles.bzl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/private/venv_runfiles.bzl b/python/private/venv_runfiles.bzl index d8c34b8a5a..5908c65b29 100644 --- a/python/private/venv_runfiles.bzl +++ b/python/private/venv_runfiles.bzl @@ -1,7 +1,6 @@ """Code for constructing venvs.""" load("@bazel_skylib//lib:paths.bzl", "paths") -load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED") # buildifier: disable=bzl-visibility load( ":common.bzl", "is_file", @@ -264,7 +263,7 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): # directly. for src in all_files: rf_root_path = runfiles_root_path(ctx, src.short_path) - repo, _, repo_rel_path = rf_root_path.partition("/") + _, _, repo_rel_path = rf_root_path.partition("/") head, found_sp_root, venv_path = repo_rel_path.partition(site_packages_root) if head or not found_sp_root: # If head is set, then the path didn't start with site_packages_root From 55aec54f4bbacc12ba30ff8b077d84fd8b1109d2 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Wed, 10 Dec 2025 08:21:26 -0800 Subject: [PATCH 5/5] remove defunct guard check --- python/private/venv_runfiles.bzl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/private/venv_runfiles.bzl b/python/private/venv_runfiles.bzl index 5908c65b29..43fcab6192 100644 --- a/python/private/venv_runfiles.bzl +++ b/python/private/venv_runfiles.bzl @@ -333,12 +333,6 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): # Finally, for each group, we create the VenvSymlinkEntry objects for venv_path, files in optimized_groups.items(): - if venv_path in venv_symlinks: - fail(( - "duplicate entry: {venv_path}" - ).format( - venv_path = venv_path, - )) link_to_path = ( _get_label_runfiles_repo(ctx, files[0].owner) + "/" +