From 6c0651a8b2a3de480e92dfc3d5dea80c5e47af18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Fri, 18 Jul 2025 17:42:55 +0200 Subject: [PATCH 1/2] tar.py: Fix hardlink extraction with latest Python Recent Python security fixes (included in 3.13.4 but also backported to older branches) require link targets to pass the specified filter function as well. This can result in hardlinked files to not be extracted when a `base-dir` is set. This commit separates the filtering from the extraction to avoid this issue. Pass `filter="tar"` as Python 3.14+ will default to the too restrictive `filter="data"`. https://github.com/python/cpython/pull/135037 Fixes #2029. --- src/buildstream/plugins/sources/tar.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/buildstream/plugins/sources/tar.py b/src/buildstream/plugins/sources/tar.py index f1d939119..040b5c48d 100644 --- a/src/buildstream/plugins/sources/tar.py +++ b/src/buildstream/plugins/sources/tar.py @@ -145,14 +145,14 @@ def stage(self, directory): base_dir = base_dir + os.sep filter_function = functools.partial(self._extract_filter, base_dir) + filtered_members = [] + for member in tar.getmembers(): + member = filter_function(member, directory) + if member is not None: + filtered_members.append(member) if sys.version_info >= (3, 12): - tar.extractall(path=directory, filter=filter_function) + tar.extractall(path=directory, members=filtered_members, filter="tar") else: - filtered_members = [] - for member in tar.getmembers(): - member = filter_function(member, directory) - if member is not None: - filtered_members.append(member) tar.extractall(path=directory, members=filtered_members) except (tarfile.TarError, OSError) as e: From 6d93bd3d422c7403369171372fdb08b3939538b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 21 Jul 2025 08:44:01 +0200 Subject: [PATCH 2/2] tests/sources/tar.py: Add test for symlinks --- tests/sources/tar.py | 45 +++++++++++++++++++++++++++ tests/sources/tar/symlinks/target.bst | 5 +++ 2 files changed, 50 insertions(+) create mode 100644 tests/sources/tar/symlinks/target.bst diff --git a/tests/sources/tar.py b/tests/sources/tar.py index 391912cac..f169800ff 100644 --- a/tests/sources/tar.py +++ b/tests/sources/tar.py @@ -561,3 +561,48 @@ def ensure_link(member): result.assert_success() result = cli.run(project=project, args=["source", "fetch", "malicious_target.bst"]) result.assert_main_error(ErrorDomain.STREAM, None) + + +@pytest.mark.datafiles(os.path.join(DATA_DIR, "symlinks")) +def test_symlinks(cli, tmpdir, datafiles): + project = str(datafiles) + generate_project(project, config={"aliases": {"tmpdir": "file:///" + str(tmpdir)}}) + checkoutdir = os.path.join(str(tmpdir), "checkout") + + absolute_target = "/tmp/foo" + relative_target = "foo/../bar" + + # Create a tarball with an absolute symlink + src_tar = os.path.join(str(tmpdir), "contents.tar.gz") + old_dir = os.getcwd() + os.chdir(str(tmpdir)) + os.mkdir("contents") + os.symlink(absolute_target, "contents/absolute-symlink") + os.symlink(relative_target, "contents/relative-symlink") + with tarfile.open(src_tar, "w:gz") as tar: + tar.add("contents") + os.chdir(old_dir) + + # Make sure our tarfile is actually created with the desired attributes set + with tarfile.open(src_tar, "r:gz") as tar: + assert any( + member.issym() and member.path == "contents/absolute-symlink" and member.linkname == absolute_target + for member in tar.getmembers() + ) + assert any( + member.issym() and member.path == "contents/relative-symlink" and member.linkname == relative_target + for member in tar.getmembers() + ) + + # Assert that we will allow and not mangle symlinks with relative and absolute target paths + result = cli.run(project=project, args=["source", "track", "target.bst"]) + result.assert_success() + result = cli.run(project=project, args=["source", "fetch", "target.bst"]) + result.assert_success() + result = cli.run(project=project, args=["build", "target.bst"]) + result.assert_success() + result = cli.run(project=project, args=["artifact", "checkout", "target.bst", "--directory", checkoutdir]) + result.assert_success() + + assert os.readlink(checkoutdir + "/absolute-symlink") == absolute_target + assert os.readlink(checkoutdir + "/relative-symlink") == relative_target diff --git a/tests/sources/tar/symlinks/target.bst b/tests/sources/tar/symlinks/target.bst new file mode 100644 index 000000000..b9debe961 --- /dev/null +++ b/tests/sources/tar/symlinks/target.bst @@ -0,0 +1,5 @@ +kind: import +description: The kind of this element is irrelevant. +sources: +- kind: tar + url: tmpdir:/contents.tar.gz