From 75aafc62f07be43cdaed01b81019992b1c576b33 Mon Sep 17 00:00:00 2001 From: Abderrahim Kitouni Date: Tue, 8 Jul 2025 08:21:04 +0100 Subject: [PATCH 1/4] _sandboxremote.py: stop trying to pull partial artifacts There used to be a time where buildstream could partially download artifacts, which could mean that not all artifact blobs are available locally. This is no longer the case: support for partial artifacts was dropped in favour of the cache storage-service and is only used for buildtrees, which aren't used in this case. Downloading missing blobs from the cache storage-service is handled in `CASCache._send_blobs()` called a few lines below. --- src/buildstream/sandbox/_sandboxremote.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/buildstream/sandbox/_sandboxremote.py b/src/buildstream/sandbox/_sandboxremote.py index 8abaa2b53..1b72f8718 100644 --- a/src/buildstream/sandbox/_sandboxremote.py +++ b/src/buildstream/sandbox/_sandboxremote.py @@ -24,7 +24,7 @@ from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc from .._protos.build.buildgrid import local_cas_pb2 from .._protos.google.rpc import code_pb2 -from .._exceptions import BstError, SandboxError +from .._exceptions import SandboxError from .._protos.google.longrunning import operations_pb2, operations_pb2_grpc from .._cas import CASRemote @@ -247,9 +247,7 @@ def _execute_action(self, action, flags): stdout, stderr = self._get_output() context = self._get_context() - project = self._get_project() cascache = context.get_cascache() - artifactcache = context.artifactcache action_digest = cascache.add_object(buffer=action.SerializeToString()) @@ -269,15 +267,6 @@ def _execute_action(self, action, flags): except grpc.RpcError as e: raise SandboxError("Failed to determine missing blobs: {}".format(e)) from e - # Check if any blobs are also missing locally (partial artifact) - # and pull them from the artifact cache. - try: - local_missing_blobs = cascache.missing_blobs(missing_blobs) - if local_missing_blobs: - artifactcache.fetch_missing_blobs(project, local_missing_blobs) - except (grpc.RpcError, BstError) as e: - raise SandboxError("Failed to pull missing blobs from artifact cache: {}".format(e)) from e - # Add command and action messages to blob list to push missing_blobs.append(action.command_digest) missing_blobs.append(action_digest) From e5059e2124a2561a56c9d342177ddba1350c3e18 Mon Sep 17 00:00:00 2001 From: Abderrahim Kitouni Date: Wed, 9 Jul 2025 10:11:45 +0100 Subject: [PATCH 2/4] sandbox.py: add method to create a sub-sandbox This allows an element to use a secondary sandbox for manipulating artifacts that doesn't affect the build --- src/buildstream/sandbox/sandbox.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/buildstream/sandbox/sandbox.py b/src/buildstream/sandbox/sandbox.py index 505cc5c1a..f87075b73 100644 --- a/src/buildstream/sandbox/sandbox.py +++ b/src/buildstream/sandbox/sandbox.py @@ -85,13 +85,18 @@ def __init__(self, context: "Context", project: "Project", **kwargs): self.__env = None # type: Optional[Dict[str, str]] self.__mount_sources = {} # type: Dict[str, str] self.__allow_run = True + self.__subsandboxes = [] # Plugin element full name for logging plugin = kwargs.get("plugin", None) if plugin: self.__element_name = plugin._get_full_name() else: - self.__element_name = None + parent = kwargs.get("parent", None) + if parent: + self.__element_name = parent._get_element_name() + else: + self.__element_name = None # Configuration from kwargs common to all subclasses self.__config = kwargs["config"] @@ -264,6 +269,24 @@ def batch( batch.execute() + def create_subsandbox(self, **kwargs): + """Create an empty sandbox + + This allows an element to use a secondary sandbox for manipulating artifacts + that does not affect the build sandbox + """ + + sub = Sandbox( + self.__context, + self.__project, + parent=self, + stdout=self.__stdout, + stderr=self.__stderr, + config=self.__config, + ) + self.__subsandboxes.append(sub) + return sub + ##################################################### # Abstract Methods for Sandbox implementations # ##################################################### @@ -565,6 +588,9 @@ def _get_element_name(self): def _disable_run(self): self.__allow_run = False + def _get_subsandboxes(self): + return self.__subsandboxes + # SandboxFlags() # From d38563d79a73d44271458fb55011bcff55da136f Mon Sep 17 00:00:00 2001 From: Abderrahim Kitouni Date: Tue, 17 Jun 2025 17:02:10 +0100 Subject: [PATCH 3/4] buildelement: Add the digest-environment config property This allows setting an environment variable inside the sandbox to the CAS digest of one or more dependencies. Co-authored by: Adrien Plazas --- src/buildstream/buildelement.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/buildstream/buildelement.py b/src/buildstream/buildelement.py index 40355f5cf..5f069919c 100644 --- a/src/buildstream/buildelement.py +++ b/src/buildstream/buildelement.py @@ -219,6 +219,7 @@ def configure(self, node): def configure_dependencies(self, dependencies): self.__layout = {} # pylint: disable=attribute-defined-outside-init + self.__digest_environment = {} # pylint: disable=attribute-defined-outside-init # FIXME: Currently this forcefully validates configurations # for all BuildElement subclasses so they are unable to @@ -227,9 +228,18 @@ def configure_dependencies(self, dependencies): for dep in dependencies: # Determine the location to stage each element, default is "/" location = "/" + if dep.config: - dep.config.validate_keys(["location"]) - location = dep.config.get_str("location") + dep.config.validate_keys(["digest-environment", "location"]) + + location = dep.config.get_str("location", "/") + + digest_var_name = dep.config.get_str("digest-environment", None) + + if digest_var_name is not None: + element_list = self.__digest_environment.setdefault(digest_var_name, []) + element_list.append(dep.element) + try: element_list = self.__layout[location] except KeyError: @@ -285,10 +295,17 @@ def configure_sandbox(self, sandbox): command_dir = build_root sandbox.set_work_directory(command_dir) + def stage(self, sandbox): # Setup environment - sandbox.set_environment(self.get_environment()) + env = self.get_environment() - def stage(self, sandbox): + for digest_variable, element_list in self.__digest_environment.items(): + dummy_sandbox = sandbox.create_sub_sandbox() + self.stage_dependency_artifacts(dummy_sandbox, element_list) + digest = dummy_sandbox.get_virtual_directory()._get_digest() + env[digest_variable] = "{}/{}".format(digest.hash, digest.size_bytes) + + sandbox.set_environment(env) # First stage it all # From 614dc3747ef8bfff1d6c706342eab59058cb5522 Mon Sep 17 00:00:00 2001 From: Abderrahim Kitouni Date: Wed, 9 Jul 2025 10:16:41 +0100 Subject: [PATCH 4/4] _sandboxremote.py: Upload blobs for subsandbox roots The subsandboxes can be used to extract a CAS digest that could be used for nested remote execution, and thus need to be available in the remote execution CAS. --- src/buildstream/sandbox/_sandboxremote.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/buildstream/sandbox/_sandboxremote.py b/src/buildstream/sandbox/_sandboxremote.py index 1b72f8718..9deb87943 100644 --- a/src/buildstream/sandbox/_sandboxremote.py +++ b/src/buildstream/sandbox/_sandboxremote.py @@ -261,9 +261,17 @@ def _execute_action(self, action, flags): "Uploading input root", element_name=self._get_element_name() ): # Determine blobs missing on remote + root_digests = [action.input_root_digest] + + # Add virtual directories for subsandboxes + for subsandbox in self._get_subsandboxes(): + vdir = subsandbox.get_virtual_directory() + root_digests.append(vdir._get_digest()) + + missing_blobs = [] try: - input_root_digest = action.input_root_digest - missing_blobs = list(cascache.missing_blobs_for_directory(input_root_digest, remote=casremote)) + for root_digest in root_digests: + missing_blobs.extend(cascache.missing_blobs_for_directory(root_digest, remote=casremote)) except grpc.RpcError as e: raise SandboxError("Failed to determine missing blobs: {}".format(e)) from e