From c832f0a746fd9feaf705a0b0c15c242b9c42e626 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Fri, 27 Sep 2024 16:19:39 -0400 Subject: [PATCH 1/5] Added new function get_task_cost and modified get_tasks_by_name to take None for the name (meaning get all tasks for a project) --- src/cwl_platform/arvados_platform.py | 47 +++++++++++++++++------ src/cwl_platform/base_platform.py | 6 ++- src/cwl_platform/sevenbridges_platform.py | 7 +++- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/cwl_platform/arvados_platform.py b/src/cwl_platform/arvados_platform.py index 937fb009..cde5fc3e 100644 --- a/src/cwl_platform/arvados_platform.py +++ b/src/cwl_platform/arvados_platform.py @@ -91,9 +91,6 @@ def _load_cwl_output(self, task: ArvadosTask): cwl_output_collection = arvados.collection.Collection(task.container_request['output_uuid'], api_client=self.api, keep_client=self.keep_client) - if cwl_output_collection.items() is None: - return None - cwl_output = None with cwl_output_collection.open('cwl.output.json') as cwl_output_file: cwl_output = json.load(cwl_output_file) @@ -392,30 +389,56 @@ def get_task_output_filename(self, task: ArvadosTask, output_name): output_file = cwl_output[output_name]['basename'] return output_file - def get_tasks_by_name(self, project, task_name): # -> list(ArvadosTask): + def get_tasks_by_name(self, project, task_name=None): # -> list(ArvadosTask): ''' Get all processes (jobs) in a project with a specified name :param project: The project to search - :param process_name: The name of the process to search for + :param task_name: The name of the process to search for (value None means return all) :return: List of container request uuids and associated containers ''' # We must add priority>0 filter so we do not capture Cancelled jobs as Queued jobs. # According to Curii, 'Cancelled' on the UI = 'Queued' with priority=0, we are not interested in Cancelled # jobs here anyway, we will submit the job again + cont_reqs = [] + conts = {} tasks = [] + setup_filters=[ + ['owner_uuid', '=', project['uuid']], ['priority', '>', 0] + ] + if task_name is not None: + setup_filters.append(["name", '=', task_name]) for container_request in arvados.util.keyset_list_all( self.api.container_requests().list, - filters=[ - ["name", '=', task_name], - ['owner_uuid', '=', project['uuid']], ['priority', '>', 0] - ] + filters=setup_filters, + select=["uuid", "owner_uuid", "container_uuid", "name", "cumulative_cost", "properties", "modified_by_user_uuid", "created_at"] ): - # Get the container - container = self.api.containers().get(uuid=container_request['container_uuid']).execute() - tasks.append(ArvadosTask(container_request, container)) + cont_reqs.append(container_request) + + chunk_size = 1000 + + for i in range(0, len(cont_reqs), chunk_size): + chunk = cont_reqs[i:i + chunk_size] + for container in arvados.util.keyset_list_all( + self.api.container().list, + filters=[ + ["uuid", "in", [c["container_uuid"] for c in chunk if c["container_uuid"]]], + ], + select=["uuid", "started_at", "finished_at", "cost"] + ): + conts[container["uuid"]] = container + + for cr in cont_reqs: + cid = cr["container_uuid"] + if cid in conts: + c = conts[cid] + tasks.append(ArvadosTask(cr,c)) + return tasks + def get_task_cost(self, task): + return(task.container["cost"]) + def get_project(self): ''' Determine what project we are running in ''' try: diff --git a/src/cwl_platform/base_platform.py b/src/cwl_platform/base_platform.py index bca4e157..dddc9c00 100644 --- a/src/cwl_platform/base_platform.py +++ b/src/cwl_platform/base_platform.py @@ -87,9 +87,13 @@ def get_task_output_filename(self, task, output_name): ''' Retrieve the output field of the task and return filename''' @abstractmethod - def get_tasks_by_name(self, project, task_name): + def get_tasks_by_name(self, project, task_name=None): ''' Get a tasks by its name ''' + @abstractmethod + def get_task_cost(self, task): + ''' Get a task's cost ''' + @abstractmethod def get_project(self): ''' Determine what project we are running in ''' diff --git a/src/cwl_platform/sevenbridges_platform.py b/src/cwl_platform/sevenbridges_platform.py index 0d5f83ca..3853dbf1 100644 --- a/src/cwl_platform/sevenbridges_platform.py +++ b/src/cwl_platform/sevenbridges_platform.py @@ -412,14 +412,17 @@ def get_task_output_filename(self, task: sevenbridges.Task, output_name): return outputfile.name raise ValueError(f"Output {output_name} does not exist for task {task.name}.") - def get_tasks_by_name(self, project, task_name): # -> list(sevenbridges.Task): + def get_tasks_by_name(self, project, task_name=None): # -> list(sevenbridges.Task): ''' Get a process by its name ''' tasks = [] for task in self.api.tasks.query(project=project).all(): - if task.name == task_name: + if task_name is None or task.name == task_name: tasks.append(task) return tasks + def get_task_cost(self, task): + return(task.price.amount) + def get_project(self): ''' Determine what project we are running in ''' task_id = os.environ.get('TASK_ID') From 1d876334e8681b77b6b6587e11259fa7669d77db Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Fri, 27 Sep 2024 17:13:19 -0400 Subject: [PATCH 2/5] Changed self.api.container to self.api.containers --- src/cwl_platform/arvados_platform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cwl_platform/arvados_platform.py b/src/cwl_platform/arvados_platform.py index cde5fc3e..9d4b7e44 100644 --- a/src/cwl_platform/arvados_platform.py +++ b/src/cwl_platform/arvados_platform.py @@ -420,7 +420,7 @@ def get_tasks_by_name(self, project, task_name=None): # -> list(ArvadosTask): for i in range(0, len(cont_reqs), chunk_size): chunk = cont_reqs[i:i + chunk_size] for container in arvados.util.keyset_list_all( - self.api.container().list, + self.api.containers().list, filters=[ ["uuid", "in", [c["container_uuid"] for c in chunk if c["container_uuid"]]], ], From 9cd225fb1877c85c535ba75a88708fec7782fa88 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Fri, 4 Oct 2024 13:04:55 -0400 Subject: [PATCH 3/5] Bug fix for get_task_cost to not fail if no cost associated with a task --- src/cwl_platform/sevenbridges_platform.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/cwl_platform/sevenbridges_platform.py b/src/cwl_platform/sevenbridges_platform.py index 3853dbf1..63156d70 100644 --- a/src/cwl_platform/sevenbridges_platform.py +++ b/src/cwl_platform/sevenbridges_platform.py @@ -421,7 +421,13 @@ def get_tasks_by_name(self, project, task_name=None): # -> list(sevenbridges.Tas return tasks def get_task_cost(self, task): - return(task.price.amount) + task_cost = 0.0 + try: + task_cost = task.price.amount + except Exception as e: + print("Error getting cost for task: " + str(task)) + + return(task_cost) def get_project(self): ''' Determine what project we are running in ''' From 3d5e9852bb4fdf93f6c22d3bc1b0ec503c2df502 Mon Sep 17 00:00:00 2001 From: smitha26 Date: Wed, 30 Oct 2024 17:17:16 -0400 Subject: [PATCH 4/5] Added new function get_project_cost (also in arvados_platform.py some comments Peter Amstutz gave me) --- src/cwl_platform/arvados_platform.py | 21 +++++++++++++++++++++ src/cwl_platform/base_platform.py | 4 ++++ src/cwl_platform/sevenbridges_platform.py | 12 ++++++++++++ 3 files changed, 37 insertions(+) diff --git a/src/cwl_platform/arvados_platform.py b/src/cwl_platform/arvados_platform.py index 9d4b7e44..f10db189 100644 --- a/src/cwl_platform/arvados_platform.py +++ b/src/cwl_platform/arvados_platform.py @@ -408,6 +408,12 @@ def get_tasks_by_name(self, project, task_name=None): # -> list(ArvadosTask): ] if task_name is not None: setup_filters.append(["name", '=', task_name]) + #Potentially easier way suggested by Peter: + #setup_filters.append(["name", 'ilike', '%launcher%']) --> should return single process/container, and take its cumulative_cost, should match adding up all container costs + #even better: + #setup_filters.append(['requesting_container_uuid', '=',None]) --> only the launcher won't have another container request for it, so this is better way to find launcher, then just take its comulative_cost prop + #If only want to count completed launchers: + #setup_filters_append(["state","=","Final"]) (but then also need to check its corresponding container and should be ["state","=","Complete"] and also check exit_code) for container_request in arvados.util.keyset_list_all( self.api.container_requests().list, filters=setup_filters, @@ -439,6 +445,21 @@ def get_tasks_by_name(self, project, task_name=None): # -> list(ArvadosTask): def get_task_cost(self, task): return(task.container["cost"]) + def get_project_cost(self, project): + setup_filters=[ + ['owner_uuid', '=', project['uuid']], ['priority', '>', 0], ['requesting_container_uuid', '=', None] + ] + + projCost = 0.0 + for container_request in arvados.util.keyset_list_all( + self.api.container_requests().list, + filters=setup_filters, + select=["uuid", "owner_uuid", "container_uuid", "name", "cumulative_cost", "properties", "modified_by_user_uuid", "created_at"] + ): + projCost += container_request["cumulative_cost"] + + return(projCost) + def get_project(self): ''' Determine what project we are running in ''' try: diff --git a/src/cwl_platform/base_platform.py b/src/cwl_platform/base_platform.py index dddc9c00..803b6628 100644 --- a/src/cwl_platform/base_platform.py +++ b/src/cwl_platform/base_platform.py @@ -94,6 +94,10 @@ def get_tasks_by_name(self, project, task_name=None): def get_task_cost(self, task): ''' Get a task's cost ''' + @abstractmethod + def get_project_cost(self, project): + ''' Get a project's cost ''' + @abstractmethod def get_project(self): ''' Determine what project we are running in ''' diff --git a/src/cwl_platform/sevenbridges_platform.py b/src/cwl_platform/sevenbridges_platform.py index 63156d70..6d7f3be0 100644 --- a/src/cwl_platform/sevenbridges_platform.py +++ b/src/cwl_platform/sevenbridges_platform.py @@ -429,6 +429,18 @@ def get_task_cost(self, task): return(task_cost) + def get_project_cost(self, project): + # 1. Get a list of tasks from the project + tasks = self.get_tasks_by_name(project) + + # 2. Iterate over tasks and sum up total cost + total_cost = 0.0 + for task in tasks: + total_cost += self.get_task_cost(task) + rounded_total_cost = round(total_cost,2) + + return(rounded_total_cost) + def get_project(self): ''' Determine what project we are running in ''' task_id = os.environ.get('TASK_ID') From 15405f403e725df6c8877275f977f9b9e8e661c3 Mon Sep 17 00:00:00 2001 From: smitha26 Date: Mon, 24 Feb 2025 13:24:35 -0500 Subject: [PATCH 5/5] Some updates based on suggestions by Peter Amstutz I believe --- src/cwl_platform/arvados_platform.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/cwl_platform/arvados_platform.py b/src/cwl_platform/arvados_platform.py index f10db189..31c03299 100644 --- a/src/cwl_platform/arvados_platform.py +++ b/src/cwl_platform/arvados_platform.py @@ -434,11 +434,14 @@ def get_tasks_by_name(self, project, task_name=None): # -> list(ArvadosTask): ): conts[container["uuid"]] = container + seen_cids = set() for cr in cont_reqs: cid = cr["container_uuid"] if cid in conts: c = conts[cid] - tasks.append(ArvadosTask(cr,c)) + if cid not in seen_cids: + seen_cids.add(cid) + tasks.append(ArvadosTask(cr,c)) return tasks @@ -447,7 +450,7 @@ def get_task_cost(self, task): def get_project_cost(self, project): setup_filters=[ - ['owner_uuid', '=', project['uuid']], ['priority', '>', 0], ['requesting_container_uuid', '=', None] + ['owner_uuid', '=', project['uuid']], ['requesting_container_uuid', '=', None] ] projCost = 0.0