diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1619b09 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "python.analysis.extraPaths": [ + "./banyan-python", + ] +} \ No newline at end of file diff --git a/banyan-polars/banyan_polars/__init__.py b/banyan-polars/banyan_polars/__init__.py new file mode 100644 index 0000000..7e9429c --- /dev/null +++ b/banyan-polars/banyan_polars/__init__.py @@ -0,0 +1,3 @@ +__version__ = "0.1.0" + +from .api.io import read_csv diff --git a/banyan/tests/__init__.py b/banyan-polars/banyan_polars/api/__init__.py similarity index 100% rename from banyan/tests/__init__.py rename to banyan-polars/banyan_polars/api/__init__.py diff --git a/banyan-polars/banyan_polars/api/df.py b/banyan-polars/banyan_polars/api/df.py new file mode 100644 index 0000000..98b5106 --- /dev/null +++ b/banyan-polars/banyan_polars/api/df.py @@ -0,0 +1,75 @@ +import banyan as bn +import polars as pl +from typing_extensions import Self + +from ..communication.lazy_aggregation import LazyAggregation +from .utils_constants import AGGREGATION_FUNCTIONS + +# class GroupBy: +# def __init__(self, fut: bn.Future): +# self.future = fut + +# def __future__(self) -> bn.Future: +# return self.future + +# def agg(self, cols): +# return bn.record_task( +# "res", +# LazyAggregation, +# [self, pl.internals.dataframe.groupby.GroupBy.agg, ], +# ["Blocked", "Consolidated", "Grouped"], +# ) + + +def is_aggregation(expr) -> bool: + if isinstance(expr, list): + return all(is_aggregation(e) for e in expr) + if isinstance(expr, str): + return False + expr_str = str(expr) + return any(s in expr_str for s in AGGREGATION_FUNCTIONS) + + +class DataFrame: + def __init__(self, fut: bn.Future): + self.future = fut + + def __future__(self) -> bn.Future: + return self.future + + # def filter(self, expr) -> Self: + # return DataFrame( + # bn.record_task( + # "res", + # pl.DataFrame.filter, + # [self, expr], + # ["Blocked", "Consolidated", "Grouped"], + # ) + # ) + + def select(self, expr) -> Self: + if is_aggregation(expr): + raise ValueError( + f"select received expression {str(expr)} that has an aggregation function not currently supported" + ) + return DataFrame( + bn.record_task( + "res", + pl.DataFrame.select, + [self, expr], + ["Blocked", "Consolidated", "Grouped"], + ) + ) + + # def groupby(self, cols) -> GroupBy: + # keys = [col for col in bn.utils.to_list(cols)] + # # TODO: Convert keys to strings if they are columns/expressions + # keys_grouping_pts = [bn.pt("Grouped", key=key for key in keys] + # return GroupBy( + # bn.record_task( + # "res", + # pl.DataFrame.groupby, + # [self, cols], + # ["Blocked", "Consolidated", *keys_grouping_pts], + # ) + # ) diff --git a/banyan-polars/banyan_polars/api/io.py b/banyan-polars/banyan_polars/api/io.py new file mode 100644 index 0000000..6ed57df --- /dev/null +++ b/banyan-polars/banyan_polars/api/io.py @@ -0,0 +1,16 @@ +import banyan as bn + +from ..communication.location_spec import LocationSpec +from . import df + + +def read_csv(p): + # res = LocationSpec(p) where p: Blocked | Consolidated | Grouped + return df.DataFrame( + bn.record_task( + "res", + LocationSpec, + [p, "csv"], + ["Blocked", "Consolidated", "Grouped"], + ) + ) diff --git a/banyan-polars/banyan_polars/api/utils_constants.py b/banyan-polars/banyan_polars/api/utils_constants.py new file mode 100644 index 0000000..cae0abc --- /dev/null +++ b/banyan-polars/banyan_polars/api/utils_constants.py @@ -0,0 +1,86 @@ +AGGREGATION_FUNCTIONS = [ + "any", + "all", + "agg_groups", + "count", + "len", + "slice", + "append", + "rechunk", + "cumsum", + "cumprod", + "cummin", + "cummax", + "cumcount", + "dot", + "mode", + "sort", + "top_k", + "arg_sort", + "arg_max", + "arg_min", + "search_sorted", + "sort_by", + "take", + "shift", + "shift_and_fill", + "forward_fill", + "backward_fill", + "reverse", + "std", + "var", + "max", + "min", + "nan_max", + "nan_min", + "mean", + "median", + "product", + "n_unique", + "arg_unique", + "unique", + "first", + "last", + "over", + "is_unique", + "is_first", + "is_duplicated", + "quantile", + "flatten", + "explode", + "take_every", + "head", + "tail", + "limit", + "interpolate", + "rolling_min", + "rolling_max", + "rolling_mean", + "rolling_sum", + "rolling_std", + "rolling_var", + "rolling_median", + "rolling_quantile", + "rolling_apply", + "rolling_skew", + "argsort", + "rank", + "diff", + "pct_change", + "skew", + "curtosis", + "lower_bound", + "upper_bound", + "reshape", + "shuffle", + "sample", + "ewm_mean", + "ewm_std", + "ewm_var", + "extend_constant", + "value_counts", + "unique_counts", + "entropy", + "cumulative_eval", + "list", +] diff --git a/banyan-polars/banyan_polars/communication/__init__.py b/banyan-polars/banyan_polars/communication/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/banyan-polars/banyan_polars/communication/df.py b/banyan-polars/banyan_polars/communication/df.py new file mode 100644 index 0000000..3bcc615 --- /dev/null +++ b/banyan-polars/banyan_polars/communication/df.py @@ -0,0 +1 @@ +# TODO: Add `convert_partition_type` implementation here diff --git a/banyan-polars/banyan_polars/communication/lazy_aggregation.py b/banyan-polars/banyan_polars/communication/lazy_aggregation.py new file mode 100644 index 0000000..7ef6048 --- /dev/null +++ b/banyan-polars/banyan_polars/communication/lazy_aggregation.py @@ -0,0 +1,24 @@ +class LazyAggregation: + """ + Store information to lazily aggregate data across multiple workers when + the future for the `LazyAggregation` is converted from None to Consolidated + partition type. + """ + + def __init__( + self, + data, + data_func, + value_func, + data_func_args=None, + value_func_args=None, + ): + self.data = data + self.data_func = data_func + self.value_func = value_func + self.data_func_args = data_func_args if data_func_args is None else [] + self.value_func_args = ( + value_func_args if value_func_args is None else [] + ) + + # TODO: Add `convert_partition_type` implementation here diff --git a/banyan-polars/banyan_polars/communication/location_spec.py b/banyan-polars/banyan_polars/communication/location_spec.py new file mode 100644 index 0000000..c17fdf2 --- /dev/null +++ b/banyan-polars/banyan_polars/communication/location_spec.py @@ -0,0 +1,6 @@ +class LocationSpec: + def __init__(self, pattern, format): + self.pattern = pattern + self.format = format + + # TODO: Add `convert_partition_type` implementation here diff --git a/banyan-polars/poetry.lock b/banyan-polars/poetry.lock new file mode 100644 index 0000000..8fad675 --- /dev/null +++ b/banyan-polars/poetry.lock @@ -0,0 +1,231 @@ +[[package]] +name = "banyan-python" +version = "0.2.0" +description = "Instant large-scale computing with Python APIs you already know and love" +category = "dev" +optional = false +python-versions = "^3.8" +develop = false + +[package.dependencies] +boto3 = "^1.26.37" +botocore = "^1.23.48" +cloudpickle = "^2.2.0" +pytz = "^2021.3" +requests = "^2.27.1" +toml = "^0.10.2" +typing-extensions = "^4.4.0" + +[package.source] +type = "directory" +url = "../banyan-python" + +[[package]] +name = "boto3" +version = "1.26.42" +description = "The AWS SDK for Python" +category = "dev" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +botocore = ">=1.29.42,<1.30.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.6.0,<0.7.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.29.42" +description = "Low-level, data-driven core of boto 3." +category = "dev" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.15.3)"] + +[[package]] +name = "certifi" +version = "2022.12.7" +description = "Python package for providing Mozilla's CA Bundle." +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "charset-normalizer" +version = "2.1.1" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "dev" +optional = false +python-versions = ">=3.6.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + +[[package]] +name = "cloudpickle" +version = "2.2.0" +description = "Extended pickling support for Python objects" +category = "dev" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "dev" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "polars" +version = "0.15.11" +description = "Blazingly fast DataFrame library" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +typing_extensions = {version = ">=4.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +deltalake = ["deltalake"] +pyarrow = ["pyarrow (>=4.0.0)"] +pandas = ["pyarrow (>=4.0.0)", "pandas"] +connectorx = ["connectorx"] +xlsx2csv = ["xlsx2csv (>=0.8.0)"] +all = ["polars"] +timezone = ["backports.zoneinfo", "tzdata"] +numpy = ["numpy (>=1.16.0)"] +fsspec = ["fsspec"] +matplotlib = ["matplotlib"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2021.3" +description = "World timezone definitions, modern and historical" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "requests" +version = "2.28.1" +description = "Python HTTP for Humans." +category = "dev" +optional = false +python-versions = ">=3.7, <4" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<3" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "s3transfer" +version = "0.6.0" +description = "An Amazon S3 Transfer Manager" +category = "dev" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "typing-extensions" +version = "4.4.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "urllib3" +version = "1.26.13" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" + +[package.extras] +brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.8" +content-hash = "aacc4e2a24b5c08750b173dcb655324a06caa516be360d7749e11930fb5eb580" + +[metadata.files] +banyan-python = [] +boto3 = [] +botocore = [] +certifi = [] +charset-normalizer = [] +cloudpickle = [] +idna = [] +jmespath = [] +polars = [] +python-dateutil = [] +pytz = [] +requests = [] +s3transfer = [] +six = [] +toml = [] +typing-extensions = [] +urllib3 = [] diff --git a/banyan-polars/pyproject.toml b/banyan-polars/pyproject.toml new file mode 100644 index 0000000..1ffdc8d --- /dev/null +++ b/banyan-polars/pyproject.toml @@ -0,0 +1,20 @@ +[tool.poetry] +name = "banyan-polars" +version = "0.1.0" +description = "Instant large-scale data analytics with the friendly polars API" +authors = ["Banyan Computing "] +license = "Apache-2.0" +packages = [ + { include = "banyan_polars" } +] + +[tool.poetry.dependencies] +python = "^3.8" +polars = "^0.15.11" + +[tool.poetry.dev-dependencies] +banyan-python = { path = "../banyan-python" } + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/banyan-polars/tests/test_polars_annotation.py b/banyan-polars/tests/test_polars_annotation.py new file mode 100644 index 0000000..60626b9 --- /dev/null +++ b/banyan-polars/tests/test_polars_annotation.py @@ -0,0 +1,12 @@ +import banyan as bn +import banyan_polars as bpl +import polars as pl + + +def test_task_graph_construction(): + df = bpl.read_csv("s3://test-bucket") + df = df.select(pl.col("species") == "iris") + tg = bn.annotation.to_future(df)._task_graph + assert len(tg) == 2 + assert tg[0].name.startswith("LocationSpec") + assert tg[1].name.startswith("select") diff --git a/banyan/LICENSE b/banyan-python/LICENSE similarity index 100% rename from banyan/LICENSE rename to banyan-python/LICENSE diff --git a/banyan/README.md b/banyan-python/README.md similarity index 100% rename from banyan/README.md rename to banyan-python/README.md diff --git a/banyan-python/banyan/__init__.py b/banyan-python/banyan/__init__.py new file mode 100644 index 0000000..59f234f --- /dev/null +++ b/banyan-python/banyan/__init__.py @@ -0,0 +1,30 @@ +import logging +import os + +import boto3 + + +__version__ = "0.2.0" + + +# Check if AWS region is set. If not, default to us-west-2 and give a warning +if boto3.Session().region_name == None: + logging.warning( + "Defaulting to region us-west-2. If you want to use a different AWS region, " + "please set the `AWS_DEFAULT_REGION` environment variable or update the " + "default region in `~/.aws/config`, before importing `banyan`." + ) + os.environ["AWS_DEFAULT_REGION"] = "us-west-2" + + +from banyan.annotation import Future, record_task +from banyan.config import configure +from banyan.constants import * # TODO: Should this really be here? +from banyan.sessions import start_session +from banyan.utils_future_computation import PartitionType, pt + +__all__ = ( + "configure", + "SessionInfo", + "start_session", +) diff --git a/banyan-python/banyan/annotation.py b/banyan-python/banyan/annotation.py new file mode 100644 index 0000000..cd49e67 --- /dev/null +++ b/banyan-python/banyan/annotation.py @@ -0,0 +1,192 @@ +from typing import Any, Dict, List, Optional, Union + +from .sessions import get_session_id +from .utils import send_request_get_response, to_list +from .utils_communication import receive_to_client, send_to_client +from .utils_future_computation import ( + FutureComputation, + FutureId, + PartitionType, + TaskGraph, + _new_future_id, + is_future_id, +) + + +class Future: + """ + Represents data not yet computed and stores the steps to compute it + """ + + def __init__(self): + self._id: FutureId = _new_future_id() + self._task_graph: TaskGraph = [] + + @property + def id(self): + """ + Returns a unique ID for this future computation + """ + return self._id + + def _record_task(self, fc: FutureComputation): + self._task_graph.append(fc) + + def compute(self): + """ + Computes this future and returns its concrete value + """ + record_task(self, send_to_client, self, {self: "Consolidated"}) + send_request_get_response( + "run_computation", + { + "session_id": get_session_id(), + "task_graph": self._task_graph.to_dict(), + "future_ids": [self.id], + }, + ) + return receive_to_client() + + + def __future__(self): + return self + + +def to_future(obj) -> Optional[Future]: + if isinstance(obj, Future) or hasattr(obj, "__future__"): + return obj.__future__() + else: + return None + + +def _futures_to_future_ids( + futures: Union[Future, List[Future]] +) -> Optional[List[FutureId]]: + if isinstance(futures, Future): + return [futures.id] + elif futures is None: + return None + else: + return [ + (future.id if isinstance(future, Future) else future) + for future in futures + ] + + +def _to_futures_list(l: List) -> List: + return [(to_future(x) if to_future(x) is not None else x) for x in l] + + +PartitioningSpec = Union[ + str, + PartitionType, + Dict[Union[Future, FutureId], Union[PartitionType, List[PartitionType]]], +] + + +def record_task( + results: Union[Future, List[Future]], + func: Any, + args: Union[Future, List[Future]], + partitioning: Union[PartitioningSpec, List[PartitioningSpec]], + static=None, +): + """ + Records a task with the given function in the task graph for the results + + Given result futures, a function, and argument futures (think + "results = func(args)"), this will record a task in the result futures' + task graphs with the function and references to the argument futures. + + A partitioning can also be specified to indicate the partition types that + can be assigned to the result and argument futures. + + Arguments + --------- + results : Union[Union[Future, str], List[Union[Future, str]]] + The futures for the results of the function. If a string is provided, + a new future is automatically created for the result and returned. + func : Any + The function that gets run when this recorded task is finally executed + args : Union[Union[Future, str], List[Union[Future, str]], Any] + The futures or concrete values to be passed into the function + partitioning : PartitioningSpec + The assignment of partition types to each result or argument future. + This can be either a partition type or partition type name (in which + case the partition type is applied to all futures) or a dictionary + mapping from future (or future ID) to a partition type or list of + partition types. + + Returns + ------- + A future or list of futures depending on whether there are one or more + result futures. + + Examples + -------- + >>> bn.record_task( + "res", + pl.DataFrame.filter, + [self, expr], + ["Blocked", "Consolidated", "Grouped"], + ) + """ + args = _to_futures_list(to_list(args)) + results = to_list(results) + arg_ids = list(filter(is_future_id, _futures_to_future_ids(args))) + result_ids = _futures_to_future_ids(results) + + # Generate new futures if results or partitioning keys are string variable names + new_futures: Dict[str, Future] = {} + for i in range(len(results)): + if isinstance(results[i], str): + new_future = Future() + new_futures[results[i]] = new_future + results[i] = new_future + result_ids[i] = new_future.id + partitioning = to_list(partitioning) + for partitioning_map in partitioning: + if isinstance(partitioning_map, dict): + for k in list(partitioning_map.keys()): + if k in new_futures: + partitioning_map[new_futures[k]] = partitioning_map[k] + partitioning_map.pop(k, None) + + # Construct a `FutureComputation` for the new task` + fc = FutureComputation( + func, + _futures_to_future_ids(args), + result_ids, + [ + { + (f.id if isinstance(f, Future) else f): ( + pt if isinstance(pt, PartitionType) else PartitionType(pt) + ) + for f, pt in p.items() + } + if isinstance(p, dict) + else { + f: p if isinstance(p, PartitionType) else PartitionType(p) + for f in arg_ids + result_ids + } + for p in partitioning + ], + static=_futures_to_future_ids(static), + name=func.__name__, + ) + + # Record the task for each result + for result in results: + # Record each task required to construct the arguments + for arg in args: + if isinstance(arg, Future): + for task in arg._task_graph: + result._record_task(task) + + # Record the new task + result._record_task(fc) + + # Return result futures + if len(results) == 1: + return results[0] + return results diff --git a/banyan/banyan/config.py b/banyan-python/banyan/config.py similarity index 85% rename from banyan/banyan/config.py rename to banyan-python/banyan/config.py index 648e6f6..38e356f 100644 --- a/banyan/banyan/config.py +++ b/banyan-python/banyan/config.py @@ -3,8 +3,12 @@ loading configurations. """ -from banyan.imports import * from copy import deepcopy +import os +from typing import Optional + +import toml + banyan_config = None # Global variable representing configuration @@ -43,7 +47,9 @@ def write_config(banyanconfig_path: Optional[str] = None): os.path.expanduser("~"), ".banyan/banyanconfig.toml" ) - os.makedirs(os.path.join(os.path.expanduser("~"), ".banyan/"), exist_ok=True) + os.makedirs( + os.path.join(os.path.expanduser("~"), ".banyan/"), exist_ok=True + ) with open(banyanconfig_path, "w") as f: toml.dump(banyan_config, f) @@ -53,7 +59,6 @@ def write_config(banyanconfig_path: Optional[str] = None): def configure( user_id: Optional[str] = None, api_key: Optional[str] = None, - ec2_key_pair_name: Optional[str] = None, banyanconfig_path: Optional[str] = None, ): """Sets configuration. @@ -63,7 +68,6 @@ def configure( Arguments: - user_id:Optional[str], defaults to None - api_key:Optional[str], defaults to None - - ec2_key_pair_name:Optional[str], defaults to None - banyanconfig_path:Optional[str], defaults to None file to save configurations to if None (recommended), will use "$HOME/.banyan/banyanconfig.toml" @@ -82,10 +86,6 @@ def configure( if api_key is None and not api_key_env is None: api_key = api_key_env - ec2_env = os.getenv("BANYAN_EC2_KEY_PAIR_NAME") - if ec2_key_pair_name is None and not ec2_env == None: - ec2_key_pair_name = ec2_env - # Check banyan_config file banyan_config_has_info = not (banyan_config is None or banyan_config == {}) if ( @@ -102,13 +102,6 @@ def configure( and "api_key" in banyan_config["banyan"] ): api_key = banyan_config["banyan"]["api_key"] - if ( - ec2_key_pair_name is None - and banyan_config_has_info - and "aws" in banyan_config - and "ec2_key_pair_name" in banyan_config["aws"] - ): - ec2_key_pair_name = banyan_config["aws"]["ec2_key_pair_name"] # Ensure a configuration has been created or can be created. Otherwise, # return nothing @@ -119,7 +112,6 @@ def configure( "banyan": {"user_id": user_id, "api_key": api_key}, "aws": {}, } - is_modified = True else: raise Exception( "Your user ID and API key must be specified using either keyword arguments, environment variables, or banyanconfig.toml" diff --git a/banyan/banyan/constants.py b/banyan-python/banyan/constants.py similarity index 100% rename from banyan/banyan/constants.py rename to banyan-python/banyan/constants.py diff --git a/banyan-python/banyan/session_info.py b/banyan-python/banyan/session_info.py new file mode 100644 index 0000000..ac020b8 --- /dev/null +++ b/banyan-python/banyan/session_info.py @@ -0,0 +1,11 @@ +from dataclasses import dataclass + + +@dataclass +class SessionInfo: + """Information about a particular session.""" + session_name: str + session_id: str + num_workers: int + scatter_queue_url: str + gather_queue_url: str diff --git a/banyan-python/banyan/sessions.py b/banyan-python/banyan/sessions.py new file mode 100644 index 0000000..0384836 --- /dev/null +++ b/banyan-python/banyan/sessions.py @@ -0,0 +1,212 @@ +import io +import json +import os +import shutil +import time +from typing import Dict +import zipfile + +import boto3 +from botocore.exceptions import ClientError + +from banyan.config import configure +from banyan.session_info import SessionInfo +from banyan.utils import convert_iso_time, send_request_get_response + + +SessionId = str +SUPPORTED_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"] + +_iam_client = boto3.client("iam") +_lambda_client = boto3.client("lambda") +_s3_client = boto3.client("s3") + + +curr_session_info: Dict[SessionId, SessionInfo] = {} + + +def _get_executor_code_from_s3(): + return _s3_client.get_object( + Bucket="banyan-executor", + Key="executor.py" + ) + + +def _get_executor_lambda_zip(site_packages_dir: str): + # Construct zipfile with site_packages_dir if provided + zipfile_name = "executor_lambda_code" + if site_packages_dir: + shutil.make_archive(zipfile_name, "zip", root_dir=site_packages_dir, base_dir=".") + else: + # Create empty zipfile + with zipfile.ZipFile(zipfile_name + ".zip", "w") as f: + pass + # Add executor code file + executor_code = _get_executor_code_from_s3()["Body"].read().decode("utf-8") + with zipfile.ZipFile(zipfile_name + ".zip", "a") as f: + f.writestr("lambda_function.py", executor_code) + # Upload to S3. First create a S3 bucket if not already existing + # and then upload the zip file. + bucket_name = "banyan-assets" + s3_key = zipfile_name + ".zip" + try: + _s3_client.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={"LocationConstraint": _s3_client.meta.region_name} + ) + _s3_client.get_waiter("bucket_exists").wait(Bucket=bucket_name) + except ClientError as error: + if not error.response["Error"]["Code"] in ["BucketAlreadyExists", "BucketAlreadyOwnedByYou"]: + raise + with open(zipfile_name + ".zip", "rb") as f: + _s3_client.put_object( + Bucket=bucket_name, + Body=f, + Key=s3_key + ) + return bucket_name, s3_key + + +def _update_executor_code( + executor_lambda_function_name: str, + lambda_last_modified: int +): + executor_code_object =_get_executor_code_from_s3() + executor_code_last_modified = executor_code_object["LastModified"].timestamp() + if executor_code_last_modified > lambda_last_modified: + # Update executor code + bucket_name, s3_key = _get_executor_lambda_zip() + _s3_client.update_function_code( + FunctionName=executor_lambda_function_name, + S3Bucket=bucket_name, + S3Key=s3_key, + ) + + +def _create_executor_lambda_iam_role(): + assume_role_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "lambda.amazonaws.com" + }, + "Action": "sts:AssumeRole" + }, + ] + } + basic_lambda_policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + iam_role_name = "banyan-executor-lambda-role" + try: + role = _iam_client.create_role( + RoleName=iam_role_name, + AssumeRolePolicyDocument=json.dumps(assume_role_policy) + ) + _iam_client.get_waiter("role_exists").wait(RoleName=iam_role_name) + _iam_client.attach_role_policy(RoleName=iam_role_name, PolicyArn=basic_lambda_policy_arn) + # Sleep to ensure that the policy has been attached + # TODO: Instead of sleeping, adding a try/catch or a while loop to check + # if the policy has been attached would be more robust. + time.sleep(5) + except ClientError as error: + if error.response["Error"]["Code"] == "EntityAlreadyExists": + role = _iam_client.get_role(RoleName=iam_role_name) + else: + raise + return role["Role"]["Arn"] + + +def _create_executor_lambda_function( + site_packages_dir: str, version: str, environment_hash: str +): + """Creates an executor Lambda if one doesn't exist with the same hash.""" + executor_lambda_function_name = ( + f"executor_python{version.replace('.', '-')}_{environment_hash}" + ) + # Note that there may be race conditions here, since creating a lambda might + # take some time. + # Check if the function exists, and if it does not, create one + try: + executor_info = _lambda_client.get_function( + FunctionName=executor_lambda_function_name + ) + last_modified = int( + convert_iso_time(executor_info["Configuration"]["LastModified"]) + ) + _update_executor_code( + executor_lambda_function_name=executor_lambda_function_name, + lambda_last_modified=last_modified, + ) + except _lambda_client.exceptions.ResourceNotFoundException: + # Zip together directory and create function + bucket_name, s3_key = _get_executor_lambda_zip(site_packages_dir) + _lambda_client.create_function( + FunctionName=executor_lambda_function_name, + Runtime=f"python{version}", + Role=_create_executor_lambda_iam_role(), + Handler="executor.lambda_handler", + Code={ + "S3Bucket": bucket_name, + "S3Key": s3_key, + }, + Timeout=900, + MemorySize=10240, + ) + + +def _compute_environment_hash(site_packages_dir: str): + """Computes last modified date of the given directory/subdirectories.""" + if not site_packages_dir: + return "0000" + last_modified = int( + max(os.path.getmtime(root) for root,_,_ in os.walk(site_packages_dir)) + ) + return last_modified + + +def start_session( + num_workers: int = 16, + python_version: str = "3.8", + session_name: str = None, + site_packages_dir: str = None, + *args, + **kwargs, +): + """Starts a new session.""" + configure(*args, **kwargs) + + if python_version not in SUPPORTED_PYTHON_VERSIONS: + raise ValueError( + f"Only the following Python versions are supported: " + f"{SUPPORTED_PYTHON_VERSIONS}" + ) + environment_hash = _compute_environment_hash(site_packages_dir=site_packages_dir) + _create_executor_lambda_function( + site_packages_dir=site_packages_dir, + version=python_version, + environment_hash=environment_hash, + ) + response = send_request_get_response( + "start-session", + { + "num_workers": num_workers, + "version": python_version, + "environment_hash": environment_hash, + "aws_region": _lambda_client.meta.region_name, + "session_name": session_name, + }, + ) + session = SessionInfo( + session_name=response["session_name"], + session_id=response["session_id"], + num_workers=num_workers, + scatter_queue_url=response["scatter_queue_url"], + gather_queue_url=response["gather_queue_url"], + ) + global curr_session_info + curr_session_info[response["session_id"]] = session + + +def get_session_id() -> SessionId: + return "" diff --git a/banyan-python/banyan/typing.py b/banyan-python/banyan/typing.py new file mode 100644 index 0000000..09cf5ed --- /dev/null +++ b/banyan-python/banyan/typing.py @@ -0,0 +1,9 @@ +from .annotation import PartitioningSpec +from .utils_future_computation import ( + FutureComputation, + FutureId, + Partitioning, + PartitioningMulti, + PartitionTypeId, + TaskGraph, +) diff --git a/banyan/banyan/utils.py b/banyan-python/banyan/utils.py similarity index 56% rename from banyan/banyan/utils.py rename to banyan-python/banyan/utils.py index d52a93f..b4a6f28 100644 --- a/banyan/banyan/utils.py +++ b/banyan-python/banyan/utils.py @@ -1,23 +1,18 @@ -from .constants import BANYAN_API_ENDPOINT - -import boto3 -import codecs import hashlib import inspect import json -import logging import os -import pickle import platform +from datetime import datetime +from typing import Dict, List, Optional + +import boto3 import pytz import requests import toml - -from botocore.exceptions import ClientError from .config import load_config -from datetime import datetime -from typing import Dict +from .constants import BANYAN_API_ENDPOINT s3_client = boto3.client("s3") @@ -46,17 +41,7 @@ def method_to_endpoint(method): return method.replace("_", "-") -def s3_bucket_arn_to_name(s3_bucket_arn: str): - # Get s3 bucket name from arn - s3_bucket_name = s3_bucket_arn.split(":")[-1] - if s3_bucket_name.endswith("/") or s3_bucket_name.endswith("*"): - s3_bucket_name = s3_bucket_name[:-1] - elif s3_bucket_name.endswith("/*"): - s3_bucket_name = s3_bucket_name[:-2] - return s3_bucket_name - - -def parse_bytes(s:str): +def parse_bytes(s: str): s = s.replace(" ", "") if not any([char.isdigit() for char in s]): s = "1" + s @@ -91,13 +76,16 @@ def send_request_get_response(method: str, content: dict): user_id = configuration["banyan"]["user_id"] api_key = configuration["banyan"]["api_key"] - url = (BANYAN_API_ENDPOINT) + method_to_endpoint(method) + url = BANYAN_API_ENDPOINT + "banyan-main" + content["method"] = method.replace("-", "_") content["debug"] = is_debug_on() headers = { "content-type": "application/json", "Username-APIKey": f"{user_id}-{api_key}", } - resp = requests.post(url=url, json=content, headers=headers) # , timeout=30) + resp = requests.post( + url=url, json=content, headers=headers + ) # , timeout=30) data = json.loads(resp.text) if resp.status_code == 403: raise Exception( @@ -107,12 +95,13 @@ def send_request_get_response(method: str, content: dict): # HTTP request timed out, for example if isinstance(data, Dict) and "message" in data: data = data["message"] - # @error data #????? return None elif resp.status_code == 500 or resp.status_code == 504: raise Exception(data) elif resp.status_code == 502: - raise Exception("Sorry there has been an error. Please contact support.") + raise Exception( + "Sorry there has been an error. Please contact support." + ) return data @@ -126,33 +115,40 @@ def get_python_version(): Returns ------- - string - Python version is returned as a string + str + The Python version returned as a string """ return platform.python_version() +def convert_iso_time(time): + """Convert time from ISO-8601 format (YYYY-MM-DDThh:mm:ss.sTZD) + to seconds.""" + return datetime.strptime( + time, "%Y-%m-%dT%H:%M:%S.%f%z" + ).timestamp() + + def parse_time(time): """Converts given time to local timezone. Parameters --------- - time : string + time : str The current time in the format "yyyy-mm-dd-HH:MM:SSzzzz" + Returns ------- - string - The DateTime is returned. + str + The `DateTime` for the given time string """ time = datetime.fromisoformat(time) - # time = datetime.strptime(time[:-4], '%Y-%m-%d-%H:%M:%S') #we don't want milli-second timezone = pytz.timezone("UTC") time = timezone.localize(time) local_time = time.astimezone() return local_time -# TO DO - to test this function def get_loaded_packages(): """Returns all the packages/libraries that are currently imported by the user""" return [ @@ -163,59 +159,30 @@ def get_loaded_packages(): def get_hash(s): - """Gets a unique represetation of a string + """Gets a unique representation of a string Parameters ---------- - s : string + s : str + The string to take a hash of Returns ------- - string : - the SHA256 hash of the string + str + The SHA256 hash of the string """ hs = hashlib.sha256(s.encode("utf-8")).hexdigest() return hs -def upload_file_to_s3(filename, bucket, object_name=None): - """Uploads file to the S3 bucket - - Parameters - ---------- - filename: string - Is the local path to the file to upload - bucket: string - Is the S3 bucket to which to upload the file to - - Returns - ------- - boolean: True if file was uploaded, else False - """ - - # if S3 object_name not specified, use filename - key = os.path.basename(filename) - - # upload the file - try: - reponse = s3_client.upload_file( - filename, bucket, object_name if object_name is not None else key - ) - except ClientError as e: - logging.error(e) - return False - return True - - def load_toml(path): - # path --> "file://.banyan/banyanconfig.toml" - # path[2:], path[4:8], path[:9] if isinstance(path, list): result = {} for p in path: result.update(load_toml(p)) return result + if path.startswith("file://"): return toml.load(path[7:-1]) @@ -223,31 +190,14 @@ def load_toml(path): raise Exception("S3 path not currently supported") elif (path.startswith("http://")) or (path.startswith("https://")): - r = ( - requests.get(path) - ).content # downloads the data from the internet into a toml-fomatted string + (requests.get(path)).content return toml.loads(requests.get(path).text) -def to_py_value_contents(py): - # Handle functions defined in a module - # TODO: Document this special case - # if jl isa Function && !(isdefined(Base, jl) || isdefined(Core, jl) || isdefined(Main, jl)) - # if jl isa Expr && eval(jl) isa Function - # jl = Dict("is_banyan_udf" => true, "code" => jl) - # end - - # Convert Python object to string - return codecs.encode(pickle.dumps(py), "base64").decode() - - -def from_py_value_contents(py_value_contents): - # Converty string to Python object - return pickle.loads(codecs.decode(py_value_contents.encode(), "base64")) - - # # Handle functions defined in a module - # if res isa Dict && haskey(res, "is_banyan_udf") && res["is_banyan_udf"] - # eval(res["code"]) - # else - # res - # end +def to_list(l) -> Optional[List]: + if isinstance(l, List): + return l + elif l is None: + return None + else: + return [l] diff --git a/banyan-python/banyan/utils_communication.py b/banyan-python/banyan/utils_communication.py new file mode 100644 index 0000000..0aeaff0 --- /dev/null +++ b/banyan-python/banyan/utils_communication.py @@ -0,0 +1,12 @@ +from typing import Any + +# TODO: Store gather/scatter/shuffle queues +# TODO: Implement send_to, receive_from + + +def send_to_client(data: Any): + raise NotImplementedError() + + +def receive_to_client(): + raise NotImplementedError() diff --git a/banyan-python/banyan/utils_future_computation.py b/banyan-python/banyan/utils_future_computation.py new file mode 100644 index 0000000..89a70ce --- /dev/null +++ b/banyan-python/banyan/utils_future_computation.py @@ -0,0 +1,443 @@ +# NOTE: This file is copied from banyan-python but could be removed after +# banyan-python is published. + +import json +import random +import string +from copy import deepcopy +from hashlib import md5 +from typing import Any, Dict, List, Optional, Set, Union + +from typing_extensions import Self + +from .utils_serialization import from_str, to_str + +"""ID of a future computation created on the client side""" +FutureId = str + + +"""ID of a partition type for use in the executor""" +PartitionTypeId = str + + +def is_future_id(id: FutureId) -> bool: + return isinstance(id, FutureId) and id.startswith("bn_fut_") + + +def _new_future_id() -> FutureId: + return "bn_fut_" + "".join( + random.choice(string.ascii_lowercase) for i in range(10) + ) + + +class PartitionType: + """ + Specifies how data can be partitioned + + Attributes + ---------- + name : str + The primary identifier of the partition type. E.g. - "Blocked", + "Grouped", or "Consolidated" + params : Dict[str, Any] + Additional information about how the data is partitioned. + + For example, + a Grouped partition type may have `params` set to + `{"key": "AgeSegment"}` to indicate that the data should be distributed + across workers such that all the data for an age segment are on the + same worker. + id : PartitionTypeId + This identifies a partition type. + + It's based solely on the name and + parameters though it could in the future be made such that it is also + preserved even after merged with other partition types. + """ + + def __init__( + self, + name: str, + params: Optional[Dict[str, Any]] = None, + ) -> Self: + self._name = name + self._params = params if params is not None else {} + self._id = None + + @property + def name(self) -> str: + return self._name + + @property + def params(self) -> Dict[str, Any]: + return self._params + + @property + def id(self) -> PartitionTypeId: + if self._id is None: + self._id = md5( + json.dumps( + { + **self._params, + "bn_pt_name": self._name, + }, + sort_keys=True, + ).encode() + ).hexdigest() + return self._id + + def __eq__(self, __o: object) -> bool: + return self.id == __o.id + + def __hash__(self) -> int: + return int(self.id, 16) + + def _merge_with(self, other: Self, only_check=False) -> bool: + """ + Merges another `PartitionType` into self's parameters + + Merging requires name and parameters to match. Parameters match when + there isn't a different value for the same key. A static PT _can_ be + merged with a non-static PT. For a given `List[FutureComputation]`, + after PTs are merged, they are reassigned to each future and if the + PT was previously static for that future, the new merged PT is set to + be static for it. For example, two PTs with parameters + `{"name": "Grouped", "reverse": True}` and + `{"name": "Grouped", "key": "species"}` can + be merged to produce + `{"name": "Grouped", "key": "species", "reverse": True}`. + """ + + # Quick checks + if self.name != other.name: + return False + if len(other.params) == 0 or self.params == other.params: + return True + if len(self.params) == 0: + self._params = other.params + return True + + # Check for inconsistency between the 2 PTs + if self.params != other.params: + for ak, av in self.params.items(): + if ak in other.params and av != other.params[ak]: + return False + + # Update a with parameters in b and return + if not only_check: + self.params.update(other.params) + + return True + + @property + def is_blocked(self) -> bool: + """ + Standard partition type for data that is equally split across workers + + It is not guaranteed that data with blocked partitioning is also + balanced, which is a key precondition for operations like horizontal + concatenation of tables. Also, it is not guaranteed that the order + of data is preserved which may be key for some computation. For these + scenarios, new partition types may be developed. + """ + return self.name == "Blocked" + + @property + def is_grouped(self) -> bool: + """ + Standard partition type for data that is grouped by some "key" and + distributed across workers such that each worker has distinct groups + """ + return self.name == "Grouped" + + @property + def is_consolidated(self) -> bool: + """ + Standard partition type for data that is consolidated on the main + worker + """ + return self.name == "Consolidated" + + @property + def is_none(self) -> bool: + """ + Standard partition type for data that requires some final computation + before being returned to the user + + For example, the result of some aggregation (a `PartialAggregation`) + may be assigned the None partition type so that + `convert_partition_type` (converting from None to Consolidated) + completes the aggregation. + """ + return self.name == "None" + + def to_dict(self) -> Dict[str, Any]: + return { + "name": self._name, + "params": self._params, + } + + def from_dict(data: Dict[str, Any]) -> Self: + return PartitionType(data["name"], data["params"]) + + def __str__(self) -> str: + params = {k: v for k, v in self.params.items() if k != "name"} + params_str = ", ".join([f"{k}: {v}" for k, v in params.items()]) + return f"{self.name}({params_str})" + + +def _arg_to_dict_or_str(arg: Any) -> Union[str, Dict[str, Any]]: + if is_future_id(arg): + return arg + elif isinstance(arg, PartitionType): + return arg.to_dict() + else: + return to_str(arg) + + +def pt(name, **kwargs) -> PartitionType: + return PartitionType(name, kwargs) + + +def _arg_from_dict_or_str( + arg: Union[str, Dict[str, Any]], use_cloudpickle=False +) -> Any: + if is_future_id(arg): + return arg + elif isinstance(arg, dict): + return PartitionType.from_dict(arg) + elif use_cloudpickle: + return from_str(arg) + else: + return arg + + +Partitioning = Dict[FutureId, PartitionType] +PartitioningMulti = Dict[FutureId, List[PartitionType]] + + +def make_partitioning( + future_ids: List[FutureId], pt: PartitionType +) -> Partitioning: + return {future_id: deepcopy(pt) for future_id in future_ids} + + +def _partitioning_from_dict(d: Dict[str, Any]) -> PartitioningMulti: + return { + fid: [PartitionType.from_dict(pt) for pt in pts] + for fid, pts in d.items() + } + + +def _partitioning_to_dict(partitioning: PartitioningMulti) -> Dict[str, Any]: + return { + fid: [pt.to_dict() for pt in pts] for fid, pts in partitioning.items() + } + + +class FutureComputation: + """ + Info to perform some computation in the future + in parallel across multiple workers + + Attributes + ---------- + func : Any + The function to run for the computation. This may be pickled. + + This is either a function or a string for a special function like + "convert_partition_type". + args : List[Union[FutureId, Any]] + The arguments for the computation. They could either be IDs of futures + or constant values (that may be pickled). + results : List[FutureId] + The IDs of futures that result from this computation + partitioning_list : List[Dict[FutureId, PartitionType]] + The allowed partitioning for this computation. When this computation is + scheduled, all of the arguments and results must be partitioned + according to one of the elements of this list. + static : List[FutureId] + List of futures that have static partition types, meaning they cannot + be converted to some other partitioning. + + For example, the future for a + `pl.GroupBy` might be assigned a static blocked partitioning because + once assigned it cannot be directly converted to another partitioning. + Instead, the input to the `pl.groupby` would have to be converted and + then the future could be recomputed. This field is only meaningful for + PTs assigned to future results in the given task graph. + name : str + Optional name of the computation + + Examples + -------- + >>> FutureComputation( + pl.GroupBy.agg, + [, "AgeSegment"], + ) + """ + + def __init__( + self, + func: Any, + args: List[Union[FutureId, Any]], + results: List[FutureId], + partitioning: List[Partitioning], + static: Union[List[FutureId], None] = None, + name: Union[str, None] = None, + ) -> Self: + self.func = func + self.args = args + self.results = results + self.partitioning_list = partitioning + self.name = name if name is not None else func.func_name + self.static = static or [] + self._assert_valid() + + def _assert_valid(self): + for partitioning in self.partitioning_list: + partitioning: Partitioning + curr_future_ids = list(partitioning.keys()) + if set(curr_future_ids) != set(self.future_ids): + raise ValueError( + f"Annotation of future computation {self.name} has " + f"future IDs {', '.join(self.future_ids)} but a specified " + "partitioning only assigns partition types to " + f"{', '.join(curr_future_ids)}." + ) + + @property + def future_ids(self) -> List[FutureId]: + return [e for e in self.args + self.results if is_future_id(e)] + + @property + def is_specialized(self) -> bool: + """ + Returns whether the partitioning is specialized such that there is a + single allowable partition type for each future in this computation + """ + return len(self.partitioning_list) == 1 + + @property + def specialized_partitioning(self): + assert self.is_specialized + return self.partitioning_list[0] + + def is_static(self, fut: FutureId) -> bool: + return fut in self.static + + def to_dict(self) -> Dict[str, Any]: + return { + "func": self.func + if isinstance(self.func, str) + else to_str(self.func), + "results": self.results, + "args": [_arg_to_dict_or_str(arg) for arg in self.args], + "partitioning": [ + { + future_id: pt.to_dict() + for future_id, pt in pt_assignment.items() + } + for pt_assignment in self.partitioning_list + ], + "static": self.static, + "name": self.name, + } + + def from_dict(data: Dict[str, Any], use_cloudpickle=True) -> Self: + return FutureComputation( + from_str(data["func"]) if use_cloudpickle else data["func"], + [ + _arg_from_dict_or_str(arg, use_cloudpickle) + for arg in data["args"] + ], + data["results"], + [ + { + future_id: PartitionType.from_dict(pt) + for future_id, pt in pt_assignment.items() + } + for pt_assignment in data["partitioning"] + ], + static=data.get("static", []), + name=data.get("name", None), + ) + + def __str__(self) -> str: + arg_abbrevs = [ + arg if is_future_id(arg) else str(arg)[0:16] for arg in self.args + ] + args_with_static = [ + ("static " + arg_abbrev) + if is_future_id(arg_abbrev) and arg_abbrev in self.static + else arg_abbrev + for arg_abbrev in arg_abbrevs + ] + fc_signature = ( + f"{self.name}(" + f"{', '.join(args_with_static)}) -> {', '.join(self.results)}" + ) + return fc_signature + + +TaskGraph = List[FutureComputation] + + +def _get_future_ids_to_abbrevs( + future_ids: Set[FutureId], +) -> Dict[FutureId, str]: + if len(future_ids) == 0: + return {} + fid_len = max([len(fid) for fid in future_ids]) + for abbrev_len in range(len("bn_fut_") + 1, fid_len + 1): + future_id_abbrevs = set([fid[0:abbrev_len] for fid in future_ids]) + if len(future_id_abbrevs) == len(future_ids): + fid_len = abbrev_len + break + future_ids_to_abbrevs = {fid: fid[0:fid_len] for fid in future_ids} + return future_ids_to_abbrevs + + +def print_task_graph(tg: TaskGraph, display=True): + if len(tg) == 0: + return tg + + # Map each future ID to an abbreviation + future_ids = set() + for fc in tg: + for fid in fc.future_ids: + future_ids.add(fid) + future_ids_to_abbrevs = _get_future_ids_to_abbrevs(future_ids) + + # Print out info + res = "" + for i, fc in enumerate(tg): + fc: FutureComputation + res += f"Task #{i + 1}: " + str(fc) + "\n" + + for j, partitioning in enumerate(fc.partitioning_list): + partitioning: Partitioning + pts_str = ", ".join( + [f"{fid}: {str(pt)}" for fid, pt in partitioning.items()] + ) + res += f"\tPartitioning #{j + 1}: " + pts_str + "\n" + for fid, abbrev in future_ids_to_abbrevs.items(): + res = res.replace(fid, abbrev) + res = res.replace("bn_fut_", "") + res = res[0:-2] # Remove last '\n' + + if display: + print(res) + else: + return res + + +def print_partitioning(partitioning: Union[Partitioning, PartitioningMulti]): + print("Partitioning:") + future_ids = set(partitioning.keys()) + future_ids_to_abbrevs = _get_future_ids_to_abbrevs(future_ids) + for fid, pts in partitioning.items(): + fid_abbrev = future_ids_to_abbrevs[fid] + print(f"\t{fid_abbrev}: {', '.join(map(str, pts))}") + if len(future_ids) == 0: + print("\tempty") diff --git a/banyan-python/banyan/utils_serialization.py b/banyan-python/banyan/utils_serialization.py new file mode 100644 index 0000000..88eb046 --- /dev/null +++ b/banyan-python/banyan/utils_serialization.py @@ -0,0 +1,11 @@ +import codecs + +import cloudpickle + + +def to_str(obj): + return codecs.encode(cloudpickle.dumps(obj), "base64").decode() + + +def from_str(pickled): + return cloudpickle.loads(codecs.decode(pickled.encode(), "base64")) diff --git a/banyan-python/executor_lambda_code.zip b/banyan-python/executor_lambda_code.zip new file mode 100644 index 0000000..af9fe9a Binary files /dev/null and b/banyan-python/executor_lambda_code.zip differ diff --git a/banyan-python/poetry.lock b/banyan-python/poetry.lock new file mode 100644 index 0000000..0969f3c --- /dev/null +++ b/banyan-python/poetry.lock @@ -0,0 +1,296 @@ +[[package]] +name = "attrs" +version = "22.1.0" +description = "Classes Without Boilerplate" +category = "dev" +optional = false +python-versions = ">=3.5" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] +docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "cloudpickle"] + +[[package]] +name = "boto3" +version = "1.26.37" +description = "The AWS SDK for Python" +category = "main" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +botocore = ">=1.29.37,<1.30.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.6.0,<0.7.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.29.37" +description = "Low-level, data-driven core of boto 3." +category = "main" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.15.3)"] + +[[package]] +name = "certifi" +version = "2022.9.24" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "charset-normalizer" +version = "2.1.1" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.6.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + +[[package]] +name = "cloudpickle" +version = "2.2.0" +description = "Extended pickling support for Python objects" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" + +[[package]] +name = "exceptiongroup" +version = "1.0.4" +description = "Backport of PEP 654 (exception groups)" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "iniconfig" +version = "1.1.1" +description = "iniconfig: brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "packaging" +version = "21.3" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "dev" +optional = false +python-versions = ">=3.6.8" + +[package.extras] +diagrams = ["railroad-diagrams", "jinja2"] + +[[package]] +name = "pytest" +version = "7.2.0" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2021.3" +description = "World timezone definitions, modern and historical" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "requests" +version = "2.28.1" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<3" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "s3transfer" +version = "0.6.0" +description = "An Amazon S3 Transfer Manager" +category = "main" +optional = false +python-versions = ">= 3.7" + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "typing-extensions" +version = "4.4.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "urllib3" +version = "1.26.13" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" + +[package.extras] +brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.8" +content-hash = "f134085a230aa4c727d903651278bd41a5f7e04ec9396cdc5f7703e4556635df" + +[metadata.files] +attrs = [] +boto3 = [] +botocore = [] +certifi = [] +charset-normalizer = [] +cloudpickle = [] +colorama = [] +exceptiongroup = [] +idna = [] +iniconfig = [] +jmespath = [] +packaging = [] +pluggy = [] +pyparsing = [] +pytest = [] +python-dateutil = [] +pytz = [] +requests = [] +s3transfer = [] +six = [] +toml = [] +tomli = [] +typing-extensions = [] +urllib3 = [] diff --git a/banyan/pyproject.toml b/banyan-python/pyproject.toml similarity index 73% rename from banyan/pyproject.toml rename to banyan-python/pyproject.toml index fa9bd5f..fb39814 100644 --- a/banyan/pyproject.toml +++ b/banyan-python/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "banyan-python" version = "0.2.0" -description = "Massively parallel cloud computing with popular Python libraries for analytics, processing, and simulation! " +description = "Instant large-scale computing with Python APIs you already know and love" authors = ["Banyan Computing "] license = "Apache-2.0" readme = "README.md" @@ -29,20 +29,16 @@ packages = [ [tool.poetry.dependencies] python = "^3.8" -boto3 = "^1.18.45" +boto3 = "^1.26.37" botocore = "^1.23.48" -progressbar2 = "^4.0.0" requests = "^2.27.1" toml = "^0.10.2" pytz = "^2021.3" -progressbar = "^2.5" -tqdm = "^4.63.0" -pygit2 = "^1.9.0" -mpi4py = "^3.1.3" -plum-dispatch = "^1.6" +typing-extensions = "^4.4.0" +cloudpickle = "^2.2.0" [tool.poetry.dev-dependencies] -pytest = "^5.2" +pytest = "^7.2" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/banyan-python/tests/__init__.py b/banyan-python/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/banyan-python/tests/test_annotation.py b/banyan-python/tests/test_annotation.py new file mode 100644 index 0000000..958a660 --- /dev/null +++ b/banyan-python/tests/test_annotation.py @@ -0,0 +1,120 @@ +import banyan as bn +from banyan.utils_future_computation import print_task_graph + + +def filter_df(x, func=None): + return x + + +def test_record_task(): + arg = bn.Future() + res = bn.Future() + bn.record_task( + res, + filter_df, + arg, + [ + {arg: "Blocked", res: "Blocked"}, + {arg: "Grouped", res: "Grouped"}, + { + arg: bn.PartitionType("Grouped", {"key": "species"}), + res: "Grouped", + }, + {arg: "Replicated", res: "Replicated"}, + ], + ) + + arg = res + res = bn.Future() + bn.record_task( + res, + filter_df, + arg, + [{arg: "Replicated", res: "Replicated"}], + static=res, + ) + + arg = res + res = bn.Future() + bn.record_task( + res, + filter_df, + arg, + [{arg: "Replicated", res: "Replicated"}], + static=[res], + ) + + assert len(arg._task_graph) == 2 + assert len(res._task_graph) == 3 + assert res._task_graph[0].results[0] == res._task_graph[1].args[0] + assert len(res._task_graph[0].partitioning_list) == 4 + assert len(arg._task_graph[0].partitioning_list) == 4 + pt = arg._task_graph[0].partitioning_list[2][arg._task_graph[0].args[0]] + assert pt.is_grouped + assert pt.params["key"] == "species" + + arg = res + res = bn.Future() + bn.record_task( + res, + filter_df, + arg, + [ + "Blocked", + "Consolidated", + { + arg: bn.PartitionType("Grouped", {"key": "species"}), + res: "Grouped", + }, + bn.PartitionType("Grouped", {"key": "species"}), + ], + ) + assert len(arg._task_graph) == 3 + assert len(res._task_graph) == 4 + assert len(res._task_graph[3].partitioning_list) == 4 + for pt in [ + res._task_graph[3].partitioning_list[2][res._task_graph[3].args[0]], + res._task_graph[3].partitioning_list[3][res._task_graph[3].args[0]], + res._task_graph[3].partitioning_list[3][res._task_graph[3].results[0]], + ]: + assert pt.is_grouped + assert pt.params["key"] == "species" + + +def test_record_task_with_constants(): + arg = bn.Future() + res = bn.Future() + bn.record_task( + [res], + filter_df, + [arg, lambda x: x * 2], + [ + {arg: "Blocked", res: "Blocked"}, + {arg: "Grouped", res: "Grouped"}, + { + arg: bn.PartitionType("Grouped", {"key": "species"}), + res: "Grouped", + }, + {arg: "Replicated", res: "Replicated"}, + ], + ) + assert len(res._task_graph[0].args) == 2 + + +def test_record_task_create_new_future(): + arg = bn.Future() + res = bn.record_task( + "res", + filter_df, + [arg, lambda x: x * 2], + [ + {arg: "Blocked", "res": "Blocked"}, + {arg: "Grouped", "res": "Grouped"}, + { + arg: bn.PartitionType("Grouped", {"key": "species"}), + "res": "Grouped", + }, + {arg: "Replicated", "res": "Replicated"}, + ], + ) + assert len(res._task_graph[0].args) == 2 diff --git a/banyan/tests/test_config.py b/banyan-python/tests/test_config.py similarity index 72% rename from banyan/tests/test_config.py rename to banyan-python/tests/test_config.py index 0e51f0c..6ba0754 100644 --- a/banyan/tests/test_config.py +++ b/banyan-python/tests/test_config.py @@ -1,4 +1,6 @@ -from banyan.config import * +import os + +import banyan as bn user_id = "user1" api_key = "12345" @@ -6,7 +8,7 @@ def test_args(): banyanconfig_path = "tempfile_args.toml" - config = configure( + config = bn.configure( user_id=user_id, api_key=api_key, banyanconfig_path=banyanconfig_path ) print(config) @@ -15,7 +17,7 @@ def test_args(): and config["banyan"]["api_key"] == api_key ) - config = load_config(banyanconfig_path) + config = bn.config.load_config(banyanconfig_path) assert ( config["banyan"]["user_id"] == user_id and config["banyan"]["api_key"] == api_key @@ -23,7 +25,7 @@ def test_args(): try: os.remove(banyanconfig_path) - except FileNotFoundError as e: + except FileNotFoundError: pass @@ -32,13 +34,13 @@ def test_environ(): os.environ["BANYAN_USER_ID"] = user_id os.environ["BANYAN_API_KEY"] = api_key - config = configure(banyanconfig_path=banyanconfig_path) + config = bn.configure(banyanconfig_path=banyanconfig_path) assert ( config["banyan"]["user_id"] == user_id and config["banyan"]["api_key"] == api_key ) - config = load_config(banyanconfig_path) + config = bn.config.load_config(banyanconfig_path) assert ( config["banyan"]["user_id"] == user_id and config["banyan"]["api_key"] == api_key @@ -46,23 +48,25 @@ def test_environ(): try: os.remove(banyanconfig_path) - except FileNotFoundError as e: + except FileNotFoundError: pass def test_toml(): banyanconfig_path = "tempfile_toml.toml" - configure(user_id=user_id, api_key=api_key, banyanconfig_path=banyanconfig_path) + bn.configure( + user_id=user_id, api_key=api_key, banyanconfig_path=banyanconfig_path + ) del os.environ["BANYAN_USER_ID"] del os.environ["BANYAN_API_KEY"] - config = configure(banyanconfig_path=banyanconfig_path) + config = bn.configure(banyanconfig_path=banyanconfig_path) assert ( config["banyan"]["user_id"] == user_id and config["banyan"]["api_key"] == api_key ) - config = load_config(banyanconfig_path) + config = bn.config.load_config(banyanconfig_path) assert ( config["banyan"]["user_id"] == user_id and config["banyan"]["api_key"] == api_key @@ -70,5 +74,5 @@ def test_toml(): try: os.remove(banyanconfig_path) - except FileNotFoundError as e: + except FileNotFoundError: pass diff --git a/banyan-python/tests/test_sessions.py b/banyan-python/tests/test_sessions.py new file mode 100644 index 0000000..1fe5c03 --- /dev/null +++ b/banyan-python/tests/test_sessions.py @@ -0,0 +1,10 @@ +from banyan import start_session + +# TODO: Check that the required environment variables are set before +# running tests: api_key, user_id, endpoint + + +# start_session was tested by calling start_session with a path +# to a site_packages directory. It was manually verified that +# a new Lambda function was created if not already existing and +# that it was invoked once. diff --git a/banyan/banyan/__init__.py b/banyan/banyan/__init__.py deleted file mode 100644 index 790bccd..0000000 --- a/banyan/banyan/__init__.py +++ /dev/null @@ -1,65 +0,0 @@ -__version__ = "0.1.1" - -from .imports import * - -# Check if AWS region is set. If not, default to us-west-2 and give a warning -if boto3.Session().region_name == None: - logging.warning( - "Defaulting to region us-west-2. If you want to use a different AWS region, " - "please set the `AWS_DEFAULT_REGION` environment variable or update the " - "default region in `~/.aws/config`, before importing `banyan`." - ) - os.environ["AWS_DEFAULT_REGION"] = "us-west-2" - -from .constants import * -from .clusters import ( - create_cluster, - destroy_cluster, - delete_cluster, - update_cluster, - assert_cluster_is_ready, - Cluster, - get_clusters, - get_cluster_s3_bucket_arn, - get_cluster_s3_bucket_name, - get_cluster, - get_running_clusters, - get_cluster_status, - wait_for_cluster, - upload_to_s3, -) -from .config import load_config, write_config, configure -from .id import generate_message_id -from .queues import ( - get_scatter_queue, - get_gather_queue, - get_execution_queue, - get_next_message, - receive_next_message, - receive_from_client, - send_message, - send_to_client, -) -from .session import Session, set_session, get_session_id, get_session, get_cluster_name -from .sessions import ( - end_session, - end_all_sessions, - get_sessions, - get_running_sessions, - get_session_status, - download_session_logs, - wait_for_session, - start_session, - run_session, -) -from .utils import ( - get_aws_config_region, - send_request_get_response, - is_debug_on, - get_python_version, - parse_time, - get_loaded_packages, - get_hash, - upload_file_to_s3, - load_toml, -) diff --git a/banyan/banyan/clusters.py b/banyan/banyan/clusters.py deleted file mode 100644 index 1a1456f..0000000 --- a/banyan/banyan/clusters.py +++ /dev/null @@ -1,315 +0,0 @@ -from math import ceil -from tqdm import tqdm -import urllib - -from .config import configure -from .imports import * -from .session import get_cluster_name -from .utils import ( - send_request_get_response, - get_aws_config_region, - s3_bucket_arn_to_name, - parse_bytes -) - - -s3 = boto3.client("s3") -clusters = dict() - - -def create_cluster( - name: str = None, - instance_type: str = "m4.4xlarge", - max_num_workers: str = 2048, - initial_num_workers: int = 16, - min_num_workers: int = 0, - iam_policy_arn: str = None, - s3_bucket_arn: str = None, - s3_bucket_name: str = None, - disk_capacity = "1200 GiB", # some # of GBs or "auto" to use Amazon EFS - scaledown_time: int = 25, - region: str = None, - vpc_id: str = None, - subnet_id: str = None, - nowait: bool = False, - ec2_key_pair: str = None, - **kwargs, -): - """Creates a new cluster or re-creates a previously destroyed cluster. - If no vpc_id and subnet_id are provided, - the cluster is by default created in the default public VPC - and subnet in your AWS account. - """ - global clusters - - # Configure using parameters - c = configure(**kwargs) - - clusters_local = get_clusters(**kwargs) - if name is None: - name = "Cluster " + str(len(clusters_local) + 1) - if region is None: - region = get_aws_config_region() - - # Check if the configuration for this cluster name already exists - # If it does, then recreate cluster - if name in clusters_local: - if clusters_local[name].status == "terminated": - logging.info(f"Started re-creating cluster named {name}") - send_request_get_response( - "create-cluster", {"cluster_name": name, "recreate": True} - ) - if not nowait: - wait_for_cluster(name) - # Cache info - return get_cluster(name) - else: - raise Exception( - f"Cluster with name {name} already exists and its current status is {str(clusters_local[name].status)}" - ) - - # Construct arguments - if s3_bucket_name is not None: - s3_bucket_arn = f"arn:aws:s3:::{s3_bucket_name}" - elif s3_bucket_arn is not None: - s3_bucket_name = s3_bucket_arn.split(":")[-1] - - if s3_bucket_arn is None: - s3_bucket_arn = "" - elif s3_bucket_name not in s3.list_buckets(): - logging.error( - f"Bucket {s3_bucket_name} does not exist in connected AWS account" - ) - - # Construct cluster creation - cluster_config = { - "cluster_name": name, - "instance_type": instance_type, - "max_num_workers": max_num_workers, - "initial_num_workers": initial_num_workers, - "min_num_workers": min_num_workers, - "aws_region": region, - "s3_read_write_resource": s3_bucket_arn, - "scaledown_time": scaledown_time, - "recreate": False, - # We need to pass in the disk capacity in # of GiB and we do this by dividing the input - # by size of 1 GiB and then round up. Then the backend will determine how to adjust the - # disk capacity to an allowable increment (e.g., 1200 GiB or an increment of 2400 GiB - # for AWS FSx Lustre filesystems) - "disk_capacity": -1 if (disk_capacity == "auto") else ceil(parse_bytes(disk_capacity) / 1.073741824e7) - } - - if "ec2_key_pair_name" in c["aws"]: - cluster_config["ec2_key_pair"] = c["aws"]["ec2_key_pair_name"] - if iam_policy_arn is not None: - cluster_config["additional_policy"] = iam_policy_arn - if vpc_id is not None: - cluster_config["vpc_id"] = vpc_id - if subnet_id is not None: - cluster_config["subnet_id"] = subnet_id - if not ec2_key_pair == None: - cluster_config["ec2_key_pair"] = ec2_key_pair - - logging.info(f"Started creating cluster named {name}") - # Send request to create cluster - send_request_get_response("create-cluster", cluster_config) - - if not nowait: - wait_for_cluster(name) - - # Cache info - get_cluster(name) - - return clusters[name] - - -def destroy_cluster(name: str, **kwargs): - configure(**kwargs) - logging.info(f"Destroying cluster named {name}") - send_request_get_response("destroy-cluster", {"cluster_name": name}) - - -def delete_cluster(name: str, **kwargs): - configure(**kwargs) - logging.info(f"Deleting cluster named {name}") - send_request_get_response( - "destroy-cluster", {"cluster_name": name, "permanently_delete": True} - ) - - -def update_cluster(name: str, **kwargs): - configure(**kwargs) - logging.info(f"Updating cluster named {name}") - send_request_get_response("update-cluster", {"cluster_name": name}) - - -def assert_cluster_is_ready(name: str, **kwargs): - logging.info(f"Setting status of cluster named {name} to running") - configure(**kwargs) - send_request_get_response("set-cluster-ready", {"cluster_name": name}) - - -class Cluster: - def __init__( - self, - name: str, - status: str, - status_explanation: str, - s3_bucket_arn: str, - organization_id: str, - curr_cluster_instance_id: str, - num_sessions_running: int, - num_workers_running: int, - ): - self.name = name - self.status = status - self.status_explanation = status_explanation - self.s3_bucket_arn = s3_bucket_arn - self.organization_id = organization_id - self.curr_cluster_instance_id = curr_cluster_instance_id - self.num_sessions_running = num_sessions_running - self.num_workers_running = num_workers_running - - -def get_clusters(cluster_name=None, **kwargs): - logging.debug("Downloading description of clusters") - filters = {} - if cluster_name is not None: - filters["cluster_name"] = cluster_name - response = send_request_get_response("describe-clusters", {"filters": filters}) - clusters_dict = { - name: Cluster( - name, - c["status"], - c.get("status_explanation", ""), - c["s3_read_write_resource"], - c["organization_id"], - c.get("curr_cluster_instance_id", ""), - c.get("num_sessions", 0), - c.get("num_workers_in_use", 0), - ) - for (name, c) in response["clusters"].items() - } - - # Cache info - global clusters - for (name, c) in clusters_dict.items(): - clusters[name] = c - - return clusters_dict - - -def get_cluster_s3_bucket_arn(cluster_name=None, **kwargs): - configure(**kwargs) - if cluster_name is None: - cluster_name = get_cluster_name() - global clusters - # Check if cached, since this property is immutable - if cluster_name not in clusters: - get_cluster(cluster_name) - return clusters[cluster_name].s3_bucket_arn - - -def get_cluster_s3_bucket_name(cluster_name=None, **kwargs): - configure(**kwargs) - if cluster_name is None: - cluster_name = get_cluster_name() - return s3_bucket_arn_to_name(get_cluster_s3_bucket_arn(cluster_name)) - - -def get_cluster(name: str = None, **kwargs): - if name is None: - name = get_cluster_name() - return get_clusters(name, **kwargs)[name] - - -def get_running_clusters(*args, **kwargs): - return dict( - filter( - lambda entry: entry[1].status == "running", - get_clusters(*args, **kwargs).items(), - ) - ) - - -def get_cluster_status(name: str = None, **kwargs): - if name is None: - name = get_cluster_name() - global clusters - if name in clusters: - if clusters[name].status == "failed": - logging.error(clusters[name].status_explanation) - # If it is not failed, then retrieve status, in case it has changed - c = get_cluster(name, **kwargs) - if c.status == "failed": - raise Exception(c.status_explanation) - return c.status - - -def wait_for_cluster(name: str = None, **kwargs): - if name is None: - name = get_cluster_name() - t = 5 - i = 0 - cluster_status = get_cluster_status(name, **kwargs) - while cluster_status == "creating" or cluster_status == "updating": - if cluster_status == "creating": - pbar = tqdm(desc=f"Setting up cluster {name}") - else: - pbar = tqdm(desc=f"Updating cluster {name}") - time.sleep(t) - if t < 80: - t *= 2 - cluster_status = get_cluster_status(name, **kwargs) - pbar.update(i) - i += 1 - try: - pbar.close() - except: - pass - if cluster_status == "running": - logging.info(f"Cluster {name} is ready") - elif cluster_status == "terminated": - raise Exception(f"Cluster {name} no longer exists") - elif cluster_status not in ["creating", "updating"]: - raise Exception(f"Failed to set up cluster named {name}") - else: - raise Exception(f"Cluster {name} has unexpected status: {cluster_status}") - - -def upload_to_s3(src_path, dst_name=None, cluster_name=None, **kwargs): - if dst_name is None: - dst_name = os.path.basename(src_path) - if cluster_name is None: - cluster_name = get_cluster_name() - - configure(**kwargs) - bucket_name = get_cluster_s3_bucket_name(cluster_name) - - if src_path.startswith("http://") or src_path.startswith("https://"): - with urllib.request.urlopen(src_path) as f: - s3.upload_fileobj(f, bucket_name, dst_name) - if src_path.startswith("s3://"): - bucket_src, key_src = src_path.replace("s3://", "").split("/", 1) - copy_source = {"Bucket": bucket_src, "Key": key_src} - s3.meta_client.copy(copy_source, bucket_name, dst_name) - else: - if src_path.startswith("file://"): - src_path = src_path[8:] - src_path = Path(src_path) - - if src_path.is_file(): - s3.meta.client.upload_file(src_path, bucket_name, dst_name) - else: - files = [ - f - for f in os.listdir(src_path) - if os.path.isfile(os.path.join(src_path, f)) - ] - for file in files: - s3.meta.client.upload_file( - Path(os.path.join(src_path, file)), - bucket_name, - os.path.join(dst_name, file), - ) - return dst_name diff --git a/banyan/banyan/id.py b/banyan/banyan/id.py deleted file mode 100644 index 54a7b7c..0000000 --- a/banyan/banyan/id.py +++ /dev/null @@ -1,12 +0,0 @@ -ResourceId = str - -generated_message_ids = set() -num_message_ids_issued = 0 - -def generate_message_id(): - global generated_message_ids - global num_message_ids_issued - num_message_ids_issued += 1 - v = str(num_message_ids_issued) - generated_message_ids.add(v) - return v diff --git a/banyan/banyan/imports.py b/banyan/banyan/imports.py deleted file mode 100644 index 68a1e20..0000000 --- a/banyan/banyan/imports.py +++ /dev/null @@ -1,17 +0,0 @@ -# Standard Library -import logging -import os -from pathlib import Path -import time -from typing import Any, Dict, Optional, Union, List - -# from _typeshed import NoneType - -# Third Party -import boto3 -from botocore.config import Config -import requests -import toml - -# Constructed -NoneType = type(None) diff --git a/banyan/banyan/queues.py b/banyan/banyan/queues.py deleted file mode 100644 index d14d632..0000000 --- a/banyan/banyan/queues.py +++ /dev/null @@ -1,147 +0,0 @@ -import boto3 -import json - -from .id import generate_message_id, ResourceId -from .sessions import get_session, get_session_id, end_session -from .utils import from_py_value_contents, to_py_value_contents - -sqs = boto3.client("sqs") - -################# -# GET QUEUE URL # -################# - - -def get_scatter_queue(resource_id=None): - if resource_id is None: - resource_id = get_session().resource_id - return sqs.get_queue_url(QueueName="banyan_" + resource_id + "_scatter.fifo")[ - "QueueUrl" - ] - - -def get_gather_queue(resource_id=None): - if resource_id is None: - resource_id = get_session().resource_id - return sqs.get_queue_url(QueueName="banyan_" + resource_id + "_gather.fifo")[ - "QueueUrl" - ] - - -def get_execution_queue(resource_id=None): - if resource_id is None: - resource_id = get_session().resource_id - return sqs.get_queue_url(QueueName="banyan_" + resource_id + "_execution.fifo")[ - "QueueUrl" - ] - - -################### -# RECEIVE MESSAGE # -################### - - -def get_next_message(queue, delete=True): - message_receiving_result = sqs.receive_message( - QueueUrl=queue, MaxNumberOfMessages=1 - ) - m = ( - message_receiving_result["Messages"][0]["ReceiptHandle"] - if len(message_receiving_result["Messages"]) > 0 - else None - ) - - while m is None: - message_receiving_result = sqs.receive_message( - QueueUrl=queue, MaxNumberOfMessages=1 - ) - m = ( - message_receiving_result["Messages"][0]["ReceiptHandle"] - if len(message_receiving_result["Messages"]) > 0 - else None - ) - - if delete: - sqs.delete_message(QueueUrl=queue, ReceiptHandle=m) - return message_receiving_result["Messages"][0]["Body"] - - -def receive_next_message(queue_name): - content = get_next_message(queue_name) - - if content.startswith("JOB_READY") or content.startswith("SESSION_READY"): - response = {"kind": "SESSION_READY"} - return response - elif content.startswith("EVALUATION_END"): - response = {"kind": "EVALUATION_END"} - response["end"] = content.endswith("MESSAGE_END") - print(content[14:-1]) - return response - elif content.startswith("JOB_FAILURE") or content.startswith( - content, "SESSION_FAILURE" - ): - tail = 11 if content.endswith("MESSAGE_END") else 0 - head_len = 11 if content.startswith("JOB_FAILURE") else 15 - # This print statement is needed, so that we can print out the error message - print(content[head_len:tail]) - if content.endswith("MESSAGE_END"): - end_session( - failed=True, release_resources_now=content.startswith("JOB_FAILURE") - ) - raise RuntimeError("Session failed; see preceding output") - response = {"kind": "SESSION_FAILURE"} - return response - else: - return json.loads(content) - - -# Used by Banyan/src/pfs.jl, intended to be called from the executor -def receive_from_client(value_id): - # Send scatter message to client - send_message( - get_gather_queue(), - json.dumps({"kind": "SCATTER_REQUEST", "value_id": value_id}), - ) - # Receive response from client - m = json.loads(get_next_message(get_scatter_queue())) - v = from_py_value_contents(m["contents"]) - return v - - -################ -# SEND MESSAGE # -################ - - -def send_message(queue_name, message): - # queue_url = sqs.get_queue_url(queue_name) - return sqs.send_message( - QueueUrl=queue_name, # queue_url, - MessageBody=message, # TODO: Is that correct?, - MessageGroupId="1", - MessageDeduplicationId=generate_message_id(), # TODO: where does that function come from? - ) - - -def send_to_client(value_id, value): - print("QUEUE NAME: ", get_gather_queue()) - send_message( - get_gather_queue(), - json.dumps( - { - "kind": "GATHER", - "value_id": value_id, - "contents": to_py_value_contents(value), - } - ), - ) - - -########################### -# GET MESSAGES FROM QUEUE # -########################### - - -########################## -# SEND MESSAGES TO QUEUE # -########################## diff --git a/banyan/banyan/session.py b/banyan/banyan/session.py deleted file mode 100644 index b6f2a63..0000000 --- a/banyan/banyan/session.py +++ /dev/null @@ -1,112 +0,0 @@ -sessions = dict() -current_session_id = None - - -class Session: - """Stores information about one session""" - - def __init__(self, cluster_name, session_id, resource_id, nworkers, sample_rate): - self._cluster_name = cluster_name - self._session_id = session_id - self._resource_id = resource_id - self._nworkers = nworkers - self._sample_rate = sample_rate - self._locations = {} - self._pending_requests = [] - self._futures_on_client = {} - - @property - def cluster_name(self): - return self._cluster_name - - @property - def resource_id(self): - return self._resource_id - - @property - def nworkers(self): - return self._nworkers - - @property - def sample_rate(self): - return self._sample_rate - - # Add other getters if needed - - -def set_session(session_id: str, session=None, *args, **kwargs): - """Sets the session ID. - - Parameters - ---------- - session_id : string - Session ID to use - session : Session - If not None (default), the global sessions table is updated to include - this session. - """ - - global current_session_id - current_session_id = session_id - - global sessions - if session is not None: - sessions[current_session_id] = session - - -def get_session_id(*args, **kwargs): - """Returns the value of the global variable set to the current session ID. - - Returns - ------- - string - Current session ID - """ - - global current_session_id - if current_session_id is None: - raise Exception( - "No session started or selected using `start_session` or `with_session` or `set_session`. The current session may have been destroyed or no session started yet.", - ) - return current_session_id - - -def get_session(session_id=None, *args, **kwargs): - """Get information about the current session. - - Parameter - -------- - session_id : string - Session ID to get information for - - Returns - ------- - Session - Information about the given session ID - - Raises - ------ - Exception if the session ID is for a session that wasn't created by this - process or has failed - """ - - if session_id is None: - session_id = get_session_id() - global sessions # an empty dictionary that will get filled up with mappings from session_id ->instances of the class Session - if session_id not in sessions: - raise Exception( - f"The selected job with ID {session_id} does not have any information; if it was created by this process, it has either failed or been destroyed." - ) - return sessions[session_id] - - -def get_cluster_name(*args, **kwargs): - """Gets the name of the cluster that the current session is running on. - - Returns - ------- - string - Name of the cluster that the current session is running on. - """ - - return get_session().cluster_name diff --git a/banyan/banyan/sessions.py b/banyan/banyan/sessions.py deleted file mode 100644 index 362ca20..0000000 --- a/banyan/banyan/sessions.py +++ /dev/null @@ -1,614 +0,0 @@ -import boto3 -import logging -import os -from pygit2 import Repository -import time -from tqdm import tqdm -from typing import List - - -from .constants import BANYAN_PYTHON_BRANCH_NAME, BANYAN_PYTHON_PACKAGES -from .clusters import ( - get_cluster, - get_running_clusters, - get_cluster_s3_bucket_name, - wait_for_cluster, -) -from .config import configure -from .session import ( - get_session_id, - set_session, - sessions, - current_session_id, - get_session, - Session, -) -from .utils import ( - get_hash, - get_loaded_packages, - get_python_version, - load_toml, - parse_time, - send_request_get_response, - upload_file_to_s3, -) - - -def start_session( - cluster_name: str = None, - nworkers: int = 16, - release_resources_after: int = 20, - print_logs: bool = False, - store_logs_in_s3: bool = True, - store_logs_on_cluster: bool = False, - log_initialization: bool = False, - sample_rate: int = None, - session_name: str = None, - files: list = None, - code_files: list = None, - force_update_files: bool = False, - pf_dispatch_table: List[str] = None, - using_modules: list = None, - # pip_requirements_file = None, # paths to a requirements.txt file that contains packages to be installed with pip - # conda_environment_file = None, # paths to environment.yml file that contains packages to be installed with conda - project_dir: str = None, # a pyproject.toml file containing information about a poetry environment - url: str = None, - branch: str = None, - directory: str = None, - dev_paths: list = None, - force_sync: bool = False, - force_pull: bool = False, - force_install=False, - estimate_available_memory=True, - nowait=True, - email_when_ready=None, - for_running=False, - *args, - **kwargs, -): - """ - Starts a new session. - """ - - configure(*args, **kwargs) - - if sample_rate is None: - sample_rate = nworkers - - if files is None: - files = [] - - if code_files is None: - code_files = [] - - if using_modules is None: - using_modules = [] - - if dev_paths is None: - dev_paths = [] - - if project_dir is None: - project_dir = os.getcwd() # gets the current working directory - - poetry_pyproject_file = os.path.join(project_dir, "pyproject.toml") - poetry_lock_file = os.path.join(project_dir, "poetry.lock") - - # Construct parameters for starting session - if cluster_name is None: - # running_clusters is dictionary - running_clusters = get_running_clusters() - if len(running_clusters) == 0: - raise Exception( - "Failed to start session: you don't have any clusters created" - ) - else: - cluster_name = list(running_clusters.keys())[0] - version = get_python_version() - - c = get_cluster(cluster_name) - - session_configuration = { - "cluster_name": cluster_name, - "organization_id": c.organization_id, - "curr_cluster_instance_id": c.curr_cluster_instance_id, - "num_workers": nworkers, - "release_resources_after": release_resources_after, - "return_logs": print_logs, - "store_logs_in_s3": store_logs_in_s3, - "store_logs_on_cluster": store_logs_on_cluster, - "log_initialization": log_initialization, - "version": version, - "benchmark": os.environ.get("BANYAN_BENCHMARK", "0") == "1", - "main_modules": get_loaded_packages(), - "using_modules": using_modules, - "reuse_resources": not force_update_files, - "estimate_available_memory": estimate_available_memory, - "language": "py", - } - - if session_name is None: - session_configuration["session_name"] = session_name - - if email_when_ready is None: - session_configuration["email_when_ready"] = email_when_ready - - s3_bucket_name = get_cluster_s3_bucket_name(cluster_name) - environment_info = {} - # If a url is not provided, then use the local environment - if url is None: - # There are two files we need: pyproject.toml and project.lock - # Check if the pyproject.toml exists - if os.path.exists(poetry_pyproject_file): - # Check if the project.lock exists - if os.path.exists(poetry_lock_file): - # Read in the poetry.lock - with open(poetry_lock_file) as f: - poetry_lock_file_contents = f.read() - else: - # If it doesn't exist - that's fine.. - poetry_lock_file_contents = "" - # Read in the pyproject.toml - with open(poetry_pyproject_file) as f: - file_contents = f.read() - - # At this point, both files have been read in so we go ahead and - # get the hash of them concatenated - environment_hash = get_hash(poetry_lock_file_contents + file_contents) - environment_info["environment_hash"] = environment_hash - - # Upload the pyproject.toml file to S3 - object_name = environment_hash + "/pyproject.toml" - upload_file_to_s3(poetry_pyproject_file, s3_bucket_name, object_name) - environment_info["pyproject_toml"] = object_name - - if poetry_lock_file_contents != "": - object_name = environment_hash + "/poetry.lock" - upload_file_to_s3(poetry_lock_file, s3_bucket_name, object_name) - environment_info["poetry_lock"] = object_name - - else: - # It has to exist! - raise Exception("poetry_pyproject_file does not exist") - - else: - # Otherwise, use url and optionally a particular branch - environment_info["url"] = url - - if directory is None: - raise Exception("Directory must be provided for given URL $url") - - environment_info["directory"] = directory - - if branch is not None: - environment_info["branch"] = branch - - environment_info["dev_paths"] = dev_paths - environment_info["force_sync"] = force_sync - environment_info["force_pull"] = force_pull - environment_info["force_install"] = False # force_install - environment_info["environment_hash"] = get_hash( - url + ("" if branch is None else branch) - ) - - session_configuration["environment_info"] = environment_info - - # Upload files to S3 - for f in files: - upload_file_to_s3(f.replace("file://", ""), s3_bucket_name) - for f in code_files: - upload_file_to_s3(f.replace("file://", ""), s3_bucket_name) - - session_configuration["files"] = [os.path.basename(f) for f in files] - session_configuration["code_files"] = [os.path.basename(f) for f in code_files] - - if pf_dispatch_table is None: - # is_it_a ? a : b in Julia becomes a if is_it_a else b in Python (edited) - branch_to_use = ( - Repository(".").head.shorthand - if os.getenv("BANYAN_TESTING", "0") == "1" - else BANYAN_PYTHON_BRANCH_NAME - ) - pf_dispatch_table = [ - "https://raw.githubusercontent.com/banyan-team/banyan-python/" - + branch_to_use - + "/" - + dir - + "/res/pf_dispatch_table.toml" - for dir in BANYAN_PYTHON_PACKAGES - ] - - pf_dispatch_table_loaded = load_toml(pf_dispatch_table) - session_configuration["pf_dispatch_table"] = pf_dispatch_table_loaded - session_configuration["language"] = "py" - - # Start the session - response = send_request_get_response("start-session", session_configuration) - session_id = response["session_id"] - resource_id = response["resource_id"] - - # Store in global state - set_session( - session_id, - Session(cluster_name, session_id, resource_id, nworkers, sample_rate), - ) - - wait_for_cluster(cluster_name) - - if not nowait: - wait_for_session(session_id) - - return session_id - - -def end_session( - session_id=None, - failed=False, - release_resources_now=False, - release_resources_after=None, - *args, - **kwargs, -): - """Ends a session given the session_id. - - Parameters - ---------- - session_id : string - Session ID of the session that should get ended. - Defaults to the session_id returned from get_session_id() - failed : bool - Indicates whether the session being ended has failed. Defaults to False. - release_resources_now : string - Indicates whether to release underlying resources now. Defaults to False. - release_resources_after: int - The number of minutes after which to release underlying resources - - Returns - ------- - String - session ID of the session that was ended - """ - - configure(*args, **kwargs) - - if session_id is None: - session_id = get_session_id() - - # Ending session with ID session_ID - request_params = { - "session_id": session_id, - "failed": failed, - "release_resources_now": release_resources_now, - } - if release_resources_after is not None: - request_params["release_resources_after"] = release_resources_after - send_request_get_response("end-session", request_params) - - # Remove from global state - set_session(None) - if session_id in sessions: - del sessions[session_id] - return session_id - - -def end_all_sessions( - cluster_name, - release_resources_now=False, - release_resources_after=None, - *args, - **kwargs, -): - """End all running sessions for a given cluster. - - Parameters - ---------- - session_id : string - Session ID of the session that should get ended. - Defaults to the session_id returned from get_session_id() - failed : bool - Indicates whether the session being ended has failed. Defaults to False. - release_resources_now : string - Indicates whether to release underlying resources now. Defaults to False. - release_resources_after: int - The number of minutes after which to release underlying resources - """ - - configure(*args, **kwargs) - - sessions = get_sessions(cluster_name, status=["creating", "running"]) - for (session_id, session) in sessions.items(): - end_session(session_id, release_resources_now, release_resources_after) - - -def get_sessions(cluster_name=None, status=None, limit=-1, *args, **kwargs): - """Gets information about all the sessions for the user. Optionally can filter - by cluster name and status. - - Parameters - ---------- - cluster_name : string - Name of the cluste to filter by. Defaults to nothing - status : string - Status of session to filter by. Defaults to nothing - - Returns - ------- - Dictionary - Mappings from session ID to another dictionary containing information about the session - """ - - configure(*args, **kwargs) - - filters = {} - if cluster_name is not None: - filters["cluster_name"] = cluster_name - - if status is not None: - filters["status"] = status - # The function repeatedly calls the send_request_get_response function that takes - # the string 'describe_sessions' and the dictionary that contains filters. looping until - #'last_eval' does not exist in the indiv_response dictionary. - if limit > 0: - # Get the last `limit` number of sessions - indiv_response = send_request_get_response( - "describe-sessions", {"filters": filters, "limit": limit} - ) - sessions = indiv_response["sessions"] - else: - # Get all sessions - indiv_response = send_request_get_response( - "describe-sessions", {"filters": filters} - ) - curr_last_eval = indiv_response["last_eval"] - sessions = indiv_response["sessions"] - - while curr_last_eval is not None: - indiv_response = send_request_get_response( - "describe-sessions", - {"filters": filters, "this_start_key": curr_last_eval}, - ) - sessions.update(indiv_response["sessions"]) - curr_last_eval = indiv_response["last_eval"] - - # sessions is a dictionary that contains - # mappings from session_id to another dictionary containing information about the session - # {"start_time": "0126202220937124", "end_time": "", "num_workers": 2} - # both start_time and end_time are of the format "yyyy-mm-dd-HH:MM:SSzzzz" - for id, s in sessions.items(): # iterating over key and value - if sessions[id]["end_time"] == "": - sessions[id]["end_time"] = None - else: - sessions[id]["end_time"] = parse_time(sessions[id]["end_time"]) - sessions[id]["start_time"] = parse_time(sessions[id]["start_time"]) - return sessions - - -def get_running_sessions(*args, **kwargs): - """Gets info about all sessions that are currently running - - Returns - ------- - Dictionary - Mappings from session ID to another dictionary containing information about sessions that are running - """ - configure(*args, **kwargs) - return get_sessions(status="running") - - -def get_session_status(session_id=None, *args, **kwargs): - """Get the status of the session with the given session ID or current session - if nothing is provided. - - Parameter - --------- - session_id : string - Session ID of the session that should be got. - Defaults to the session_id returned from get_session_id() - - Returns - ------- - string - Status of the session. If the status is 'failed', the 'status_explanation' is printed - """ - - configure(*args, **kwargs) - - if session_id is None: - session_id = get_session_id() - filters = {"session_id": session_id} # filters is a dictionary - # response contains a dictionary (at the key "sessions") which maps from session IDs - # to session information where the session information is a dictionary containing - # various info such as the status (at the key "status"). - response = send_request_get_response("describe-sessions", {"filters": filters}) - session_status = response["sessions"][session_id]["status"] - if session_status == "failed": - # We don't immediately fail - we're just explaining. It's only later on - # where it's like we're actually using this session do we set the status. - # TODO: Should this be logging.error? - # This print statement is necessary so that we can print reason for session failure - print(response["sessions"][session_id]["status_explanation"]) - return session_status - - -def download_session_logs(session_id, cluster_name, filename=None, *args, **kwargs): - """Downloads the logs from Amazon S3 for a particular session to a local file - - Parameters - ---------- - session_id : string - Session ID of the session to get the log for that should get downloaded - clusterr_name : string - Name of the cluster the session was running on - fileneame : string and defaults to nothing - Path to the file on the local computer to which to download to - """ - - configure(*args, **kwargs) - - s3_bucket_name = get_cluster_s3_bucket_name(cluster_name) - log_file_name = f"banyan-log-for-session-{session_id}" # This creates a string with the {session_id} replaced with the value of the job_id - if filename is None: # if fileneame is not specified - filename = os.path.expanduser("~") # path to the home directory on the computer - s3 = boto3.client("s3") - s3.download_file(s3_bucket_name, log_file_name, filename) - - -def print_session_logs(session_id, cluster_name, delete_file=True): - s3 = boto3.client("s3") - s3_bucket_name = get_cluster_s3_bucket_name(cluster_name) - log_file_name = f"banyan-log-for-session-{session_id}" - try: - obj = s3.get_object(Bucket=s3_bucket_name, Key=log_file_name) - print(obj["Body"].read().decode("utf-8")) - except Exception as e: - print(f"Could not print session logs for session with ID {session_id}") - print(f"To download session logs, you can use `banyan.download_session_logs()`") - if delete_file: - s3.delete_object(Bucket=s3_bucket_name, Key=log_file_name) - - -def wait_for_session(session_id=None, *args, **kwargs): - """Implements an algorithm to repeatedly get the session status and then wait for a - period of time - - Parameters - ---------- - session_id : string - Session ID of the session that should get ended. - Defaults to the session_id returned from get_session_id() - - Raises - ------ - Raises Exception if session fails - """ - - configure(*args, **kwargs) - - if session_id is None: - session_id = get_session_id() - session_status = get_session_status(session_id) - t = 5 - i = 0 - if session_status == "creating": - pbar = tqdm(desc=f"Starting session with ID {session_id}") - while session_status == "creating": - time.sleep(t) - if t < 80: - t *= 2 - session_status = get_session_status(session_id) - pbar.update(i) - i += 1 - try: - pbar.close() - except: - pass - if session_status == "running": - logging.info(f"session with ID {session_id} is ready") - elif session_status == "completed": - raise Exception(f"session with ID {session_id} has already completed") - elif session_status == "failed": - raise Exception(f"session with ID {session_id} has failed") - else: - raise Exception(f"Unknown session status {session_status} is ready") - - -def run_session( - cluster_name=None, - nworkers=16, - release_resources_after=20, - print_logs=False, - store_logs_in_s3=True, - store_logs_on_cluster=False, - sample_rate=None, - session_name=None, - files=None, - code_files=None, - force_update_files=True, - pf_dispatch_table=None, - using_modules=None, - project_dir=None, - url=None, - branch=None, - directory=None, - dev_paths=None, - force_sync=False, - force_pull=False, - force_install=False, - estimate_available_memory=True, - email_when_ready=None, - *args, - **kwargs, -): - """Starts a session, runs some code files and the sessions ends after that.""" - - if sample_rate is None: - sample_rate = nworkers - - if files is None: - files = [] - - if code_files is None: - code_files = [] - - if using_modules is None: - using_modules = [] - - if dev_paths is None: - dev_paths = [] - - store_logs_in_s3_orig = store_logs_in_s3 - - try: - if print_logs: - # If logs need to be printed, ensure that we save logs in S3. If - # store_logs_in_s3==False, then delete logs in S3 later - store_logs_in_s3 = True - start_session( - cluster_name=cluster_name, - nworkers=nworkers, - release_resources_after=release_resources_after, - print_logs=print_logs, - store_logs_in_s3=store_logs_in_s3, - store_logs_on_cluster=store_logs_on_cluster, - sample_rate=sample_rate, - session_name=session_name, - files=files, - code_files=code_files, - force_update_files=force_update_files, - pf_dispatch_table=pf_dispatch_table, - using_modules=using_modules, - project_dir=project_dir, - url=url, - branch=branch, - directory=directory, - dev_paths=dev_paths, - force_sync=force_sync, - force_pull=force_pull, - force_install=force_install, - estimate_available_memory=estimate_available_memory, - nowait=False, # Wait untile session is ready, since code files are running - email_when_ready=email_when_ready, - for_running=True, - ) - except: - try: - session_id = get_session_id() - except: - session_id = None - if session_id is not None: - end_session(get_session_id(), failed=True) - if print_logs: - print_session_logs( - session_id, cluster_name, delete_file=(not store_logs_in_s3_orig) - ) - raise - finally: - try: - session_id = get_session_id() - except: - session_id = None - if session_id is not None: - end_session(get_session_id(), failed=False) - if print_logs: - print_session_logs( - session_id, cluster_name, delete_file=(not store_logs_in_s3_orig) - ) diff --git a/banyan/poetry.lock b/banyan/poetry.lock deleted file mode 100644 index c9523ec..0000000 --- a/banyan/poetry.lock +++ /dev/null @@ -1,585 +0,0 @@ -[[package]] -name = "atomicwrites" -version = "1.4.0" -description = "Atomic file writes." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "attrs" -version = "21.4.0" -description = "Classes Without Boilerplate" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[package.extras] -dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] -docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] -tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] -tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] - -[[package]] -name = "boto3" -version = "1.20.48" -description = "The AWS SDK for Python" -category = "main" -optional = false -python-versions = ">= 3.6" - -[package.dependencies] -botocore = ">=1.23.48,<1.24.0" -jmespath = ">=0.7.1,<1.0.0" -s3transfer = ">=0.5.0,<0.6.0" - -[package.extras] -crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] - -[[package]] -name = "botocore" -version = "1.23.48" -description = "Low-level, data-driven core of boto 3." -category = "main" -optional = false -python-versions = ">= 3.6" - -[package.dependencies] -jmespath = ">=0.7.1,<1.0.0" -python-dateutil = ">=2.1,<3.0.0" -urllib3 = ">=1.25.4,<1.27" - -[package.extras] -crt = ["awscrt (==0.12.5)"] - -[[package]] -name = "cached-property" -version = "1.5.2" -description = "A decorator for caching properties in classes." -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "certifi" -version = "2021.10.8" -description = "Python package for providing Mozilla's CA Bundle." -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "cffi" -version = "1.15.0" -description = "Foreign Function Interface for Python calling C code." -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -pycparser = "*" - -[[package]] -name = "charset-normalizer" -version = "2.0.11" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" -optional = false -python-versions = ">=3.5.0" - -[package.extras] -unicode_backport = ["unicodedata2"] - -[[package]] -name = "colorama" -version = "0.4.4" -description = "Cross-platform colored terminal text." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "idna" -version = "3.3" -description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "jmespath" -version = "0.10.0" -description = "JSON Matching Expressions" -category = "main" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "more-itertools" -version = "8.12.0" -description = "More routines for operating on iterables, beyond itertools" -category = "dev" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "mpi4py" -version = "3.1.3" -description = "Python bindings for MPI" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "packaging" -version = "21.3" -description = "Core utilities for Python packages" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" - -[[package]] -name = "pluggy" -version = "0.13.1" -description = "plugin and hook calling mechanisms for python" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[package.extras] -dev = ["pre-commit", "tox"] - -[[package]] -name = "plum-dispatch" -version = "1.6" -description = "Multiple dispatch in Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "progressbar" -version = "2.5" -description = "Text progress bar library for Python." -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "progressbar2" -version = "4.0.0" -description = "A Python Progressbar library to provide visual (yet text based) progress to long running operations." -category = "main" -optional = false -python-versions = ">=3.7.0" - -[package.dependencies] -python-utils = ">=3.0.0" - -[package.extras] -docs = ["sphinx (>=1.8.5)"] -tests = ["flake8 (>=3.7.7)", "pytest (>=4.6.9)", "pytest-cov (>=2.6.1)", "pytest-mypy", "freezegun (>=0.3.11)", "sphinx (>=1.8.5)"] - -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "pycparser" -version = "2.21" -description = "C parser in Python" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - -[[package]] -name = "pygit2" -version = "1.9.0" -description = "Python bindings for libgit2." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -cached-property = "*" -cffi = ">=1.9.1" - -[[package]] -name = "pyparsing" -version = "3.0.7" -description = "Python parsing module" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - -[[package]] -name = "pytest" -version = "5.4.3" -description = "pytest: simple powerful testing with Python" -category = "dev" -optional = false -python-versions = ">=3.5" - -[package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} -attrs = ">=17.4.0" -colorama = {version = "*", markers = "sys_platform == \"win32\""} -more-itertools = ">=4.0.0" -packaging = "*" -pluggy = ">=0.12,<1.0" -py = ">=1.5.0" -wcwidth = "*" - -[package.extras] -checkqa-mypy = ["mypy (==v0.761)"] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] - -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "python-utils" -version = "3.1.0" -description = "Python Utils is a module with some convenient utilities not included with the standard Python install" -category = "main" -optional = false -python-versions = ">3.6.0" - -[package.extras] -docs = ["mock", "sphinx", "python-utils"] -tests = ["flake8", "pytest", "pytest-cov", "pytest-mypy", "pytest-flake8", "pytest-asyncio", "sphinx", "types-setuptools"] - -[[package]] -name = "pytz" -version = "2021.3" -description = "World timezone definitions, modern and historical" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "requests" -version = "2.27.1" -description = "Python HTTP for Humans." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""} -idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""} -urllib3 = ">=1.21.1,<1.27" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] -use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] - -[[package]] -name = "s3transfer" -version = "0.5.1" -description = "An Amazon S3 Transfer Manager" -category = "main" -optional = false -python-versions = ">= 3.6" - -[package.dependencies] -botocore = ">=1.12.36,<2.0a.0" - -[package.extras] -crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "toml" -version = "0.10.2" -description = "Python Library for Tom's Obvious, Minimal Language" -category = "main" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "tqdm" -version = "4.63.0" -description = "Fast, Extensible Progress Meter" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -dev = ["py-make (>=0.1.0)", "twine", "wheel"] -notebook = ["ipywidgets (>=6)"] -telegram = ["requests"] - -[[package]] -name = "urllib3" -version = "1.26.8" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" - -[package.extras] -brotli = ["brotlipy (>=0.6.0)"] -secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - -[[package]] -name = "wcwidth" -version = "0.2.5" -description = "Measures the displayed width of unicode strings in a terminal" -category = "dev" -optional = false -python-versions = "*" - -[metadata] -lock-version = "1.1" -python-versions = "^3.8" -content-hash = "b03483ec1610361ec9a607014b7fba206d59c0b5fde5b7cbad663ebbc524bd6f" - -[metadata.files] -atomicwrites = [ - {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, - {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, -] -attrs = [ - {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, - {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, -] -boto3 = [ - {file = "boto3-1.20.48-py3-none-any.whl", hash = "sha256:1c6301d9676cb18f2b0feddec393e52b9d5fa8147e6fe9a1665e39fd9739efc3"}, - {file = "boto3-1.20.48.tar.gz", hash = "sha256:6a8111492a571aeefbac2e4b6df5ce38bdbc16c7d8326f2a60a61c86032c49b0"}, -] -botocore = [ - {file = "botocore-1.23.48-py3-none-any.whl", hash = "sha256:768acb9a2247155b974a4184b29be321242ef8f61827f4bb958e60f00e476e90"}, - {file = "botocore-1.23.48.tar.gz", hash = "sha256:8652c11ff05d11d6cea7096aca8df7f8eb87980469860036ff47e196e4625c96"}, -] -cached-property = [ - {file = "cached-property-1.5.2.tar.gz", hash = "sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130"}, - {file = "cached_property-1.5.2-py2.py3-none-any.whl", hash = "sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0"}, -] -certifi = [ - {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, - {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"}, -] -cffi = [ - {file = "cffi-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962"}, - {file = "cffi-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0"}, - {file = "cffi-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14"}, - {file = "cffi-1.15.0-cp27-cp27m-win32.whl", hash = "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474"}, - {file = "cffi-1.15.0-cp27-cp27m-win_amd64.whl", hash = "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6"}, - {file = "cffi-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27"}, - {file = "cffi-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023"}, - {file = "cffi-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2"}, - {file = "cffi-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e"}, - {file = "cffi-1.15.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7"}, - {file = "cffi-1.15.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3"}, - {file = "cffi-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c"}, - {file = "cffi-1.15.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962"}, - {file = "cffi-1.15.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382"}, - {file = "cffi-1.15.0-cp310-cp310-win32.whl", hash = "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55"}, - {file = "cffi-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0"}, - {file = "cffi-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e"}, - {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39"}, - {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc"}, - {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032"}, - {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8"}, - {file = "cffi-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605"}, - {file = "cffi-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e"}, - {file = "cffi-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc"}, - {file = "cffi-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636"}, - {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4"}, - {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997"}, - {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b"}, - {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2"}, - {file = "cffi-1.15.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7"}, - {file = "cffi-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66"}, - {file = "cffi-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029"}, - {file = "cffi-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880"}, - {file = "cffi-1.15.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20"}, - {file = "cffi-1.15.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024"}, - {file = "cffi-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e"}, - {file = "cffi-1.15.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728"}, - {file = "cffi-1.15.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6"}, - {file = "cffi-1.15.0-cp38-cp38-win32.whl", hash = "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c"}, - {file = "cffi-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443"}, - {file = "cffi-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a"}, - {file = "cffi-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37"}, - {file = "cffi-1.15.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a"}, - {file = "cffi-1.15.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e"}, - {file = "cffi-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"}, - {file = "cffi-1.15.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df"}, - {file = "cffi-1.15.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8"}, - {file = "cffi-1.15.0-cp39-cp39-win32.whl", hash = "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a"}, - {file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"}, - {file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"}, -] -charset-normalizer = [ - {file = "charset-normalizer-2.0.11.tar.gz", hash = "sha256:98398a9d69ee80548c762ba991a4728bfc3836768ed226b3945908d1a688371c"}, - {file = "charset_normalizer-2.0.11-py3-none-any.whl", hash = "sha256:2842d8f5e82a1f6aa437380934d5e1cd4fcf2003b06fed6940769c164a480a45"}, -] -colorama = [ - {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, - {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, -] -idna = [ - {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, - {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, -] -jmespath = [ - {file = "jmespath-0.10.0-py2.py3-none-any.whl", hash = "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f"}, - {file = "jmespath-0.10.0.tar.gz", hash = "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9"}, -] -more-itertools = [ - {file = "more-itertools-8.12.0.tar.gz", hash = "sha256:7dc6ad46f05f545f900dd59e8dfb4e84a4827b97b3cfecb175ea0c7d247f6064"}, - {file = "more_itertools-8.12.0-py3-none-any.whl", hash = "sha256:43e6dd9942dffd72661a2c4ef383ad7da1e6a3e968a927ad7a6083ab410a688b"}, -] -mpi4py = [ - {file = "mpi4py-3.1.3-cp27-cp27m-win32.whl", hash = "sha256:ae07361c343539364f09b9099a3456e7849aae593c631633ea90bd164cd7e61c"}, - {file = "mpi4py-3.1.3-cp27-cp27m-win_amd64.whl", hash = "sha256:ba67935e2d80c3d50ecd38280a082dfe586a25acd8c68ccbe24d65055cf03874"}, - {file = "mpi4py-3.1.3-cp310-cp310-win32.whl", hash = "sha256:ad64e8daf48943c9f51ea12b11f6681754a2f3f3a0f2d7e2158844343193305c"}, - {file = "mpi4py-3.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:b5abeda929cdfd66abd9e0de8d03e6e43dcf53d8152750cb47a14837e519fcd8"}, - {file = "mpi4py-3.1.3-cp35-cp35m-win32.whl", hash = "sha256:e309d642a22808e2cf16188e3acb66aa83c513ba8702392cd82d3c876a576131"}, - {file = "mpi4py-3.1.3-cp35-cp35m-win_amd64.whl", hash = "sha256:44c24f7aec24f09e4b9d3d77082509e914ab836c2d8281091cfde8a1072ed08a"}, - {file = "mpi4py-3.1.3-cp36-cp36m-win32.whl", hash = "sha256:19aada1336bbcc9b04f7e72d6a4f3074ba67e177eada7241dc259789f70e0d56"}, - {file = "mpi4py-3.1.3-cp36-cp36m-win_amd64.whl", hash = "sha256:5a78b0948d74422e2b597f8caebd129421d22bc786301f046edf95b726a34e3e"}, - {file = "mpi4py-3.1.3-cp37-cp37m-win32.whl", hash = "sha256:a3d3731ac7dbb6a80ff138c9321e7b31bdede804a1c7881fabce5ba3e0620a58"}, - {file = "mpi4py-3.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:ff84488f701ce60e141bb55f57b4039e041163054e5d16bd348c874180df2451"}, - {file = "mpi4py-3.1.3-cp38-cp38-win32.whl", hash = "sha256:2d1accd5544f78079ae19ee912e3311692219444d8cb64360b822984f94604b9"}, - {file = "mpi4py-3.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:d1989b3dd36c3bc42906183fc8ea7789e7e797c5abd0cee75b2bcbb6f3a4b59c"}, - {file = "mpi4py-3.1.3-cp39-cp39-win32.whl", hash = "sha256:32c4ccc1fa2578bf22c01cc532aae363672a921c6efe44a4937a7990c1b12129"}, - {file = "mpi4py-3.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:57c3d41f44b6fe68c043a145a5fddbe5e0fb36af67929c6292ca41b7d9f0c6f4"}, - {file = "mpi4py-3.1.3.tar.gz", hash = "sha256:f1e9fae1079f43eafdd9f817cdb3fd30d709edc093b5d5dada57a461b2db3008"}, -] -packaging = [ - {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, - {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, -] -pluggy = [ - {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, - {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, -] -plum-dispatch = [] -progressbar = [ - {file = "progressbar-2.5.tar.gz", hash = "sha256:5d81cb529da2e223b53962afd6c8ca0f05c6670e40309a7219eacc36af9b6c63"}, -] -progressbar2 = [ - {file = "progressbar2-4.0.0-py2.py3-none-any.whl", hash = "sha256:2562ba3e554433f08e81fb7b786208b19de135f3ca1c5da1787d9b05558e6247"}, - {file = "progressbar2-4.0.0.tar.gz", hash = "sha256:14d3165a1781d053ffaa117daf27cc706128d2ec1d2977fdb05b6bb079888013"}, -] -py = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] -pycparser = [ - {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, - {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, -] -pygit2 = [ - {file = "pygit2-1.9.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4a78eabd0ad69887940c9b853d375303c199a79d6964a524f8e4dd5dfe930bb0"}, - {file = "pygit2-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e9317f731cbbe90b64ba30fbcb1bb019857312a2cb1b46f1dff5963f36dfc758"}, - {file = "pygit2-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2d4b89fcb9f5c2b97a58c7ae560f51beeb45a0be02f1359964a2b1f03a221dd"}, - {file = "pygit2-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2374d98b82195186765868f73c6c65fb2a69ec4f53aa9c56f139d72881c8967a"}, - {file = "pygit2-1.9.0-cp310-cp310-win32.whl", hash = "sha256:ef5660e8abcfae1425ea2bb80f1b09cdbc633d2926ae89bc961367af8eddf880"}, - {file = "pygit2-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:7ebbafcc41a0d8e7deefe12e15fe862df1f862afcf74a319c5596910c0636cda"}, - {file = "pygit2-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0aa6b6393b52dac92c88b09d3e655820641d940a1e9c53b565f698f70c6a2c2"}, - {file = "pygit2-1.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9150d58bd84e71ace4c8b74257898d2aa88574fe60145cbc59e07455c19f811a"}, - {file = "pygit2-1.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50210f7066b659a8e5cdb96b81a6a56386b0f940a5ed3d4a49218a7fa7052327"}, - {file = "pygit2-1.9.0-cp37-cp37m-win32.whl", hash = "sha256:e3421c8617b708cc3dc7e5c8449afe0917c8327767d33e3bb6d40e6f387334df"}, - {file = "pygit2-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ad332c1be1a8cdf8803d73ad1c40ae91d6bd5366489dbaad2de08c68f1fa8eb7"}, - {file = "pygit2-1.9.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5abe0dacf32227be721f8a1f793b2c95a92bca3ef6564bd065238736e49a3d07"}, - {file = "pygit2-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b957b8f91854a5b19b7b7ec696c9310562af1585d5d592746f19d008d08c62b4"}, - {file = "pygit2-1.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dad5ddd816f3690cb181ccd84883b5d95f61f19a27060ed379a6ea733855777"}, - {file = "pygit2-1.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cbc6f03c0a5a9dc414fe0a95c4c77a516575ec184205179a63872957b92482a"}, - {file = "pygit2-1.9.0-cp38-cp38-win32.whl", hash = "sha256:1e12f17ec594bf9297b56dffae50a4938caf7b44aecd9523cae82587e833cc81"}, - {file = "pygit2-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:6cb5d0131f699a91cf329a3d081b1309f542b50ae8968d583de8329e8a234f5f"}, - {file = "pygit2-1.9.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8dafc6079f880b5e4b0fe1a141bcf1018110d8e4579710de01dff0221c65b848"}, - {file = "pygit2-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ff79d9b20f5a5ec5ce2a2dfc2e5775404c3932b40cc39836cdb92b1368a3025c"}, - {file = "pygit2-1.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cad866f71e2f0b240920d5630a70d1286e140a9ea4bfc91f3a349d73d24288a8"}, - {file = "pygit2-1.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fe01a6540e0ea99ddc1d9053286a1dc829dd096b97a64d2300f9db46470f7f"}, - {file = "pygit2-1.9.0-cp39-cp39-win32.whl", hash = "sha256:2f72b9cf6d55b80cbd870e86b92ada45ff68f459634cd18220c87d1076e2c354"}, - {file = "pygit2-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:1e69647473ba6cd84c5325286a856241cdfe3ebf1d6a6fd12343acbe9a8e815f"}, - {file = "pygit2-1.9.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2bf9fd1bc9ab8c409fd1575d4b10d9b219ff127c07a9bb09e2e500ab57d0b55e"}, - {file = "pygit2-1.9.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d4458cebf2c811c4dfbd1423eaecfe49a41a27de31916fb21dbf40eae11cdfb"}, - {file = "pygit2-1.9.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da8ad80a037f414a880952e5b02d99ee4052970b0ba67aeff12916c2d96cb5db"}, - {file = "pygit2-1.9.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1f1339410c70d7cf8b3cee2bee019d7d30141ffcad0f614e975decbe9867efd3"}, - {file = "pygit2-1.9.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59a24e8e58035aa4746c5d47eae37612ebde0c1d0d04f9cf0b0fac39fb6fe21d"}, - {file = "pygit2-1.9.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3ffd6ae7bc9e4d6a204f593c02b2537acdd08f973ddd7e5015277d9550cb125"}, - {file = "pygit2-1.9.0.tar.gz", hash = "sha256:c5e8588acad5e32fa0595582571059e6b90ec7c487c58b4e53c2800dcbde44c8"}, -] -pyparsing = [ - {file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"}, - {file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"}, -] -pytest = [ - {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"}, - {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"}, -] -python-dateutil = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] -python-utils = [ - {file = "python-utils-3.1.0.tar.gz", hash = "sha256:4dace6420c5f50d6509251fa0aee0e2a0b826bbc8a5d2a6d7e99dca80e78a7f3"}, - {file = "python_utils-3.1.0-py2.py3-none-any.whl", hash = "sha256:93d9cdc8b8580669eeb4418128b3dfb71118c332fa6fda3f204e7ad4900363f1"}, -] -pytz = [ - {file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"}, - {file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"}, -] -requests = [ - {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"}, - {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"}, -] -s3transfer = [ - {file = "s3transfer-0.5.1-py3-none-any.whl", hash = "sha256:25c140f5c66aa79e1ac60be50dcd45ddc59e83895f062a3aab263b870102911f"}, - {file = "s3transfer-0.5.1.tar.gz", hash = "sha256:69d264d3e760e569b78aaa0f22c97e955891cd22e32b10c51f784eeda4d9d10a"}, -] -six = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] -toml = [ - {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, - {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, -] -tqdm = [ - {file = "tqdm-4.63.0-py2.py3-none-any.whl", hash = "sha256:e643e071046f17139dea55b880dc9b33822ce21613b4a4f5ea57f202833dbc29"}, - {file = "tqdm-4.63.0.tar.gz", hash = "sha256:1d9835ede8e394bb8c9dcbffbca02d717217113adc679236873eeaac5bc0b3cd"}, -] -urllib3 = [ - {file = "urllib3-1.26.8-py2.py3-none-any.whl", hash = "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed"}, - {file = "urllib3-1.26.8.tar.gz", hash = "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"}, -] -wcwidth = [ - {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, - {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, -] diff --git a/banyan/res/pf_dispatch_table.toml b/banyan/res/pf_dispatch_table.toml deleted file mode 100644 index 7fe9ac4..0000000 --- a/banyan/res/pf_dispatch_table.toml +++ /dev/null @@ -1,3 +0,0 @@ -splits = { } -merges = { } -casts = { } \ No newline at end of file diff --git a/banyan/tests/mpi_script_fail.py b/banyan/tests/mpi_script_fail.py deleted file mode 100644 index 919c852..0000000 --- a/banyan/tests/mpi_script_fail.py +++ /dev/null @@ -1,9 +0,0 @@ -from mpi4py import MPI - - -comm = MPI.COMM_WORLD -size = comm.Get_size() -rank = comm.Get_rank() - -# Raise exception on all workers except for main -raise Exception(f"Error on worker {rank}") \ No newline at end of file diff --git a/banyan/tests/mpi_script_success.py b/banyan/tests/mpi_script_success.py deleted file mode 100644 index f0879a9..0000000 --- a/banyan/tests/mpi_script_success.py +++ /dev/null @@ -1,23 +0,0 @@ -from mpi4py import MPI - - -class Foo: - def __init__(self, x): - self.x = x - - def get_value(self): - return (self.x + 1) ** 2 - -comm = MPI.COMM_WORLD -size = comm.Get_size() -rank = comm.Get_rank() - -data = Foo(rank).get_value() -data = comm.gather(data, root=0) -if rank == 0: - for i in range(size): - assert data[i] == (i+1)**2 -else: - assert data is None - -print("Finished executing script") \ No newline at end of file diff --git a/banyan/tests/test_clusters.py b/banyan/tests/test_clusters.py deleted file mode 100644 index 2d5419e..0000000 --- a/banyan/tests/test_clusters.py +++ /dev/null @@ -1,43 +0,0 @@ -import pytest -import random -import string - -from banyan.clusters import ( - create_cluster, - get_clusters, - get_running_clusters, - delete_cluster, -) - - -def test_create_cluster_with_invalid_name(): - with pytest.raises(Exception) as excinfo: - bad_cluster_name = "name with spaces" - cluster_object = create_cluster( - name=bad_cluster_name, - instance_type="t3.xlarge", - ) - assert "can only contain" in str(excinfo.value) - - -def test_create_delete_cluster(): - - cluster_name = "c" + "".join( - random.choices(string.ascii_lowercase + string.digits, k=5) - ) - print(cluster_name) - - cluster_object = create_cluster( - name=cluster_name, - instance_type="t3.xlarge", - ) - - assert cluster_name in get_clusters() - - assert cluster_name in get_running_clusters() - - delete_cluster(cluster_name) - - assert cluster_name not in get_clusters() - - assert not cluster_name not in get_running_clusters() diff --git a/banyan/tests/test_sessions.py b/banyan/tests/test_sessions.py deleted file mode 100644 index bf2d19a..0000000 --- a/banyan/tests/test_sessions.py +++ /dev/null @@ -1,347 +0,0 @@ -import boto3 -from contextlib import nullcontext as does_not_raise - -import os -from pygit2 import Repository -import pytest -import time - -from banyan.constants import BANYAN_PYTHON_BRANCH_NAME -from banyan.clusters import get_cluster_s3_bucket_name -from banyan.sessions import ( - get_session, - get_sessions, - get_session_status, - start_session, - get_running_sessions, - end_session, - get_running_sessions, - run_session, -) - -TEST_BRANCH = ( - Repository(".").head.shorthand - if os.getenv("BANYAN_TESTING", "0") == "1" - else BANYAN_PYTHON_BRANCH_NAME -) - - -@pytest.mark.parametrize( - "status", ["all", "creating", "running", "failed", "completed", "invalid_status"] -) -def test_get_sessions_with_status(status): - """Test getting sessions for a cluster.""" - cluster_name = os.environ["BANYAN_CLUSTER_NAME"] - if status == "all": - sessions = get_sessions(cluster_name) - else: - filtered_sessions = get_sessions(cluster_name, status=status) - assert all([s["status"] == status for (s_id, s) in filtered_sessions.items()]) - - -def test_start_get_end_sessions(): - # Start a session - cluster_name = os.environ["BANYAN_CLUSTER_NAME"] - - session_id = start_session( - cluster_name=cluster_name, - nworkers=2, - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - force_sync=(os.getenv("BANYAN_FORCE_SYNC") == "1"), - nowait=False, - ) - running_sessions = get_running_sessions(cluster_name) - all_sessions = get_sessions(cluster_name) - end_session(session_id, release_resources_now=True) - running_sessions_after = get_running_sessions(cluster_name) - all_sessions_after = get_sessions(cluster_name) - - # Before end_session - assert all([s["status"] == "running" for (s_id, s) in running_sessions.items()]) - assert session_id in running_sessions - assert session_id in all_sessions - # After end_session - assert (session_id in all_sessions_after) and ( - all_sessions_after[session_id]["status"] == "completed" - ) - assert session_id not in running_sessions_after - - -# @testset "Start a session with dev paths" begin -# session_id = start_session( -# cluster_name = ENV["BANYAN_CLUSTER_NAME"], -# nworkers = 2, -# url = "https://github.com/banyan-team/banyan-julia.git", -# branch = get(ENV, "BANYAN_JULIA_BRANCH", Banyan.get_branch_name()), -# directory = "banyan-julia/Banyan/test", -# dev_paths = [ -# "banyan-julia/Banyan", -# ], -# force_pull = true, -# force_sync = true, -# force_install = true, -# store_logs_on_cluster=get(ENV, "BANYAN_STORE_LOGS_ON_CLUSTER", "0") == "1", -# ) -# session_status = get_session_status(session_id) -# end_session(session_id, release_resources_now=true) -# @test session_status == "running" -# end - - -@pytest.mark.parametrize("nowait", [True, False]) -def test_start_sessions_with_nowait(nowait): - cluster_name = os.environ["BANYAN_CLUSTER_NAME"] - - session_id = start_session( - cluster_name=cluster_name, - nworkers=2, - store_logs_on_cluster=os.environ.get("BANYAN_STORE_LOGS_ON_CLUSTER", "0") - == "1", - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - force_sync=os.getenv("BANYAN_FORCE_SYNC") == "1", - nowait=nowait, - ) - - session_status = get_session_status(session_id) - if not nowait: - assert session_status == "running" - else: - assert session_status == "creating" - while session_status == "creating": - time.sleep(20) - session_status = get_session_status(session_id) - assert session_status == "running" - - end_session(session_id, release_resources_now=True) - - -@pytest.mark.parametrize("estimate_available_memory", [True, False]) -def test_start_sessions_with_estimate_available_memory(estimate_available_memory): - cluster_name = os.environ["BANYAN_CLUSTER_NAME"] - - session_id = start_session( - cluster_name=cluster_name, - nworkers=2, - store_logs_on_cluster=os.environ.get("BANYAN_STORE_LOGS_ON_CLUSTER", "0") - == "1", - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - force_sync=os.getenv("BANYAN_FORCE_SYNC") == "1", - estimate_available_memory=estimate_available_memory, - ) - - end_session(session_id, release_resources_now=True) - - -@pytest.mark.parametrize("store_logs_in_s3", [True, False]) -def test_start_sessions_store_logs_in_s3(store_logs_in_s3): - cluster_name = os.environ["BANYAN_CLUSTER_NAME"] - - session_id = start_session( - cluster_name=cluster_name, - nworkers=2, - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - store_logs_in_s3=store_logs_in_s3, - force_sync=os.getenv("BANYAN_FORCE_SYNC") == "1", - ) - end_session(session_id, release_resources_now=True) - time.sleep(60) - - log_file = f"banyan-log-for-session-{session_id}" - s3 = boto3.resource("s3") - bucket = s3.Bucket(get_cluster_s3_bucket_name(cluster_name)) - objs = list(bucket.objects.filter(Prefix=log_file)) - assert store_logs_in_s3 == (len(objs) > 0) - - -def test_start_end_multiple_sessions(): - cluster_name = os.environ["BANYAN_CLUSTER_NAME"] - delay_time = 5 - - # Start a session and end it - session_id_1 = start_session( - cluster_name=cluster_name, - nworkers=2, - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - force_sync=True, - store_logs_on_cluster=os.environ.get("BANYAN_STORE_LOGS_ON_CLUSTER", "0") - == "1", - release_resources_after=delay_time, - nowait=False, - ) - resource_id_1 = get_session().resource_id - session_status = get_session_status(session_id_1) - assert session_status == "running" - - end_session(session_id_1) - time.sleep(60) # To ensure session gets ended - session_status = get_session_status(session_id_1) - assert session_status == "completed" - - # Start another session with same nworkers and verify the job ID matches - session_id_2 = start_session( - cluster_name=os.environ["BANYAN_CLUSTER_NAME"], - nworkers=2, - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - store_logs_on_cluster=os.environ.get("BANYAN_STORE_LOGS_ON_CLUSTER", "0") - == "1", - release_resources_after=delay_time, - nowait=False, - ) - resource_id_2 = get_session().resource_id - session_status = get_session_status(session_id_2) - assert session_status == "running" - assert resource_id_2 == resource_id_1 # it should have reused resource - - end_session(session_id_2) - time.sleep(60) - session_status = get_session_status(session_id_2) - assert session_status == "completed" - - # Start another session with different nworkers and verify the job ID - # is different - session_id_3 = start_session( - cluster_name=os.environ["BANYAN_CLUSTER_NAME"], - nworkers=4, - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - store_logs_on_cluster=os.environ.get("BANYAN_STORE_LOGS_ON_CLUSTER", "0") - == "1", - release_resources_after=delay_time, - nowait=False, - ) - resource_id_3 = get_session().resource_id - session_status = get_session_status(session_id_3) - assert session_status == "running" - assert resource_id_3 != resource_id_1 - - end_session(session_id_3) - time.sleep(60) - session_status = get_session_status(session_id_3) - assert session_status == "completed" - - # Sleep for the delay_time and check that the underlying resources are destroyed - # by creating a new session and ensuring that it uses different resources - time.sleep(delay_time * 60) - session_id_4 = start_session( - cluster_name=os.environ["BANYAN_CLUSTER_NAME"], - nworkers=2, - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - store_logs_on_cluster=os.environ.get("BANYAN_STORE_LOGS_ON_CLUSTER", "0") - == "1", - release_resources_after=delay_time, - nowait=True, - ) - resource_id_4 = get_session().resource_id - assert resource_id_4 != resource_id_1 - - end_session(session_id_4, release_resources_now=True) - - -def test_start_session_with_invalid_branch_name(): - cluster_name = os.environ["BANYAN_CLUSTER_NAME"] - with pytest.raises(Exception): - session_id = start_session( - cluster_name=cluster_name, - nworkers=2, - store_logs_on_cluster=os.environ.get("BANYAN_STORE_LOGS_ON_CLUSTER", "0") - == "1", - url="https://github.com/banyan-team/banyan-python", - branch="nonexistant-branch", - directory="banyan-python/banyan", - force_sync=os.getenv("BANYAN_FORCE_SYNC") == "1", - nowait=False, - ) - try: - end_session(session_id, release_resources_now=True) - except: - pass - - -def test_run_session_with_mpi_script(): - cluster_name = os.getenv("BANYAN_CLUSTER_NAME") - with does_not_raise(): - run_session( - cluster_name=cluster_name, - nworkers=2, - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - code_files=["tests/mpi_script_success.py"], - force_sync=os.getenv("BANYAN_FORCE_SYNC") == "1", - store_logs_on_cluster=os.environ.get("BANYAN_STORE_LOGS_ON_CLUSTER", "0") - == "1", - ) - - -def test_run_session_with_mpi_script_with_error(): - cluster_name = os.getenv("BANYAN_CLUSTER_NAME") - with pytest.raises(Exception): - run_session( - cluster_name=cluster_name, - nworkers=2, - url="https://github.com/banyan-team/banyan-python", - branch=TEST_BRANCH, - directory="banyan-python/banyan", - code_files=["tests/mpi_script_fail.py"], - force_sync=os.getenv("BANYAN_FORCE_SYNC") == "1", - store_logs_on_cluster=os.environ.get("BANYAN_STORE_LOGS_ON_CLUSTER", "0") - == "1", - ) - - -# @testset "Reusing session that fails" begin -# Pkg.activate("./") -# cluster_name = ENV["BANYAN_CLUSTER_NAME"] - -# # Start a session -# session_id_1 = start_session( -# cluster_name = ENV["BANYAN_CLUSTER_NAME"], -# nworkers = 2, -# store_logs_on_cluster=get(ENV, "BANYAN_STORE_LOGS_ON_CLUSTER", "0") == "1", -# force_sync=true -# ) -# resource_id_1 = get_session().resource_id -# session_status_1 = get_session_status(session_id_1) - -# # Trigger a failure in the session that will end the session -# try -# @test_throws begin -# offloaded(distributed=true) do -# error("Oops sorry this is an error") -# end -# end ErrorException -# catch -# end -# session_status_1_after_failure = get_session_status(session_id_1) - -# # Start a new session (it should reuse the resources of the failed session) and then end it -# session_id_2 = start_session( -# cluster_name = ENV["BANYAN_CLUSTER_NAME"], -# nworkers = 2, -# store_logs_on_cluster=get(ENV, "BANYAN_STORE_LOGS_ON_CLUSTER", "0") == "1", -# nowait=true -# ) -# resource_id_2 = get_session().resource_id -# session_status_2 = get_session_status(session_id_2) -# end_session(session_id_2, release_resources_now=true) - -# # Assert -# @test session_status_1 == "running" -# @test session_status_1_after_failure == "failed" -# @test resource_id_2 == resource_id_1 -# end diff --git a/ci/style.sh b/ci/style.sh new file mode 100755 index 0000000..d1cf210 --- /dev/null +++ b/ci/style.sh @@ -0,0 +1,4 @@ +MAIN_DIR="." +isort -rc $MAIN_DIR +autoflake -r --in-place --remove-unused-variables $MAIN_DIR +black $MAIN_DIR --line-length 80