diff --git a/helpers/henv.py b/helpers/henv.py index c43fed15d..950882867 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -436,118 +436,89 @@ def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: # ############################################################################# -# Copied from helpers.hgit to avoid circular dependencies. - - -@functools.lru_cache() -def _is_inside_submodule(git_dir: str = ".") -> bool: - """ - Return whether a dir is inside a Git submodule or a Git supermodule. - - We determine this checking if the current Git repo is included - inside another Git repo. - """ - cmd = [] - # - Find the git root of the current directory - # - Check if the dir one level up is a valid Git repo - # Go to the dir. - cmd.append(f"cd {git_dir}") - # > cd im/ - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd.append('cd "$(git rev-parse --show-toplevel)/.."') - # > git rev-parse --is-inside-work-tree - # true - cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") - cmd_as_str = " && ".join(cmd) - rc = hsystem.system(cmd_as_str, abort_on_error=False) - ret: bool = rc == 0 - return ret - - -@functools.lru_cache() -def _get_client_root(super_module: bool) -> str: - """ - Return the full path of the root of the Git client. - - E.g., `/Users/saggese/src/.../amp`. - - :param super_module: if True use the root of the Git super_module, - if we are in a submodule. Otherwise use the Git sub_module root - """ - if super_module and _is_inside_submodule(): - # https://stackoverflow.com/questions/957928 - # > cd /Users/saggese/src/.../amp - # > git rev-parse --show-superproject-working-tree - # /Users/saggese/src/... - cmd = "git rev-parse --show-superproject-working-tree" - else: - # > git rev-parse --show-toplevel - # /Users/saggese/src/.../amp - cmd = "git rev-parse --show-toplevel" - # TODO(gp): Use system_to_one_line(). - _, out = hsystem.system_to_string(cmd) - out = out.rstrip("\n") - hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") - client_root: str = os.path.realpath(out) - return client_root - - -# End copy. - - -def get_repo_config_file(super_module: bool = True) -> str: - """ - Return the absolute path to `repo_config.py` that should be used. - - The `repo_config.py` is determined based on an overriding env var or - based on the root of the Git path. - """ - env_var = "CSFY_REPO_CONFIG_PATH" - file_name = get_env_var(env_var, abort_on_missing=False) - if file_name: - _LOG.warning("Using value '%s' for %s from env var", file_name, env_var) - else: - # TODO(gp): We should actually ask Git where the super-module is. - client_root = _get_client_root(super_module) - file_name = os.path.join(client_root, "repo_config.py") - file_name = os.path.abspath(file_name) - return file_name - - -def _get_repo_config_code(super_module: bool = True) -> str: - """ - Return the text of the code stored in `repo_config.py`. - """ - file_name = get_repo_config_file(super_module) - hdbg.dassert_file_exists(file_name) - code: str = hio.from_file(file_name) - return code - - -def execute_repo_config_code(code_to_execute: str) -> Any: - """ - Execute code in `repo_config.py` by dynamically finding the correct one. - - E.g., - ``` - henv.execute_repo_config_code("has_dind_support()") - ``` - """ - # Read the info from the current repo. - code = _get_repo_config_code() - # TODO(gp): make the linter happy creating this symbol that comes from the - # `exec()`. - try: - exec(code, globals()) # pylint: disable=exec-used - ret = eval(code_to_execute) - except NameError as e: - _LOG.error( - "While executing '%s' caught error:\n%s\nTrying to continue", - code_to_execute, - e, - ) - ret = None - _ = e - # raise e - return ret +# # Copied from helpers.hgit to avoid circular dependencies. +# +# +# @functools.lru_cache() +# def _is_inside_submodule(git_dir: str = ".") -> bool: +# """ +# Return whether a dir is inside a Git submodule or a Git supermodule. +# +# We determine this checking if the current Git repo is included +# inside another Git repo. +# """ +# cmd = [] +# # - Find the git root of the current directory +# # - Check if the dir one level up is a valid Git repo +# # Go to the dir. +# cmd.append(f"cd {git_dir}") +# # > cd im/ +# # > git rev-parse --show-toplevel +# # /Users/saggese/src/.../amp +# cmd.append('cd "$(git rev-parse --show-toplevel)/.."') +# # > git rev-parse --is-inside-work-tree +# # true +# cmd.append("(git rev-parse --is-inside-work-tree | grep -q true)") +# cmd_as_str = " && ".join(cmd) +# rc = hsystem.system(cmd_as_str, abort_on_error=False) +# ret: bool = rc == 0 +# return ret +# +# +# @functools.lru_cache() +# def _get_client_root(super_module: bool) -> str: +# """ +# Return the full path of the root of the Git client. +# +# E.g., `/Users/saggese/src/.../amp`. +# +# :param super_module: if True use the root of the Git super_module, +# if we are in a submodule. Otherwise use the Git sub_module root +# """ +# if super_module and _is_inside_submodule(): +# # https://stackoverflow.com/questions/957928 +# # > cd /Users/saggese/src/.../amp +# # > git rev-parse --show-superproject-working-tree +# # /Users/saggese/src/... +# cmd = "git rev-parse --show-superproject-working-tree" +# else: +# # > git rev-parse --show-toplevel +# # /Users/saggese/src/.../amp +# cmd = "git rev-parse --show-toplevel" +# # TODO(gp): Use system_to_one_line(). +# _, out = hsystem.system_to_string(cmd) +# out = out.rstrip("\n") +# hdbg.dassert_eq(len(out.split("\n")), 1, msg=f"Invalid out='{out}'") +# client_root: str = os.path.realpath(out) +# return client_root +# +# +# # End copy. + + +# def execute_repo_config_code(code_to_execute: str) -> Any: +# """ +# Execute code in `repo_config.py` by dynamically finding the correct one. +# +# E.g., +# ``` +# henv.execute_repo_config_code("has_dind_support()") +# ``` +# """ +# # Read the info from the current repo. +# code = _get_repo_config_code() +# # TODO(gp): make the linter happy creating this symbol that comes from the +# # `exec()`. +# try: +# exec(code, globals()) # pylint: disable=exec-used +# ret = eval(code_to_execute) +# except NameError as e: +# _LOG.error( +# "While executing '%s' caught error:\n%s\nTrying to continue", +# code_to_execute, +# e, +# ) +# ret = None +# _ = e +# # raise e +# return ret diff --git a/helpers/hgit.py b/helpers/hgit.py index 2ee2217a6..153861735 100644 --- a/helpers/hgit.py +++ b/helpers/hgit.py @@ -171,6 +171,7 @@ def get_client_root(super_module: bool) -> str: # TODO(gp): Replace `get_client_root` with this. +# TODO(gp): -> get_client_root2() or get_outermost_supermodule_root() def find_git_root(path: str = ".") -> str: """ Find recursively the dir of the outermost super module. @@ -292,9 +293,9 @@ def get_project_dirname(only_index: bool = False) -> str: Return the name of the project name (e.g., `/Users/saggese/src/amp1` -> `amp1`). - NOTE: this works properly only outside Docker, e.g., when calling from `invoke`. - Inside Docker the result might be incorrect since the Git client is mapped on - `/app`. + NOTE: this works properly only outside Docker, e.g., when calling from + `invoke`. Inside Docker the result might be incorrect since the Git client + is mapped on `/app`. :param only_index: return only the index of the client if possible, e.g., E.g., for `/Users/saggese/src/amp1` it returns the string `1` @@ -354,8 +355,8 @@ def is_helpers() -> bool: """ Return whether we are inside `helpers` repo. - Either as super module, or a sub module depending on a current - working directory. + Either as super module, or a sub module depending on a current working + directory. """ return _is_repo("helpers") @@ -803,6 +804,7 @@ def get_all_repo_names( return sorted(list(repo_map.keys())) +# TODO(gp): This should be injected from repo_config.py def get_task_prefix_from_repo_short_name(short_name: str) -> str: """ Return the task prefix for a repo (e.g., "amp" -> "AmpTask"). @@ -888,6 +890,7 @@ def get_path_from_git_root( return ret +# TODO(gp): Just do a find @functools.lru_cache() def get_amp_abs_path() -> str: """ @@ -935,6 +938,7 @@ def get_repo_dirs() -> List[str]: return dir_names +# TODO(gp): It should go in hdocker? def find_docker_file( file_name: str, *, @@ -947,15 +951,15 @@ def find_docker_file( Convert a file or dir that was generated inside Docker to a file in the current Git client. - This operation is best effort since it might not be able to find the + This operation is best-effort since it might not be able to find the corresponding file in the current repo. E.g., - - A file like '/app/amp/core/dataflow_model/utils.py', in a Docker container with - Git root in '/app' becomes 'amp/core/dataflow_model/utils.py' - - For a file like '/app/amp/core/dataflow_model/utils.py' outside Docker, we look - for the file 'dataflow_model/utils.py' in the current client and then normalize - with respect to the + - A file like '/app/amp/core/dataflow_model/utils.py', in a Docker container + with Git root in '/app' becomes 'amp/core/dataflow_model/utils.py' + - For a file like '/app/amp/core/dataflow_model/utils.py' outside Docker, we + look for the file 'dataflow_model/utils.py' in the current client and + then normalize with respect to the :param dir_depth: same meaning as in `find_file_with_dir()` :param mode: same as `system_interaction.select_result_file_from_list()` diff --git a/helpers/hserver.py b/helpers/hserver.py index b288c30ed..fd5713fd8 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -8,7 +8,10 @@ import logging import os -from typing import List, Optional +import functools +import os +import subprocess +from typing import Dict, List, Optional # This module should depend only on: # - Python standard modules @@ -217,7 +220,7 @@ def setup_to_str() -> str: def _dassert_setup_consistency() -> None: """ - Check that one and only one set up config should be true. + Check that one and only one server config is true. """ is_cmamp_prod_ = is_cmamp_prod() is_dev4_ = is_dev4() @@ -238,9 +241,8 @@ def _dassert_setup_consistency() -> None: ) if sum_ != 1: msg = "One and only one set-up config should be true:\n" + setup_to_str() - # TODO(gp): Unclear if this is a difference between Kaizenflow and cmamp. - _LOG.warning(msg) - # raise ValueError(msg) + #_LOG.warning(msg) + raise ValueError(msg) # If the env var is not defined then we want to check. The only reason to skip @@ -255,6 +257,435 @@ def _dassert_setup_consistency() -> None: _LOG.warning("Skipping repo check in %s", __file__) +# ############################################################################# +# Docker +# ############################################################################# + +# There are functions that: +# 1) Check if a certain feature is supported on the host +# - E.g., `is_..._supported`, `is_docker_in_docker_supported`, +# `is_docker_sibling_container_supported` +# - These functions run tests on the host to check if a certain feature are +# possible +# - These functions must not be dependent on the repo +# 2) Return whether a certain feature should be used for a certain machine +# - E.g., `use_...`, `use_docker_sibling_containers`, `use_docker_main_network`, +# - These functions use the type of hose (i.e., `is_...`) to decide what to do +# - These functions should not be dependent on the repo, unless exceptional +# cases + +# Note that a different approach could have been to use the first type of functions +# to decide directly what to do +# Instead we use a more conservative approach +# - Use the 2nd type of functions to decide what to do +# - Unit tests to check that certain features we expected are indeed available +# on each machine (i.e., 1st and 2nd type of functions should agree) + +# ############################################################################# +# is_..._supported +# ############################################################################# + +# Our code runs on: +# - MacOS +# - Supports Docker privileged mode +# - The same user and group is used inside the container +# - Root can also be used +# - Linux (dev server, GitHub CI) +# - Supports Docker privileged mode +# - The same user and group is used inside the container +# - Linux (spm-dev4) +# - Doesn't support Docker privileged mode +# - A different user and group is used inside the container + +@functools.lru_cache() +def is_docker_supported() -> bool: + """ + Return whether Docker is installed and accessible. + """ + # Check if Docker is installed. + try: + subprocess.run(["docker", "--version"], check=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except FileNotFoundError: + _LOG.error("Docker is not installed") + return False + except subprocess.CalledProcessError as e: + _LOG.error("Docker installation is found but inaccessible: %s", str(e)) + return False + _LOG.debug("Docker is installed") + # Check if Docker socket is accessible. + docker_socket = "/var/run/docker.sock" + if not os.path.exists(docker_socket): + _LOG.error("Docker socket %s is not accessible", docker_socket) + return False + if not os.access(docker_socket, os.R_OK | os.W_OK): + _LOG.error("Docker socket %s exists but lacks necessary read/write " + "permissions", docker_socket) + return False + _LOG.debug("Docker socket is accessible") + # Test running a Docker container. + try: + subprocess.run( + ["docker", "run", "--rm", "-it", "hello-world"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + _LOG.error("Docker is installed, but running containers failed: %s", + str(e)) + return False + return True + + +@functools.lru_cache() +def is_docker_sibling_container_supported() -> bool: + """ + Return whether Docker supports running sibling containers. + """ + # Test running a Docker container + try: + subprocess.run( + "docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -it hello-world".split(), + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + _LOG.error(f"Running sibling containers failed: %s", str(e)) + return False + return True + + +# TODO(gp): -> is_docker_in_docker_supported +@functools.lru_cache() +def has_dind_support() -> bool: + """ + Return whether Docker supports privileged mode to run containers. + + This is need to use Docker-in-Docker (aka "dind"). + """ + # Test running a Docker container in privileged mode. + try: + subprocess.run( + "docker run --rm --privileged -it hello-world".split(), + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + _LOG.debug("Running privileged containers failed: %s", str(e)) + return False + # Test running a Docker dind container. + try: + subprocess.run( + "docker run --privileged -it docker:dind docker --version".split(), + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + _LOG.debug("Running docker:dind failed: %s", str(e)) + return False + return True + + # _print("is_inside_docker()=%s" % is_inside_docker()) + # if not is_inside_docker(): + # # Outside Docker there is no privileged mode. + # _print("-> ret = False") + # return False + # # TODO(gp): Not sure this is really needed since we do this check + # # after enable_privileged_mode controls if we have dind or not. + # if _is_mac_version_with_sibling_containers(): + # return False + # # TODO(gp): This part is not multi-process friendly. When multiple + # # processes try to run this code they interfere. A solution is to run `ip + # # link` in the entrypoint and create a `has_docker_privileged_mode` file + # # which contains the value. + # # We rely on the approach from https://stackoverflow.com/questions/32144575 + # # to check if there is support for privileged mode. + # # Sometimes there is some state left, so we need to clean it up. + # cmd = "ip link delete dummy0 >/dev/null 2>&1" + # # TODO(gp): use `has_docker_sudo`. + # if is_mac() or is_dev_ck(): + # cmd = f"sudo {cmd}" + # rc = os.system(cmd) + # _print("cmd=%s -> rc=%s" % (cmd, rc)) # # + # cmd = "ip link add dummy0 type dummy >/dev/null 2>&1" + # if is_mac() or is_dev_ck(): + # cmd = f"sudo {cmd}" + # rc = os.system(cmd) + # _print("cmd=%s -> rc=%s" % (cmd, rc)) + # has_dind = rc == 0 + # # Clean up, after the fact. + # cmd = "ip link delete dummy0 >/dev/null 2>&1" + # if is_mac() or is_dev_ck(): + # cmd = f"sudo {cmd}" + # rc = os.system(cmd) + # _print("cmd=%s -> rc=%s" % (cmd, rc)) + # # dind is supported on both Mac and GH Actions. + # check_repo = os.environ.get("AM_REPO_CONFIG_CHECK", "True") != "False" + # #TODO(Juraj): HelpersTask16. + # #if check_repo: + # # if is_inside_ci(): + # # # Docker-in-docker is needed for GH actions. For all other builds is optional. + # # assert has_dind, ( + # # f"Expected privileged mode: has_dind={has_dind}\n" + # # + setup_to_str() + # # ) + # # else: + # # only_warning = True + # # _raise_invalid_host(only_warning) + # # return False + # #else: + # # am_repo_config = os.environ.get("AM_REPO_CONFIG_CHECK", "True") + # # print( + # # _WARNING + # # + ": Skip checking since AM_REPO_CONFIG_CHECK=" + # # + f"'{am_repo_config}'" + # # ) + # return has_dind + + +# ############################################################################# +# use_... +# ############################################################################# + + +def _raise_invalid_host(only_warning: bool) -> None: + host_os_name = os.uname()[0] + am_host_os_name = os.environ.get("AM_HOST_OS_NAME", None) + msg = (f"Don't recognize host: host_os_name={host_os_name}, " + f"am_host_os_name={am_host_os_name}") + if only_warning: + _LOG.warning(msg) + else: + raise ValueError(msg) + + +# TODO(gp): -> use_docker_in_docker_support +def enable_privileged_mode(repo_name: str) -> bool: + """ + Return whether a host supports privileged mode for its containers. + """ + # TODO(gp): Remove this dependency from a repo. + if repo_name in ("//dev_tools",): + ret = False + else: + # Keep this in alphabetical order. + if is_cmamp_prod(): + ret = False + elif is_dev_ck(): + ret = True + elif is_inside_ci(): + ret = True + elif is_mac(version="Catalina"): + # Docker for macOS Catalina supports dind. + ret = True + elif is_mac(version="Monterey") or is_mac(version="Ventura"): + # Docker for macOS Monterey doesn't seem to support dind. + ret = False + else: + ret = False + only_warning = True + _raise_invalid_host(only_warning) + return ret + + +# TODO(gp): -> use_docker_sudo_in_commands +def has_docker_sudo() -> bool: + """ + Return whether Docker commands should be run with `sudo` or not. + """ + # Keep this in alphabetical order. + if is_cmamp_prod(): + ret = False + elif is_dev_ck(): + ret = True + elif is_inside_ci(): + ret = False + elif is_mac(): + # macOS runs Docker with sudo by default. + # TODO(gp): This is not true. + ret = True + else: + ret = False + only_warning = True + _raise_invalid_host(only_warning) + return ret + + +def _is_mac_version_with_sibling_containers() -> bool: + return is_mac(version="Monterey") or is_mac(version="Ventura") + + +# TODO(gp): -> use_docker_sibling_container_support +def use_docker_sibling_containers() -> bool: + """ + Return whether to use Docker sibling containers. + + Using sibling containers requires that all Docker containers in the + same network so that they can communicate with each other. + """ + val = is_dev4() or _is_mac_version_with_sibling_containers() + return val + + +# TODO(gp): -> use_docker_main_network +def use_main_network() -> bool: + # TODO(gp): Replace this. + return use_docker_sibling_containers() + + +# TODO(gp): -> get_docker_shared_data_dir_map +def get_shared_data_dirs() -> Optional[Dict[str, str]]: + """ + Get path of dir storing data shared between different users on the host and + Docker. + + E.g., one can mount a central dir `/data/shared`, shared by multiple + users, on a dir `/shared_data` in Docker. + """ + # TODO(gp): Keep this in alphabetical order. + if is_dev4(): + shared_data_dirs = { + "/local/home/share/cache": "/cache", + "/local/home/share/data": "/data", + } + elif is_dev_ck(): + shared_data_dirs = { + "/data/shared": "/shared_data", + "/data/shared2": "/shared_data2", + } + elif is_mac() or is_inside_ci() or is_cmamp_prod(): + shared_data_dirs = None + else: + shared_data_dirs = None + only_warning = True + _raise_invalid_host(only_warning) + return shared_data_dirs + + +def use_docker_network_mode_host() -> bool: + # TODO(gp): Not sure this is needed any more, since we typically run in + # bridge mode. + ret = is_mac() or is_dev_ck() + ret = False + if ret: + assert use_docker_sibling_containers() + return ret + + +def use_docker_db_container_name_to_connect() -> bool: + """ + Connect to containers running DBs just using the container name, instead of + using port and localhost / hostname. + """ + if _is_mac_version_with_sibling_containers(): + # New Macs don't seem to see containers unless we connect with them + # directly with their name. + ret = True + else: + ret = False + if ret: + # This implies that we are using Docker sibling containers. + assert use_docker_sibling_containers() + return ret + + +# TODO(gp): This seems redundant with use_docker_sudo_in_commands +def run_docker_as_root() -> bool: + """ + Return whether Docker should be run with root user. + + I.e., adding `--user $(id -u):$(id -g)` to docker compose or not. + """ + # Keep this in alphabetical order. + if is_cmamp_prod(): + ret = False + elif is_dev4() or is_ig_prod(): + # //lime runs on a system with Docker remap which assumes we don't + # specify user credentials. + ret = True + elif is_dev_ck(): + # On dev1 / dev2 we run as users specifying the user / group id as + # outside. + ret = False + elif is_inside_ci(): + # When running as user in GH action we get an error: + # ``` + # /home/.config/gh/config.yml: permission denied + # ``` + # see https://github.com/alphamatic/amp/issues/1864 + # So we run as root in GH actions. + ret = True + elif is_mac(): + ret = False + else: + ret = False + only_warning = True + _raise_invalid_host(only_warning) + return ret + + +def get_docker_user() -> str: + """ + Return the user that runs Docker, if any. + """ + if is_dev4(): + val = "spm-sasm" + else: + val = "" + return val + + +def get_docker_shared_group() -> str: + """ + Return the group of the user running Docker, if any. + """ + if is_dev4(): + val = "sasm-fileshare" + else: + val = "" + return val + + +# TODO(gp): -> repo_config.yaml +def skip_submodules_test(repo_name: str) -> bool: + """ + Return whether the tests in the submodules should be skipped. + + E.g. while running `i run_fast_tests`. + """ + # TODO(gp): Why do we want to skip running tests? + # TODO(gp): Remove this dependency from a repo. + if repo_name in ("//dev_tools",): + # Skip running `amp` tests from `dev_tools`. + return True + return False + + +# TODO(gp): Remove this comment. +# # This function can't be in `helpers.hserver` since it creates circular import +# # and `helpers.hserver` should not depend on anything. +def is_CK_S3_available(repo_name: str) -> bool: + val = True + if is_inside_ci(): + # TODO(gp): Remove this dependency from a repo. + if repo_name in ("//amp", "//dev_tools"): + # No CK bucket. + val = False + # TODO(gp): We might want to enable CK tests also on lemonade. + if repo_name in ("//lemonade",): + # No CK bucket. + val = False + elif is_dev4(): + # CK bucket is not available on dev4. + val = False + _LOG.debug("val=%s", val) + return val + + # ############################################################################# # S3 buckets. # ############################################################################# @@ -326,3 +757,53 @@ def config_func_to_str() -> str: # Package. ret: str = "# hserver.config\n" + indent("\n".join(ret)) return ret + + +def config_func_to_str() -> str: + """ + Print the value of all the config functions. + """ + ret: List[str] = [] + # + function_names = [ + "enable_privileged_mode", + "get_docker_base_image_name", + "get_docker_user", + "get_docker_shared_group", + # "get_extra_amp_repo_sym_name", + "get_host_name", + # "get_html_dir_to_url_mapping", + "get_invalid_words", + "get_name", + "get_repo_map", + "get_shared_data_dirs", + "has_dind_support", + "has_docker_sudo", + "is_CK_S3_available", + "run_docker_as_root", + "skip_submodules_test", + "use_docker_sibling_containers", + "use_docker_network_mode_host", + "use_docker_db_container_name_to_connect", + ] + for func_name in sorted(function_names): + try: + _LOG.debug("func_name=%s", func_name) + func_value = eval(f"{func_name}()") + except NameError as e: + func_value = "*undef*" + _ = e + # raise e + msg = f"{func_name}='{func_value}'" + ret.append(msg) + # _print(msg) + # Package. + ret: str = "# repo_config.config\n" + indent("\n".join(ret)) + # Add the signature from hserver. + ret += "\n" + indent(hserver.config_func_to_str()) + return ret + + +if False: + print(config_func_to_str()) + # assert 0 diff --git a/helpers/lib_tasks_docker.py b/helpers/lib_tasks_docker.py index 086784cec..5fbe9f5df 100644 --- a/helpers/lib_tasks_docker.py +++ b/helpers/lib_tasks_docker.py @@ -756,7 +756,7 @@ def _get_docker_compose_files( :return: list of the Docker compose paths """ docker_compose_files = [] - # Get the repo short name (e.g., amp). + # Get the repo short name (e.g., `amp`). dir_name = hgit.get_repo_full_name_from_dirname(".", include_host_name=False) repo_short_name = hgit.get_repo_name(dir_name, in_mode="full_name") _LOG.debug("repo_short_name=%s", repo_short_name) diff --git a/helpers/repo_config_utils.py b/helpers/repo_config_utils.py index 8cb87cfea..b90a599ab 100644 --- a/helpers/repo_config_utils.py +++ b/helpers/repo_config_utils.py @@ -4,80 +4,276 @@ import helpers.repo_config_utils as hrecouti """ +import functools import logging import os -from typing import Any - -import helpers.hdbg as hdbg -import helpers.henv as henv -import helpers.hprint as hprint +import yaml +from typing import Any, Dict, List, Optional, Union _LOG = logging.getLogger(__name__) -def assert_setup( - self_: Any, exp_enable_privileged_mode: bool, exp_has_dind_support: bool -) -> None: - signature = henv.env_to_str(add_system_signature=False) - _LOG.debug("env_to_str=%s", signature) - # - act_enable_privileged_mode = henv.execute_repo_config_code( - "enable_privileged_mode()" - ) - self_.assertEqual(act_enable_privileged_mode, exp_enable_privileged_mode) - # - act_has_dind_support = henv.execute_repo_config_code("has_dind_support()") - self_.assertEqual(act_has_dind_support, exp_has_dind_support) +# ###### + +def _get_env_var( + env_name: str, + as_bool: bool = False, + default_value: Any = None, + abort_on_missing: bool = True, +) -> Union[str, bool]: + """ + Get an environment variable by name. -def _dassert_setup_consistency() -> None: + :param env_name: name of the env var + :param as_bool: convert the value into a Boolean + :param default_value: the default value to use in case it's not + defined + :param abort_on_missing: if the env var is not defined aborts, + otherwise use the default value + :return: value of env var """ - Check that one and only one set up config should be true. + if env_name not in os.environ: + if abort_on_missing: + assert 0, f"Can't find env var '{env_name}' in '{str(os.environ)}'" + else: + return default_value + value = os.environ[env_name] + if as_bool: + # Convert the value into a boolean. + if value in ("0", "", "None", "False"): + value = False + else: + value = True + return value + + +def _find_git_root(path: str = ".") -> str: """ - # Use the settings from the `repo_config` corresponding to this container. - enable_privileged_mode = henv.execute_repo_config_code( - "enable_privileged_mode()" - ) - use_docker_sibling_containers = henv.execute_repo_config_code( - "use_docker_sibling_containers()" - ) - use_docker_network_mode_host = henv.execute_repo_config_code( - "use_docker_network_mode_host()" - ) - use_main_network = henv.execute_repo_config_code("use_main_network()") - _LOG.debug( - hprint.to_str( - "enable_privileged_mode use_docker_sibling_containers " - "use_docker_network_mode_host use_main_network" - ) - ) - # It's not possible to have dind and sibling containers together. - hdbg.dassert( - not (use_docker_sibling_containers and enable_privileged_mode), - "use_docker_sibling_containers=%s enable_privileged_mode=%s", - use_docker_sibling_containers, - enable_privileged_mode, - ) - # To run sibling containers they need to be in the same main network. - if use_docker_sibling_containers: - hdbg.dassert(use_main_network, "use_main_network=%s", use_main_network) - # It's not possible to have both host and main network (which implies - # bridge mode). - hdbg.dassert( - not (use_docker_network_mode_host and use_main_network), - "use_docker_network_mode_host=%s use_main_network=%s", - use_docker_network_mode_host, - use_main_network, - ) - - -# If the env var is not defined then we want to check. The only reason to skip -# it's if the env var is defined and equal to False. -check_repo = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") != "False" -_is_called = False -if check_repo: - if not _is_called: - _dassert_setup_consistency() - _is_called = True -else: - _LOG.warning(f"Skipping repo check in {__file__}") + Find the dir of the outermost Git super module. + + This function looks for `.git` dirs in the path and its parents until it + finds one. This is a different approach than asking Git directly. + Pros: + - it doesn't require to call `git` through a system call. + Cons: + - it relies on `git` internal structure, which might change in the future. + """ + path = os.path.abspath(path) + while not os.path.isdir(os.path.join(path, ".git")): + git_dir_file = os.path.join(path, ".git") + if os.path.isfile(git_dir_file): + with open(git_dir_file, "r") as f: + for line in f: + if line.startswith("gitdir:"): + git_dir = line.split(":", 1)[1].strip() + return os.path.abspath( + os.path.join(path, git_dir, "..", "..") + ) + parent = os.path.dirname(path) + assert parent != path, f"Can't find the Git root starting from {path}" + path = parent + return path + + +# End copy + + +class RepoConfig: + + def __init__(self, data: Dict) -> None: + """ + Set the data to be used by the module. + """ + self._data = data + + def set_repo_config_data(self, data: Dict) -> None: + self._data = data + + @classmethod + def from_file(cls, file_name: Optional[str] = None) -> "RepoConfig": + """ + Return the text of the code stored in `repo_config.py`. + """ + if file_name is None: + file_name = RepoConfig._get_repo_config_file() + assert os.path.exists(file_name), f"File '{file_name}' doesn't exist" + _LOG.debug("Reading file_name='%s'", file_name) + try: + with open(file_name, "r") as file: + # Use `safe_load()` to avoid executing arbitrary code. + data = yaml.safe_load(file) + assert isinstance(data, dict), ("data=\n%s\nis not a dict but %s", + str(data), type(data)) + except Exception as e: + raise f"Error reading YAML file {file_name}: {e}" + return cls(data) + + # TODO(gp): -> get_repo_name + def get_name(self) -> str: + value = self._data["repo_info"]["repo_name"] + return f"//{value}" + + def get_github_repo_account(self) -> str: + value = self._data["repo_info"]["github_repo_account"] + return value + + def get_repo_map(self) -> Dict[str, str]: + """ + Return a mapping of short repo name -> long repo name. + """ + repo_name = self.get_name() + github_repo_account = self.get_github_repo_account() + repo_map = {repo_name: f"{github_repo_account}/{repo_name}"} + return repo_map + + # def get_extra_amp_repo_sym_name() -> str: + # return f"{_GITHUB_REPO_ACCOUNT}/{_REPO_NAME}" + + # TODO(gp): -> get_github_host_name + def get_host_name(self) -> str: + value = self._data["repo_info"]["github_host_name"] + return value + + def get_invalid_words(self) -> List[str]: + return [] + + def get_docker_base_image_name(self) -> str: + """ + Return a base name for docker image. + """ + value = self._data["docker_info"]["docker_image_name"] + return value + + def get_unit_test_bucket_path(self) -> str: + """ + Return the path to the unit test bucket. + """ + value = self._data["s3_bucket_info"]["unit_test_bucket_name"] + return value + + def get_html_bucket_path(self) -> str: + """ + Return the path to the bucket where published HTMLs are stored. + """ + value = self._data["s3_bucket_info"]["html_bucket_name"] + return value + + def get_html_ip(self) -> str: + """ + Return the IP of the bucket where published HTMLs are stored. + """ + value = self.data["s3_bucket_info"]["html_bucket_name"] + return value + + def get_html_dir_to_url_mapping(self) -> Dict[str, str]: + """ + Return a mapping between directories mapped on URLs. + + This is used when we have web servers serving files from specific + directories. + """ + dir_to_url = {self.get_html_bucket_path(): self.get_html_ip()} + return dir_to_url + + @staticmethod + def _get_repo_config_file() -> str: + """ + Return the absolute path to `repo_config.py` that should be used. + + The `repo_config.py` is determined based on an overriding env var or + based on the root of the Git path. + """ + env_var = "AM_REPO_CONFIG_PATH" + file_name = _get_env_var(env_var, abort_on_missing=False) + if file_name: + _LOG.warning("Using value '%s' for %s from env var", file_name, env_var) + else: + client_root = _find_git_root() + _LOG.debug("Reading file_name='%s'", file_name) + file_name = os.path.join(client_root, "repo_config.yaml") + file_name = os.path.abspath(file_name) + return file_name + + +_repo_config = None + + +def get_repo_config() -> RepoConfig: + """ + Return the repo config object. + """ + global _repo_config + if _repo_config is None: + _repo_config = RepoConfig.from_file() + return _repo_config + +# # ############################################################################# +# +# +# def assert_setup( +# self_: Any, exp_enable_privileged_mode: bool, exp_has_dind_support: bool +# ) -> None: +# signature = henv.env_to_str(add_system_signature=False) +# _LOG.debug("env_to_str=%s", signature) +# # +# act_enable_privileged_mode = henv.execute_repo_config_code( +# "enable_privileged_mode()" +# ) +# self_.assertEqual(act_enable_privileged_mode, exp_enable_privileged_mode) +# # +# act_has_dind_support = henv.execute_repo_config_code("has_dind_support()") +# self_.assertEqual(act_has_dind_support, exp_has_dind_support) +# +# +# def _dassert_setup_consistency() -> None: +# """ +# Check that one and only one set up config should be true. +# """ +# # Use the settings from the `repo_config` corresponding to this container. +# enable_privileged_mode = henv.execute_repo_config_code( +# "enable_privileged_mode()" +# ) +# use_docker_sibling_containers = henv.execute_repo_config_code( +# "use_docker_sibling_containers()" +# ) +# use_docker_network_mode_host = henv.execute_repo_config_code( +# "use_docker_network_mode_host()" +# ) +# use_main_network = henv.execute_repo_config_code("use_main_network()") +# _LOG.debug( +# hprint.to_str( +# "enable_privileged_mode use_docker_sibling_containers " +# "use_docker_network_mode_host use_main_network" +# ) +# ) +# # It's not possible to have dind and sibling containers together. +# hdbg.dassert( +# not (use_docker_sibling_containers and enable_privileged_mode), +# "use_docker_sibling_containers=%s enable_privileged_mode=%s", +# use_docker_sibling_containers, +# enable_privileged_mode, +# ) +# # To run sibling containers they need to be in the same main network. +# if use_docker_sibling_containers: +# hdbg.dassert(use_main_network, "use_main_network=%s", use_main_network) +# # It's not possible to have both host and main network (which implies +# # bridge mode). +# hdbg.dassert( +# not (use_docker_network_mode_host and use_main_network), +# "use_docker_network_mode_host=%s use_main_network=%s", +# use_docker_network_mode_host, +# use_main_network, +# ) +# +# +# # If the env var is not defined then we want to check. The only reason to skip +# # it's if the env var is defined and equal to False. +# check_repo = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") != "False" +# _is_called = False +# if check_repo: +# if not _is_called: +# _dassert_setup_consistency() +# _is_called = True +# else: +# _LOG.warning(f"Skipping repo check in {__file__}") diff --git a/helpers/test/test_repo_config_utils.py b/helpers/test/test_repo_config_utils.py index a4739ee18..7050cbfc1 100644 --- a/helpers/test/test_repo_config_utils.py +++ b/helpers/test/test_repo_config_utils.py @@ -1,11 +1,45 @@ import logging +import os +import helpers.hio as hio +import helpers.hprint as hprint import helpers.hunit_test as hunitest import helpers.repo_config_utils as hrecouti _LOG = logging.getLogger(__name__) -class Test_repo_config_utils1(hunitest.TestCase): - def test_consistency1(self) -> None: - hrecouti._dassert_setup_consistency() +# class Test_repo_config_utils1(hunitest.TestCase): +# def test_consistency1(self) -> None: +# hrecouti._dassert_setup_consistency() + + +class Test_repo_config1(hunitest.TestCase): + def create_test_file(self) -> str: + yaml_txt = """ + repo_info: + repo_name: helpers + github_repo_account: kaizen-ai + github_host_name: github.com + invalid_words: + + docker_info: + docker_image_name: helpers + + s3_bucket_info: + unit_test_bucket_name: s3//cryptokaizen-unit-test + html_bucket_name: s3//cryptokaizen-html + html_ip: http://172.30.2.44 + """ + yaml_txt = hprint.dedent(yaml_txt) + file_name = os.path.path(self.get_scratch_space(), + "yaml.txt") + hio.to_file(file_name, yaml_txt) + return file_name + + def test1(self) -> None: + file_name = self.create_test_file() + repo_config = hrecouti.RepoConfig.from_file(file_name) + act = repo_config.get_name(file_name) + exp = "helpers" + self.assert_equal(act, exp) diff --git a/repo_config.py b/repo_config.py index b26a72ba6..4a248b7e0 100644 --- a/repo_config.py +++ b/repo_config.py @@ -1,495 +1,137 @@ -""" -Contain info specific of this repo. -""" - -# TODO(gp): Centralize all the common functions under hserver.py. - -import functools -import logging -import os -from typing import Dict, List, Optional - -import helpers.hserver as hserver - -_LOG = logging.getLogger(__name__) - - -_WARNING = "\033[33mWARNING\033[0m" - - -def _print(msg: str) -> None: - # _LOG.info(msg) - if False: - print(msg) - - -# We can't use `__file__` since this file is imported with an exec. -# _print("Importing //cmamp/repo_config.py") - - -# ############################################################################# -# Repo info. -# ############################################################################# - - -# To customize: xyz -# _REPO_NAME = "xyz" -_REPO_NAME = "helpers" - -# To customize: xyz -_GITHUB_REPO_ACCOUNT = "causify-ai" - -# To customize: xyz -# _DOCKER_IMAGE_NAME = "xyz" -_DOCKER_IMAGE_NAME = "helpers" - - -def get_name() -> str: - return f"//{_REPO_NAME}" - - -def get_repo_map() -> Dict[str, str]: - """ - Return a mapping of short repo name -> long repo name. - """ - repo_map: Dict[str, str] = { - _REPO_NAME: f"{_GITHUB_REPO_ACCOUNT}/{_REPO_NAME}" - } - return repo_map - - -def get_extra_amp_repo_sym_name() -> str: - return f"{_GITHUB_REPO_ACCOUNT}/{_REPO_NAME}" - - -# TODO(gp): -> get_gihub_host_name -def get_host_name() -> str: - return "github.com" - - -def get_invalid_words() -> List[str]: - return [] - - -def get_docker_base_image_name() -> str: - """ - Return a base name for docker image. - """ - return _DOCKER_IMAGE_NAME - - -# ############################################################################# - - -# //cmamp runs on: -# - MacOS -# - Supports Docker privileged mode -# - The same user and group is used inside the container -# - Root can also be used -# - Linux (dev server, GitHub CI) -# - Supports Docker privileged mode -# - The same user and group is used inside the container -# - Linux (spm-dev4) -# - Doesn't support Docker privileged mode -# - A different user and group is used inside the container - - -def _raise_invalid_host(only_warning: bool) -> None: - host_os_name = os.uname()[0] - csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) - msg = f"Don't recognize host: host_os_name={host_os_name}, csfy_host_os_name={csfy_host_os_name}" - # TODO(Grisha): unclear if it is a difference between `cmamp` and `sorrentum`. - if only_warning: - _LOG.warning(msg) - else: - raise ValueError(msg) - - -def enable_privileged_mode() -> bool: - """ - Return whether an host supports privileged mode for its containers. - """ - ret = False - # Keep this in alphabetical order. - if hserver.is_cmamp_prod(): - ret = False - elif hserver.is_dev4() or hserver.is_ig_prod(): - ret = False - elif hserver.is_dev_ck(): - ret = True - elif hserver.is_inside_ci(): - ret = True - elif hserver.is_mac(version="Catalina"): - # Docker for macOS Catalina supports dind. - ret = True - elif hserver.is_mac(version="Monterey") or hserver.is_mac(version="Ventura"): - # Docker for macOS Monterey doesn't seem to support dind. - ret = False - else: - ret = False - only_warning = True - _raise_invalid_host(only_warning) - return ret - - -def has_docker_sudo() -> bool: - """ - Return whether commands should be run with sudo or not. - """ - ret = False - # Keep this in alphabetical order. - if hserver.is_cmamp_prod(): - ret = False - elif hserver.is_dev4() or hserver.is_ig_prod(): - ret = False - elif hserver.is_dev_ck(): - ret = True - elif hserver.is_inside_ci(): - ret = False - elif hserver.is_mac(): - # macOS runs Docker with sudo by default. - ret = True - else: - ret = False - only_warning = True - _raise_invalid_host(only_warning) - return ret - - -def _is_mac_version_with_sibling_containers() -> bool: - return hserver.is_mac(version="Monterey") or hserver.is_mac(version="Ventura") - - -# TODO(gp): -> has_docker_privileged_mode -@functools.lru_cache() -def has_dind_support() -> bool: - """ - Return whether the current container supports privileged mode. - - This is need to use Docker-in-Docker. - """ - _print("is_inside_docker()=%s" % hserver.is_inside_docker()) - if not hserver.is_inside_docker(): - # Outside Docker there is no privileged mode. - _print("-> ret = False") - return False - # TODO(gp): Not sure this is really needed since we do this check - # after enable_privileged_mode controls if we have dind or not. - if _is_mac_version_with_sibling_containers(): - return False - # TODO(gp): This part is not multi-process friendly. When multiple - # processes try to run this code they interfere. A solution is to run `ip - # link` in the entrypoint and create a `has_docker_privileged_mode` file - # which contains the value. - # We rely on the approach from https://stackoverflow.com/questions/32144575 - # to check if there is support for privileged mode. - # Sometimes there is some state left, so we need to clean it up. - cmd = "ip link delete dummy0 >/dev/null 2>&1" - # TODO(gp): use `has_docker_sudo`. - if hserver.is_mac() or hserver.is_dev_ck(): - cmd = f"sudo {cmd}" - rc = os.system(cmd) - _print("cmd=%s -> rc=%s" % (cmd, rc)) - # - cmd = "ip link add dummy0 type dummy >/dev/null 2>&1" - if hserver.is_mac() or hserver.is_dev_ck(): - cmd = f"sudo {cmd}" - rc = os.system(cmd) - _print("cmd=%s -> rc=%s" % (cmd, rc)) - has_dind = rc == 0 - # Clean up, after the fact. - cmd = "ip link delete dummy0 >/dev/null 2>&1" - if hserver.is_mac() or hserver.is_dev_ck(): - cmd = f"sudo {cmd}" - rc = os.system(cmd) - _print("cmd=%s -> rc=%s" % (cmd, rc)) - # dind is supported on both Mac and GH Actions. - check_repo = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") != "False" - # TODO(Juraj): HelpersTask16. - # if check_repo: - # if hserver.is_inside_ci(): - # # Docker-in-docker is needed for GH actions. For all other builds is optional. - # assert has_dind, ( - # f"Expected privileged mode: has_dind={has_dind}\n" - # + hserver.setup_to_str() - # ) - # else: - # only_warning = True - # _raise_invalid_host(only_warning) - # return False - # else: - # csfy_repo_config = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") - # print( - # _WARNING - # + ": Skip checking since CSFY_REPO_CONFIG_CHECK=" - # + f"'{csfy_repo_config}'" - # ) - return has_dind - - -def use_docker_sibling_containers() -> bool: - """ - Return whether to use Docker sibling containers. - - Using sibling containers requires that all Docker containers in the - same network so that they can communicate with each other. - """ - val = hserver.is_dev4() or _is_mac_version_with_sibling_containers() - return val - - -def use_main_network() -> bool: - # TODO(gp): Replace this. - return use_docker_sibling_containers() - - -def get_shared_data_dirs() -> Optional[Dict[str, str]]: - """ - Get path of dir storing data shared between different users on the host and - Docker. - - E.g., one can mount a central dir `/data/shared`, shared by multiple - users, on a dir `/shared_data` in Docker. - """ - # TODO(gp): Keep this in alphabetical order. - shared_data_dirs: Optional[Dict[str, str]] = None - if hserver.is_dev4(): - shared_data_dirs = { - "/local/home/share/cache": "/cache", - "/local/home/share/data": "/data", - } - elif hserver.is_dev_ck(): - shared_data_dirs = { - "/data/shared": "/shared_data", - "/data/shared2": "/shared_data2", - } - elif hserver.is_mac() or hserver.is_inside_ci() or hserver.is_cmamp_prod(): - shared_data_dirs = None - else: - shared_data_dirs = None - only_warning = True - _raise_invalid_host(only_warning) - return shared_data_dirs - - -def use_docker_network_mode_host() -> bool: - # TODO(gp): Not sure this is needed any more, since we typically run in bridge - # mode. - ret = hserver.is_mac() or hserver.is_dev_ck() - ret = False - if ret: - assert use_docker_sibling_containers() - return ret - - -def use_docker_db_container_name_to_connect() -> bool: - """ - Connect to containers running DBs just using the container name, instead of - using port and localhost / hostname. - """ - if _is_mac_version_with_sibling_containers(): - # New Macs don't seem to see containers unless we connect with them - # directly with their name. - ret = True - else: - ret = False - if ret: - # This implies that we are using Docker sibling containers. - assert use_docker_sibling_containers() - return ret - - -def run_docker_as_root() -> bool: - """ - Return whether Docker should be run with root user. - - I.e., adding `--user $(id -u):$(id -g)` to docker compose or not. - """ - ret = None - # Keep this in alphabetical order. - if hserver.is_cmamp_prod(): - ret = False - elif hserver.is_dev4() or hserver.is_ig_prod(): - # //lime runs on a system with Docker remap which assumes we don't - # specify user credentials. - ret = True - elif hserver.is_dev_ck(): - # On dev1 / dev2 we run as users specifying the user / group id as - # outside. - ret = False - elif hserver.is_inside_ci(): - # When running as user in GH action we get an error: - # ``` - # /home/.config/gh/config.yml: permission denied - # ``` - # see https://github.com/alphamatic/amp/issues/1864 - # So we run as root in GH actions. - ret = True - elif hserver.is_mac(): - ret = False - else: - ret = False - only_warning = True - _raise_invalid_host(only_warning) - return ret - - -def get_docker_user() -> str: - """ - Return the user that runs Docker, if any. - """ - if hserver.is_dev4(): - val = "spm-sasm" - else: - val = "" - return val - - -def get_unit_test_bucket_path() -> str: - """ - Return the path to the unit test bucket. - """ - - assert 0, f"Not supported by '{_REPO_NAME}'" - unit_test_bucket = "cryptokaizen-unit-test" - # We do not use `os.path.join` since it converts `s3://` to `s3:/`. - unit_test_bucket_path = "s3://" + unit_test_bucket - return unit_test_bucket_path - - -def get_html_bucket_path() -> str: - """ - Return the path to the bucket where published HTMLs are stored. - """ - assert 0, f"Not supported by '{_REPO_NAME}'" - html_bucket = "cryptokaizen-html" - # We do not use `os.path.join` since it converts `s3://` to `s3:/`. - html_bucket_path = "s3://" + html_bucket - return html_bucket_path - - -def get_html_dir_to_url_mapping() -> Dict[str, str]: - """ - Return a mapping between directories mapped on URLs. - - This is used when we have web servers serving files from specific - directories. - """ - assert 0, f"Not supported by '{_REPO_NAME}'" - dir_to_url = {"s3://cryptokaizen-html": "http://172.30.2.44"} - return dir_to_url - - -def get_docker_shared_group() -> str: - """ - Return the group of the user running Docker, if any. - """ - if hserver.is_dev4(): - val = "sasm-fileshare" - else: - val = "" - return val - - -def skip_submodules_test() -> bool: - """ - Return whether the tests in the submodules should be skipped. - - E.g. while running `i run_fast_tests`. - """ - return False - - -# ############################################################################# - -# Copied from hprint to avoid import cycles. - - -# TODO(gp): It should use *. -def indent(txt: str, num_spaces: int = 2) -> str: - """ - Add `num_spaces` spaces before each line of the passed string. - """ - spaces = " " * num_spaces - txt_out = [] - for curr_line in txt.split("\n"): - if curr_line.lstrip().rstrip() == "": - # Do not prepend any space to a line with only white characters. - txt_out.append("") - continue - txt_out.append(spaces + curr_line) - res = "\n".join(txt_out) - return res - - -# End copy. - - -# This function can't be in `helpers.hserver` since it creates circular import -# and `helpers.hserver` should not depend on anything. -def is_CK_S3_available() -> bool: - val = True - if hserver.is_inside_ci(): - repo_name = get_name() - if repo_name in ("//amp",): - # No CK bucket. - val = False - # TODO(gp): We might want to enable CK tests also on lemonade. - if repo_name in ("//lemonade",): - # No CK bucket. - val = False - elif hserver.is_dev4(): - # CK bucket is not available on dev4. - val = False - _LOG.debug("val=%s", val) - return val - - -def config_func_to_str() -> str: - """ - Print the value of all the config functions. - """ - ret: List[str] = [] - # - function_names = [ - "enable_privileged_mode", - "get_docker_base_image_name", - "get_docker_user", - "get_docker_shared_group", - # "get_extra_amp_repo_sym_name", - "get_host_name", - # "get_html_dir_to_url_mapping", - "get_invalid_words", - "get_name", - "get_repo_map", - "get_shared_data_dirs", - "has_dind_support", - "has_docker_sudo", - "is_CK_S3_available", - "run_docker_as_root", - "skip_submodules_test", - "use_docker_sibling_containers", - "use_docker_network_mode_host", - "use_docker_db_container_name_to_connect", - ] - for func_name in sorted(function_names): - try: - _LOG.debug("func_name=%s", func_name) - func_value = eval(f"{func_name}()") - except NameError as e: - func_value = "*undef*" - _ = e - # raise e - msg = f"{func_name}='{func_value}'" - ret.append(msg) - # _print(msg) - # Package. - ret: str = "# repo_config.config\n" + indent("\n".join(ret)) - # Add the signature from hserver. - ret += "\n" + indent(hserver.config_func_to_str()) - return ret - - -if False: - print(config_func_to_str()) - # assert 0 +# """ +# Contain info specific of this repo. +# """ +# +# import logging +# from typing import Dict, List +# +# import helpers.hserver as hserver +# +# _LOG = logging.getLogger(__name__) +# +# +# _WARNING = "\033[33mWARNING\033[0m" +# +# +# def _print(msg: str) -> None: +# # _LOG.info(msg) +# if False: +# print(msg) +# +# +# # We can't use `__file__` since this file is imported with an exec. +# # _print("Importing //cmamp/repo_config.py") +# +# +# # ############################################################################# +# # Repo info. +# # ############################################################################# +# +# +# # To customize: xyz +# #_REPO_NAME = "xyz" +# _REPO_NAME = "helpers" +# +# # To customize: xyz +# _GITHUB_REPO_ACCOUNT = "kaizen-ai" +# +# # To customize: xyz +# #_DOCKER_IMAGE_NAME = "xyz" +# _DOCKER_IMAGE_NAME = "helpers" +# +# def get_name() -> str: +# return f"//{_REPO_NAME}" +# +# +# def get_repo_map() -> Dict[str, str]: +# """ +# Return a mapping of short repo name -> long repo name. +# """ +# repo_map: Dict[str, str] = {_REPO_NAME: f"{_GITHUB_REPO_ACCOUNT}/{_REPO_NAME}"} +# return repo_map +# +# +# def get_extra_amp_repo_sym_name() -> str: +# return f"{_GITHUB_REPO_ACCOUNT}/{_REPO_NAME}" +# +# +# # TODO(gp): -> get_github_host_name +# def get_host_name() -> str: +# return "github.com" +# +# +# def get_invalid_words() -> List[str]: +# return [] +# +# +# def get_docker_base_image_name() -> str: +# """ +# Return a base name for docker image. +# """ +# return _DOCKER_IMAGE_NAME +# +# +# # TODO(gp): Convert in variables. +# +# def get_unit_test_bucket_path() -> str: +# """ +# Return the path to the unit test bucket. +# """ +# +# assert 0, f"Not supported by '{_REPO_NAME}'" +# unit_test_bucket = "cryptokaizen-unit-test" +# # We do not use `os.path.join` since it converts `s3://` to `s3:/`. +# unit_test_bucket_path = "s3://" + unit_test_bucket +# return unit_test_bucket_path +# +# +# def get_html_bucket_path() -> str: +# """ +# Return the path to the bucket where published HTMLs are stored. +# """ +# assert 0, f"Not supported by '{_REPO_NAME}'" +# html_bucket = "cryptokaizen-html" +# # We do not use `os.path.join` since it converts `s3://` to `s3:/`. +# html_bucket_path = "s3://" + html_bucket +# return html_bucket_path +# +# +# def get_html_dir_to_url_mapping() -> Dict[str, str]: +# """ +# Return a mapping between directories mapped on URLs. +# +# This is used when we have web servers serving files from specific +# directories. +# """ +# assert 0, f"Not supported by '{_REPO_NAME}'" +# dir_to_url = {"s3://cryptokaizen-html": "http://172.30.2.44"} +# return dir_to_url +# +# +# +# +# # ############################################################################# +# +# # Copied from hprint to avoid import cycles. +# +# +# # TODO(gp): It should use *. +# def indent(txt: str, num_spaces: int = 2) -> str: +# """ +# Add `num_spaces` spaces before each line of the passed string. +# """ +# spaces = " " * num_spaces +# txt_out = [] +# for curr_line in txt.split("\n"): +# if curr_line.lstrip().rstrip() == "": +# # Do not prepend any space to a line with only white characters. +# txt_out.append("") +# continue +# txt_out.append(spaces + curr_line) +# res = "\n".join(txt_out) +# return res +# +# +# # End copy. +# +# diff --git a/repo_config.yaml b/repo_config.yaml new file mode 100644 index 000000000..230610248 --- /dev/null +++ b/repo_config.yaml @@ -0,0 +1,15 @@ +repo_info: + repo_name: helpers + github_repo_account: kaizen-ai + github_host_name: github.com + invalid_words: + issue_prefix: HlprTask + # AmpTask, DevToolsTask + +docker_info: + docker_image_name: helpers + +s3_bucket_info: + unit_test_bucket_name: s3//cryptokaizen-unit-test + html_bucket_name: s3//cryptokaizen-html + html_ip: http://172.30.2.44 \ No newline at end of file diff --git a/tasks.py b/tasks.py index 4272688bc..8dc182928 100644 --- a/tasks.py +++ b/tasks.py @@ -2,7 +2,7 @@ import os from typing import Any -import repo_config as rconf +import helpers.repo_config_utils as hrecouti # Expose the pytest targets. # Extract with: @@ -149,7 +149,7 @@ # TODO(gp): Move it to lib_tasks. ECR_BASE_PATH = os.environ["CSFY_ECR_BASE_PATH"] -DOCKER_BASE_IMAGE_NAME = rconf.get_docker_base_image_name() +repo_config = hrecouti.get_repo_config() def _run_qa_tests(ctx: Any, stage: str, version: str) -> bool: @@ -171,13 +171,11 @@ def _run_qa_tests(ctx: Any, stage: str, version: str) -> bool: default_params = { - # TODO(Nikola): Remove prefix after everything is cleaned. - # Currently there are a lot dependencies on prefix. "CSFY_ECR_BASE_PATH": ECR_BASE_PATH, # When testing a change to the build system in a branch you can use a different # image, e.g., `XYZ_tmp` to not interfere with the prod system. # "BASE_IMAGE": "amp_tmp", - "BASE_IMAGE": DOCKER_BASE_IMAGE_NAME, + "BASE_IMAGE": repo_config.get_docker_base_image_name(), "QA_TEST_FUNCTION": _run_qa_tests, }