From 88329655e7c1e5105c95f7dffcac4c13e195a6ee Mon Sep 17 00:00:00 2001 From: saggese Date: Thu, 10 Apr 2025 16:04:20 +0000 Subject: [PATCH 001/193] Update --- helpers/hdocker.py | 2 +- helpers/repo_config_utils.py | 17 +++++++++++++++-- repo_config.yaml | 7 +++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 66abcffcd..4c3e34b82 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -1880,7 +1880,7 @@ def run_dockerized_graphviz( ) # Convert files to Docker paths. is_caller_host = not hserver.is_inside_docker() - use_sibling_container_for_callee = True + use_sibling_container_for_callee = False caller_mount_path, callee_mount_path, mount = get_docker_mount_info( is_caller_host, use_sibling_container_for_callee ) diff --git a/helpers/repo_config_utils.py b/helpers/repo_config_utils.py index 316040d1d..c26673992 100644 --- a/helpers/repo_config_utils.py +++ b/helpers/repo_config_utils.py @@ -37,6 +37,9 @@ def indent(txt: str, num_spaces: int = 2) -> str: # End copy. +# ############################################################################# + + def _find_config_file(file_name: str) -> str: """ Find recursively the dir of config file. @@ -155,14 +158,14 @@ def config_func_to_str(self) -> str: # TODO(gp): -> get_repo_name def get_name(self) -> str: """ - Return the name of the repo, e.g., `//amp`. + Return the name of the repo, e.g., in `//amp`. """ value = self._data["repo_info"]["repo_name"] return f"//{value}" def get_github_repo_account(self) -> str: """ - Return the account name of the repo on GitHub, e.g., `github.com`. + Return the account name of the repo on GitHub, e.g., `causify-ai`, `gpsaggese`. """ value = self._data["repo_info"]["github_repo_account"] return value @@ -254,6 +257,16 @@ def get_docker_base_image_name(self) -> str: value = self._data["docker_info"]["docker_image_name"] return value + def get_use_sibling_container(self) -> bool: + """ + Return whether to use a sibling container or a chilren docker-in-docker approach. + + This is used in unit tests to test dockerized executables. + """ + value = self._data["docker_info"]["use_sibling_container"]) + assert value in ["True", "False"], f"Invalid boolen value: {value}" + return value == "True" + # s3_bucket_info def get_unit_test_bucket_path(self) -> str: diff --git a/repo_config.yaml b/repo_config.yaml index e7b885b52..45db03077 100644 --- a/repo_config.yaml +++ b/repo_config.yaml @@ -1,12 +1,19 @@ repo_info: + # Name of the repo, like in `//helpers`, `//amp`. repo_name: helpers + # Account name of the repo on GitHub, e.g., `causify-ai`, `gpsaggese`. github_repo_account: causify-ai + # Host name of the repo on GitHub, e.g., `github.com`. github_host_name: github.com + # List of words that are not allowed in the repo for security reasons. invalid_words: + # Prefix of the issue in the repo, e.g., `HelpersTask`, `CmampTask`. issue_prefix: HelpersTask docker_info: + # Base name of the docker image, e.g., `helpers`. docker_image_name: helpers + use_sibling_container_in_unit_tests: True s3_bucket_info: unit_test_bucket_name: s3://cryptokaizen-unit-test From 0f1bd8456e563ad411ac6e8cf3b9c350bc281b22 Mon Sep 17 00:00:00 2001 From: saggese Date: Thu, 10 Apr 2025 22:29:43 +0000 Subject: [PATCH 002/193] Update --- .../dockerize/dockerized_template.py | 94 ++--- helpers/henv.py | 343 ++++++++++++------ helpers/hprint.py | 4 + helpers/hserver.py | 2 +- helpers/hversion.py | 22 +- helpers/repo_config_utils.py | 5 +- helpers/test/test_hversion.py | 2 +- linters/dockerized_pydeps.py | 111 ++++++ 8 files changed, 386 insertions(+), 197 deletions(-) create mode 100755 linters/dockerized_pydeps.py diff --git a/dev_scripts_helpers/dockerize/dockerized_template.py b/dev_scripts_helpers/dockerize/dockerized_template.py index f7b660cb3..def07483b 100755 --- a/dev_scripts_helpers/dockerize/dockerized_template.py +++ b/dev_scripts_helpers/dockerize/dockerized_template.py @@ -1,28 +1,10 @@ #!/usr/bin/env python + """ -Dockerized DOCX-to-Markdown Converter Template. +Dockerized template. -This script converts a DOCX file to Markdown using a Dockerized pandoc environment. +This script is a template for creating a Dockerized script. It is intended as a template to explain the process. - -Usage Instructions: - - 1. In your working directory, ensure you have your DOCX file ready (e.g., my_document.docx). - Then run the script with the appropriate arguments. For example: - dev_scripts_helpers/dockerize/dockerized_template/dockerized_template.py \ - --docx_file my_document.docx \ - --md_file my_document.md - - 2. The script will: - - Convert the input DOCX file to a Markdown file. - - Extract any embedded media (e.g., images) into a folder derived from the Markdown file name - (e.g., "my_document.md" -> "my_document_figs"). - - Execute pandoc within a Docker container for a consistent conversion environment. - -Notes: - - Docker-specific options (such as forcing a rebuild or using sudo) are supported via the helper functions. - - The pandoc command is constructed to always start with the token "pandoc" to ensure proper parsing by the helper routines. - - Any line marked with "FILL THIS LIKE:" is a placeholder that you can customize to suit your needs. """ import argparse @@ -35,32 +17,19 @@ def _parse() -> argparse.ArgumentParser: - """ - Parse command-line arguments for the conversion script. - - The script expects: - --docx_file: Path to the input DOCX file. - --md_file: Path to the output Markdown file. - """ # Create an ArgumentParser instance with the provided docstring. parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) - # FILL THIS LIKE: Required argument for the DOCX file (input). - parser.add_argument( - "--docx_file", - required=True, - type=str, - help="Path to the DOCX file to convert.", - ) - # FILL THIS LIKE: Required argument for the Markdown file (output). - parser.add_argument( - "--md_file", - required=True, - type=str, - help="Path to the output Markdown file.", - ) - # Add Docker-specific arguments (e.g., --dockerized_force_rebuild, --dockerized_use_sudo). + # FILL THIS. + # parser.add_argument( + # "--docx_file", + # required=True, + # type=str, + # help="Path to the DOCX file to convert.", + # ) + # Add Docker-specific arguments (e.g., --dockerized_force_rebuild, + # --dockerized_use_sudo). hparser.add_dockerized_script_arg(parser) hparser.add_verbosity_arg(parser) return parser @@ -71,37 +40,16 @@ def _main(parser: argparse.ArgumentParser) -> None: hdbg.init_logger( verbosity=args.log_level, use_exec_path=True, force_white=False ) - # FILL THIS LIKE: Define folder for extracted media (e.g., images) by replacing ".md" with "_figs". - md_file_figs = args.md_file.replace(".md", "_figs") - _LOG.info("Converting '%s' to Markdown '%s'...", args.docx_file, args.md_file) - # FILL THIS LIKE: Build the pandoc command. IMPORTANT: The command string must start with 'pandoc'. - pandoc_cmd = ( - # FILL THIS LIKE: 'pandoc' executable token. - "pandoc " - + - # FILL THIS LIKE: Input DOCX file. - f"{args.docx_file} " - + - # FILL THIS LIKE: Flag to extract embedded media to the specified folder. - f"--extract-media {md_file_figs} " - + - # FILL THIS LIKE: Conversion: input format DOCX, output format strict Markdown. - "-f docx -t markdown_strict " - + - # FILL THIS LIKE: Specify output Markdown file with '-o'. - f"-o {args.md_file}" + # FILL THIS. + cmd = ( + ) + _LOG.debug("Command: %s", cmd) + hdocker.run_dockerized_pandoc( + pandoc_cmd, + container_type="pandoc_only", + force_rebuild=args.dockerized_force_rebuild, + use_sudo=args.dockerized_use_sudo, ) - _LOG.debug("Pandoc command: %s", pandoc_cmd) - # FILL THIS LIKE: Run pandoc within a Docker container using our helper function. - # This function will handle: - # - Converting host file paths to Docker container paths. - # - Executing the command in a reproducible Docker environment. - # hdocker.run_dockerized_pandoc( - # pandoc_cmd, - # container_type="pandoc_only", - # force_rebuild=args.dockerized_force_rebuild, - # use_sudo=args.dockerized_use_sudo, - # ) _LOG.info("Finished converting '%s' to '%s'.", args.docx_file, args.md_file) diff --git a/helpers/henv.py b/helpers/henv.py index 25af1ea1a..663c4be3f 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -6,8 +6,10 @@ import logging import os +import re from typing import Any, Dict, List, Tuple, Union +import helpers.hdbg as hdbg import helpers.hprint as hprint import helpers.hserver as hserver import helpers.hsystem as hsystem @@ -28,6 +30,7 @@ _WARNING = "\033[33mWARNING\033[0m" +# TODO(gp): Is this the right place for this function? def has_module(module: str) -> bool: """ Return whether a Python module can be imported or not. @@ -37,13 +40,14 @@ def has_module(module: str) -> bool: # architecture yet, see CmTask4886 for details. return False code = f""" -try: - import {module} - has_module_ = True -except ImportError as e: - _LOG.warning("%s: %s", _WARNING, str(e)) - has_module_ = False -""" + try: + import {module} + has_module_ = True + except ImportError as e: + _LOG.warning("%s: %s", _WARNING, str(e)) + has_module_ = False + """ + code = hprint.dedent(code) # To make the linter happy. has_module_ = True locals_: Dict[str, Any] = {} @@ -56,6 +60,41 @@ def has_module(module: str) -> bool: return has_module_ +# ############################################################################# +# Utility functions. +# ############################################################################# + + +# All printing functions should: +# - Return a string and not a list of strings +# - Add a newline at the end of the string (i.e., the string should end with +# `\n`) + + +def _dassert_one_trailing_newline(txt: str) -> None: + num_newlines = len(re.search(r'\n*$', txt).group()) + hdbg.dassert_eq(num_newlines, 0, "num_newlines='%s' txt='%s'", num_newlines, txt) + + +def _to_info(tag: str, txt: Union[str, List[str]]) -> str: + hdbg.dassert_isinstance(tag, str) + hdbg.dassert_isinstance(txt, (str, list)) + txt_tmp = "" + txt_tmp += "# " + tag + "\n" + # Indent the text. + if not isinstance(txt, str): + for t in txt: + hdbg.dassert_isinstance(t, str) + txt = "\n".join(txt) + txt_tmp += hprint.indent(txt) + # Ensure that there is a single trailing newline. + txt_tmp = txt_tmp.rstrip("\n") + # txt_tmp += "\n" + # _dassert_one_trailing_newline(txt_tmp) + _LOG.debug("'%s'", txt_tmp) + return txt_tmp + + # ############################################################################# # Print the env vars. # ############################################################################# @@ -63,6 +102,7 @@ def has_module(module: str) -> bool: def get_env_var( env_name: str, + *, as_bool: bool = False, default_value: Any = None, abort_on_missing: bool = True, @@ -80,7 +120,7 @@ def get_env_var( """ if env_name not in os.environ: if abort_on_missing: - assert 0, f"Can't find env var '{env_name}' in '{str(os.environ)}'" + hdbg.dassert_in(env_name, os.environ, f"Can't find env var '{env_name}' in '{str(os.environ)}'") else: return default_value value = os.environ[env_name] @@ -135,9 +175,8 @@ def get_env_vars() -> List[str]: "CSFY_ECR_BASE_PATH", ] # No duplicates. - assert len(set(env_var_names)) == len( - env_var_names - ), f"There are duplicates: {str(env_var_names)}" + # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. + hdbg.dassert_eq(len(set(env_var_names)),len(env_var_names), f"There are duplicates", str(env_var_names)) # Sort. env_var_names = sorted(env_var_names) return env_var_names @@ -154,11 +193,11 @@ def get_secret_env_vars() -> List[str]: "GH_ACTION_ACCESS_TOKEN", ] # No duplicates. - assert len(set(secret_env_var_names)) == len( - secret_env_var_names - ), f"There are duplicates: {str(secret_env_var_names)}" + # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. + hdbg.dassert_eq(len(set(secret_env_var_names)), len(secret_env_var_names), f"There are duplicates", str(secret_env_var_names)) # Secret env vars are a subset of the env vars. env_vars = get_env_vars() + # TODO(gp): GFI. Use `hdbg.dassert_issubset()` instead. if not set(secret_env_var_names).issubset(set(env_vars)): diff = set(secret_env_var_names).difference(set(env_vars)) cmd = f"Secret vars in `{str(diff)} are not in '{str(env_vars)}'" @@ -174,9 +213,8 @@ def check_env_vars() -> None: """ env_vars = get_env_vars() for env_var in env_vars: - assert ( - env_var in os.environ - ), f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}''" + # TODO(gp): GFI. Use %s instead of str(). + hdbg.dassert_in(env_var, os.environ, f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}''") def env_vars_to_string() -> str: @@ -208,29 +246,6 @@ def env_vars_to_string() -> str: return msg -def env_to_str(add_system_signature: bool = True) -> str: - msg = "" - # - msg += "# Repo config:\n" - repo_config_str = hrecouti.get_repo_config().config_func_to_str() - msg += hprint.indent(repo_config_str) - msg += "\n" - msg += "# Server config:\n" - server_config_str = hserver.config_func_to_str() - msg += hprint.indent(server_config_str) - msg += "\n" - # System signature. - if add_system_signature: - msg += "# System signature:\n" - msg += hprint.indent(get_system_signature()[0]) - msg += "\n" - # Check which env vars are defined. - msg += "# Env vars:\n" - msg += hprint.indent(env_vars_to_string()) - msg += "\n" - return msg - - # ############################################################################# # Print the library versions. # ############################################################################# @@ -249,20 +264,6 @@ def _get_library_version(lib_name: str) -> str: return version -def _append( - txt: List[str], to_add: List[str], num_spaces: int = 2 -) -> Tuple[List[str], List[str]]: - txt.extend( - [ - " " * num_spaces + line - for txt_tmp in to_add - for line in txt_tmp.split("\n") - ] - ) - to_add: List[str] = [] - return txt, to_add - - # Copied from helpers.hgit to avoid circular dependencies. @@ -294,7 +295,7 @@ def _git_log(num_commits: int = 5, my_commits: bool = False) -> str: # End copy. -def _get_git_signature(git_commit_type: str = "all") -> List[str]: +def _get_git_signature(git_commit_type: str = "all") -> str: """ Get information about current branch and latest commits. """ @@ -321,60 +322,53 @@ def _get_git_signature(git_commit_type: str = "all") -> List[str]: pass else: raise ValueError(f"Invalid value='{git_commit_type}'") + # + txt_tmp = "\n".join(txt_tmp) + "\n" + hdbg.dassert(txt_tmp.endswith("\n"), f"txt_tmp='%s'", txt_tmp) return txt_tmp -def _get_submodule_signature( - partial_signature: List[str], git_commit_type: str = "all" -) -> List[str]: - """ - Add git signature for all submodules. +# def _get_submodule_signature( +# partial_signature: List[str], *, git_commit_type: str = "all" +# ) -> str: +# """ +# Add git signature for all submodules. + +# :paramp partial_signature: the signature to append to +# :git_commit_type: the type of git commit to include in the +# signature +# :return: system signature enhanced by git submodule info +# """ +# # TODO(Juraj): Think of a better generalisation rather listing all the options. +# submodule_options = ["amp", "amp/helpers_root", "helpers_root"] +# signature = partial_signature +# prev_cwd = os.getcwd() +# for submodule in submodule_options: +# if os.path.exists(submodule): +# try: +# # Temporarily descend into submodule. +# os.chdir(submodule) +# signature.append(f"# Git {submodule}") +# git_amp_sig = _get_git_signature(git_commit_type) +# signature = _append(signature, git_amp_sig) +# # In case there is a runtime error we want to end up in a consistent +# # state (the original path). +# finally: +# os.chdir(prev_cwd) +# hdbg.dassert(txt_tmp.endswith("\n"), f"txt_tmp='%s'", txt_tmp) +# return signature - :paramp partial_signature: the signature to append to - :git_commit_type: the type of git commit to include in the - signature - :return: system signature enhanced by git submodule info - """ - # TODO(Juraj): Think of a better generalisation rather listing all the options. - submodule_options = ["amp", "amp/helpers_root", "helpers_root"] - signature = partial_signature - prev_cwd = os.getcwd() - for submodule in submodule_options: - if os.path.exists(submodule): - try: - # Temporarily descend into submodule. - os.chdir(submodule) - signature.append(f"# Git {submodule}") - git_amp_sig = _get_git_signature(git_commit_type) - signature, _ = _append(signature, git_amp_sig) - # In case there is a runtime error we want to end up in a consistent state - # (the original path). - finally: - os.chdir(prev_cwd) - return signature +# ############################################################################# +# Print the system info. +# ############################################################################# -def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: - # TODO(gp): This should return a string that we append to the rest. - container_dir_name = "." - hversio.check_version(container_dir_name) - # - txt: List[str] = [] - # Add git signature. - txt.append("# Git") - txt_tmp: List[str] = [] - try: - txt_tmp += _get_git_signature(git_commit_type) - # If there are any submodules, fetch their git signature. - txt_tmp = _get_submodule_signature(txt_tmp, git_commit_type) - except RuntimeError as e: - _LOG.error(str(e)) - txt, txt_tmp = _append(txt, txt_tmp) - # Add processor info. - txt.append("# Machine info") - txt_tmp: List[str] = [] +def _get_platform_info() -> str: + """ + Get platform information as a list of strings. + """ import platform - + txt_tmp: List[str] = [] uname = platform.uname() txt_tmp.append(f"system={uname.system}") txt_tmp.append(f"node name={uname.node}") @@ -382,29 +376,56 @@ def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: txt_tmp.append(f"version={uname.version}") txt_tmp.append(f"machine={uname.machine}") txt_tmp.append(f"processor={uname.processor}") + # + txt = _to_info("Platform info", txt_tmp) + return txt + + +def _get_psutil_info() -> str: + """ + Get system resource information using psutil. + """ try: import psutil - has_psutil = True except ModuleNotFoundError as e: - print(e) + _LOG.warning("psutil is not installed: %s", str(e)) has_psutil = False + + txt_tmp = [] if has_psutil: txt_tmp.append(f"cpu count={psutil.cpu_count()}") txt_tmp.append(f"cpu freq={str(psutil.cpu_freq())}") # TODO(gp): Report in MB or GB. txt_tmp.append(f"memory={str(psutil.virtual_memory())}") txt_tmp.append(f"disk usage={str(psutil.disk_usage('/'))}") - txt, txt_tmp = _append(txt, txt_tmp) - # Add package info. - txt.append("# Packages") + txt = _to_info("Psutils info", txt_tmp) + return txt + + +# ############################################################################# +# Print the package info. +# ############################################################################# + + +def _get_package_info() -> Tuple[List[str], int]: + """Get package version information. + + Returns: + Tuple containing: + - List of strings with package info + - Number of failed imports + """ + import platform + + txt_tmp = [] packages = [] packages.append(("python", platform.python_version())) # import sys # print(sys.version) libs = [ "cvxopt", - "cvxpy", + "cvxpy", "gluonnlp", "gluonts", "joblib", @@ -431,7 +452,109 @@ def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: failed_imports += 1 packages.append((lib, version)) txt_tmp.extend([f"{l}: {v}" for (l, v) in packages]) - txt, txt_tmp = _append(txt, txt_tmp) # - txt = "\n".join(txt) + txt = _to_info("Packages", txt_tmp) return txt, failed_imports + + +# ############################################################################# +# Get container info. +# ############################################################################# + + +def _get_container_version() -> str: + txt_tmp: List[str] = [] + # + container_version = str(hversio.get_container_version()) + txt_tmp.append(f"container_version='{container_version}'") + # + container_dir_name = "." + changelog_version = str(hversio.get_changelog_version(container_dir_name)) + txt_tmp.append(f"changelog_version='{changelog_version}'") + # + txt_tmp = _to_info("Container version", txt_tmp) + return txt_tmp + + +# ############################################################################# +# Get the system signature. +# ############################################################################# + + +def _get_git_info(git_commit_type: str) -> str: + txt_tmp: List[str] = [] + try: + txt_tmp.append(_get_git_signature(git_commit_type)) + # If there are any submodules, fetch their git signature. + # txt_tmp.append(_get_submodule_signature(txt_tmp, git_commit_type)) + except RuntimeError as e: + _LOG.error(str(e)) + txt = _to_info("Git info", txt_tmp) + return txt + + +def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: + """ + Return a string with the system signature. + + :param git_commit_type: the type of git commit to include in the + signature + :return: the system signature and the number of failed imports + """ + txt: List[str] = [] + # Add container version. + txt_tmp = _get_container_version() + _dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add git signature. + txt_tmp = _get_git_info(git_commit_type) + _dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add platform info. + txt_tmp = _get_platform_info() + _dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add psutil info. + txt_tmp = _get_psutil_info() + _dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # Add package info. + txt_tmp, failed_imports = _get_package_info() + _dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) + # + txt = _to_info("System signature", txt) + return txt, failed_imports + + +# ############################################################################# +# Package all the information into a string. +# ############################################################################# + + +def env_to_str( + repo_config: bool = True, + server_config: bool = True, + system_signature: bool = True, + env_vars: bool = True) -> str: + """ + Package all the information into a string. + """ + # + msg = "" + # + if repo_config: + repo_config_str = hrecouti.get_repo_config().config_func_to_str() + msg += _to_info("Repo config", repo_config_str) + "\n" + # + if server_config: + server_config_str = hserver.config_func_to_str() + msg += _to_info("Server config", server_config_str) + "\n" + # + if system_signature: + msg += get_system_signature()[0] + "\n" + # + if env_vars: + env_vars_str = env_vars_to_string() + msg += _to_info("Env vars", env_vars_str) + "\n" + return msg diff --git a/helpers/hprint.py b/helpers/hprint.py index 92faa8771..cea430922 100644 --- a/helpers/hprint.py +++ b/helpers/hprint.py @@ -197,6 +197,9 @@ def indent(txt: Optional[str], *, num_spaces: int = 2) -> str: """ if txt is None: return "" + hdbg.dassert_isinstance(txt, str) + hdbg.dassert_isinstance(num_spaces, int) + hdbg.dassert_lte(0, num_spaces) spaces = " " * num_spaces txt_out = [] for curr_line in txt.split("\n"): @@ -339,6 +342,7 @@ def remove_empty_lines(txt: str) -> str: """ Remove empty lines from a multi-line string. """ + hdbg.dassert_isinstance(txt, str) arr = txt.split("\n") arr = remove_empty_lines_from_string_list(arr) txt = "\n".join(arr) diff --git a/helpers/hserver.py b/helpers/hserver.py index 337695068..f4d4cb789 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -658,5 +658,5 @@ def config_func_to_str() -> str: ret.append(msg) # _print(msg) # Package. - ret: str = "# hserver.config\n" + indent("\n".join(ret)) + ret = "\n".join(ret) return ret diff --git a/helpers/hversion.py b/helpers/hversion.py index 91f408012..74f31c539 100644 --- a/helpers/hversion.py +++ b/helpers/hversion.py @@ -51,7 +51,7 @@ def check_version(container_dir_name: str) -> None: return # Get code version. code_version = get_changelog_version(container_dir_name) - container_version = _get_container_version() + container_version = get_container_version() # Check version, if possible. if container_version is None: # No need to check. @@ -142,7 +142,7 @@ def get_changelog_version(container_dir_name: str) -> Optional[str]: return version -def _get_container_version() -> Optional[str]: +def get_container_version() -> Optional[str]: """ Return the container version. @@ -193,14 +193,16 @@ def _check_version(code_version: str, container_version: str) -> bool: is_ok = container_version == code_version if not is_ok: msg = f""" ------------------------------------------------------------------------------ -This code is not in sync with the container: -code_version='{code_version}' != container_version='{container_version}' ------------------------------------------------------------------------------ -You need to: -- merge origin/master into your branch with `invoke git_merge_master` -- pull the latest container with `invoke docker_pull` -""" + ----------------------------------------------------------------------------- + This code is not in sync with the container: + code_version='{code_version}' != container_version='{container_version}' + ----------------------------------------------------------------------------- + You need to: + - merge origin/master into your branch with `invoke git_merge_master` + - pull the latest container with `invoke docker_pull` + """ + # Can't use `hprint.indent()` to avoid a dependency on `hprint`. + msg = "\n".join(line.lstrip() for line in msg.split("\n")) msg = msg.rstrip().lstrip() msg = f"\033[31m{msg}\033[0m" print(msg) diff --git a/helpers/repo_config_utils.py b/helpers/repo_config_utils.py index c26673992..2d92b4007 100644 --- a/helpers/repo_config_utils.py +++ b/helpers/repo_config_utils.py @@ -151,7 +151,8 @@ def config_func_to_str(self) -> str: ret.append( f"get_docker_base_image_name='{self.get_docker_base_image_name()}'" ) - return "# repo_config.config\n" + indent("\n".join(ret)) + txt = "\n".join(ret) + return txt # repo_info @@ -263,7 +264,7 @@ def get_use_sibling_container(self) -> bool: This is used in unit tests to test dockerized executables. """ - value = self._data["docker_info"]["use_sibling_container"]) + value = self._data["docker_info"]["use_sibling_container"] assert value in ["True", "False"], f"Invalid boolen value: {value}" return value == "True" diff --git a/helpers/test/test_hversion.py b/helpers/test/test_hversion.py index 319b7935c..27c13beb5 100644 --- a/helpers/test/test_hversion.py +++ b/helpers/test/test_hversion.py @@ -16,7 +16,7 @@ def test_get_changelog_version1(self) -> None: _LOG.debug("code_version=%s", code_version) def test_get_container_version1(self) -> None: - container_version = hversio._get_container_version() + container_version = hversio.get_container_version() _LOG.debug("container_version=%s", container_version) def test_check_version1(self) -> None: diff --git a/linters/dockerized_pydeps.py b/linters/dockerized_pydeps.py new file mode 100755 index 000000000..f35e64087 --- /dev/null +++ b/linters/dockerized_pydeps.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python + +""" +Run pydeps as a dockerized executable. +""" + +import argparse +import logging + +import helpers.hdbg as hdbg +import helpers.hdocker as hdocker +import helpers.hparser as hparser +import helpers.hsystem as hsystem +import helpers.hserver as hserver +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + hparser.add_dockerized_script_arg(parser) + hparser.add_verbosity_arg(parser) + return parser + + +def _run_dockerized_pydeps( + in_file_path: str, + *, + force_rebuild: bool = False, + use_sudo: bool = False, +) -> None: + """ + Run `graphviz` in a Docker container. + + :param in_file_path: path to the code of the image to render + :param out_file_path: path to the image to be created + :param force_rebuild: whether to force rebuild the Docker container + :param use_sudo: whether to use sudo for Docker commands + """ + _LOG.debug(hprint.func_signature_to_str()) + # Get the container image. + container_image = "tmp.pydeps" + dockerfile = rf""" + FROM alpine:latest + + RUN pip install pydeps + """ + container_image = hdocker.build_container_image( + container_image, dockerfile, force_rebuild, use_sudo + ) + # Convert files to Docker paths. + is_caller_host = not hserver.is_inside_docker() + use_sibling_container_for_callee = False + caller_mount_path, callee_mount_path, mount = hdocker.get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + in_file_path = hdocker.convert_caller_to_callee_docker_path( + in_file_path, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + out_file_path = hdocker.convert_caller_to_callee_docker_path( + out_file_path, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=False, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + cmd = [ + "pydeps" + f"{in_file_path}" + ] + cmd = " ".join(cmd) + executable = hdocker.get_docker_executable(use_sudo) + docker_cmd = ( + f"{executable} run --rm --user $(id -u):$(id -g)" + f" --workdir {callee_mount_path} --mount {mount}" + f" {container_image}" + f" {cmd}" + ) + hsystem.system(docker_cmd) + + +def _main(parser: argparse.ArgumentParser) -> None: + # Parse everything that can be parsed and returns the rest. + args, cmd_opts = parser.parse_known_args() + hdbg.init_logger( + verbosity=args.log_level, use_exec_path=True, force_white=False + ) + hdocker.run_dockerized_graphviz( + args.input, + force_rebuild=args.dockerized_force_rebuild, + use_sudo=args.dockerized_use_sudo, + ) + _LOG.info("Output written to '%s'", args.output) + + +if __name__ == "__main__": + _main(_parse()) From aabdade1fe32850c1a36f1ce90471b9228004e21 Mon Sep 17 00:00:00 2001 From: saggese Date: Fri, 11 Apr 2025 16:41:22 +0000 Subject: [PATCH 003/193] Update --- helpers/henv.py | 9 +++++++++ helpers/hserver.py | 33 +++++++++++++++++++++++++++++++++ helpers/lib_tasks_print.py | 14 ++++++++++++-- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/helpers/henv.py b/helpers/henv.py index 663c4be3f..e00bea52b 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -493,6 +493,15 @@ def _get_git_info(git_commit_type: str) -> str: return txt +def _get_docker_info() -> str: + txt_tmp: List[str] = [] + has_docker = hserver.has_docker() + txt_tmp.append(f"docker installed={has_docker}") + rc, docker_version = hsystem.system_to_string(r"docker version --format '{{.Server.Version}}'") + txt_tmp.append(f"docker_version='{docker_version}'") + return txt_tmp + + def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: """ Return a string with the system signature. diff --git a/helpers/hserver.py b/helpers/hserver.py index f4d4cb789..e69f8dc0c 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -9,6 +9,7 @@ import functools import logging import os +import subprocess from typing import Dict, List, Optional import helpers.repo_config_utils as hrecouti @@ -29,6 +30,16 @@ def _print(msg: str) -> None: print(msg) +def _system_to_string(cmd: str) -> Tuple[int, str]: + result = subprocess.run( + cmd, stdout=subprocess.PIPE, + # Redirect stderr to stdout. + stderr=subprocess.STDOUT, text=True) + rc = result.returncode + output = result.stdout + return rc, output + + # ############################################################################# # Detect server. # ############################################################################# @@ -286,6 +297,28 @@ def _dassert_setup_consistency() -> None: # ############################################################################# +def has_docker() -> bool: + """ + Return whether we have Docker installed. + """ + return shutil.which("docker") is not None + + +def docker_needs_sudo() -> bool: + """ + Return whether Docker commands need to be run with sudo. + """ + # groups | grep docker + return not has_docker() + + +def has_docker_privileged_mode() -> bool: + cmd = "docker run hello-world" + rc = os.system(cmd) + _print("cmd=%s -> rc=%s" % (cmd, rc)) + has_dind = rc == 0 + + # TODO(gp): -> has_docker_privileged_mode @functools.lru_cache() def has_dind_support() -> bool: diff --git a/helpers/lib_tasks_print.py b/helpers/lib_tasks_print.py index 1d9587f16..b46e5cfe0 100644 --- a/helpers/lib_tasks_print.py +++ b/helpers/lib_tasks_print.py @@ -77,12 +77,22 @@ def print_tasks(ctx, as_code=False): # type: ignore @task -def print_env(ctx): # type: ignore +def print_env(ctx, + repo_config=True, + server_config=True, + system_signature=True, + env_vars=True +): # type: ignore """ Print the repo configuration. """ _ = ctx - print(henv.env_to_str()) + print(henv.env_to_str( + repo_config=repo_config, + server_config=server_config, + system_signature=system_signature, + env_vars=env_vars + )) # TODO(gp): From ad7702e99336d5f792937151d4971664174265d1 Mon Sep 17 00:00:00 2001 From: saggese Date: Fri, 11 Apr 2025 21:39:26 +0000 Subject: [PATCH 004/193] Update --- helpers/henv.py | 10 +++++----- helpers/hserver.py | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/helpers/henv.py b/helpers/henv.py index e00bea52b..83cc54834 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -379,7 +379,7 @@ def _get_platform_info() -> str: # txt = _to_info("Platform info", txt_tmp) return txt - + def _get_psutil_info() -> str: """ @@ -391,7 +391,7 @@ def _get_psutil_info() -> str: except ModuleNotFoundError as e: _LOG.warning("psutil is not installed: %s", str(e)) has_psutil = False - + txt_tmp = [] if has_psutil: txt_tmp.append(f"cpu count={psutil.cpu_count()}") @@ -410,7 +410,7 @@ def _get_psutil_info() -> str: def _get_package_info() -> Tuple[List[str], int]: """Get package version information. - + Returns: Tuple containing: - List of strings with package info @@ -425,7 +425,7 @@ def _get_package_info() -> Tuple[List[str], int]: # print(sys.version) libs = [ "cvxopt", - "cvxpy", + "cvxpy", "gluonnlp", "gluonts", "joblib", @@ -461,7 +461,7 @@ def _get_package_info() -> Tuple[List[str], int]: # Get container info. # ############################################################################# - + def _get_container_version() -> str: txt_tmp: List[str] = [] # diff --git a/helpers/hserver.py b/helpers/hserver.py index e69f8dc0c..37634fab2 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -9,8 +9,9 @@ import functools import logging import os +import shutil import subprocess -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Tuple import helpers.repo_config_utils as hrecouti @@ -30,11 +31,18 @@ def _print(msg: str) -> None: print(msg) +# We can't use `hsystem` to avoid import cycles. def _system_to_string(cmd: str) -> Tuple[int, str]: + """ + Run a command and return the output and the return code. + + :param cmd: command to run + :return: tuple of (return code, output) + """ result = subprocess.run( - cmd, stdout=subprocess.PIPE, - # Redirect stderr to stdout. - stderr=subprocess.STDOUT, text=True) + cmd, stdout=subprocess.PIPE, + # Redirect stderr to stdout. + stderr=subprocess.STDOUT, text=True) rc = result.returncode output = result.stdout return rc, output From f3a8ec585ae34fa595770dd5096c8cc4023e0a34 Mon Sep 17 00:00:00 2001 From: saggese Date: Fri, 11 Apr 2025 22:21:16 +0000 Subject: [PATCH 005/193] Improve --- helpers/henv.py | 127 ++++++++++++++++++++++++++++----------------- helpers/hgit.py | 9 +--- helpers/hserver.py | 95 +++++++++++++++++++++++++-------- 3 files changed, 154 insertions(+), 77 deletions(-) diff --git a/helpers/henv.py b/helpers/henv.py index 83cc54834..5ec82260d 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -96,7 +96,7 @@ def _to_info(tag: str, txt: Union[str, List[str]]) -> str: # ############################################################################# -# Print the env vars. +# Get env vars info. # ############################################################################# @@ -222,7 +222,7 @@ def env_vars_to_string() -> str: Return a string with the signature of all the expected env vars (including the secret ones). """ - msg = [] + txt: List[str] = [] # Get the expected env vars and the secret ones. env_vars = get_env_vars() secret_env_vars = get_secret_env_vars() @@ -231,39 +231,26 @@ def env_vars_to_string() -> str: is_defined = env_name in os.environ is_empty = is_defined and os.environ[env_name] == "" if not is_defined: - msg.append(f"{env_name}=undef") + txt.append(f"{env_name}=undef") else: if env_name in secret_env_vars: # Secret env var: print if it's empty or not. if is_empty: - msg.append(f"{env_name}=empty") + txt.append(f"{env_name}=empty") else: - msg.append(f"{env_name}=***") + txt.append(f"{env_name}=***") else: # Not a secret var: print the value. - msg.append(f"{env_name}='{os.environ[env_name]}'") - msg = "\n".join(msg) - return msg + txt.append(f"{env_name}='{os.environ[env_name]}'") + txt = "\n".join(txt) + return txt # ############################################################################# -# Print the library versions. +# Get Git info. # ############################################################################# -def _get_library_version(lib_name: str) -> str: - try: - cmd = f"import {lib_name}" - # pylint: disable=exec-used - exec(cmd) - except ImportError: - version = "?" - else: - cmd = f"{lib_name}.__version__" - version = eval(cmd) - return version - - # Copied from helpers.hgit to avoid circular dependencies. @@ -299,33 +286,33 @@ def _get_git_signature(git_commit_type: str = "all") -> str: """ Get information about current branch and latest commits. """ - txt_tmp: List[str] = [] + txt: List[str] = [] # Get the branch name. cmd = "git branch --show-current" _, branch_name = hsystem.system_to_one_line(cmd) - txt_tmp.append(f"branch_name='{branch_name}'") + txt.append(f"branch_name='{branch_name}'") # Get the short Git hash of the current branch. cmd = "git rev-parse --short HEAD" _, hash_ = hsystem.system_to_one_line(cmd) - txt_tmp.append(f"hash='{hash_}'") + txt.append(f"hash='{hash_}'") # Add info about the latest commits. num_commits = 3 if git_commit_type == "all": - txt_tmp.append("# Last commits:") + txt.append("# Last commits:") log_txt = _git_log(num_commits=num_commits, my_commits=False) - txt_tmp.append(hprint.indent(log_txt)) + txt.append(hprint.indent(log_txt)) elif git_commit_type == "mine": - txt_tmp.append("# Your last commits:") + txt.append("# Your last commits:") log_txt = _git_log(num_commits=num_commits, my_commits=True) - txt_tmp.append(hprint.indent(log_txt)) + txt.append(hprint.indent(log_txt)) elif git_commit_type == "none": pass else: raise ValueError(f"Invalid value='{git_commit_type}'") # - txt_tmp = "\n".join(txt_tmp) + "\n" - hdbg.dassert(txt_tmp.endswith("\n"), f"txt_tmp='%s'", txt_tmp) - return txt_tmp + txt = "\n".join(txt) + "\n" + hdbg.dassert(txt.endswith("\n"), f"txt_tmp='%s'", txt) + return txt # def _get_submodule_signature( @@ -358,8 +345,9 @@ def _get_git_signature(git_commit_type: str = "all") -> str: # hdbg.dassert(txt_tmp.endswith("\n"), f"txt_tmp='%s'", txt_tmp) # return signature + # ############################################################################# -# Print the system info. +# Get system info. # ############################################################################# @@ -399,15 +387,31 @@ def _get_psutil_info() -> str: # TODO(gp): Report in MB or GB. txt_tmp.append(f"memory={str(psutil.virtual_memory())}") txt_tmp.append(f"disk usage={str(psutil.disk_usage('/'))}") - txt = _to_info("Psutils info", txt_tmp) + else: + txt_tmp.append("psutil is not installed") + # + txt = _to_info("psutils info", txt_tmp) return txt # ############################################################################# -# Print the package info. +# Get package info. # ############################################################################# +def _get_library_version(lib_name: str) -> str: + try: + cmd = f"import {lib_name}" + # pylint: disable=exec-used + exec(cmd) + except ImportError: + version = "?" + else: + cmd = f"{lib_name}.__version__" + version = eval(cmd) + return version + + def _get_package_info() -> Tuple[List[str], int]: """Get package version information. @@ -458,8 +462,6 @@ def _get_package_info() -> Tuple[List[str], int]: # ############################################################################# -# Get container info. -# ############################################################################# def _get_container_version() -> str: @@ -472,13 +474,8 @@ def _get_container_version() -> str: changelog_version = str(hversio.get_changelog_version(container_dir_name)) txt_tmp.append(f"changelog_version='{changelog_version}'") # - txt_tmp = _to_info("Container version", txt_tmp) - return txt_tmp - - -# ############################################################################# -# Get the system signature. -# ############################################################################# + txt = _to_info("Container version", txt_tmp) + return txt def _get_git_info(git_commit_type: str) -> str: @@ -488,18 +485,46 @@ def _get_git_info(git_commit_type: str) -> str: # If there are any submodules, fetch their git signature. # txt_tmp.append(_get_submodule_signature(txt_tmp, git_commit_type)) except RuntimeError as e: - _LOG.error(str(e)) + _LOG.warning(str(e)) + txt_tmp.append("No git info") + # txt = _to_info("Git info", txt_tmp) return txt def _get_docker_info() -> str: txt_tmp: List[str] = [] + # has_docker = hserver.has_docker() - txt_tmp.append(f"docker installed={has_docker}") - rc, docker_version = hsystem.system_to_string(r"docker version --format '{{.Server.Version}}'") + txt_tmp.append(f"has_docker={has_docker}") + # + cmd = r"docker version --format '{{.Server.Version}}'" + _, docker_version = hsystem.system_to_string(cmd) txt_tmp.append(f"docker_version='{docker_version}'") - return txt_tmp + # + docker_needs_sudo = hserver.docker_needs_sudo() + txt_tmp.append(f"docker_needs_sudo={docker_needs_sudo}") + # + has_privileged_mode = hserver.has_docker_privileged_mode() + txt_tmp.append(f"has_privileged_mode={has_privileged_mode}") + # + is_inside_docker = hserver.is_inside_docker() + txt_tmp.append(f"is_inside_docker={is_inside_docker}") + # + if is_inside_docker: + has_sibling_containers_support = hserver.has_sibling_containers_support() + txt_tmp.append(f"has_sibling_containers_support={has_sibling_containers_support}") + # + has_docker_dind_support = hserver.has_docker_dind_support() + txt_tmp.append(f"has_docker_dind_support={has_docker_dind_support}") + # + txt = _to_info("Docker info", txt_tmp) + return txt + + +# ############################################################################# +# Get system signature. +# ############################################################################# def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: @@ -515,7 +540,7 @@ def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: txt_tmp = _get_container_version() _dassert_one_trailing_newline(txt_tmp) txt.append(txt_tmp) - # Add git signature. + # Add Git signature. txt_tmp = _get_git_info(git_commit_type) _dassert_one_trailing_newline(txt_tmp) txt.append(txt_tmp) @@ -527,6 +552,10 @@ def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: txt_tmp = _get_psutil_info() _dassert_one_trailing_newline(txt_tmp) txt.append(txt_tmp) + # Add Docker info. + txt_tmp = _get_docker_info() + _dassert_one_trailing_newline(txt_tmp) + txt.append(txt_tmp) # Add package info. txt_tmp, failed_imports = _get_package_info() _dassert_one_trailing_newline(txt_tmp) diff --git a/helpers/hgit.py b/helpers/hgit.py index 25af9c8d8..c2c64eba2 100644 --- a/helpers/hgit.py +++ b/helpers/hgit.py @@ -411,13 +411,8 @@ def is_amp_present(*, dir_name: str = ".") -> bool: # Using these functions is the last resort to skip / change the tests depending -# on the repo. We should control the tests through what functionalities they have, -# e.g., -# ``` -# hserver.has_dind_support(), -# ``` -# -# rather than their name. +# on the repo. We should control the tests through what functionalities they +# have, rather than the name of the repo. def is_cmamp() -> bool: diff --git a/helpers/hserver.py b/helpers/hserver.py index 575c9ae23..6d0d25942 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -364,10 +364,11 @@ def _dassert_setup_consistency() -> None: # ############################################################################# -# Docker +# Detect Docker functionalities. # ############################################################################# +@functools.lru_cache() def has_docker() -> bool: """ Return whether we have Docker installed. @@ -375,19 +376,72 @@ def has_docker() -> bool: return shutil.which("docker") is not None +@functools.lru_cache() def docker_needs_sudo() -> bool: """ Return whether Docker commands need to be run with sudo. """ - # groups | grep docker - return not has_docker() + if not has_docker(): + return False + # Another way to check is to see if your user is in the docker group: + # > groups | grep docker + rc = os.system("docker run hello-world 2>&1 >/dev/null") + if rc == 0: + return False + # + rc = os.system("sudo docker run hello-world 2>&1 >/dev/null") + if rc == 0: + return True + assert False, "Failed to run docker" +@functools.lru_cache() def has_docker_privileged_mode() -> bool: - cmd = "docker run hello-world" + """ + Return whether the current container supports privileged mode. + + Docker privileged mode gives containers nearly all the same capabilities as + the host system's kernel. + Privileged mode allows to: + - run Docker-in-Docker + - mount filesystems + """ + cmd = "docker run --privileged hello-world 2>&1 >/dev/null" rc = os.system(cmd) _print("cmd=%s -> rc=%s" % (cmd, rc)) - has_dind = rc == 0 + has_privileged_mode = rc == 0 + return has_privileged_mode + + +def has_sibling_containers_support() -> bool: + # We need to be inside a container to run sibling containers. + if not is_inside_docker(): + return False + # We assume that if the socket exists then we can run sibling containers. + if os.path.exists("/var/run/docker.sock"): + return True + return False + + +def has_docker_dind_support() -> bool: + """ + Return whether the current container supports Docker-in-Docker. + """ + # We need to be inside a container to run docker-in-docker. + if not is_inside_docker(): + return False + # We assume that if we have privileged mode then we can run docker-in-docker. + return has_docker_privileged_mode() + + +# ############################################################################# +# Detect Docker functionalities, based on the set-up. +# ############################################################################# + + +# TODO(gp): These approach is sub-optimal. We deduce what we can do based on the +# name of the set-up. We should base our decisions on the actual capabilities of +# the system. # TODO(gp): -> has_docker_privileged_mode @@ -396,7 +450,7 @@ def has_dind_support() -> bool: """ Return whether the current container supports privileged mode. - This is need to use Docker-in-Docker. + This is needed to use Docker-in-Docker. """ _print("is_inside_docker()=%s" % is_inside_docker()) if not is_inside_docker(): @@ -625,6 +679,7 @@ def run_docker_as_root() -> bool: return ret +# TODO(gp): Probably obsolete def get_docker_user() -> str: """ Return the user that runs Docker, if any. @@ -636,6 +691,7 @@ def get_docker_user() -> str: return val +# TODO(gp): Probably obsolete def get_docker_shared_group() -> str: """ Return the group of the user running Docker, if any. @@ -663,9 +719,18 @@ def skip_submodules_test() -> bool: return False -# TODO(gp): Remove this comment. -# # This function can't be in `helpers.hserver` since it creates circular import -# # and `helpers.hserver` should not depend on anything. +# ############################################################################# +# S3 buckets. +# ############################################################################# + + +def is_AM_S3_available() -> bool: + # AM bucket is always available. + val = True + _LOG.debug("val=%s", val) + return val + + def is_CK_S3_available() -> bool: val = True if is_inside_ci(): @@ -685,18 +750,6 @@ def is_CK_S3_available() -> bool: return val -# ############################################################################# -# S3 buckets. -# ############################################################################# - - -def is_AM_S3_available() -> bool: - # AM bucket is always available. - val = True - _LOG.debug("val=%s", val) - return val - - def get_host_user_name() -> Optional[str]: return os.environ.get("CSFY_HOST_USER_NAME", None) From 118902f01519560172ac88fe45774d5f4fbd5ef1 Mon Sep 17 00:00:00 2001 From: saggese Date: Fri, 11 Apr 2025 23:32:24 +0000 Subject: [PATCH 006/193] Improve --- helpers/hserver.py | 10 ++-- helpers/test/test_henv.py | 5 +- helpers/test/test_hserver.py | 106 ++++++++++++++++++++++++++++++++++- 3 files changed, 108 insertions(+), 13 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index 6d0d25942..b88843f44 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -107,12 +107,10 @@ def is_inside_unit_test() -> bool: def is_dev_csfy() -> bool: - # TODO(gp): Update to use dev1 values. - # sysname='Darwin' - # nodename='gpmac.lan' - # release='19.6.0' - # version='Darwin Kernel Version 19.6.0: Mon Aug 31 22:12:52 PDT 2020; - # root:xnu-6153.141.2~1/RELEASE_X86_64' + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws', + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025', # machine='x86_64' host_name = os.uname()[1] host_names = ("dev1", "dev2", "dev3") diff --git a/helpers/test/test_henv.py b/helpers/test/test_henv.py index 9b428f973..aca2d1407 100644 --- a/helpers/test/test_henv.py +++ b/helpers/test/test_henv.py @@ -27,7 +27,4 @@ def test_has_not_module1(self) -> None: """ Check that the function returns false for the non-existing package. """ - self.assertFalse(henv.has_module("no_such_module")) - - -# ############################################################################# + self.assertFalse(henv.has_module("no_such_module")) \ No newline at end of file diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index 6b019b957..acf486013 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -1,5 +1,7 @@ import logging +import pytest + import helpers.hserver as hserver import helpers.hunit_test as hunitest @@ -11,6 +13,10 @@ # ############################################################################# +@pytest.mark.skipif( + not hserver.is_inside_docker(), + reason="This test should be run inside a Docker container", +) class Test_hserver1(hunitest.TestCase): def test_is_inside_ci1(self) -> None: @@ -48,6 +54,100 @@ def test_config_func_to_str1(self) -> None: val = hserver.config_func_to_str() _LOG.info("val=\n%s", val) - # TODO(gp): Add test mocking the environment variables in _get_setup_signature. - # We should have one class for each set up (e.g., outside Mac, outside Linux, - # inside Docker, inside CI, etc.) \ No newline at end of file + +# ############################################################################# +# Test_hserver2 +# ############################################################################# + + +@pytest.mark.skipif( + hserver.is_inside_docker(), + reason="This test should be run outside a Docker container", +) +class Test_hserver2(hunitest.TestCase): + + def test_consistency1(self) -> None: + hserver._dassert_setup_consistency() + + def test_get_setup_signature1(self) -> None: + val = hserver._get_setup_signature() + _LOG.info("val=\n%s", val) + + def test_get_setup_settings1(self) -> None: + setups = hserver._get_setup_settings() + val = hserver._setup_to_str(setups) + _LOG.info("val=\n%s", val) + + def test_config_func_to_str1(self) -> None: + val = hserver.config_func_to_str() + _LOG.info("val=\n%s", val) + + +# ############################################################################# +# Test_hserver_dev_csfy1 +# ############################################################################# + + +@pytest.mark.skipif( + hserver.is_inside_docker() or not hserver.is_dev_csfy(), + reason="This test should be run on one of Causify dev machines", +) +class Test_hserver3(hunitest.TestCase): + + def test_consistency1(self) -> None: + hserver._dassert_setup_consistency() + + def test_get_setup_signature1(self) -> None: + act = hserver._get_setup_signature() + exp = "" + self.assert_equal(act, dev) + + def test_get_setup_settings1(self) -> None: + setups = hserver._get_setup_settings() + act = hserver._setup_to_str(setups) + exp = "" + self.assert_equal(act, exp) + + def test_config_func_to_str1(self) -> None: + act = hserver.config_func_to_str() + exp = "" + self.assert_equal(act, exp) + + +# ############################################################################# +# Test_hserver_gp_mac1 +# ############################################################################# + + +@pytest.mark.skipif( + hserver.is_inside_docker() or not hserver.is_dev_csfy(), + reason="This test should be run on one of Causify dev machines", +) +class Test_hserver_gp_mac1(hunitest.TestCase): + + def test_consistency1(self) -> None: + hserver._dassert_setup_consistency() + + def test_get_setup_signature1(self) -> None: + act = hserver._get_setup_signature() + exp = "" + self.assert_equal(act, dev) + + def test_get_setup_settings1(self) -> None: + setups = hserver._get_setup_settings() + act = hserver._setup_to_str(setups) + exp = "" + self.assert_equal(act, exp) + + def test_config_func_to_str1(self) -> None: + act = hserver.config_func_to_str() + exp = "" + self.assert_equal(act, exp) + + +# ############################################################################# + + +# TODO(gp): Add test mocking the environment variables in _get_setup_signature. +# We should have one class for each set up (e.g., outside Mac, outside Linux, +# inside Docker, inside CI, etc.) \ No newline at end of file From ba6edc2c20c6d8607ad7a3b3218036ce0bb23836 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Fri, 11 Apr 2025 19:34:40 -0400 Subject: [PATCH 007/193] Improve --- helpers/hdocker.py | 101 +++++++++++++++++++++++++++++++++++++++++++++ helpers/hserver.py | 32 +++++++------- 2 files changed, 119 insertions(+), 14 deletions(-) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index c31dc87a3..675ce4b79 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -1454,6 +1454,107 @@ def dockerized_tikz_to_bitmap( ) +# ############################################################################# +# Dockerized llm_transform. +# ############################################################################# + + +def run_dockerized_llm_transform( + in_file_path: str, + cmd_opts: List[str], + out_file_path: str, + *, + return_cmd: bool = False, + force_rebuild: bool = False, + use_sudo: bool = False, +) -> Optional[str]: + """ + Run dockerized_llm_transform.py in a Docker container with all its dependencies. + """ + _LOG.debug(hprint.func_signature_to_str()) + # + hdbg.dassert_in("OPENAI_API_KEY", os.environ) + hdbg.dassert_isinstance(cmd_opts, list) + # Build the container, if needed. + container_image = "tmp.llm_transform" + dockerfile = r""" + FROM python:3.12-alpine + + # Install Bash. + #RUN apk add --no-cache bash + + # Set Bash as the default shell. + #SHELL ["/bin/bash", "-c"] + + # Install pip packages. + RUN pip install --no-cache-dir pyyaml openai + """ + container_image = build_container_image( + container_image, dockerfile, force_rebuild, use_sudo + ) + # Convert files to Docker paths. + is_caller_host = not hserver.is_inside_docker() + use_sibling_container_for_callee = True + caller_mount_path, callee_mount_path, mount = get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + in_file_path = convert_caller_to_callee_docker_path( + in_file_path, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + out_file_path = convert_caller_to_callee_docker_path( + out_file_path, + caller_mount_path, + callee_mount_path, + check_if_exists=False, + is_input=False, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + helpers_root = hgit.find_helpers_root() + helpers_root = convert_caller_to_callee_docker_path( + helpers_root, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=False, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + git_root = hgit.find_git_root() + script = hsystem.find_file_in_repo("dockerized_llm_transform.py", root_dir=git_root) + script = convert_caller_to_callee_docker_path( + script, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + cmd_opts_as_str = " ".join(cmd_opts) + executable = get_docker_executable(use_sudo) + docker_cmd = ( + f"{executable} run --rm --user $(id -u):$(id -g)" + f" -e OPENAI_API_KEY -e PYTHONPATH={helpers_root}" + f" --workdir {callee_mount_path} --mount {mount}" + f" {container_image}" + f" {script} -i {in_file_path} -o {out_file_path} {cmd_opts_as_str}" + ) + if return_cmd: + ret = docker_cmd + else: + # TODO(gp): Note that `suppress_output=False` seems to hang the call. + hsystem.system(docker_cmd, suppress_output=False) + ret = None + return ret + + # ############################################################################# diff --git a/helpers/hserver.py b/helpers/hserver.py index b88843f44..802f8f373 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -14,6 +14,7 @@ from typing import Dict, List, Optional, Tuple import helpers.repo_config_utils as hrecouti +import helpers.hprint as hprint # This module should depend only on: # - Python standard modules @@ -95,16 +96,6 @@ def is_inside_unit_test() -> bool: # - we are inside CI or not # We should grep all the use cases in the codebase and use the right function. -# TODO(gp): The confusion is that we want to determine on which "setup" we are -# running. We do this both inside container and outside container. -# -# Sometimes we want to know if: -# - the processor is x86_64 or arm64 -# - the host is Mac or Linux -# - we are running on a Causify machine or on an external machine -# - we are inside CI or not -# We should grep all the use cases in the codebase and use the right function. - def is_dev_csfy() -> bool: # sysname='Linux' @@ -197,6 +188,19 @@ def is_mac(*, version: Optional[str] = None) -> bool: return is_mac_ +# The valid set ups are: +# - Running on dev1, dev2, dev3 server +# - Container +# - Host +# - External Mac (GP, Paul, interns, contributors) +# - Container +# - Host +# - External Linux (interns, contributors) +# - Container +# - Host +# - prod container on Linux + + def is_external_linux() -> bool: """ Detect whether we are running on a non-server/non-CI Linux machine. @@ -335,10 +339,10 @@ def _dassert_setup_consistency() -> None: # One and only one set-up should be true. sum_ = sum([value for _, value in setups]) if sum_ != 1: - msg = "One and only one set-up config should be true:\n" + _setup_to_str( - setups - ) - msg += "_get_setup_signature() returns:\n" + _get_setup_signature() + msg = "One and only one set-up config should be true:\n" + msg += _setup_to_str(setups) + "\n" + msg += "_get_setup_signature() returns:\n" + msg += hprint.indent(_get_setup_signature()) raise ValueError(msg) From f52feb84f080dab29d1939858d6f0a3e1b460258 Mon Sep 17 00:00:00 2001 From: saggese Date: Sat, 12 Apr 2025 01:30:33 +0000 Subject: [PATCH 008/193] Improve --- helpers/hserver.py | 457 +++++++++++++++++++++++------------ helpers/test/test_hserver.py | 190 ++++++++------- 2 files changed, 411 insertions(+), 236 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index 802f8f373..06299a761 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -14,7 +14,6 @@ from typing import Dict, List, Optional, Tuple import helpers.repo_config_utils as hrecouti -import helpers.hprint as hprint # This module should depend only on: # - Python standard modules @@ -49,6 +48,162 @@ def _system_to_string(cmd: str) -> Tuple[int, str]: return rc, output +# ############################################################################# +# Host +# ############################################################################# + + +# We can't rely only on the name / version of the host to infer where we are +# running, since inside Docker the name of the host is like `01a7e34a82a5`. Of +# course, there is no way to know anything about the host for security reason, +# so we pass this value from the external environment to the container, through +# env vars (e.g., `CSFY_HOST_NAME`, `CSFY_HOST_OS_NAME`, `CSFY_HOST_VERSION`). + + +# Sometimes we want to know if: +# - The processor is x86_64 or arm64 +# - The host is Mac or Linux +# - We are running on a Causify machine or on an external machine +# - We are inside CI or not +# TODO(gp): Grep all the use cases in the codebase and use the right function. + + +def get_host_user_name() -> Optional[str]: + """ + Return the name of the user running the host. + """ + return os.environ.get("CSFY_HOST_USER_NAME", None) + + +def get_dev_csfy_host_names() -> List[str]: + """ + Return the names of the Causify dev servers. + """ + host_names = ("dev1", "dev2", "dev3") + return host_names + + +def _get_host_name() -> str: + """ + Return the name of the host (not the machine) on which we are running. + + If we are inside a Docker container, we use the name of the host passed + through the `CSFY_HOST_NAME` env var. + """ + if is_inside_docker(): + host_name = os.environ["CSFY_HOST_NAME"] + else: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws' + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' + # machine='x86_64' + host_name = os.uname()[1] + _LOG.debug("host_name=%s", host_name) + return host_name + + +def _get_host_os_name() -> str: + """ + Return the name of the OS on which we are running (e.g., "Linux", "Darwin"). + + If we are inside a Docker container, we use the name of the OS passed + through the `CSFY_HOST_OS_NAME` env var. + """ + if is_inside_docker(): + host_os_name = os.environ["CSFY_HOST_OS_NAME"] + else: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws' + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' + # machine='x86_64' + host_os_name = os.uname()[0] + _LOG.debug("host_os_name=%s", host_os_name) + return host_os_name + + +def _get_host_os_version() -> str: + """ + Return the version of the OS on which we are running. + + If we are inside a Docker container, we use the version of the OS passed + through the `CSFY_HOST_OS_VERSION` env var. + """ + if is_inside_docker(): + host_os_version = os.environ["CSFY_HOST_OS_VERSION"] + else: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws' + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' + # machine='x86_64' + host_os_version = os.uname()[2] + _LOG.debug("host_os_version=%s", host_os_version) + return host_os_version + + +def is_host_dev_csfy() -> bool: + """ + Return whether we are running on a Causify dev server. + """ + host_name = _get_host_name() + ret = host_name in get_dev_csfy_host_names() + return ret + + +_MAC_OS_VERSION_MAPPING = { + "Catalina": "19.", + "Monterey": "21.", + "Ventura": "22.", + "Sequoia": "24.", +} + + +def is_host_mac() -> bool: + """ + Return whether we are running on macOS. + """ + host_os_name = _get_host_os_name() + # + is_mac_ = host_os_name == "Darwin" + return is_mac_ + + +def get_host_mac_version() -> str: + """ + Get the macOS version (e.g., "Catalina", "Monterey", "Ventura"). + """ + host_os_version = _get_host_os_version() + for version, tag in _MAC_OS_VERSION_MAPPING.items(): + if tag in host_os_version: + return version + raise ValueError(f"Invalid host_os_version='{host_os_version}'") + + +def is_host_mac_version(version: str) -> bool: + """ + Return whether we are running on a Mac with a specific version (e.g., + "Catalina", "Monterey", "Ventura"). + """ + assert version in _MAC_OS_VERSION_MAPPING, f"Invalid version='{version}'" + host_mac_version = get_host_mac_version() + ret = version.lower() == host_mac_version.lower() + return ret + + +def is_host_gp_mac() -> bool: + """ + Return whether we are running on a Mac owned by GP. + + This is used to check if we can use a specific feature before releasing + it to all the users. + """ + host_name = _get_host_name() + ret = host_name.startswith("gpmac.") + return ret + + # ############################################################################# # Detect server. # ############################################################################# @@ -65,38 +220,25 @@ def is_inside_ci() -> bool: return ret +# TODO(gp): -> is_inside_docker_container() def is_inside_docker() -> bool: """ Return whether we are inside a container or not. """ # From https://stackoverflow.com/questions/23513045 - return os.path.exists("/.dockerenv") + ret = os.path.exists("/.dockerenv") + return ret def is_inside_unit_test() -> bool: """ Return whether we are running code insider the regressions. """ - return "PYTEST_CURRENT_TEST" in os.environ - - -# We can't rely only on the name of the host to infer where we are running, -# since inside Docker the name of the host is like `01a7e34a82a5`. Of course, -# there is no way to know anything about the host for security reason, so we -# pass this value from the external environment to the container, through env -# vars (e.g., `CSFY_HOST_NAME`, `CSFY_HOST_OS_NAME`). - -# TODO(gp): The confusion is that we want to determine on which "setup" we are -# running. We do this both inside container and outside container. -# -# Sometimes we want to know if: -# - the processor is x86_64 or arm64 -# - the host is Mac or Linux -# - we are running on a Causify machine or on an external machine -# - we are inside CI or not -# We should grep all the use cases in the codebase and use the right function. + ret = "PYTEST_CURRENT_TEST" in os.environ + return ret +# TODO(gp): Remove! def is_dev_csfy() -> bool: # sysname='Linux' # nodename='dev1' @@ -129,98 +271,6 @@ def is_dev4() -> bool: return is_dev4_ -def is_mac(*, version: Optional[str] = None) -> bool: - """ - Return whether we are running on macOS and, optionally, on a specific - version. - - :param version: check whether we are running on a certain macOS version (e.g., - `Catalina`, `Monterey`) - """ - _LOG.debug("version=%s", version) - host_os_name = os.uname()[0] - _LOG.debug("os.uname()=%s", str(os.uname())) - csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) - _LOG.debug( - "host_os_name=%s csfy_host_os_name=%s", host_os_name, csfy_host_os_name - ) - is_mac_ = host_os_name == "Darwin" or csfy_host_os_name == "Darwin" - if version is None: - # The user didn't request a specific version, so we return whether we - # are running on a Mac or not. - _LOG.debug("is_mac_=%s", is_mac_) - return is_mac_ - else: - # The user specified a version: if we are not running on a Mac then we - # return False, since we don't even have to check the macOS version. - if not is_mac_: - _LOG.debug("is_mac_=%s", is_mac_) - return False - # Check the macOS version we are running. - if version == "Catalina": - # Darwin gpmac.local 19.6.0 Darwin Kernel Version 19.6.0: - # root:xnu-6153.141.2~1/RELEASE_X86_64 x86_64 - macos_tag = "19.6" - elif version == "Monterey": - # Darwin alpha.local 21.5.0 Darwin Kernel Version 21.5.0: - # root:xnu-8020.121.3~4/RELEASE_ARM64_T6000 arm64 - macos_tag = "21." - elif version == "Ventura": - macos_tag = "22." - elif version == "Sequoia": - # Darwin gpmac.local 24.4.0 Darwin Kernel Version 24.4.0: - # root:xnu-11417.101.15~1/RELEASE_ARM64_T8112 arm64 - macos_tag = "24." - else: - raise ValueError(f"Invalid version='{version}'") - _LOG.debug("macos_tag=%s", macos_tag) - host_os_version = os.uname()[2] - # 'Darwin Kernel Version 19.6.0: Mon Aug 31 22:12:52 PDT 2020; - # root:xnu-6153.141.2~1/RELEASE_X86_64' - csfy_host_os_version = os.environ.get("CSFY_HOST_VERSION", "") - _LOG.debug( - "host_os_version=%s csfy_host_os_version=%s", - host_os_version, - csfy_host_os_version, - ) - is_mac_ = macos_tag in host_os_version or macos_tag in csfy_host_os_version - _LOG.debug("is_mac_=%s", is_mac_) - return is_mac_ - - -# The valid set ups are: -# - Running on dev1, dev2, dev3 server -# - Container -# - Host -# - External Mac (GP, Paul, interns, contributors) -# - Container -# - Host -# - External Linux (interns, contributors) -# - Container -# - Host -# - prod container on Linux - - -def is_external_linux() -> bool: - """ - Detect whether we are running on a non-server/non-CI Linux machine. - - :return: whether an external Linux system is running - """ - # CI and dev servers are not considered external Linux systems. - if is_dev_csfy() or is_inside_ci(): - is_external_linux_ = False - # If we are inside a Docker container, we need to check the host OS. - elif is_inside_docker(): - csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) - is_external_linux_ = csfy_host_os_name == "Linux" - # If we are not inside a Docker container, we can check the host OS directly. - else: - host_os_name = os.uname()[0] - is_external_linux_ = host_os_name == "Linux" - return is_external_linux_ - - def is_prod_csfy() -> bool: """ Detect whether we are running in a Causify production container. @@ -248,32 +298,80 @@ def is_ig_prod() -> bool: def is_inside_ecs_container() -> bool: """ Detect whether we are running in an ECS container. - - When deploying jobs via ECS the container obtains credentials based - on passed task role specified in the ECS task-definition, refer to: - https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html. """ + # When deploying jobs via ECS the container obtains credentials based + # on passed task role specified in the ECS task-definition, refer to: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html ret = "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" in os.environ return ret -def is_external_dev() -> bool: +# ############################################################################# + + +def is_external_linux() -> bool: """ - Detect whether we are running in an external system. + Detect whether we are running on a non-server/non-CI Linux machine. + + This is true when we run on the machine of an intern, or a non-CSFY + contributor. + """ + if is_dev_csfy() or is_inside_ci(): + # CI and dev servers are not considered external Linux systems. + ret = False + elif is_inside_docker(): + # If we are inside a Docker container, we need to check the host OS. + csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) + ret = csfy_host_os_name == "Linux" + else: + # If we are not inside a Docker container, we can check the host OS + # directly. + host_os_name = os.uname()[0] + ret = host_os_name == "Linux" + return ret + + +# TODO(gp): When is this used? +def is_csfy_or_external_container() -> bool: + """ + Detect whether we are running on a container in a CSFY or external system. + + This is true for dockerized executables. + """ + res = False + if is_inside_ci(): + # CI servers are not considered external or CSFY systems. + res = False + elif not is_inside_docker(): + # Outside Docker there is no container. + res = False + else: + res = is_inside_docker() + _LOG.debug(" -> is_csfy_or_external_container=%s", res) + return res + - This includes macOS and non-server/non-CI Linux machines. +def is_external_dev() -> bool: """ - is_external_dev_ = is_mac() or is_external_linux() + Detect whether we are running on an system outside of Causify system + (e.g., a contributor's laptop, an intern's laptop, a non-CSFY machine). + """ + is_external_dev_ = is_host_mac() or is_external_linux() return is_external_dev_ # ############################################################################# +# Set up consistency. +# ############################################################################# +# TODO(gp): Update this. def _get_setup_signature() -> str: """ Dump all the variables that are used to make a decision about the values of the functions in `_get_setup_settings()`. + + This function is used to mock the state of the system for testing purposes. """ cmds = [] # is_prod_csfy() @@ -300,17 +398,62 @@ def _get_setup_signature() -> str: return result +# The valid set ups are: +# - Running on a Causify server (e.g., `dev1`, `dev2`, `dev3`) +# - Container +# - Host +# - External Mac (GP, Paul, interns, contributors) +# - Container +# - Host +# - External Linux (interns, contributors) +# - Container +# - Host +# - Prod container on Linux + + +def is_docker_container_on_csfy_server() -> bool: + """ + Return whether we are running on a Docker container on a Causify server. + """ + ret = is_inside_docker() and is_host_dev_csfy() + return ret + + +def is_docker_container_on_mac_host() -> bool: + """ + Return whether we are running on a Docker container on a Mac host. + """ + ret = is_inside_docker() and is_host_mac() + return ret + + +def is_docker_container_on_external_linux() -> bool: + """ + Return whether we are running on a Docker container on an external Linux. + """ + ret = is_inside_docker() and is_external_linux() + return ret + + def _get_setup_settings() -> List[Tuple[str, bool]]: + """ + Return a list of tuples with the name and value of the current server setup. + """ # Store name-value pairs as tuples. setups = [ - ("is_prod_csfy", is_prod_csfy()), + ("is_docker_container_on_csfy_server", is_docker_container_on_csfy_server()), + ("is_host_dev_csfy", is_host_dev_csfy()), + # + ("is_docker_container_on_mac_host", is_docker_container_on_mac_host()), + ("is_host_mac", is_host_mac()), + # + ("is_docker_container_on_external_linux", is_docker_container_on_external_linux()), + ("is_external_linux", is_external_linux()), + # ("is_dev4", is_dev4()), - ("is_dev_csfy", is_dev_csfy()), ("is_ig_prod", is_ig_prod()), ("is_inside_ci", is_inside_ci()), - ("is_mac", is_mac()), - ("is_external_linux", is_external_linux()), - # ("is_csfy_or_external_container", is_csfy_or_external_container()), + ("is_prod_csfy", is_prod_csfy()), ] return setups @@ -319,8 +462,7 @@ def _setup_to_str(setups: List[Tuple[str, bool]]) -> str: """ Return a string representation of the current server setup configuration. - :return: string with each setting on a new line, aligned with - padding + :return: string with each setting on a new line, aligned with padding """ # Find maximum length of setting names. max_len = max(len(name) for name, _ in setups) + 1 @@ -333,7 +475,10 @@ def _setup_to_str(setups: List[Tuple[str, bool]]) -> str: def _dassert_setup_consistency() -> None: """ - Check that one and only one server config is true. + Check that one and only one setup configuration is true. + + This is used to ensure that the setup configuration is one of the expected + ones and uniquely defined. """ setups = _get_setup_settings() # One and only one set-up should be true. @@ -342,7 +487,7 @@ def _dassert_setup_consistency() -> None: msg = "One and only one set-up config should be true:\n" msg += _setup_to_str(setups) + "\n" msg += "_get_setup_signature() returns:\n" - msg += hprint.indent(_get_setup_signature()) + msg += _indent(_get_setup_signature()) raise ValueError(msg) @@ -752,10 +897,6 @@ def is_CK_S3_available() -> bool: return val -def get_host_user_name() -> Optional[str]: - return os.environ.get("CSFY_HOST_USER_NAME", None) - - # ############################################################################# # Functions. # ############################################################################# @@ -764,7 +905,7 @@ def get_host_user_name() -> Optional[str]: # Copied from hprint to avoid import cycles. -def indent(txt: str, *, num_spaces: int = 2) -> str: +def _indent(txt: str, *, num_spaces: int = 2) -> str: """ Add `num_spaces` spaces before each line of the passed string. """ @@ -791,27 +932,33 @@ def config_func_to_str() -> str: # Get the functions with: # grep "def " helpers/hserver.py | sort | awk '{ print $2 }' | perl -i -ne 'print "$1\n" if /^([^\(]+)/' function_names = [ - "get_shared_data_dirs()", - "enable_privileged_mode()", - "get_docker_shared_group()", - "get_docker_user()", - "is_AM_S3_available()", - "has_dind_support()", - "has_docker_sudo()", - "is_CK_S3_available()", - "run_docker_as_root()", - "skip_submodules_test()", - "use_docker_db_container_name_to_connect()", - "use_docker_network_mode_host()", - "use_docker_sibling_containers()", - "is_dev4()", - "is_dev_csfy()", - "is_inside_ci()", - "is_inside_docker()", - "is_mac(version='Catalina')", - "is_mac(version='Monterey')", - "is_mac(version='Ventura')", - "is_mac(version='Sequoia')", + "enable_privileged_mode", + "get_docker_shared_group", + "get_docker_user", + "get_host_user_name", + "get_shared_data_dirs", + "has_dind_support", + "has_docker_sudo", + "is_AM_S3_available", + "is_CK_S3_available", + "is_csfy_or_external_container", + "is_dev4", + "is_dev_csfy", + "is_external_linux", + "is_host_mac", + "is_ig_prod", + "is_inside_ci", + "is_inside_docker", + "is_inside_ecs_container", + "is_inside_unit_test", + "is_mac", + "is_prod_csfy", + "run_docker_as_root", + "skip_submodules_test", + "use_docker_db_container_name_to_connect", + "use_docker_network_mode_host", + "use_docker_sibling_containers", + "use_main_network", ] for func_name in sorted(function_names): try: diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index acf486013..e5882154a 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -8,35 +8,19 @@ _LOG = logging.getLogger(__name__) -# ############################################################################# -# Test_hserver1 -# ############################################################################# - - -@pytest.mark.skipif( - not hserver.is_inside_docker(), - reason="This test should be run inside a Docker container", -) -class Test_hserver1(hunitest.TestCase): +class _Test_hserver1(hunitest.TestCase): def test_is_inside_ci1(self) -> None: - is_inside_ci_ = hserver.is_inside_ci() - if is_inside_ci_: - # Inside CI we expect to run inside Docker. - self.assertTrue(hserver.is_inside_docker()) - - def test_is_inside_docker1(self) -> None: - # We always run tests inside Docker. - self.assertTrue(hserver.is_inside_docker()) + val = hserver.is_inside_ci() + _LOG.info("val=\n%s", val) + if self.exp_is_inside_ci is not None: + self.assert_equal(val, self.exp_is_inside_ci) def test_is_dev_csfy1(self) -> None: - _ = hserver.is_dev_csfy() - - def test_is_prod_csfy1(self) -> None: - is_prod_csfy = hserver.is_prod_csfy() - if is_prod_csfy: - # Prod runs inside Docker. - self.assertTrue(hserver.is_inside_docker()) + val = hserver.is_dev_csfy() + _LOG.info("val=\n%s", val) + if self.exp_is_dev_csfy is not None: + self.assert_equal(val, self.exp_is_dev_csfy) def test_consistency1(self) -> None: hserver._dassert_setup_consistency() @@ -44,43 +28,84 @@ def test_consistency1(self) -> None: def test_get_setup_signature1(self) -> None: val = hserver._get_setup_signature() _LOG.info("val=\n%s", val) + if self.exp_get_setup_signature is not None: + self.assert_equal(val, self.exp_get_setup_signature) def test_get_setup_settings1(self) -> None: setups = hserver._get_setup_settings() val = hserver._setup_to_str(setups) _LOG.info("val=\n%s", val) + if self.exp_get_setup_settings is not None: + self.assert_equal(val, self.exp_get_setup_settings) def test_config_func_to_str1(self) -> None: val = hserver.config_func_to_str() _LOG.info("val=\n%s", val) + if self.exp_config_func_to_str is not None: + self.assert_equal(val, self.exp_config_func_to_str) # ############################################################################# -# Test_hserver2 +# Test_hserver1 +# ############################################################################# + + +class Test_hserver1(_Test_hserver1): + """ + Smoke test without checking anything. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_is_inside_ci = None + self.exp_is_dev_csfy = None + self.exp_get_setup_signature = None + self.exp_get_setup_settings = None + self.exp_config_func_to_str = None + + +# ############################################################################# +# Test_hserver_inside_ci # ############################################################################# @pytest.mark.skipif( - hserver.is_inside_docker(), - reason="This test should be run outside a Docker container", + not hserver.is_inside_ci(), ) -class Test_hserver2(hunitest.TestCase): +class Test_hserver_inside_ci1(_Test_hserver1): + """ + Run tests inside CI. + """ - def test_consistency1(self) -> None: - hserver._dassert_setup_consistency() + def setUp(self) -> None: + super().setUp() + self.exp_is_inside_ci = True + self.exp_is_dev_csfy = True + self.exp_get_setup_signature = "" + self.exp_get_setup_settings = "" + self.exp_config_func_to_str = "" - def test_get_setup_signature1(self) -> None: - val = hserver._get_setup_signature() - _LOG.info("val=\n%s", val) - def test_get_setup_settings1(self) -> None: - setups = hserver._get_setup_settings() - val = hserver._setup_to_str(setups) - _LOG.info("val=\n%s", val) +# ############################################################################# +# Test_hserver_docker_container_on_csfy_server1 +# ############################################################################# - def test_config_func_to_str1(self) -> None: - val = hserver.config_func_to_str() - _LOG.info("val=\n%s", val) + +@pytest.mark.skipif( + not hserver.is_docker_container_on_csfy_server(), +) +class Test_hserver_inside_docker_container_on_csfy_server1(_Test_hserver1): + """ + Run tests inside Docker container on a Causify dev server. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_is_inside_ci = True + self.exp_is_dev_csfy = True + self.exp_get_setup_signature = "" + self.exp_get_setup_settings = "" + self.exp_config_func_to_str = "" # ############################################################################# @@ -89,30 +114,42 @@ def test_config_func_to_str1(self) -> None: @pytest.mark.skipif( - hserver.is_inside_docker() or not hserver.is_dev_csfy(), - reason="This test should be run on one of Causify dev machines", + not (not hserver.is_inside_docker() and hserver.is_host_dev_csfy()), ) -class Test_hserver3(hunitest.TestCase): +class Test_hserver_outside_docker_container_on_csfy_server1(hunitest.TestCase): + """ + Run tests outside Docker container on a Causify dev server. + """ - def test_consistency1(self) -> None: - hserver._dassert_setup_consistency() + def setUp(self) -> None: + super().setUp() + self.exp_is_inside_ci = False + self.exp_is_dev_csfy = True + self.exp_get_setup_signature = "" + self.exp_get_setup_settings = "" + self.exp_config_func_to_str = "" - def test_get_setup_signature1(self) -> None: - act = hserver._get_setup_signature() - exp = "" - self.assert_equal(act, dev) - def test_get_setup_settings1(self) -> None: - setups = hserver._get_setup_settings() - act = hserver._setup_to_str(setups) - exp = "" - self.assert_equal(act, exp) +# ############################################################################# +# Test_hserver_docker_container_on_mac_host1 +# ############################################################################# - def test_config_func_to_str1(self) -> None: - act = hserver.config_func_to_str() - exp = "" - self.assert_equal(act, exp) +@pytest.mark.skipif( + not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), +) +class Test_hserver_inside_docker_container_on_mac_host1(hunitest.TestCase): + """ + Run tests inside Docker container on a GP's Mac. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_is_inside_ci = True + self.exp_is_dev_csfy = True + self.exp_get_setup_signature = "" + self.exp_get_setup_settings = "" + self.exp_config_func_to_str = "" # ############################################################################# # Test_hserver_gp_mac1 @@ -120,29 +157,20 @@ def test_config_func_to_str1(self) -> None: @pytest.mark.skipif( - hserver.is_inside_docker() or not hserver.is_dev_csfy(), - reason="This test should be run on one of Causify dev machines", + not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), ) -class Test_hserver_gp_mac1(hunitest.TestCase): - - def test_consistency1(self) -> None: - hserver._dassert_setup_consistency() - - def test_get_setup_signature1(self) -> None: - act = hserver._get_setup_signature() - exp = "" - self.assert_equal(act, dev) - - def test_get_setup_settings1(self) -> None: - setups = hserver._get_setup_settings() - act = hserver._setup_to_str(setups) - exp = "" - self.assert_equal(act, exp) - - def test_config_func_to_str1(self) -> None: - act = hserver.config_func_to_str() - exp = "" - self.assert_equal(act, exp) +class Test_hserver_outside_docker_container_on_gp_mac1(hunitest.TestCase): + """ + Run tests outside Docker container on a GP's Mac. + """ + + def setUp(self) -> None: + super().setUp() + self.exp_is_inside_ci = True + self.exp_is_dev_csfy = True + self.exp_get_setup_signature = "" + self.exp_get_setup_settings = "" + self.exp_config_func_to_str = "" # ############################################################################# From 33eaabca789c3690029a8a706e78ab4e96c03d3e Mon Sep 17 00:00:00 2001 From: saggese Date: Sat, 12 Apr 2025 01:55:34 +0000 Subject: [PATCH 009/193] Improve --- .../encrypt_models/encrypt_model.py | 2 +- .../system_tools/save_screenshot.py | 2 +- dev_scripts_helpers/thin_client/build.py | 2 +- helpers/hdbg.py | 2 +- helpers/henv.py | 2 +- helpers/hserver.py | 4 +- helpers/hsystem.py | 2 +- helpers/hunit_test_utils.py | 5 +- helpers/lib_tasks_lint.py | 2 +- helpers/lib_tasks_utils.py | 2 +- helpers/test/test_hserver.py | 124 ++++++++++-------- 11 files changed, 84 insertions(+), 65 deletions(-) diff --git a/dev_scripts_helpers/encrypt_models/encrypt_model.py b/dev_scripts_helpers/encrypt_models/encrypt_model.py index 9ed431001..8f075925e 100755 --- a/dev_scripts_helpers/encrypt_models/encrypt_model.py +++ b/dev_scripts_helpers/encrypt_models/encrypt_model.py @@ -73,7 +73,7 @@ def _encrypt_input_dir( # Create temporary Dockerfile. temp_dockerfile_path = "./tmp.encrypt_model.Dockerfile" with open(temp_dockerfile_path, "w") as temp_dockerfile: - if hserver.is_mac(): + if hserver.is_host_mac(): temp_dockerfile.write( f""" FROM python:{exp_python_version} diff --git a/dev_scripts_helpers/system_tools/save_screenshot.py b/dev_scripts_helpers/system_tools/save_screenshot.py index 96c36251f..68b7e4b4e 100755 --- a/dev_scripts_helpers/system_tools/save_screenshot.py +++ b/dev_scripts_helpers/system_tools/save_screenshot.py @@ -60,7 +60,7 @@ def _main(parser: argparse.ArgumentParser) -> None: txt = "![](%s)" % filename _LOG.info("%s", txt) # A tree - if hserver.is_mac(): + if hserver.is_host_mac(): _LOG.warning("Copied to clipboard") cmd = f"echo '{txt}' | pbcopy" hsystem.system(cmd) diff --git a/dev_scripts_helpers/thin_client/build.py b/dev_scripts_helpers/thin_client/build.py index 2a4573c74..1fa3c2c36 100755 --- a/dev_scripts_helpers/thin_client/build.py +++ b/dev_scripts_helpers/thin_client/build.py @@ -84,7 +84,7 @@ def _main(parser: argparse.ArgumentParser) -> None: _system(f"{activate_cmd} && pip3 install -r {tmp_requirements_path}") # Show the package list. _system("pip3 list") - if hserver.is_mac(): + if hserver.is_host_mac(): # Darwin specific updates. _system("brew update") _, brew_ver = hsystem.system_to_string("brew --version") diff --git a/helpers/hdbg.py b/helpers/hdbg.py index b83a29a2c..15d10e2e4 100644 --- a/helpers/hdbg.py +++ b/helpers/hdbg.py @@ -1010,7 +1010,7 @@ def init_logger( root_logger.addHandler(file_handler) file_handler.setFormatter(formatter) # - _LOG.info("Saving log to file '%s'", {log_filename}) + _LOG.info("Saving log to file '%s'", log_filename) # _LOG.debug("Effective logging level=%s", _LOG.getEffectiveLevel()) # Shut up chatty modules. diff --git a/helpers/henv.py b/helpers/henv.py index 5ec82260d..6eaf51fc1 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -35,7 +35,7 @@ def has_module(module: str) -> bool: """ Return whether a Python module can be imported or not. """ - if module == "gluonts" and hserver.is_mac(): + if module == "gluonts" and hserver.is_host_mac(): # Gluonts and mxnet modules are not properly supported on the ARM # architecture yet, see CmTask4886 for details. return False diff --git a/helpers/hserver.py b/helpers/hserver.py index 06299a761..096900448 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -166,8 +166,8 @@ def is_host_mac() -> bool: """ host_os_name = _get_host_os_name() # - is_mac_ = host_os_name == "Darwin" - return is_mac_ + ret = host_os_name == "Darwin" + return ret def get_host_mac_version() -> str: diff --git a/helpers/hsystem.py b/helpers/hsystem.py index eab5ba4f1..b0b3c6b0f 100644 --- a/helpers/hsystem.py +++ b/helpers/hsystem.py @@ -687,7 +687,7 @@ def to_pbcopy(txt: str, pbcopy: bool) -> None: if not txt: print("Nothing to copy") return - if hserver.is_mac(): + if hserver.is_host_mac(): # -n = no new line cmd = f"echo -n '{txt}' | pbcopy" system(cmd) diff --git a/helpers/hunit_test_utils.py b/helpers/hunit_test_utils.py index 755eee30d..adb65e302 100644 --- a/helpers/hunit_test_utils.py +++ b/helpers/hunit_test_utils.py @@ -448,6 +448,7 @@ def execute_only_in_target_repo(target_name: str) -> None: pytest.skip(f"Only run on {target_name} and not {repo_short_name}") +# TODO(gp): Remove and use pytest.skipif(). def execute_only_on_ci() -> None: is_inside_ci_ = hserver.is_inside_ci() if not is_inside_ci_: @@ -467,8 +468,8 @@ def execute_only_on_dev_csfy() -> None: def execute_only_on_mac(*, version: Optional[str] = None) -> None: - is_mac_ = hserver.is_mac(version=version) - if not is_mac_: + is_host_mac_ = hserver.is_host_mac(version=version) + if not is_host_mac_: pytest.skip(f"Only run on Mac with version={version}") diff --git a/helpers/lib_tasks_lint.py b/helpers/lib_tasks_lint.py index f18713ee6..af1f2b0b5 100644 --- a/helpers/lib_tasks_lint.py +++ b/helpers/lib_tasks_lint.py @@ -261,7 +261,7 @@ def lint( # type: ignore else: _LOG.info("All Linter actions selected") # Compose the command line. - if hserver.is_mac(): + if hserver.is_host_mac(): find_cmd = "$(find . -path '*linters/base.py')" else: find_cmd = "$(find -wholename '*linters/base.py')" diff --git a/helpers/lib_tasks_utils.py b/helpers/lib_tasks_utils.py index 22802d9af..1f8dc8b51 100644 --- a/helpers/lib_tasks_utils.py +++ b/helpers/lib_tasks_utils.py @@ -250,7 +250,7 @@ def _to_pbcopy(txt: str, pbcopy: bool) -> None: if not txt: print("Nothing to copy") return - if hserver.is_mac(): + if hserver.is_host_mac(): # -n = no new line cmd = f"echo -n '{txt}' | pbcopy" hsystem.system(cmd) diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index e5882154a..1c0bf29de 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -2,35 +2,24 @@ import pytest +import helpers.hprint as hprint import helpers.hserver as hserver import helpers.hunit_test as hunitest _LOG = logging.getLogger(__name__) -class _Test_hserver1(hunitest.TestCase): +class GP_TestCase1(hunitest.TestCase): - def test_is_inside_ci1(self) -> None: - val = hserver.is_inside_ci() - _LOG.info("val=\n%s", val) - if self.exp_is_inside_ci is not None: - self.assert_equal(val, self.exp_is_inside_ci) - - def test_is_dev_csfy1(self) -> None: - val = hserver.is_dev_csfy() - _LOG.info("val=\n%s", val) - if self.exp_is_dev_csfy is not None: - self.assert_equal(val, self.exp_is_dev_csfy) + # def test_config_func_to_str1(self) -> None: + # val = hserver.config_func_to_str() + # _LOG.info("val=\n%s", val) + # if self.exp_config_func_to_str is not None: + # self.assert_equal(val, self.exp_config_func_to_str) def test_consistency1(self) -> None: hserver._dassert_setup_consistency() - def test_get_setup_signature1(self) -> None: - val = hserver._get_setup_signature() - _LOG.info("val=\n%s", val) - if self.exp_get_setup_signature is not None: - self.assert_equal(val, self.exp_get_setup_signature) - def test_get_setup_settings1(self) -> None: setups = hserver._get_setup_settings() val = hserver._setup_to_str(setups) @@ -38,30 +27,41 @@ def test_get_setup_settings1(self) -> None: if self.exp_get_setup_settings is not None: self.assert_equal(val, self.exp_get_setup_settings) - def test_config_func_to_str1(self) -> None: - val = hserver.config_func_to_str() + # def test_get_setup_signature1(self) -> None: + # val = hserver._get_setup_signature() + # _LOG.info("val=\n%s", val) + # if self.exp_get_setup_signature is not None: + # self.assert_equal(val, self.exp_get_setup_signature) + + def test_is_dev_csfy1(self) -> None: + val = hserver.is_dev_csfy() _LOG.info("val=\n%s", val) - if self.exp_config_func_to_str is not None: - self.assert_equal(val, self.exp_config_func_to_str) + if self.exp_is_dev_csfy is not None: + self.assertEqual(val, self.exp_is_dev_csfy) + def test_is_inside_ci1(self) -> None: + val = hserver.is_inside_ci() + _LOG.info("val=\n%s", val) + if self.exp_is_inside_ci is not None: + self.assertEqual(val, self.exp_is_inside_ci) # ############################################################################# # Test_hserver1 # ############################################################################# -class Test_hserver1(_Test_hserver1): +class Test_hserver1(GP_TestCase1): """ Smoke test without checking anything. """ def setUp(self) -> None: super().setUp() - self.exp_is_inside_ci = None - self.exp_is_dev_csfy = None - self.exp_get_setup_signature = None - self.exp_get_setup_settings = None self.exp_config_func_to_str = None + self.exp_get_setup_settings = None + self.exp_get_setup_signature = None + self.exp_is_dev_csfy = None + self.exp_is_inside_ci = None # ############################################################################# @@ -71,19 +71,20 @@ def setUp(self) -> None: @pytest.mark.skipif( not hserver.is_inside_ci(), + reason="Config not matching", ) -class Test_hserver_inside_ci1(_Test_hserver1): +class Test_hserver_inside_ci1(GP_TestCase1): """ Run tests inside CI. """ def setUp(self) -> None: super().setUp() - self.exp_is_inside_ci = True - self.exp_is_dev_csfy = True - self.exp_get_setup_signature = "" - self.exp_get_setup_settings = "" - self.exp_config_func_to_str = "" + self.exp_config_func_to_str = None + self.exp_get_setup_settings = None + self.exp_get_setup_signature = None + self.exp_is_dev_csfy = None + self.exp_is_inside_ci = None # ############################################################################# @@ -93,19 +94,20 @@ def setUp(self) -> None: @pytest.mark.skipif( not hserver.is_docker_container_on_csfy_server(), + reason="Config not matching", ) -class Test_hserver_inside_docker_container_on_csfy_server1(_Test_hserver1): +class Test_hserver_inside_docker_container_on_csfy_server1(GP_TestCase1): """ Run tests inside Docker container on a Causify dev server. """ def setUp(self) -> None: super().setUp() - self.exp_is_inside_ci = True - self.exp_is_dev_csfy = True - self.exp_get_setup_signature = "" - self.exp_get_setup_settings = "" self.exp_config_func_to_str = "" + self.exp_get_setup_settings = "" + self.exp_get_setup_signature = "" + self.exp_is_dev_csfy = True + self.exp_is_inside_ci = True # ############################################################################# @@ -115,19 +117,32 @@ def setUp(self) -> None: @pytest.mark.skipif( not (not hserver.is_inside_docker() and hserver.is_host_dev_csfy()), + reason="Config not matching", ) -class Test_hserver_outside_docker_container_on_csfy_server1(hunitest.TestCase): +class Test_hserver_outside_docker_container_on_csfy_server1(GP_TestCase1): """ Run tests outside Docker container on a Causify dev server. """ def setUp(self) -> None: super().setUp() - self.exp_is_inside_ci = False - self.exp_is_dev_csfy = True - self.exp_get_setup_signature = "" - self.exp_get_setup_settings = "" self.exp_config_func_to_str = "" + self.exp_get_setup_settings = hprint.dedent( + r""" + is_docker_container_on_csfy_server False + is_host_dev_csfy True + is_docker_container_on_mac_host False + is_host_mac False + is_docker_container_on_external_linux False + is_external_linux False + is_dev4 False + is_ig_prod False + is_inside_ci False + is_prod_csfy False + """) + self.exp_get_setup_signature = "" + self.exp_is_dev_csfy = True + self.exp_is_inside_ci = False # ############################################################################# @@ -137,19 +152,21 @@ def setUp(self) -> None: @pytest.mark.skipif( not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), + reason="Config not matching", ) -class Test_hserver_inside_docker_container_on_mac_host1(hunitest.TestCase): +class Test_hserver_inside_docker_container_on_mac_host1(GP_TestCase1): """ Run tests inside Docker container on a GP's Mac. """ def setUp(self) -> None: super().setUp() - self.exp_is_inside_ci = True - self.exp_is_dev_csfy = True - self.exp_get_setup_signature = "" - self.exp_get_setup_settings = "" self.exp_config_func_to_str = "" + self.exp_get_setup_settings = "" + self.exp_get_setup_signature = "" + self.exp_is_dev_csfy = True + self.exp_is_inside_ci = True + # ############################################################################# # Test_hserver_gp_mac1 @@ -158,19 +175,20 @@ def setUp(self) -> None: @pytest.mark.skipif( not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), + reason="Config not matching", ) -class Test_hserver_outside_docker_container_on_gp_mac1(hunitest.TestCase): +class Test_hserver_outside_docker_container_on_gp_mac1(GP_TestCase1): """ Run tests outside Docker container on a GP's Mac. """ def setUp(self) -> None: super().setUp() - self.exp_is_inside_ci = True - self.exp_is_dev_csfy = True - self.exp_get_setup_signature = "" - self.exp_get_setup_settings = "" self.exp_config_func_to_str = "" + self.exp_get_setup_settings = "" + self.exp_get_setup_signature = "" + self.exp_is_dev_csfy = True + self.exp_is_inside_ci = True # ############################################################################# From d63303a9c1a91b2b1191e2046eeb8e69bb4d639f Mon Sep 17 00:00:00 2001 From: saggese Date: Sat, 12 Apr 2025 02:19:35 +0000 Subject: [PATCH 010/193] Improve --- helpers/hserver.py | 76 ++++++++++++++++++++++++++++-------- helpers/test/test_hserver.py | 70 ++++++++++++++++++--------------- 2 files changed, 98 insertions(+), 48 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index 096900448..6d45390ec 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -143,7 +143,7 @@ def _get_host_os_version() -> str: return host_os_version -def is_host_dev_csfy() -> bool: +def is_host_csfy_server() -> bool: """ Return whether we are running on a Causify dev server. """ @@ -411,15 +411,23 @@ def _get_setup_signature() -> str: # - Prod container on Linux -def is_docker_container_on_csfy_server() -> bool: +def is_inside_docker_container_on_csfy_server() -> bool: """ Return whether we are running on a Docker container on a Causify server. """ - ret = is_inside_docker() and is_host_dev_csfy() + ret = is_inside_docker() and is_host_csfy_server() return ret -def is_docker_container_on_mac_host() -> bool: +def is_outside_docker_container_on_csfy_server() -> bool: + """ + Return whether we are running on a Docker container on a Causify server. + """ + ret = not is_inside_docker() and is_host_csfy_server() + return ret + + +def is_inside_docker_container_on_host_mac() -> bool: """ Return whether we are running on a Docker container on a Mac host. """ @@ -427,7 +435,15 @@ def is_docker_container_on_mac_host() -> bool: return ret -def is_docker_container_on_external_linux() -> bool: +def is_outside_docker_container_on_host_mac() -> bool: + """ + Return whether we are running on a Docker container on a Mac host. + """ + ret = not is_inside_docker() and is_host_mac() + return ret + + +def is_inside_docker_container_on_external_linux() -> bool: """ Return whether we are running on a Docker container on an external Linux. """ @@ -435,26 +451,37 @@ def is_docker_container_on_external_linux() -> bool: return ret +def is_outside_docker_container_on_external_linux() -> bool: + """ + Return whether we are running on a Docker container on an external Linux. + """ + ret = not is_inside_docker() and is_external_linux() + return ret + + def _get_setup_settings() -> List[Tuple[str, bool]]: """ Return a list of tuples with the name and value of the current server setup. """ - # Store name-value pairs as tuples. - setups = [ - ("is_docker_container_on_csfy_server", is_docker_container_on_csfy_server()), - ("is_host_dev_csfy", is_host_dev_csfy()), + func_names = [ + "is_inside_docker_container_on_csfy_server", + "is_outside_docker_container_on_csfy_server", # - ("is_docker_container_on_mac_host", is_docker_container_on_mac_host()), - ("is_host_mac", is_host_mac()), + "is_inside_docker_container_on_host_mac", + "is_outside_docker_container_on_host_mac", # - ("is_docker_container_on_external_linux", is_docker_container_on_external_linux()), - ("is_external_linux", is_external_linux()), + "is_inside_docker_container_on_external_linux", + "is_outside_docker_container_on_external_linux", # - ("is_dev4", is_dev4()), - ("is_ig_prod", is_ig_prod()), - ("is_inside_ci", is_inside_ci()), - ("is_prod_csfy", is_prod_csfy()), + "is_dev4", + "is_ig_prod", + "is_prod_csfy", ] + # Store function name / value pairs as tuples. + setups = [] + for func_name in func_names: + val = eval(f"{func_name}()") + setups.append((func_name, val)) return setups @@ -480,6 +507,21 @@ def _dassert_setup_consistency() -> None: This is used to ensure that the setup configuration is one of the expected ones and uniquely defined. """ + def _indent(txt: str, *, num_spaces: int = 2) -> str: + """ + Add `num_spaces` spaces before each line of the passed string. + """ + spaces = " " * num_spaces + txt_out = [] + for curr_line in txt.split("\n"): + if curr_line.lstrip().rstrip() == "": + # Do not prepend any space to a line with only white characters. + txt_out.append("") + continue + txt_out.append(spaces + curr_line) + res = "\n".join(txt_out) + return res + setups = _get_setup_settings() # One and only one set-up should be true. sum_ = sum([value for _, value in setups]) diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index 1c0bf29de..2044ee339 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -9,7 +9,7 @@ _LOG = logging.getLogger(__name__) -class GP_TestCase1(hunitest.TestCase): +class _TestCase1: # def test_config_func_to_str1(self) -> None: # val = hserver.config_func_to_str() @@ -20,6 +20,12 @@ class GP_TestCase1(hunitest.TestCase): def test_consistency1(self) -> None: hserver._dassert_setup_consistency() + def test_is_dev_csfy1(self) -> None: + val = hserver.is_dev_csfy() + _LOG.info("val=\n%s", val) + if self.exp_is_dev_csfy is not None: + self.assertEqual(val, self.exp_is_dev_csfy) + def test_get_setup_settings1(self) -> None: setups = hserver._get_setup_settings() val = hserver._setup_to_str(setups) @@ -33,12 +39,6 @@ def test_get_setup_settings1(self) -> None: # if self.exp_get_setup_signature is not None: # self.assert_equal(val, self.exp_get_setup_signature) - def test_is_dev_csfy1(self) -> None: - val = hserver.is_dev_csfy() - _LOG.info("val=\n%s", val) - if self.exp_is_dev_csfy is not None: - self.assertEqual(val, self.exp_is_dev_csfy) - def test_is_inside_ci1(self) -> None: val = hserver.is_inside_ci() _LOG.info("val=\n%s", val) @@ -50,7 +50,7 @@ def test_is_inside_ci1(self) -> None: # ############################################################################# -class Test_hserver1(GP_TestCase1): +class Test_hserver1(_TestCase1, hunitest.TestCase): """ Smoke test without checking anything. """ @@ -73,7 +73,7 @@ def setUp(self) -> None: not hserver.is_inside_ci(), reason="Config not matching", ) -class Test_hserver_inside_ci1(GP_TestCase1): +class Test_hserver_inside_ci1(_TestCase1, hunitest.TestCase): """ Run tests inside CI. """ @@ -93,10 +93,10 @@ def setUp(self) -> None: @pytest.mark.skipif( - not hserver.is_docker_container_on_csfy_server(), + not hserver.is_inside_docker_container_on_csfy_server(), reason="Config not matching", ) -class Test_hserver_inside_docker_container_on_csfy_server1(GP_TestCase1): +class Test_hserver_inside_docker_container_on_csfy_server1(_TestCase1, hunitest.TestCase): """ Run tests inside Docker container on a Causify dev server. """ @@ -104,22 +104,32 @@ class Test_hserver_inside_docker_container_on_csfy_server1(GP_TestCase1): def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" - self.exp_get_setup_settings = "" + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server True + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + """) self.exp_get_setup_signature = "" self.exp_is_dev_csfy = True self.exp_is_inside_ci = True # ############################################################################# -# Test_hserver_dev_csfy1 +# Test_hserver_outside_docker_container_on_csfy_server1 # ############################################################################# @pytest.mark.skipif( - not (not hserver.is_inside_docker() and hserver.is_host_dev_csfy()), + not hserver.is_outside_docker_container_on_csfy_server(), reason="Config not matching", ) -class Test_hserver_outside_docker_container_on_csfy_server1(GP_TestCase1): +class Test_hserver_outside_docker_container_on_csfy_server1(_TestCase1, hunitest.TestCase): """ Run tests outside Docker container on a Causify dev server. """ @@ -127,18 +137,16 @@ class Test_hserver_outside_docker_container_on_csfy_server1(GP_TestCase1): def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" - self.exp_get_setup_settings = hprint.dedent( - r""" - is_docker_container_on_csfy_server False - is_host_dev_csfy True - is_docker_container_on_mac_host False - is_host_mac False - is_docker_container_on_external_linux False - is_external_linux False - is_dev4 False - is_ig_prod False - is_inside_ci False - is_prod_csfy False + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server True + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False """) self.exp_get_setup_signature = "" self.exp_is_dev_csfy = True @@ -146,7 +154,7 @@ def setUp(self) -> None: # ############################################################################# -# Test_hserver_docker_container_on_mac_host1 +# Test_hserver_inside_docker_container_on_mac_host1 # ############################################################################# @@ -154,7 +162,7 @@ def setUp(self) -> None: not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), reason="Config not matching", ) -class Test_hserver_inside_docker_container_on_mac_host1(GP_TestCase1): +class Test_hserver_inside_docker_container_on_mac_host1(_TestCase1, hunitest.TestCase): """ Run tests inside Docker container on a GP's Mac. """ @@ -169,7 +177,7 @@ def setUp(self) -> None: # ############################################################################# -# Test_hserver_gp_mac1 +# Test_hserver_outside_docker_container_on_gp_mac1 # ############################################################################# @@ -177,7 +185,7 @@ def setUp(self) -> None: not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), reason="Config not matching", ) -class Test_hserver_outside_docker_container_on_gp_mac1(GP_TestCase1): +class Test_hserver_outside_docker_container_on_gp_mac1(_TestCase1, hunitest.TestCase): """ Run tests outside Docker container on a GP's Mac. """ From 5142661da468a25f11068cd274884c5ae52bee0d Mon Sep 17 00:00:00 2001 From: saggese Date: Sat, 12 Apr 2025 02:39:18 +0000 Subject: [PATCH 011/193] Improve --- helpers/hserver.py | 65 +++++++++++------------------------- helpers/test/test_hserver.py | 24 ++++++------- 2 files changed, 31 insertions(+), 58 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index 6d45390ec..a2a888f82 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -238,19 +238,19 @@ def is_inside_unit_test() -> bool: return ret -# TODO(gp): Remove! -def is_dev_csfy() -> bool: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws', - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025', - # machine='x86_64' - host_name = os.uname()[1] - host_names = ("dev1", "dev2", "dev3") - csfy_host_name = os.environ.get("CSFY_HOST_NAME", "") - _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) - is_dev_csfy_ = host_name in host_names or csfy_host_name in host_names - return is_dev_csfy_ +# # TODO(gp): Remove! +# def is_dev_csfy() -> bool: +# # sysname='Linux' +# # nodename='dev1' +# # release='5.15.0-1081-aws', +# # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025', +# # machine='x86_64' +# host_name = os.uname()[1] +# host_names = ("dev1", "dev2", "dev3") +# csfy_host_name = os.environ.get("CSFY_HOST_NAME", "") +# _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) +# is_dev_csfy_ = host_name in host_names or csfy_host_name in host_names +# return is_dev_csfy_ # TODO(gp): This is obsolete and should be removed. @@ -316,48 +316,23 @@ def is_external_linux() -> bool: This is true when we run on the machine of an intern, or a non-CSFY contributor. """ - if is_dev_csfy() or is_inside_ci(): - # CI and dev servers are not considered external Linux systems. + if is_host_csfy_server() or is_inside_ci(): + # Dev servers and CI are not external Linux systems. ret = False - elif is_inside_docker(): - # If we are inside a Docker container, we need to check the host OS. - csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) - ret = csfy_host_os_name == "Linux" else: - # If we are not inside a Docker container, we can check the host OS - # directly. - host_os_name = os.uname()[0] + # We need to check the host OS directly. + host_os_name = _get_host_os_name() ret = host_os_name == "Linux" return ret -# TODO(gp): When is this used? -def is_csfy_or_external_container() -> bool: - """ - Detect whether we are running on a container in a CSFY or external system. - - This is true for dockerized executables. - """ - res = False - if is_inside_ci(): - # CI servers are not considered external or CSFY systems. - res = False - elif not is_inside_docker(): - # Outside Docker there is no container. - res = False - else: - res = is_inside_docker() - _LOG.debug(" -> is_csfy_or_external_container=%s", res) - return res - - def is_external_dev() -> bool: """ Detect whether we are running on an system outside of Causify system (e.g., a contributor's laptop, an intern's laptop, a non-CSFY machine). """ - is_external_dev_ = is_host_mac() or is_external_linux() - return is_external_dev_ + ret = is_host_mac() or is_external_linux() + return ret # ############################################################################# @@ -387,7 +362,6 @@ def _get_setup_signature() -> str: cmds.append("os.uname()[2]") # is_external_linux() cmds.append('os.environ.get("CSFY_HOST_OS_NAME", "undef")') - # is_csfy_or_external_container() # Build an array of strings with the results of executing the commands. results = [] for cmd in cmds: @@ -983,7 +957,6 @@ def config_func_to_str() -> str: "has_docker_sudo", "is_AM_S3_available", "is_CK_S3_available", - "is_csfy_or_external_container", "is_dev4", "is_dev_csfy", "is_external_linux", diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index 2044ee339..79f6168fc 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -20,11 +20,11 @@ class _TestCase1: def test_consistency1(self) -> None: hserver._dassert_setup_consistency() - def test_is_dev_csfy1(self) -> None: - val = hserver.is_dev_csfy() + def test_is_host_csfy_server1(self) -> None: + val = hserver.is_host_csfy_server() _LOG.info("val=\n%s", val) - if self.exp_is_dev_csfy is not None: - self.assertEqual(val, self.exp_is_dev_csfy) + if self.exp_is_host_csfy_server is not None: + self.assertEqual(val, self.exp_is_host_csfy_server) def test_get_setup_settings1(self) -> None: setups = hserver._get_setup_settings() @@ -60,7 +60,7 @@ def setUp(self) -> None: self.exp_config_func_to_str = None self.exp_get_setup_settings = None self.exp_get_setup_signature = None - self.exp_is_dev_csfy = None + self.exp_is_host_csfy_server = None self.exp_is_inside_ci = None @@ -83,8 +83,8 @@ def setUp(self) -> None: self.exp_config_func_to_str = None self.exp_get_setup_settings = None self.exp_get_setup_signature = None - self.exp_is_dev_csfy = None - self.exp_is_inside_ci = None + self.exp_is_host_csfy_server = False + self.exp_is_inside_ci = True # ############################################################################# @@ -116,8 +116,8 @@ def setUp(self) -> None: is_prod_csfy False """) self.exp_get_setup_signature = "" - self.exp_is_dev_csfy = True - self.exp_is_inside_ci = True + self.exp_is_host_csfy_server = True + self.exp_is_inside_ci = False # ############################################################################# @@ -149,7 +149,7 @@ def setUp(self) -> None: is_prod_csfy False """) self.exp_get_setup_signature = "" - self.exp_is_dev_csfy = True + self.exp_is_host_csfy_server = True self.exp_is_inside_ci = False @@ -172,7 +172,7 @@ def setUp(self) -> None: self.exp_config_func_to_str = "" self.exp_get_setup_settings = "" self.exp_get_setup_signature = "" - self.exp_is_dev_csfy = True + self.exp_is_host_csfy_server = True self.exp_is_inside_ci = True @@ -195,7 +195,7 @@ def setUp(self) -> None: self.exp_config_func_to_str = "" self.exp_get_setup_settings = "" self.exp_get_setup_signature = "" - self.exp_is_dev_csfy = True + self.exp_is_host_csfy_server = True self.exp_is_inside_ci = True From f259ba79f2e1cc18b1fc8c42da08f66d7059daac Mon Sep 17 00:00:00 2001 From: saggese Date: Sat, 12 Apr 2025 02:46:35 +0000 Subject: [PATCH 012/193] Improve --- helpers/hdocker.py | 101 --------------------------------------------- helpers/hserver.py | 86 ++++++++++++++++++++++++++++++++------ 2 files changed, 73 insertions(+), 114 deletions(-) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 675ce4b79..c31dc87a3 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -1454,107 +1454,6 @@ def dockerized_tikz_to_bitmap( ) -# ############################################################################# -# Dockerized llm_transform. -# ############################################################################# - - -def run_dockerized_llm_transform( - in_file_path: str, - cmd_opts: List[str], - out_file_path: str, - *, - return_cmd: bool = False, - force_rebuild: bool = False, - use_sudo: bool = False, -) -> Optional[str]: - """ - Run dockerized_llm_transform.py in a Docker container with all its dependencies. - """ - _LOG.debug(hprint.func_signature_to_str()) - # - hdbg.dassert_in("OPENAI_API_KEY", os.environ) - hdbg.dassert_isinstance(cmd_opts, list) - # Build the container, if needed. - container_image = "tmp.llm_transform" - dockerfile = r""" - FROM python:3.12-alpine - - # Install Bash. - #RUN apk add --no-cache bash - - # Set Bash as the default shell. - #SHELL ["/bin/bash", "-c"] - - # Install pip packages. - RUN pip install --no-cache-dir pyyaml openai - """ - container_image = build_container_image( - container_image, dockerfile, force_rebuild, use_sudo - ) - # Convert files to Docker paths. - is_caller_host = not hserver.is_inside_docker() - use_sibling_container_for_callee = True - caller_mount_path, callee_mount_path, mount = get_docker_mount_info( - is_caller_host, use_sibling_container_for_callee - ) - in_file_path = convert_caller_to_callee_docker_path( - in_file_path, - caller_mount_path, - callee_mount_path, - check_if_exists=True, - is_input=True, - is_caller_host=is_caller_host, - use_sibling_container_for_callee=use_sibling_container_for_callee, - ) - out_file_path = convert_caller_to_callee_docker_path( - out_file_path, - caller_mount_path, - callee_mount_path, - check_if_exists=False, - is_input=False, - is_caller_host=is_caller_host, - use_sibling_container_for_callee=use_sibling_container_for_callee, - ) - helpers_root = hgit.find_helpers_root() - helpers_root = convert_caller_to_callee_docker_path( - helpers_root, - caller_mount_path, - callee_mount_path, - check_if_exists=True, - is_input=False, - is_caller_host=is_caller_host, - use_sibling_container_for_callee=use_sibling_container_for_callee, - ) - git_root = hgit.find_git_root() - script = hsystem.find_file_in_repo("dockerized_llm_transform.py", root_dir=git_root) - script = convert_caller_to_callee_docker_path( - script, - caller_mount_path, - callee_mount_path, - check_if_exists=True, - is_input=True, - is_caller_host=is_caller_host, - use_sibling_container_for_callee=use_sibling_container_for_callee, - ) - cmd_opts_as_str = " ".join(cmd_opts) - executable = get_docker_executable(use_sudo) - docker_cmd = ( - f"{executable} run --rm --user $(id -u):$(id -g)" - f" -e OPENAI_API_KEY -e PYTHONPATH={helpers_root}" - f" --workdir {callee_mount_path} --mount {mount}" - f" {container_image}" - f" {script} -i {in_file_path} -o {out_file_path} {cmd_opts_as_str}" - ) - if return_cmd: - ret = docker_cmd - else: - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd, suppress_output=False) - ret = None - return ret - - # ############################################################################# diff --git a/helpers/hserver.py b/helpers/hserver.py index a2a888f82..39a7ac6a2 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -238,19 +238,19 @@ def is_inside_unit_test() -> bool: return ret -# # TODO(gp): Remove! -# def is_dev_csfy() -> bool: -# # sysname='Linux' -# # nodename='dev1' -# # release='5.15.0-1081-aws', -# # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025', -# # machine='x86_64' -# host_name = os.uname()[1] -# host_names = ("dev1", "dev2", "dev3") -# csfy_host_name = os.environ.get("CSFY_HOST_NAME", "") -# _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) -# is_dev_csfy_ = host_name in host_names or csfy_host_name in host_names -# return is_dev_csfy_ +# TODO(gp): Remove! +def is_dev_csfy() -> bool: + # sysname='Linux' + # nodename='dev1' + # release='5.15.0-1081-aws', + # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025', + # machine='x86_64' + host_name = os.uname()[1] + host_names = ("dev1", "dev2", "dev3") + csfy_host_name = os.environ.get("CSFY_HOST_NAME", "") + _LOG.debug("host_name=%s csfy_host_name=%s", host_name, csfy_host_name) + is_dev_csfy_ = host_name in host_names or csfy_host_name in host_names + return is_dev_csfy_ # TODO(gp): This is obsolete and should be removed. @@ -271,6 +271,66 @@ def is_dev4() -> bool: return is_dev4_ +# TODO(gp): Remove. +def is_mac(*, version: Optional[str] = None) -> bool: + """ + Return whether we are running on macOS and, optionally, on a specific + version. + + :param version: check whether we are running on a certain macOS version (e.g., + `Catalina`, `Monterey`) + """ + _LOG.debug("version=%s", version) + host_os_name = os.uname()[0] + _LOG.debug("os.uname()=%s", str(os.uname())) + csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) + _LOG.debug( + "host_os_name=%s csfy_host_os_name=%s", host_os_name, csfy_host_os_name + ) + is_mac_ = host_os_name == "Darwin" or csfy_host_os_name == "Darwin" + if version is None: + # The user didn't request a specific version, so we return whether we + # are running on a Mac or not. + _LOG.debug("is_mac_=%s", is_mac_) + return is_mac_ + else: + # The user specified a version: if we are not running on a Mac then we + # return False, since we don't even have to check the macOS version. + if not is_mac_: + _LOG.debug("is_mac_=%s", is_mac_) + return False + # Check the macOS version we are running. + if version == "Catalina": + # Darwin gpmac.local 19.6.0 Darwin Kernel Version 19.6.0: + # root:xnu-6153.141.2~1/RELEASE_X86_64 x86_64 + macos_tag = "19.6" + elif version == "Monterey": + # Darwin alpha.local 21.5.0 Darwin Kernel Version 21.5.0: + # root:xnu-8020.121.3~4/RELEASE_ARM64_T6000 arm64 + macos_tag = "21." + elif version == "Ventura": + macos_tag = "22." + elif version == "Sequoia": + # Darwin gpmac.local 24.4.0 Darwin Kernel Version 24.4.0: + # root:xnu-11417.101.15~1/RELEASE_ARM64_T8112 arm64 + macos_tag = "24." + else: + raise ValueError(f"Invalid version='{version}'") + _LOG.debug("macos_tag=%s", macos_tag) + host_os_version = os.uname()[2] + # 'Darwin Kernel Version 19.6.0: Mon Aug 31 22:12:52 PDT 2020; + # root:xnu-6153.141.2~1/RELEASE_X86_64' + csfy_host_os_version = os.environ.get("CSFY_HOST_VERSION", "") + _LOG.debug( + "host_os_version=%s csfy_host_os_version=%s", + host_os_version, + csfy_host_os_version, + ) + is_mac_ = macos_tag in host_os_version or macos_tag in csfy_host_os_version + _LOG.debug("is_mac_=%s", is_mac_) + return is_mac_ + + def is_prod_csfy() -> bool: """ Detect whether we are running in a Causify production container. From ca2fb1e17033231a0d487722e51ad84e662f1e36 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Fri, 11 Apr 2025 23:08:00 -0400 Subject: [PATCH 013/193] Improve --- helpers/henv.py | 105 ++++++----------------------------- helpers/hprint.py | 27 +++++++++ helpers/hserver.py | 42 +++++++++++++- helpers/hversion.py | 18 +++++- helpers/test/test_hserver.py | 23 ++++++-- 5 files changed, 118 insertions(+), 97 deletions(-) diff --git a/helpers/henv.py b/helpers/henv.py index 6eaf51fc1..029f6a25f 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -60,41 +60,12 @@ def has_module(module: str) -> bool: return has_module_ -# ############################################################################# -# Utility functions. -# ############################################################################# - - # All printing functions should: # - Return a string and not a list of strings # - Add a newline at the end of the string (i.e., the string should end with # `\n`) -def _dassert_one_trailing_newline(txt: str) -> None: - num_newlines = len(re.search(r'\n*$', txt).group()) - hdbg.dassert_eq(num_newlines, 0, "num_newlines='%s' txt='%s'", num_newlines, txt) - - -def _to_info(tag: str, txt: Union[str, List[str]]) -> str: - hdbg.dassert_isinstance(tag, str) - hdbg.dassert_isinstance(txt, (str, list)) - txt_tmp = "" - txt_tmp += "# " + tag + "\n" - # Indent the text. - if not isinstance(txt, str): - for t in txt: - hdbg.dassert_isinstance(t, str) - txt = "\n".join(txt) - txt_tmp += hprint.indent(txt) - # Ensure that there is a single trailing newline. - txt_tmp = txt_tmp.rstrip("\n") - # txt_tmp += "\n" - # _dassert_one_trailing_newline(txt_tmp) - _LOG.debug("'%s'", txt_tmp) - return txt_tmp - - # ############################################################################# # Get env vars info. # ############################################################################# @@ -365,7 +336,7 @@ def _get_platform_info() -> str: txt_tmp.append(f"machine={uname.machine}") txt_tmp.append(f"processor={uname.processor}") # - txt = _to_info("Platform info", txt_tmp) + txt = hprint.to_info("Platform info", txt_tmp) return txt @@ -390,7 +361,7 @@ def _get_psutil_info() -> str: else: txt_tmp.append("psutil is not installed") # - txt = _to_info("psutils info", txt_tmp) + txt = hprint.to_info("psutils info", txt_tmp) return txt @@ -457,27 +428,13 @@ def _get_package_info() -> Tuple[List[str], int]: packages.append((lib, version)) txt_tmp.extend([f"{l}: {v}" for (l, v) in packages]) # - txt = _to_info("Packages", txt_tmp) + txt = hprint.to_info("Packages", txt_tmp) return txt, failed_imports # ############################################################################# -def _get_container_version() -> str: - txt_tmp: List[str] = [] - # - container_version = str(hversio.get_container_version()) - txt_tmp.append(f"container_version='{container_version}'") - # - container_dir_name = "." - changelog_version = str(hversio.get_changelog_version(container_dir_name)) - txt_tmp.append(f"changelog_version='{changelog_version}'") - # - txt = _to_info("Container version", txt_tmp) - return txt - - def _get_git_info(git_commit_type: str) -> str: txt_tmp: List[str] = [] try: @@ -488,37 +445,7 @@ def _get_git_info(git_commit_type: str) -> str: _LOG.warning(str(e)) txt_tmp.append("No git info") # - txt = _to_info("Git info", txt_tmp) - return txt - - -def _get_docker_info() -> str: - txt_tmp: List[str] = [] - # - has_docker = hserver.has_docker() - txt_tmp.append(f"has_docker={has_docker}") - # - cmd = r"docker version --format '{{.Server.Version}}'" - _, docker_version = hsystem.system_to_string(cmd) - txt_tmp.append(f"docker_version='{docker_version}'") - # - docker_needs_sudo = hserver.docker_needs_sudo() - txt_tmp.append(f"docker_needs_sudo={docker_needs_sudo}") - # - has_privileged_mode = hserver.has_docker_privileged_mode() - txt_tmp.append(f"has_privileged_mode={has_privileged_mode}") - # - is_inside_docker = hserver.is_inside_docker() - txt_tmp.append(f"is_inside_docker={is_inside_docker}") - # - if is_inside_docker: - has_sibling_containers_support = hserver.has_sibling_containers_support() - txt_tmp.append(f"has_sibling_containers_support={has_sibling_containers_support}") - # - has_docker_dind_support = hserver.has_docker_dind_support() - txt_tmp.append(f"has_docker_dind_support={has_docker_dind_support}") - # - txt = _to_info("Docker info", txt_tmp) + txt = hprint.to_info("Git info", txt_tmp) return txt @@ -537,31 +464,31 @@ def get_system_signature(git_commit_type: str = "all") -> Tuple[str, int]: """ txt: List[str] = [] # Add container version. - txt_tmp = _get_container_version() - _dassert_one_trailing_newline(txt_tmp) + txt_tmp = hversio.get_container_version_info() + hprint.dassert_one_trailing_newline(txt_tmp) txt.append(txt_tmp) # Add Git signature. txt_tmp = _get_git_info(git_commit_type) - _dassert_one_trailing_newline(txt_tmp) + hprint.dassert_one_trailing_newline(txt_tmp) txt.append(txt_tmp) # Add platform info. txt_tmp = _get_platform_info() - _dassert_one_trailing_newline(txt_tmp) + hprint.dassert_one_trailing_newline(txt_tmp) txt.append(txt_tmp) # Add psutil info. txt_tmp = _get_psutil_info() - _dassert_one_trailing_newline(txt_tmp) + hprint.dassert_one_trailing_newline(txt_tmp) txt.append(txt_tmp) # Add Docker info. - txt_tmp = _get_docker_info() - _dassert_one_trailing_newline(txt_tmp) + txt_tmp = hserver.get_docker_info() + hprint.dassert_one_trailing_newline(txt_tmp) txt.append(txt_tmp) # Add package info. txt_tmp, failed_imports = _get_package_info() - _dassert_one_trailing_newline(txt_tmp) + hprint.dassert_one_trailing_newline(txt_tmp) txt.append(txt_tmp) # - txt = _to_info("System signature", txt) + txt = hprint.to_info("System signature", txt) return txt, failed_imports @@ -583,16 +510,16 @@ def env_to_str( # if repo_config: repo_config_str = hrecouti.get_repo_config().config_func_to_str() - msg += _to_info("Repo config", repo_config_str) + "\n" + msg += hprint.to_info("Repo config", repo_config_str) + "\n" # if server_config: server_config_str = hserver.config_func_to_str() - msg += _to_info("Server config", server_config_str) + "\n" + msg += hprint.to_info("Server config", server_config_str) + "\n" # if system_signature: msg += get_system_signature()[0] + "\n" # if env_vars: env_vars_str = env_vars_to_string() - msg += _to_info("Env vars", env_vars_str) + "\n" + msg += hprint.to_info("Env vars", env_vars_str) + "\n" return msg diff --git a/helpers/hprint.py b/helpers/hprint.py index cea430922..175d5ff52 100644 --- a/helpers/hprint.py +++ b/helpers/hprint.py @@ -972,6 +972,30 @@ def filter_text(regex: str, txt: str) -> str: return txt +def dassert_one_trailing_newline(txt: str) -> None: + num_newlines = len(re.search(r'\n*$', txt).group()) + hdbg.dassert_eq(num_newlines, 0, "num_newlines='%s' txt='%s'", num_newlines, txt) + + +def to_info(tag: str, txt: Union[str, List[str]]) -> str: + hdbg.dassert_isinstance(tag, str) + hdbg.dassert_isinstance(txt, (str, list)) + txt_tmp = "" + txt_tmp += "# " + tag + "\n" + # Indent the text. + if not isinstance(txt, str): + for t in txt: + hdbg.dassert_isinstance(t, str) + txt = "\n".join(txt) + txt_tmp += indent(txt) + # Ensure that there is a single trailing newline. + txt_tmp = txt_tmp.rstrip("\n") + # txt_tmp += "\n" + # _dassert_one_trailing_newline(txt_tmp) + _LOG.debug("'%s'", txt_tmp) + return txt_tmp + + # ############################################################################# # Notebook output # ############################################################################# @@ -1033,3 +1057,6 @@ def config_notebook(sns_set: bool = True) -> None: # Force the linter to keep this import. _ = hwarnin + + +# \ No newline at end of file diff --git a/helpers/hserver.py b/helpers/hserver.py index 39a7ac6a2..a96ab3aa8 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -14,6 +14,7 @@ from typing import Dict, List, Optional, Tuple import helpers.repo_config_utils as hrecouti +import helpers.hprint as hprint # This module should depend only on: # - Python standard modules @@ -40,11 +41,15 @@ def _system_to_string(cmd: str) -> Tuple[int, str]: :return: tuple of (return code, output) """ result = subprocess.run( - cmd, stdout=subprocess.PIPE, + cmd, + stdout=subprocess.PIPE, # Redirect stderr to stdout. - stderr=subprocess.STDOUT, text=True) + stderr=subprocess.STDOUT, + shell=True, + text=True) rc = result.returncode output = result.stdout + output = output.strip() return rc, output @@ -657,6 +662,37 @@ def has_docker_dind_support() -> bool: return has_docker_privileged_mode() +def get_docker_info() -> str: + txt_tmp: List[str] = [] + # + has_docker_ = has_docker() + txt_tmp.append(f"has_docker={has_docker_}") + # + cmd = r"docker version --format '{{.Server.Version}}'" + _, docker_version = _system_to_string(cmd) + txt_tmp.append(f"docker_version='{docker_version}'") + # + docker_needs_sudo_ = docker_needs_sudo() + txt_tmp.append(f"docker_needs_sudo={docker_needs_sudo_}") + # + has_privileged_mode_ = has_docker_privileged_mode() + txt_tmp.append(f"has_privileged_mode={has_privileged_mode_}") + # + is_inside_docker_ = is_inside_docker() + txt_tmp.append(f"is_inside_docker={is_inside_docker_}") + # + if is_inside_docker_: + has_sibling_containers_support_ = has_sibling_containers_support() + txt_tmp.append(f"has_sibling_containers_support={has_sibling_containers_support_}") + # + has_docker_dind_support_ = has_docker_dind_support() + txt_tmp.append(f"has_docker_dind_support={has_docker_dind_support_}") + # + txt = hprint.to_info("Docker info", txt_tmp) + return txt + + + # ############################################################################# # Detect Docker functionalities, based on the set-up. # ############################################################################# @@ -1038,7 +1074,7 @@ def config_func_to_str() -> str: for func_name in sorted(function_names): try: _LOG.debug("func_name=%s", func_name) - func_value = eval(func_name) + func_value = eval(f"{func_name}()") except NameError: func_value = "*undef*" msg = f"{func_name}='{func_value}'" diff --git a/helpers/hversion.py b/helpers/hversion.py index 74f31c539..e774894ef 100644 --- a/helpers/hversion.py +++ b/helpers/hversion.py @@ -16,10 +16,11 @@ import logging import os import re -from typing import Optional, cast +from typing import Optional, cast, List import helpers.hdbg as hdbg import helpers.hio as hio +import helpers.hprint as hprint import helpers.hserver as hserver import helpers.hsystem as hsystem @@ -208,3 +209,18 @@ def _check_version(code_version: str, container_version: str) -> bool: print(msg) # raise RuntimeError(msg) return is_ok + + +def get_container_version_info() -> str: + txt_tmp: List[str] = [] + # + container_version = str(get_container_version()) + txt_tmp.append(f"container_version='{container_version}'") + # + container_dir_name = "." + changelog_version = str(get_changelog_version(container_dir_name)) + txt_tmp.append(f"changelog_version='{changelog_version}'") + # + txt = hprint.to_info("Container version", txt_tmp) + return txt + diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index 79f6168fc..77121ec45 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -26,6 +26,12 @@ def test_is_host_csfy_server1(self) -> None: if self.exp_is_host_csfy_server is not None: self.assertEqual(val, self.exp_is_host_csfy_server) + def test_is_host_max(self) -> None: + val = hserver.is_host_mac() + _LOG.info("val=\n%s", val) + if self.exp_is_host_mac is not None: + self.assertEqual(val, self.exp_is_host_mac) + def test_get_setup_settings1(self) -> None: setups = hserver._get_setup_settings() val = hserver._setup_to_str(setups) @@ -154,15 +160,15 @@ def setUp(self) -> None: # ############################################################################# -# Test_hserver_inside_docker_container_on_mac_host1 +# Test_hserver_inside_docker_container_on_gp_mac1 # ############################################################################# @pytest.mark.skipif( - not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), + not (hserver.is_inside_docker() and hserver.is_host_gp_mac()), reason="Config not matching", ) -class Test_hserver_inside_docker_container_on_mac_host1(_TestCase1, hunitest.TestCase): +class Test_hserver_inside_docker_container_on_gp_mac1(_TestCase1, hunitest.TestCase): """ Run tests inside Docker container on a GP's Mac. """ @@ -171,7 +177,16 @@ def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" self.exp_get_setup_settings = "" - self.exp_get_setup_signature = "" + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac True + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False""") self.exp_is_host_csfy_server = True self.exp_is_inside_ci = True From 735a0e472fe4c0d665c8f1994d0a1ea2dd9ea5f8 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Fri, 11 Apr 2025 23:20:12 -0400 Subject: [PATCH 014/193] Improve --- helpers/hserver.py | 16 +++++----- helpers/test/test_hserver.py | 60 ++++++++++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 13 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index a96ab3aa8..ff0e8bc24 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -415,18 +415,18 @@ def _get_setup_signature() -> str: """ cmds = [] # is_prod_csfy() - cmds.append('os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", "undef")') + cmds.append('os.environ.get("CK_IN_PROD_CMAMP_CONTAINER", "*undef*")') # is_dev4() # is_dev_csfy() # is_ig_prod() - cmds.append('os.environ.get("CSFY_HOST_NAME", "undef")') + cmds.append('os.environ.get("CSFY_HOST_NAME", "*undef*")') # is_inside_ci() - cmds.append('os.environ.get("CSFY_CI", "undef")') + cmds.append('os.environ.get("CSFY_CI", "*undef*")') # is_mac() cmds.append("os.uname()[0]") cmds.append("os.uname()[2]") # is_external_linux() - cmds.append('os.environ.get("CSFY_HOST_OS_NAME", "undef")') + cmds.append('os.environ.get("CSFY_HOST_OS_NAME", "*undef*")') # Build an array of strings with the results of executing the commands. results = [] for cmd in cmds: @@ -683,10 +683,12 @@ def get_docker_info() -> str: # if is_inside_docker_: has_sibling_containers_support_ = has_sibling_containers_support() - txt_tmp.append(f"has_sibling_containers_support={has_sibling_containers_support_}") - # has_docker_dind_support_ = has_docker_dind_support() - txt_tmp.append(f"has_docker_dind_support={has_docker_dind_support_}") + else: + has_sibling_containers_support_ = "*undef*" + has_docker_dind_support_ = "*undef*" + txt_tmp.append(f"has_sibling_containers_support={has_sibling_containers_support_}") + txt_tmp.append(f"has_docker_dind_support={has_docker_dind_support_}") # txt = hprint.to_info("Docker info", txt_tmp) return txt diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index 77121ec45..d041d1eee 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -26,12 +26,18 @@ def test_is_host_csfy_server1(self) -> None: if self.exp_is_host_csfy_server is not None: self.assertEqual(val, self.exp_is_host_csfy_server) - def test_is_host_max(self) -> None: + def test_is_host_mac1(self) -> None: val = hserver.is_host_mac() _LOG.info("val=\n%s", val) if self.exp_is_host_mac is not None: self.assertEqual(val, self.exp_is_host_mac) + def test_get_docker_info1(self) -> None: + val = hserver.get_docker_info() + _LOG.info("val=\n%s", val) + if self.exp_get_docker_info is not None: + self.assert_equal(val, self.exp_get_docker_info) + def test_get_setup_settings1(self) -> None: setups = hserver._get_setup_settings() val = hserver._setup_to_str(setups) @@ -51,6 +57,7 @@ def test_is_inside_ci1(self) -> None: if self.exp_is_inside_ci is not None: self.assertEqual(val, self.exp_is_inside_ci) + # ############################################################################# # Test_hserver1 # ############################################################################# @@ -64,9 +71,11 @@ class Test_hserver1(_TestCase1, hunitest.TestCase): def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = None + self.exp_get_docker_info = None self.exp_get_setup_settings = None self.exp_get_setup_signature = None self.exp_is_host_csfy_server = None + self.exp_is_host_mac = None self.exp_is_inside_ci = None @@ -87,6 +96,7 @@ class Test_hserver_inside_ci1(_TestCase1, hunitest.TestCase): def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = None + self.exp_get_docker_info = None self.exp_get_setup_settings = None self.exp_get_setup_signature = None self.exp_is_host_csfy_server = False @@ -110,6 +120,15 @@ class Test_hserver_inside_docker_container_on_csfy_server1(_TestCase1, hunitest. def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_version='28.0.4' + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_sibling_containers_support=True + has_docker_dind_support=True""") self.exp_get_setup_settings = hprint.dedent(r""" is_inside_docker_container_on_csfy_server True is_outside_docker_container_on_csfy_server False @@ -123,6 +142,7 @@ def setUp(self) -> None: """) self.exp_get_setup_signature = "" self.exp_is_host_csfy_server = True + self.exp_is_host_mac = False self.exp_is_inside_ci = False @@ -143,6 +163,15 @@ class Test_hserver_outside_docker_container_on_csfy_server1(_TestCase1, hunitest def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_version='28.0.4' + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_sibling_containers_support=True + has_docker_dind_support=True""") self.exp_get_setup_settings = hprint.dedent(r""" is_inside_docker_container_on_csfy_server False is_outside_docker_container_on_csfy_server True @@ -176,7 +205,15 @@ class Test_hserver_inside_docker_container_on_gp_mac1(_TestCase1, hunitest.TestC def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" - self.exp_get_setup_settings = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_version='28.0.4' + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_sibling_containers_support=True + has_docker_dind_support=True""") self.exp_get_setup_settings = hprint.dedent(r""" is_inside_docker_container_on_csfy_server False is_outside_docker_container_on_csfy_server False @@ -187,8 +224,9 @@ def setUp(self) -> None: is_dev4 False is_ig_prod False is_prod_csfy False""") - self.exp_is_host_csfy_server = True - self.exp_is_inside_ci = True + self.exp_is_host_csfy_server = False + self.exp_is_host_mac = True + self.exp_is_inside_ci = False # ############################################################################# @@ -208,10 +246,20 @@ class Test_hserver_outside_docker_container_on_gp_mac1(_TestCase1, hunitest.Test def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_version='28.0.4' + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_sibling_containers_support=True + has_docker_dind_support=True""") self.exp_get_setup_settings = "" self.exp_get_setup_signature = "" - self.exp_is_host_csfy_server = True - self.exp_is_inside_ci = True + self.exp_is_host_csfy_server = False + self.exp_is_host_mac = True + self.exp_is_inside_ci = False # ############################################################################# From a13e27a3fdbaf7617492654ac61aa582dabea9cf Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sat, 12 Apr 2025 10:52:00 -0400 Subject: [PATCH 015/193] Improve --- helpers/hprint.py | 2 +- helpers/hunit_test.py | 2 +- helpers/test/test_hserver.py | 47 ++++++++++++++++++++++++++++-------- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/helpers/hprint.py b/helpers/hprint.py index 175d5ff52..61fb1fcdd 100644 --- a/helpers/hprint.py +++ b/helpers/hprint.py @@ -945,7 +945,7 @@ def to_pretty_str(obj: Any) -> str: return res -# TODO(gp): -> remove_lines? +# TODO(gp): GSI -> rename remove_lines()? def filter_text(regex: str, txt: str) -> str: """ Remove lines in `txt` that match the regex `regex`. diff --git a/helpers/hunit_test.py b/helpers/hunit_test.py index 55b245026..8e2259d54 100644 --- a/helpers/hunit_test.py +++ b/helpers/hunit_test.py @@ -336,7 +336,7 @@ def _remove_dir_name(file_name: str) -> str: return txt -# TODO(gp): @all Use the copy in helpers/print.py. +# TODO(gp): GSI. Use the copy in helpers/hprint.py def filter_text(regex: str, txt: str) -> str: """ Remove lines in `txt` that match the regex `regex`. diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index d041d1eee..2ef1f5e9a 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -35,6 +35,8 @@ def test_is_host_mac1(self) -> None: def test_get_docker_info1(self) -> None: val = hserver.get_docker_info() _LOG.info("val=\n%s", val) + # Remove the docker version since it is not stable. + val = hprint.filter_text("docker_version=", val) if self.exp_get_docker_info is not None: self.assert_equal(val, self.exp_get_docker_info) @@ -96,8 +98,26 @@ class Test_hserver_inside_ci1(_TestCase1, hunitest.TestCase): def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = None - self.exp_get_docker_info = None - self.exp_get_setup_settings = None + self.exp_get_docker_info = hprint.dedent(r""" + # Docker info + has_docker=True + docker_version='28.0.4' + docker_needs_sudo=False + has_privileged_mode=True + is_inside_docker=True + has_sibling_containers_support=True + has_docker_dind_support=True""") + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server True + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac False + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False + """) self.exp_get_setup_signature = None self.exp_is_host_csfy_server = False self.exp_is_inside_ci = True @@ -199,7 +219,7 @@ def setUp(self) -> None: ) class Test_hserver_inside_docker_container_on_gp_mac1(_TestCase1, hunitest.TestCase): """ - Run tests inside Docker container on a GP's Mac. + Run tests inside Docker container on GP's Mac. """ def setUp(self) -> None: @@ -208,7 +228,6 @@ def setUp(self) -> None: self.exp_get_docker_info = hprint.dedent(r""" # Docker info has_docker=True - docker_version='28.0.4' docker_needs_sudo=False has_privileged_mode=True is_inside_docker=True @@ -240,7 +259,7 @@ def setUp(self) -> None: ) class Test_hserver_outside_docker_container_on_gp_mac1(_TestCase1, hunitest.TestCase): """ - Run tests outside Docker container on a GP's Mac. + Run tests outside Docker container on GP's Mac. """ def setUp(self) -> None: @@ -249,13 +268,21 @@ def setUp(self) -> None: self.exp_get_docker_info = hprint.dedent(r""" # Docker info has_docker=True - docker_version='28.0.4' docker_needs_sudo=False has_privileged_mode=True - is_inside_docker=True - has_sibling_containers_support=True - has_docker_dind_support=True""") - self.exp_get_setup_settings = "" + is_inside_docker=False + has_sibling_containers_support=*undef* + has_docker_dind_support=*undef*""") + self.exp_get_setup_settings = hprint.dedent(r""" + is_inside_docker_container_on_csfy_server False + is_outside_docker_container_on_csfy_server False + is_inside_docker_container_on_host_mac False + is_outside_docker_container_on_host_mac True + is_inside_docker_container_on_external_linux False + is_outside_docker_container_on_external_linux False + is_dev4 False + is_ig_prod False + is_prod_csfy False""") self.exp_get_setup_signature = "" self.exp_is_host_csfy_server = False self.exp_is_host_mac = True From 16b072e63109a0bf08878758248b88f30c90c68e Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 13 Apr 2025 15:00:41 -0400 Subject: [PATCH 016/193] Improve --- .../thin_client/thin_client_utils.sh | 18 + ....replace_common_files_with_script_links.md | 49 ++- helpers/create_links.py | 390 ++++++++++-------- helpers/stage_linked_file.py | 97 ++--- helpers/test/test_create_link.py | 8 - 5 files changed, 316 insertions(+), 246 deletions(-) diff --git a/dev_scripts_helpers/thin_client/thin_client_utils.sh b/dev_scripts_helpers/thin_client/thin_client_utils.sh index e27759abf..df3cbd127 100644 --- a/dev_scripts_helpers/thin_client/thin_client_utils.sh +++ b/dev_scripts_helpers/thin_client/thin_client_utils.sh @@ -180,6 +180,23 @@ activate_venv() { } +set_csfy_env_vars() { + echo "# set_csfy_env_vars()" + # + export CSFY_HOST_NAME=$(hostname) + echo "CSFY_HOST_NAME=$CSFY_HOST_NAME" + # + export CSFY_HOST_OS_NAME=$(uname) + echo "CSFY_HOST_OS_NAME=$CSFY_HOST_OS_NAME" + # + export CSFY_HOST_USER_NAME=$(whoami) + echo "CSFY_HOST_USER_NAME=$CSFY_HOST_USER_NAME" + # + export CSFY_HOST_OS_VERSION=$(uname -r) + echo "CSFY_HOST_OS_VERSION=$CSFY_HOST_OS_VERSION" +} + + set_path() { echo "# set_path()" local dev_script_dir=$1 @@ -254,6 +271,7 @@ is_dev_csfy() { fi } + configure_specific_project() { echo "# configure_specific_project()" # AWS profiles which are propagated to Docker. diff --git a/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md b/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md index d50147a15..100be9db2 100644 --- a/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md +++ b/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md @@ -28,9 +28,9 @@ ## Why Do We Need This Approach? -- In our codebases, it is common to have duplicate files or files - that are identical between two directories. Maintaining these files manually - can lead to inefficiencies and errors: +- In our codebases, it is common to have files that are identical between two + directories. Maintaining these files manually can lead to inefficiencies and + errors: - Synchronization: If changes are made in one location, they may not reflect in the other, leading to inconsistencies - Accidental Modifications: Directly modifying files that should remain @@ -38,13 +38,32 @@ - With our approach: - We avoid file duplication by creating links that point to the original files - - Links in the destination directory remain read-only, reducing the risk of - accidental changes + - Links in the destination directory are marked as read-only, reducing the + risk of accidental changes - If modifications are needed, the "staging process" ensures you can work safely on copies without altering the original source files - After the code has been developed, one can then convert copies of files, back to links +## Nomenclature + +- Links are often confusing since it's not clear what is linked to and what is + linked from, e.g., + - `ln -s foo bar` creates a symbolic link named `foo` that points to `bar` + ```bash + foo -> bar + ``` + - This convention seems the opposite of `cp foo bar` where a new file called + `bar` is created with the content of `foo` + +- Also referring to "source" and "destination" is confusing since it is unclear + if "destination" is the "destination" of the link (i.e., the head of the arrow) + or the "destination" of the operation of copy (the tail of the arrow) + +- In the rest of this document we will refer to the file being created as + "destination" + - E.g., `ln -s new_file old_file` + ## Workflow and Commands - Below is the step-by-step workflow for using these scripts @@ -55,7 +74,7 @@ links to the corresponding files in `src_dir` Command: - ``` + ```bash > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` @@ -76,9 +95,13 @@ - If you want to edit the files in `dst_dir` (which are currently symbolic links), use `stage_linked_file.py` to stage them. Staging replaces the symbolic links with writable copies of the original files +- At this point, you can just modify the files in `dst_dir` to achieve the + desired goal, without worries of altering the source files + - Often you don't know which files need to be changed and how to change files + so all the files are staged for modification - Command: - ``` + ```bash > stage_linked_file.py --dst_dir /path/to/dst ``` @@ -99,7 +122,7 @@ by running `create_links.py` again with the `--replace_links` flag - Command: - ``` + ```bash > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` @@ -115,19 +138,19 @@ ### Workflow Summary -- Set up `symbolic links`: - ``` +1. Set up symbolic links: + ```bash > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` -- Stage `symbolic links` for modification: +2. Stage symbolic links for modification: ``` > stage_linked_file.py --dst_dir /path/to/dst ``` -- Modify files as required +3. Modify files as required -- After modifications, restore the `symbolic links`: +4. After modifications, restore the symbolic links: ``` > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` diff --git a/helpers/create_links.py b/helpers/create_links.py index 481b0f152..9694d3a9d 100644 --- a/helpers/create_links.py +++ b/helpers/create_links.py @@ -1,4 +1,7 @@ +#!/usr/bin/env python + """ + Usage Example: - Using absolute links @@ -20,10 +23,6 @@ > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links --use_relative_paths - Other steps remain same. - -Import as: - -import helpers.create_links as hcrelink """ import argparse @@ -40,118 +39,57 @@ _LOG = logging.getLogger(__name__) -# ############################################################################# - - -def _main(parser: argparse.ArgumentParser) -> None: - """ - Entry point for the script to manage symbolic links between directories. - Depending on the command-line arguments, this script either: - - - Replaces matching files in `dst_dir` with symbolic links to `src_dir`. - - Stages all symbolic links in `dst_dir` for modification by replacing them - with writable file copies. - - Usage: - - `--replace_links`: Replace files with symbolic links - - `--stage_links`: Replace symbolic links with writable file copies - :return: None - """ - args = parser.parse_args() - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) - if args.replace_links: - common_files = _find_common_files(args.src_dir, args.dst_dir) - _replace_with_links( - common_files, use_relative_paths=args.use_relative_paths - ) - _LOG.info("Replaced %d files with symbolic links.", len(common_files)) - elif args.stage_links: - symlinks = _find_symlinks(args.dst_dir) - if not symlinks: - _LOG.info("No symbolic links found to stage.") - _stage_links(symlinks) - _LOG.info("Staged %d symbolic links for modification.", len(symlinks)) - else: - _LOG.error("You must specify either --replace_links or --stage_links.") - - -def _parse() -> argparse.ArgumentParser: - """ - Parse command-line arguments. - - :return: Argument parser object. - """ - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument("--src_dir", required=True, help="Source directory.") - parser.add_argument("--dst_dir", required=True, help="Destination directory.") - parser.add_argument( - "--replace_links", - action="store_true", - help="Replace files with symbolic links.", - ) - parser.add_argument( - "--stage_links", - action="store_true", - help="Replace symbolic links with writable copies.", - ) - parser.add_argument( - "--use_relative_paths", - action="store_true", - help="Use relative paths for symbolic links instead of absolute paths.", - ) - hparser.add_verbosity_arg(parser) - return parser +# ############################################################################# def _find_common_files(src_dir: str, dst_dir: str) -> List[Tuple[str, str]]: """ Find common files in dst_dir and change to links. - If a destination dir is not found, the functions makes a dest dir and copies all files from - source to destination after users approval. All matching files are identified based on their - name and content. The matches are returned as the file paths from both directories. + If a destination dir is not found, the functions makes a dest dir and copies + all files from source to destination after users approval. All matching + files are identified based on their name and content. The matches are + returned as the file paths from both directories. :param src_dir: The source directory containing the original files :param dst_dir: The destination directory to compare files against :return: paths of matching files from `src_dir` and `dst_dir` """ - # Ensure the destination directory exists; create it if it doesn't. - if not os.path.exists(dst_dir): - user_input = input( - "Destination directory %s does not exist. Would you like to create copy all files from source? (y/n): " - ) - if user_input.lower() == "y": - hio.create_dir( - dir_name=dst_dir, - incremental=True, - abort_if_exists=True, - ask_to_delete=False, - backup_dir_if_exists=False, - ) - _LOG.info("Created destination directory: %s", dst_dir) - for root, _, files in os.walk(src_dir): - for file in files: - src_file = os.path.join(root, file) - dst_file = os.path.join( - dst_dir, os.path.relpath(src_file, src_dir) - ) - dst_file_dir = os.path.dirname(dst_file) - # Ensure the destination file directory exists. - if not os.path.exists(dst_file_dir): - os.makedirs(dst_file_dir) - _LOG.info("Created subdirectory: %s", dst_file_dir) - # Copy the file from source to destination. - shutil.copy2(src_file, dst_file) - _LOG.info("Copied file: %s -> %s", src_file, dst_file) - else: - _LOG.error( - "Destination directory %s not created. Exiting function.", - dst_dir, - ) - return [] + # # Ensure the destination directory exists; create it if it doesn't. + # if not os.path.exists(dst_dir): + # user_input = input( + # "Destination directory %s does not exist. Would you like to create copy all files from source? (y/n): " + # ) + # if user_input.lower() == "y": + # hio.create_dir( + # dir_name=dst_dir, + # incremental=True, + # abort_if_exists=True, + # ask_to_delete=False, + # backup_dir_if_exists=False, + # ) + # _LOG.info("Created destination directory: %s", dst_dir) + # for root, _, files in os.walk(src_dir): + # for file in files: + # src_file = os.path.join(root, file) + # dst_file = os.path.join( + # dst_dir, os.path.relpath(src_file, src_dir) + # ) + # dst_file_dir = os.path.dirname(dst_file) + # # Ensure the destination file directory exists. + # if not os.path.exists(dst_file_dir): + # os.makedirs(dst_file_dir) + # _LOG.info("Created subdirectory: %s", dst_file_dir) + # # Copy the file from source to destination. + # shutil.copy2(src_file, dst_file) + # _LOG.info("Copied file: %s -> %s", src_file, dst_file) + # else: + # _LOG.error( + # "Destination directory %s not created. Exiting function.", + # dst_dir, + # ) + # return [] # After copying files, continue with comparing files. common_files = [] for root, _, files in os.walk(src_dir): @@ -159,77 +97,97 @@ def _find_common_files(src_dir: str, dst_dir: str) -> List[Tuple[str, str]]: src_file = os.path.join(root, file) dst_file = os.path.join(dst_dir, os.path.relpath(src_file, src_dir)) # Check if the file exists in the destination folder. - # Certain files do not need to be copied, so we skip them. if not os.path.exists(dst_file): - _LOG.warning( - "Warning: %s is missing in the destination directory.", + _LOG.debug( + "File %s is missing in the destination directory", dst_file, ) continue - # Compare file contents after copying. + # Check if the file is a symbolic link. + if os.path.islink(dst_file): + _LOG.debug( + "File %s is a symbolic link", + dst_file, + ) + continue + # Compare file contents. if filecmp.cmp(src_file, dst_file, shallow=False): - _LOG.info( - "Files are the same and will be replaced: %s -> %s", + _LOG.debug( + "Files src_file=%s, dst_file=%s are the same", src_file, dst_file, ) common_files.append((src_file, dst_file)) else: - _LOG.warning( - "Warning: %s and %s have different content.", - dst_file, + _LOG.debug( + "Files src_file=%s, dst_file=%s are not the same", src_file, + dst_file, ) return common_files +def _create_single_link(src_file: str, dst_file: str, use_relative_paths: bool, abort_on_first_error: bool) -> None: + """ + Create a single symbolic link from dst_file to src_file. + + :param src_file: Source file path + :param dst_file: Destination file path where symlink will be created + :param use_relative_paths: If True, create relative symlinks; if False, use absolute paths + :param abort_on_first_error: If True, abort on the first error; if False, continue processing + """ + hdbg.dassert_file_exists(src_file) + hdbg.dassert_file_exists(dst_file) + # Remove the destination file. + os.remove(dst_file) + try: + if use_relative_paths: + link_target = os.path.relpath(src_file, os.path.dirname(dst_file)) + else: + link_target = os.path.abspath(src_file) + os.symlink(link_target, dst_file) + # Remove write permissions from the file to prevent accidental + # modifications. + current_permissions = os.stat(dst_file).st_mode + new_permissions = ( + current_permissions + & ~stat.S_IWUSR + & ~stat.S_IWGRP + & ~stat.S_IWOTH + ) + os.chmod(dst_file, new_permissions) + _LOG.debug("Created symlink: %s -> %s", dst_file, link_target) + except Exception as e: + msg = "Failed to create symlink %s -> %s with error %s" % (dst_file, link_target, str(e)) + if abort_on_first_error: + raise RuntimeError(msg) + else: + _LOG.warning(msg) + + def _replace_with_links( common_files: List[Tuple[str, str]], use_relative_paths: bool, *, abort_on_first_error: bool = False, + dry_run: bool = False, ) -> None: """ Replace matching files in the destination directory with symbolic links. :param common_files: Matching file paths from `src_dir` and `dst_dir` - :param use_relative_paths: If True, create relative symlinks; if False, use absolute paths. - :param abort_on_first_error: If True, abort on the first error; if False, continue processing + :param use_relative_paths: If True, create relative symlinks; if False, use + absolute paths. + :param abort_on_first_error: If True, abort on the first error; if False, + continue processing + :param dry_run: If True, print what will be done without actually doing it. """ for src_file, dst_file in common_files: - try: - hdbg.dassert_file_exists(src_file) - except FileNotFoundError as e: - _LOG.error("Error: %s", str(e)) - if abort_on_first_error: - _LOG.error("Aborting: Source file %s doesn't exist.", src_file) - continue - if os.path.exists(dst_file): - os.remove(dst_file) - try: - if use_relative_paths: - link_target = os.path.relpath(src_file, os.path.dirname(dst_file)) - else: - link_target = os.path.abspath(src_file) - os.symlink(link_target, dst_file) - # Remove write permissions from the file to prevent accidental - # modifications. - current_permissions = os.stat(dst_file).st_mode - new_permissions = ( - current_permissions - & ~stat.S_IWUSR - & ~stat.S_IWGRP - & ~stat.S_IWOTH - ) - os.chmod(dst_file, new_permissions) - _LOG.info("Created symlink: %s -> %s", dst_file, link_target) - except Exception as e: - _LOG.error("Error creating symlink for %s: %s", dst_file, e) - if abort_on_first_error: - _LOG.warning( - "Aborting: Failed to create symlink for %s.", dst_file - ) - continue + + _create_single_link(src_file, dst_file, use_relative_paths, abort_on_first_error) + + +# ############################################################################# def _find_symlinks(dst_dir: str) -> List[str]: @@ -237,8 +195,10 @@ def _find_symlinks(dst_dir: str) -> List[str]: Find all symbolic links in the destination directory. :param dst_dir: Directory to search for symbolic links - :return: List of paths to symbolic links + :return: List of absolute paths to symbolic links """ + dst_dir = os.path.abspath(dst_dir) + hdbg.dassert_dir_exists(dst_dir) symlinks = [] for root, _, files in os.walk(dst_dir): for file in files: @@ -246,38 +206,122 @@ def _find_symlinks(dst_dir: str) -> List[str]: if os.path.islink(file_path): symlinks.append(file_path) return symlinks + +def _stage_single_link(link: str, target_file: str, abort_on_first_error: bool, dry_run: bool) -> None: + """ + Replace a single symlink with a writable copy of the linked file. + + :param link: The symlink to replace. + :param target_file: The file to copy to the symlink location. + :param abort_on_first_error: If True, abort on the first error; if False, + continue processing + :param dry_run: If True, print what will be done without actually doing it. + """ + # Resolve the original file the symlink points to. + target_file = os.readlink(link) + if not os.path.exists(target_file): + msg = "Target file does not exist for link %s -> %s" % (link, target_file) + if abort_on_first_error: + raise RuntimeError(msg) + else: + _LOG.warning(msg) + return + try: + os.remove(link) + # Copy file to the symlink location. + shutil.copy2(target_file, link) + # Make the file writable to allow for modifications. + current_permissions = os.stat(link).st_mode + new_permissions = ( + current_permissions | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH + ) + os.chmod(link, new_permissions) + _LOG.debug("Staged: %s -> %s", link, target_file) + except Exception as e: + msg = "Error staging link %s: %s" % (link, str(e)) + if abort_on_first_error: + raise RuntimeError(msg) + else: + _LOG.warning(msg) -def _stage_links(symlinks: List[str]) -> None: + +def _stage_links(symlinks: List[str], abort_on_first_error: bool, dry_run: bool) -> None: """ Replace symbolic links with writable copies of the linked files. :param symlinks: List of symbolic links to replace. """ for link in symlinks: - # Resolve the original file the symlink points to. - target_file = os.readlink(link) - if not os.path.exists(target_file): - _LOG.warning( - "Warning: Target file does not exist for link %s -> %s", - link, - target_file, - ) - continue - # Replace the symlink with a writable copy of the target file. - try: - os.remove(link) - # Copy file to the symlink location. - shutil.copy2(target_file, link) - # Make the file writable to allow for modifications. - current_permissions = os.stat(link).st_mode - new_permissions = ( - current_permissions | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH - ) - os.chmod(link, new_permissions) - _LOG.info("Staged: %s -> %s", link, target_file) - except Exception as e: - _LOG.error("Error staging link %s: %s", link, e) + _stage_single_link(link, abort_on_first_error, dry_run) + + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--src_dir", required=True, help="Source directory.") + parser.add_argument("--dst_dir", required=True, help="Destination directory.") + parser.add_argument( + "--replace_links", + action="store_true", + help="Replace equal files with symbolic links.", + ) + parser.add_argument( + "--stage_links", + action="store_true", + help="Replace symbolic links with writable copies.", + ) + parser.add_argument( + "--compare_files", + action="store_true", + help="Compare files in the directories.", + ) + parser.add_argument( + "--use_relative_paths", + action="store_true", + help="Use relative paths for symbolic links instead of absolute paths.", + ) + parser.add_argument( + "--dry_run", + action="store_true", + help="Print what will be done without actually doing it.", + ) + hparser.add_verbosity_arg(parser) + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + hdbg.dassert_dir_exists(args.src_dir) + hdbg.dassert_dir_exists(args.dst_dir) + # + hdbg.dassert_eq(sum([args.replace_links, args.stage_links, args.compare_files]), 1, "You must specify exactly one of --replace_links, --stage_links, or --compare_files.") + if args.compare_files: + # Compare files. + common_files = _find_common_files(args.src_dir, args.dst_dir) + _LOG.info("Found %d common files.", len(common_files)) + elif args.replace_links: + # Replace with links. + common_files = _find_common_files(args.src_dir, args.dst_dir) + hdbg.dassert_ne(len(symlinks), 0, "No files found to replace.") + _replace_with_links( + common_files, use_relative_paths=args.use_relative_paths + ) + _LOG.info("Replaced %d files with symbolic links.", len(common_files)) + elif args.stage_links: + # Stage links for modification. + symlinks = _find_symlinks(args.dst_dir) + hdbg.dassert_ne(len(symlinks), 0, "No symbolic links found to stage.") + _stage_links(symlinks) + _LOG.info("Staged %d symbolic links for modification.", len(symlinks)) + else: + raise RuntimeError("Internal error") if __name__ == "__main__": diff --git a/helpers/stage_linked_file.py b/helpers/stage_linked_file.py index 43d83e881..b0ab2cb98 100644 --- a/helpers/stage_linked_file.py +++ b/helpers/stage_linked_file.py @@ -1,7 +1,8 @@ -""" -Import as: +#!/usr/bin/env python -import helpers.stage_linked_file as hstlifil +""" +Usage + - python3 stage_linked_file.py --dst_dir /path/to/dst """ import argparse @@ -14,46 +15,46 @@ logging.basicConfig(level=logging.INFO) -def find_symlinks(dst_dir: str) -> List[str]: - """ - Find all symbolic links in the destination directory. - - :param dst_dir: Directory to search for symbolic links. - :return: List of paths to symbolic links. - """ - symlinks = [] - for root, _, files in os.walk(dst_dir): - for file in files: - file_path = os.path.join(root, file) - if os.path.islink(file_path): - symlinks.append(file_path) - return symlinks - - -def stage_links(symlinks: List[str]) -> None: - """ - Replace symbolic links with writable copies of the linked files. - - :param symlinks: List of symbolic links to replace. - """ - for link in symlinks: - # Resolve the original file the symlink points to. - target_file = os.readlink(link) - if not os.path.exists(target_file): - _LOG.warning( - f"Warning: Target file does not exist for link {link} -> {target_file}" - ) - continue - # Replace the symlink with a writable copy of the target file. - try: - os.remove(link) - # Copy file to the symlink location. - shutil.copy2(target_file, link) - # Make the file writable. - os.chmod(link, 0o644) - _LOG.info(f"Staged: {link} -> {target_file}") - except Exception as e: - _LOG.error(f"Error staging link {link}: {e}") +# def find_symlinks(dst_dir: str) -> List[str]: +# """ +# Find all symbolic links in the destination directory. + +# :param dst_dir: Directory to search for symbolic links. +# :return: List of paths to symbolic links. +# """ +# symlinks = [] +# for root, _, files in os.walk(dst_dir): +# for file in files: +# file_path = os.path.join(root, file) +# if os.path.islink(file_path): +# symlinks.append(file_path) +# return symlinks + + +# def stage_links(symlinks: List[str]) -> None: +# """ +# Replace symbolic links with writable copies of the linked files. + +# :param symlinks: List of symbolic links to replace. +# """ +# for link in symlinks: +# # Resolve the original file the symlink points to. +# target_file = os.readlink(link) +# if not os.path.exists(target_file): +# _LOG.warning( +# f"Warning: Target file does not exist for link {link} -> {target_file}" +# ) +# continue +# # Replace the symlink with a writable copy of the target file. +# try: +# os.remove(link) +# # Copy file to the symlink location. +# shutil.copy2(target_file, link) +# # Make the file writable. +# os.chmod(link, 0o644) +# _LOG.info(f"Staged: {link} -> {target_file}") +# except Exception as e: +# _LOG.error(f"Error staging link {link}: {e}") def main(): @@ -62,7 +63,6 @@ def main(): ) parser.add_argument("--dst_dir", required=True, help="Destination directory.") args = parser.parse_args() - symlinks = find_symlinks(args.dst_dir) if not symlinks: _LOG.info("No symbolic links found to stage.") @@ -72,11 +72,4 @@ def main(): if __name__ == "__main__": - main() - -""" -Usage - - - python3 stage_linked_file.py --dst_dir /path/to/dst - -""" + main() \ No newline at end of file diff --git a/helpers/test/test_create_link.py b/helpers/test/test_create_link.py index b21730cbf..4c33c0755 100644 --- a/helpers/test/test_create_link.py +++ b/helpers/test/test_create_link.py @@ -25,8 +25,6 @@ def test__find_common_files(self) -> None: Create two directories, each containing identical files, and checks that the `_find_common_files` function identifies these files. - - :return: None """ base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) src_dir: pathlib.Path = base_dir / "test_src_dir" @@ -50,8 +48,6 @@ def test__replace_with_links_absolute(self) -> None: Create identical files in two directories and replace the files in the destination directory with absolute symbolic links pointing to the source files. - - :return: None """ base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) src_dir: pathlib.Path = base_dir / "test_src_dir" @@ -75,8 +71,6 @@ def test__replace_with_links_relative(self) -> None: Create identical files in two directories and replace the files in the destination directory with relative symbolic links pointing to the source files. - - :return: None """ base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) src_dir: pathlib.Path = base_dir / "test_src_dir" @@ -103,8 +97,6 @@ def test__stage_links(self) -> None: Create symbolic links in a directory and then stage them by replacing each link with a copy of the original file it points to. - - :return: None """ base_dir: pathlib.Path = pathlib.Path(self.get_scratch_space()) src_dir: pathlib.Path = base_dir / "test_src_dir" From 6b8dc4410f5e0b9249ca7d00731b91cafb78b762 Mon Sep 17 00:00:00 2001 From: saggese Date: Sun, 13 Apr 2025 22:03:38 +0000 Subject: [PATCH 017/193] Update --- dev_scripts_helpers/llms/llm_prompts.py | 243 +++++++++++++++--- dev_scripts_helpers/llms/llm_transform.py | 24 +- .../thin_client/requirements.txt | 1 + dev_scripts_helpers/thin_client/setenv.sh | 9 +- helpers/henv.py | 8 +- 5 files changed, 222 insertions(+), 63 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 3d79abbea..b578cb609 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -1,4 +1,5 @@ import ast +import functools import logging import os import re @@ -12,6 +13,68 @@ _LOG = logging.getLogger(__name__) +@functools.lru_cache(maxsize=1) +def get_prompt_tags() -> List[str]: + """ + Return the list of functions in this file that can be called as a prompt. + """ + # Read current file. + curr_path = os.path.abspath(__file__) + file_content = hio.from_file(curr_path) + # + matched_functions = [] + # Parse the file content into an AST. + tree = ast.parse(file_content) + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef): + # Check function arguments and return type that match the signature: + # ``` + # def xyz() -> Tuple[str, Set[str]]: + # ``` + args = [arg.arg for arg in node.args.args] + has_no_args = len(args) == 0 + if not hasattr(node, "returns") or node.returns is None: + return_type_str = "" + else: + return_type_str = ast.unparse(node.returns) + _LOG.debug(hprint.to_str("node.name args return_type_str")) + if has_no_args and return_type_str == "_PROMPT_OUT": + _LOG.debug(" -> matched") + matched_functions.append(node.name) + matched_functions = sorted(matched_functions) + return matched_functions + +# Store the prompts that need a certain post-transforms to be applied outside +# the container. +OUTSIDE_CONTAINER_POST_TRANSFORMS = {} + +if not OUTSIDE_CONTAINER_POST_TRANSFORMS: + OUTSIDE_CONTAINER_POST_TRANSFORMS = { + # These are all the prompts with post_transforms with + # `convert_to_vim_cfile`. + "convert_file_names": + ["code_review", + "code_review_and_find_missing_docstrings", + "code_propose_refactoring", + ], + "prettier_on_str": + ["md_rewrite", + "md_summarize_short", + "slide_improve", + "slide_colorize", + ] + } + valid_prompts = get_prompt_tags() + for _, prompts in OUTSIDE_CONTAINER_POST_TRANSFORMS.items(): + for prompt in prompts: + hdbg.dassert_in(prompt, valid_prompts) + + +def get_outside_container_post_transforms(transform_name: str) -> Set[str]: + hdbg.dassert_in(transform_name, OUTSIDE_CONTAINER_POST_TRANSFORMS.keys()) + return OUTSIDE_CONTAINER_POST_TRANSFORMS[transform_name] + + # ############################################################################# # Prompts. # ############################################################################# @@ -26,11 +89,15 @@ def code_comment() -> _PROMPT_OUT: + """ + Add comments to Python code. + """ system = _CONTEXT system += r""" - Every 10 lines of code add comment explaining the code. + Every a chunk of 4 or 5 lines of code add comment explaining the code. Comments should go before the logical chunk of code they describe. - Comments should be in imperative form, a full English phrase, and end with a period. + Comments should be in imperative form, a full English phrase, and end with a + period. """ # You are a proficient Python coder and write English very well. # Given the Python code passed below, improve or add comments to the code. @@ -44,11 +111,16 @@ def code_comment() -> _PROMPT_OUT: def code_docstring() -> _PROMPT_OUT: + """ + Add a REST docstring to Python code. + """ system = _CONTEXT system += r""" Add a docstring to the function passed. - The first comment should be in imperative mode and fit in a single line of less than 80 characters. - To describe the parameters use the REST style, which requires each parameter to be prepended with :param + - The first comment should be in imperative mode and fit in a single line of + less than 80 characters. + - To describe the parameters use the REST style, which requires each + parameter to be prepended with :param """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -115,19 +187,36 @@ def code_review() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_review_and_fix() -> _PROMPT_OUT: +# TODO(gp): This is kind of expensive and we should use a linter stage. +def code_review_and_find_missing_docstrings() -> _PROMPT_OUT: + """ + Find missing docstrings in Python code. + """ system = _CONTEXT system += r""" - You will review the code and make sure it is correct and readable. + You will review the code and find missing docstrings. - You will print the code with the proposed improvements, minimizing the - number of changes to the code that are not strictly needed. + Do not print any comment, only print the line number in the following style: + : """ pre_transforms = {"add_line_numbers"} post_transforms = {"convert_to_vim_cfile"} return system, pre_transforms, post_transforms +# def code_review_and_fix() -> _PROMPT_OUT: +# system = _CONTEXT +# system += r""" +# You will review the code and make sure it is correct and readable. + +# You will print the code with the proposed improvements, minimizing the +# number of changes to the code that are not strictly needed. +# """ +# pre_transforms = {"add_line_numbers"} +# post_transforms = {"convert_to_vim_cfile"} +# return system, pre_transforms, post_transforms + + def code_propose_refactoring() -> _PROMPT_OUT: system = _CONTEXT system += r""" @@ -173,6 +262,106 @@ def code_apply_linter_issues() -> _PROMPT_OUT: return system, pre_transforms, post_transforms +def code_fix_string() -> _PROMPT_OUT: + """ + Fix the log statements to use % formatting. + """ + system = _CONTEXT + system += r""" + Use % formatting instead of f-strings (formatted string literals). + Do not print any comment, just the converted code. + + For instance, convert: + _LOG.info(f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}'") + to + _LOG.info("env_var='%s' is not in env_vars='%s'", env_var, str(os.environ.keys())) + + For instance, convert: + hdbg.dassert_in(env_var, os.environ, f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}''") + to + hdbg.dassert_in(env_var, os.environ, "env_var='%s' is not in env_vars='%s'", env_var, str(os.environ.keys())) + """ + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +def code_use_f_strings() -> _PROMPT_OUT: + """ + Use f-strings, like `f"Hello, {name}. You are {age} years old."`. + """ + system = _CONTEXT + system += r""" + Use f-strings (formatted string literals) instead of % formatting and format + strings. Do not print any comment, just the converted code. + + For instance, convert: + "Hello, %s. You are %d years old." % (name, age) + to + f"Hello, {name}. You are {age} years old." + """ + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +def code_use_perc_strings() -> _PROMPT_OUT: + """ + Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. + """ + system = _CONTEXT + system += r""" + Use % formatting instead of f-strings (formatted string literals). + Do not print any comment, just the converted code. + + For instance, convert: + f"Hello, {name}. You are {age} years old." + to + "Hello, %s. You are %d years old." % (name, age) + """ + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +def code_apply_csfy_style1() -> _PROMPT_OUT: + """ + Apply the csfy style to the code. + """ + system = _CONTEXT + file_name = "template_code.py" + file_content = hio.from_file(file_name) + system += fr""" + Apply the style described below to the Python code without changing the + behavior of the code. + Do not print any comment, just the converted code. + + ``` + {file_content} + ``` + """ + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +def code_apply_csfy_style2() -> _PROMPT_OUT: + """ + Apply the csfy style to the code. + """ + system = _CONTEXT + system += r""" + Apply the following style to the code: + - Convert docstrings into REST docstrings + - Always use imperative in comments + - Remove empty spaces in functions + - Add type hints, when missing + - Use * before mandatory parameters + - Make local functions private + - Convert .format() to f-string unless it’s a _LOG + """ + + # ############################################################################# @@ -274,7 +463,7 @@ def _convert_to_vim_cfile_str(txt: str, in_file_name: str) -> str: if line.strip() == "": continue # ``` - # 57: The docstring should use more detailed type annotations for clarity, e.g., `List[str]`, `int`, etc. + # 57: The docstring should use more detailed type annotations for ... # ``` regex = re.compile( r""" @@ -290,7 +479,7 @@ def _convert_to_vim_cfile_str(txt: str, in_file_name: str) -> str: description = match.group(2) else: # ``` - # 98-104: Simplify the hash computation logic with a helper function to avoid redundant steps. + # 98-104: Simplify the hash computation logic with a helper ... # ``` regex = re.compile( r""" @@ -317,6 +506,9 @@ def _convert_to_vim_cfile(txt: str, in_file_name: str, out_file_name: str) -> st """ Convert the text passed to a vim cfile. + This is used to convert the results of the LLM into something that vim can + use to open the files and jump to the correct lines. + in_file_name: path to the file to convert to a vim cfile (e.g., `/app/helpers_root/tmp.llm_transform.in.txt`) """ @@ -399,34 +591,3 @@ def run_prompt( if txt_out is not None: hdbg.dassert_isinstance(txt_out, str) return txt_out - - -def get_prompt_tags() -> List[str]: - """ - Return the list of functions in this file that can be called as a prompt. - """ - # Read current file. - curr_path = os.path.abspath(__file__) - file_content = hio.from_file(curr_path) - # - matched_functions = [] - # Parse the file content into an AST. - tree = ast.parse(file_content) - for node in ast.walk(tree): - if isinstance(node, ast.FunctionDef): - # Check function arguments and return type that match the signature: - # ``` - # def xyz() -> Tuple[str, Set[str]]: - # ``` - args = [arg.arg for arg in node.args.args] - has_no_args = len(args) == 0 - if not hasattr(node, "returns") or node.returns is None: - return_type_str = "" - else: - return_type_str = ast.unparse(node.returns) - _LOG.debug(hprint.to_str("node.name args return_type_str")) - if has_no_args and return_type_str == "_PROMPT_OUT": - _LOG.debug(" -> matched") - matched_functions.append(node.name) - matched_functions = sorted(matched_functions) - return matched_functions diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index cd3be2b72..4927ba424 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -67,6 +67,7 @@ def _run_dockerized_llm_transform( return_cmd: bool = False, force_rebuild: bool = False, use_sudo: bool = False, + suppress_output: bool = False, ) -> Optional[str]: """ Run dockerized_llm_transform.py in a Docker container with all its @@ -160,7 +161,7 @@ def _run_dockerized_llm_transform( ret = docker_cmd else: # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd, suppress_output=False) + hsystem.system(docker_cmd, suppress_output=suppress_output) ret = None return ret @@ -202,7 +203,6 @@ def _main(parser: argparse.ArgumentParser) -> None: return # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) - _ = in_file_name, out_file_name # Since we need to call a container and passing stdin/stdout is tricky, # we read the input and save it in a temporary file. in_lines = hparser.read_file(in_file_name) @@ -229,6 +229,8 @@ def _main(parser: argparse.ArgumentParser) -> None: # cmd_line_opts.append(f"--{arg.replace('_', '-')}") # else: # cmd_line_opts.append(f"--{arg.replace('_', '-')} {value}") + # For stdin/stdout, suppress the output of the container. + suppress_output = in_file_name == "-" or out_file_name == "-" _run_dockerized_llm_transform( tmp_in_file_name, cmd_line_opts, @@ -236,24 +238,16 @@ def _main(parser: argparse.ArgumentParser) -> None: return_cmd=False, force_rebuild=args.dockerized_force_rebuild, use_sudo=args.dockerized_use_sudo, + suppress_output=suppress_output, ) # Run post-transforms outside the container. - valid_prompts = dshlllpr.get_prompt_tags() - prompts = ["code_review", "code_propose_refactoring"] - for prompt in prompts: - hdbg.dassert_in(prompt, valid_prompts) + # 1) _convert_file_names(). + prompts = dshlllpr.get_outside_container_post_transforms("convert_file_names") if args.prompt in prompts: _convert_file_names(in_file_name, tmp_out_file_name) - # + # 2) prettier_on_str(). out_txt = hio.from_file(tmp_out_file_name) - prompts = [ - "md_rewrite", - "md_summarize_short", - "slide_improve", - "slide_colorize", - ] - for prompt in prompts: - hdbg.dassert_in(prompt, valid_prompts) + prompts = dshlllpr.get_outside_container_post_transforms("prettier_on_str") if args.prompt in prompts: # Note that we need to run this outside the `llm_transform` container to # avoid to do docker-in-docker in the `llm_transform` container (which diff --git a/dev_scripts_helpers/thin_client/requirements.txt b/dev_scripts_helpers/thin_client/requirements.txt index 7ae7b9e67..b3c2273d9 100644 --- a/dev_scripts_helpers/thin_client/requirements.txt +++ b/dev_scripts_helpers/thin_client/requirements.txt @@ -14,3 +14,4 @@ tqdm # On Mac and locally on Linux there is an issue related to this package # (see HelpersTask377), so we might want to pin it down. # pyyaml == 5.3.1 +pyyaml diff --git a/dev_scripts_helpers/thin_client/setenv.sh b/dev_scripts_helpers/thin_client/setenv.sh index 5ec7537c4..a7b4998df 100755 --- a/dev_scripts_helpers/thin_client/setenv.sh +++ b/dev_scripts_helpers/thin_client/setenv.sh @@ -6,7 +6,8 @@ # # Print the script path. -echo "##> $(realpath "${BASH_SOURCE[0]:-$0}")" +SCRIPT_PATH=$(realpath "${BASH_SOURCE[0]:-$0}") +echo "##> $SCRIPT_PATH" GIT_ROOT_DIR=$(git rev-parse --show-toplevel) echo "GIT_ROOT_DIR=$GIT_ROOT_DIR" @@ -83,7 +84,6 @@ set_path $DEV_SCRIPT_DIR # PYTHONPATH. # ############################################################################# -# Set PYTHONPATH. set_pythonpath $HELPERS_ROOT_DIR # ############################################################################# @@ -101,7 +101,10 @@ set_symlink_permissions . # Project configuration. # ############################################################################# -# - Set specific configuration of the project. +# Set CSFY environment variables. +set_csfy_env_vars + +# Set specific configuration of the project. configure_specific_project print_env_signature diff --git a/helpers/henv.py b/helpers/henv.py index 029f6a25f..a08847db7 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -91,7 +91,7 @@ def get_env_var( """ if env_name not in os.environ: if abort_on_missing: - hdbg.dassert_in(env_name, os.environ, f"Can't find env var '{env_name}' in '{str(os.environ)}'") + hdbg.dassert_in(env_name, os.environ, "Can't find env var '%s' in '%s'", env_name, str(os.environ)) else: return default_value value = os.environ[env_name] @@ -184,8 +184,7 @@ def check_env_vars() -> None: """ env_vars = get_env_vars() for env_var in env_vars: - # TODO(gp): GFI. Use %s instead of str(). - hdbg.dassert_in(env_var, os.environ, f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}''") + hdbg.dassert_in(env_var, os.environ, "env_var='%s' is not in env_vars='%s'", env_var, str(os.environ.keys())) def env_vars_to_string() -> str: @@ -384,7 +383,8 @@ def _get_library_version(lib_name: str) -> str: def _get_package_info() -> Tuple[List[str], int]: - """Get package version information. + """ + Get package version information. Returns: Tuple containing: From ba9adf278d1164aaed5dd0c184bd5dcde61be34c Mon Sep 17 00:00:00 2001 From: saggese Date: Sun, 13 Apr 2025 22:17:34 +0000 Subject: [PATCH 018/193] Update --- dev_scripts_helpers/llms/llm_prompts.py | 5 +++-- template_code.py | 15 +++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index b578cb609..7bd4317ef 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -334,11 +334,12 @@ def code_apply_csfy_style1() -> _PROMPT_OUT: system += fr""" Apply the style described below to the Python code without changing the behavior of the code. - Do not print any comment, just the converted code. - ``` {file_content} ``` + Do not remove any code, just format the existing code using the style. + + Do not report any explanation of what you did, just the converted code. """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} diff --git a/template_code.py b/template_code.py index 09a11ee0d..d3fb237fa 100644 --- a/template_code.py +++ b/template_code.py @@ -1,10 +1,9 @@ import logging -from typing import Any, Dict, List, Optional, Union +from typing import Dict, Optional import pandas as pd import helpers.hdbg as hdbg -import helpers.hprint as hprint _LOG = logging.getLogger(__name__) @@ -35,7 +34,7 @@ def _format_greeting(name: str, *, greeting: str = DEFAULT_GREETING) -> str: # ############################################################################# -# Main functionality +# Greeter # ############################################################################# @@ -52,7 +51,9 @@ def __init__(self, *, default_greeting: str = DEFAULT_GREETING) -> None: """ self._greeting_cache: Dict[str, str] = {} self._default_greeting = default_greeting - _LOG.debug("Initialized Greeter with default greeting='%s'", default_greeting) + _LOG.debug( + "Initialized Greeter with default greeting='%s'", default_greeting + ) def greet(self, name: str, *, greeting: Optional[str] = None) -> str: """ @@ -84,7 +85,9 @@ def get_greeting_stats(self) -> pd.DataFrame: """ stats = { "total_greetings": len(self._greeting_cache), - "unique_names": len(set(k.split("_")[0] for k in self._greeting_cache.keys())), + "unique_names": len( + set(k.split("_")[0] for k in self._greeting_cache.keys()) + ), } return pd.Series(stats).to_frame().T @@ -93,6 +96,7 @@ def get_greeting_stats(self) -> pd.DataFrame: # Example usage # ############################################################################# + def main() -> None: """ Demonstrate the usage of the Greeter class. @@ -109,4 +113,3 @@ def main() -> None: if __name__ == "__main__": main() - From 61dfc3571a5c04272d8cb48459e4f635ff350535 Mon Sep 17 00:00:00 2001 From: saggese Date: Tue, 15 Apr 2025 14:03:47 +0000 Subject: [PATCH 019/193] Improve --- helpers/hprint.py | 3 --- helpers/hversion.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/helpers/hprint.py b/helpers/hprint.py index c794212a6..61f79b549 100644 --- a/helpers/hprint.py +++ b/helpers/hprint.py @@ -1057,6 +1057,3 @@ def config_notebook(sns_set: bool = True) -> None: # Force the linter to keep this import. _ = hwarnin - - -# \ No newline at end of file diff --git a/helpers/hversion.py b/helpers/hversion.py index 731af5390..f33ea5e76 100644 --- a/helpers/hversion.py +++ b/helpers/hversion.py @@ -20,7 +20,6 @@ import helpers.hdbg as hdbg import helpers.hio as hio -import helpers.hprint as hprint import helpers.hserver as hserver import helpers.hsystem as hsystem @@ -223,4 +222,3 @@ def get_container_version_info() -> str: # txt = hprint.to_info("Container version", txt_tmp) return txt - From f5c9876736f54a290093299421ba1ca1ea571758 Mon Sep 17 00:00:00 2001 From: saggese Date: Tue, 15 Apr 2025 14:42:43 +0000 Subject: [PATCH 020/193] Update --- dev_scripts_helpers/dockerize/dockerized_template.py | 7 +++---- helpers/hversion.py | 10 ++++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dev_scripts_helpers/dockerize/dockerized_template.py b/dev_scripts_helpers/dockerize/dockerized_template.py index def07483b..e4c06980f 100755 --- a/dev_scripts_helpers/dockerize/dockerized_template.py +++ b/dev_scripts_helpers/dockerize/dockerized_template.py @@ -3,8 +3,8 @@ """ Dockerized template. -This script is a template for creating a Dockerized script. -It is intended as a template to explain the process. +This script is a template for creating a Dockerized script. It is +intended as a template to explain the process. """ import argparse @@ -41,8 +41,7 @@ def _main(parser: argparse.ArgumentParser) -> None: verbosity=args.log_level, use_exec_path=True, force_white=False ) # FILL THIS. - cmd = ( - ) + cmd = () _LOG.debug("Command: %s", cmd) hdocker.run_dockerized_pandoc( pandoc_cmd, diff --git a/helpers/hversion.py b/helpers/hversion.py index f33ea5e76..f8437ceae 100644 --- a/helpers/hversion.py +++ b/helpers/hversion.py @@ -20,6 +20,7 @@ import helpers.hdbg as hdbg import helpers.hio as hio +import helpers.hprint as hprint import helpers.hserver as hserver import helpers.hsystem as hsystem @@ -201,12 +202,13 @@ def _check_version(code_version: str, container_version: str) -> bool: - merge origin/master into your branch with `invoke git_merge_master` - pull the latest container with `invoke docker_pull` """ - # Can't use `hprint.indent()` to avoid a dependency on `hprint`. - msg = "\n".join(line.lstrip() for line in msg.split("\n")) - msg = msg.rstrip().lstrip() + msg = hprint.dedent(msg) + # Highlight in red. + # TODO(gp): Use the proper function, if dependencies allow it. msg = f"\033[31m{msg}\033[0m" print(msg) - # raise RuntimeError(msg) + if False: + raise RuntimeError(msg) return is_ok From 687ca5b0dd24ee4f60c4bd32216ace521e756341 Mon Sep 17 00:00:00 2001 From: saggese Date: Tue, 15 Apr 2025 14:55:46 +0000 Subject: [PATCH 021/193] Lint --- ....replace_common_files_with_script_links.md | 51 ++++++----- helpers/create_links.py | 53 +++++++---- helpers/henv.py | 36 ++++++-- helpers/hserver.py | 59 ++++++++----- helpers/lib_tasks_print.py | 19 ++-- helpers/repo_config_utils.py | 17 ++-- helpers/stage_linked_file.py | 5 +- helpers/test/test_hserver.py | 87 +++++++++++++------ linters/dockerized_pydeps.py | 9 +- 9 files changed, 213 insertions(+), 123 deletions(-) diff --git a/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md b/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md index 100be9db2..ab19b90f7 100644 --- a/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md +++ b/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md @@ -1,18 +1,15 @@ - - - [Managing Symbolic Links Between Directories](#managing-symbolic-links-between-directories) - * [Define](#define) + * [Summary](#summary) * [Why Do We Need This Approach?](#why-do-we-need-this-approach) + * [Nomenclature](#nomenclature) * [Workflow and Commands](#workflow-and-commands) + [Step 1: Replace Files with Symbolic Links](#step-1-replace-files-with-symbolic-links) + [Step 2: Stage Files for Modification](#step-2-stage-files-for-modification) + [Step 3: Restore Symbolic Links After Modifications](#step-3-restore-symbolic-links-after-modifications) + [Workflow Summary](#workflow-summary) + [Example Directory Structure](#example-directory-structure) - + [Notes and Best Practices](#notes-and-best-practices) - + [Conclusion](#conclusion) @@ -21,8 +18,8 @@ ## Summary - This document describes two scripts, `create_links.py` and - `stage_linked_file.py` used to manage symbolic links between a - source directory and a destination directory + `stage_linked_file.py` used to manage symbolic links between a source + directory and a destination directory - These tools simplify workflows where you want to create read-only symbolic links for files, stage modifications, and later restore the links @@ -42,8 +39,8 @@ risk of accidental changes - If modifications are needed, the "staging process" ensures you can work safely on copies without altering the original source files - - After the code has been developed, one can then convert copies of files, back - to links + - After the code has been developed, one can then convert copies of files, + back to links ## Nomenclature @@ -55,13 +52,13 @@ ``` - This convention seems the opposite of `cp foo bar` where a new file called `bar` is created with the content of `foo` - + - Also referring to "source" and "destination" is confusing since it is unclear - if "destination" is the "destination" of the link (i.e., the head of the arrow) - or the "destination" of the operation of copy (the tail of the arrow) - + if "destination" is the "destination" of the link (i.e., the head of the + arrow) or the "destination" of the operation of copy (the tail of the arrow) + - In the rest of this document we will refer to the file being created as - "destination" + "destination" - E.g., `ln -s new_file old_file` ## Workflow and Commands @@ -74,6 +71,7 @@ links to the corresponding files in `src_dir` Command: + ```bash > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` @@ -101,14 +99,14 @@ so all the files are staged for modification - Command: + ```bash > stage_linked_file.py --dst_dir /path/to/dst ``` - What it does: - Finds all the symbolic links in `dst_dir` - - Replaces each symbolic link with a writable copy of the file it points - to + - Replaces each symbolic link with a writable copy of the file it points to - Sets file permissions to `644` (writable) - Why it is important: @@ -118,10 +116,11 @@ ### Step 3: Restore Symbolic Links After Modifications -- Once you’ve finished modifying the files, you can restore the symbolic links +- Once you've finished modifying the files, you can restore the symbolic links by running `create_links.py` again with the `--replace_links` flag - Command: + ```bash > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` @@ -139,21 +138,21 @@ ### Workflow Summary 1. Set up symbolic links: - ```bash - > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links - ``` + ```bash + > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links + ``` 2. Stage symbolic links for modification: - ``` - > stage_linked_file.py --dst_dir /path/to/dst - ``` + ``` + > stage_linked_file.py --dst_dir /path/to/dst + ``` 3. Modify files as required 4. After modifications, restore the symbolic links: - ``` - > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links - ``` + ``` + > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links + ``` ### Example Directory Structure diff --git a/helpers/create_links.py b/helpers/create_links.py index 9694d3a9d..7cf93a503 100644 --- a/helpers/create_links.py +++ b/helpers/create_links.py @@ -34,7 +34,6 @@ from typing import List, Tuple import helpers.hdbg as hdbg -import helpers.hio as hio import helpers.hparser as hparser _LOG = logging.getLogger(__name__) @@ -127,14 +126,21 @@ def _find_common_files(src_dir: str, dst_dir: str) -> List[Tuple[str, str]]: return common_files -def _create_single_link(src_file: str, dst_file: str, use_relative_paths: bool, abort_on_first_error: bool) -> None: +def _create_single_link( + src_file: str, + dst_file: str, + use_relative_paths: bool, + abort_on_first_error: bool, +) -> None: """ Create a single symbolic link from dst_file to src_file. :param src_file: Source file path :param dst_file: Destination file path where symlink will be created - :param use_relative_paths: If True, create relative symlinks; if False, use absolute paths - :param abort_on_first_error: If True, abort on the first error; if False, continue processing + :param use_relative_paths: If True, create relative symlinks; if + False, use absolute paths + :param abort_on_first_error: If True, abort on the first error; if + False, continue processing """ hdbg.dassert_file_exists(src_file) hdbg.dassert_file_exists(dst_file) @@ -150,15 +156,16 @@ def _create_single_link(src_file: str, dst_file: str, use_relative_paths: bool, # modifications. current_permissions = os.stat(dst_file).st_mode new_permissions = ( - current_permissions - & ~stat.S_IWUSR - & ~stat.S_IWGRP - & ~stat.S_IWOTH + current_permissions & ~stat.S_IWUSR & ~stat.S_IWGRP & ~stat.S_IWOTH ) os.chmod(dst_file, new_permissions) _LOG.debug("Created symlink: %s -> %s", dst_file, link_target) except Exception as e: - msg = "Failed to create symlink %s -> %s with error %s" % (dst_file, link_target, str(e)) + msg = "Failed to create symlink %s -> %s with error %s" % ( + dst_file, + link_target, + str(e), + ) if abort_on_first_error: raise RuntimeError(msg) else: @@ -184,7 +191,9 @@ def _replace_with_links( """ for src_file, dst_file in common_files: - _create_single_link(src_file, dst_file, use_relative_paths, abort_on_first_error) + _create_single_link( + src_file, dst_file, use_relative_paths, abort_on_first_error + ) # ############################################################################# @@ -206,17 +215,20 @@ def _find_symlinks(dst_dir: str) -> List[str]: if os.path.islink(file_path): symlinks.append(file_path) return symlinks - -def _stage_single_link(link: str, target_file: str, abort_on_first_error: bool, dry_run: bool) -> None: + +def _stage_single_link( + link: str, target_file: str, abort_on_first_error: bool, dry_run: bool +) -> None: """ Replace a single symlink with a writable copy of the linked file. :param link: The symlink to replace. :param target_file: The file to copy to the symlink location. - :param abort_on_first_error: If True, abort on the first error; if False, - continue processing - :param dry_run: If True, print what will be done without actually doing it. + :param abort_on_first_error: If True, abort on the first error; if + False, continue processing + :param dry_run: If True, print what will be done without actually + doing it. """ # Resolve the original file the symlink points to. target_file = os.readlink(link) @@ -246,7 +258,9 @@ def _stage_single_link(link: str, target_file: str, abort_on_first_error: bool, _LOG.warning(msg) -def _stage_links(symlinks: List[str], abort_on_first_error: bool, dry_run: bool) -> None: +def _stage_links( + symlinks: List[str], abort_on_first_error: bool, dry_run: bool +) -> None: """ Replace symbolic links with writable copies of the linked files. @@ -256,7 +270,6 @@ def _stage_links(symlinks: List[str], abort_on_first_error: bool, dry_run: bool) _stage_single_link(link, abort_on_first_error, dry_run) - # ############################################################################# @@ -301,7 +314,11 @@ def _main(parser: argparse.ArgumentParser) -> None: hdbg.dassert_dir_exists(args.src_dir) hdbg.dassert_dir_exists(args.dst_dir) # - hdbg.dassert_eq(sum([args.replace_links, args.stage_links, args.compare_files]), 1, "You must specify exactly one of --replace_links, --stage_links, or --compare_files.") + hdbg.dassert_eq( + sum([args.replace_links, args.stage_links, args.compare_files]), + 1, + "You must specify exactly one of --replace_links, --stage_links, or --compare_files.", + ) if args.compare_files: # Compare files. common_files = _find_common_files(args.src_dir, args.dst_dir) diff --git a/helpers/henv.py b/helpers/henv.py index a08847db7..3abbd2171 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -6,7 +6,6 @@ import logging import os -import re from typing import Any, Dict, List, Tuple, Union import helpers.hdbg as hdbg @@ -91,7 +90,13 @@ def get_env_var( """ if env_name not in os.environ: if abort_on_missing: - hdbg.dassert_in(env_name, os.environ, "Can't find env var '%s' in '%s'", env_name, str(os.environ)) + hdbg.dassert_in( + env_name, + os.environ, + "Can't find env var '%s' in '%s'", + env_name, + str(os.environ), + ) else: return default_value value = os.environ[env_name] @@ -147,7 +152,12 @@ def get_env_vars() -> List[str]: ] # No duplicates. # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. - hdbg.dassert_eq(len(set(env_var_names)),len(env_var_names), f"There are duplicates", str(env_var_names)) + hdbg.dassert_eq( + len(set(env_var_names)), + len(env_var_names), + f"There are duplicates", + str(env_var_names), + ) # Sort. env_var_names = sorted(env_var_names) return env_var_names @@ -165,7 +175,12 @@ def get_secret_env_vars() -> List[str]: ] # No duplicates. # TODO(gp): GFI. Use `hdbg.dassert_no_duplicates()` instead. - hdbg.dassert_eq(len(set(secret_env_var_names)), len(secret_env_var_names), f"There are duplicates", str(secret_env_var_names)) + hdbg.dassert_eq( + len(set(secret_env_var_names)), + len(secret_env_var_names), + f"There are duplicates", + str(secret_env_var_names), + ) # Secret env vars are a subset of the env vars. env_vars = get_env_vars() # TODO(gp): GFI. Use `hdbg.dassert_issubset()` instead. @@ -184,7 +199,13 @@ def check_env_vars() -> None: """ env_vars = get_env_vars() for env_var in env_vars: - hdbg.dassert_in(env_var, os.environ, "env_var='%s' is not in env_vars='%s'", env_var, str(os.environ.keys())) + hdbg.dassert_in( + env_var, + os.environ, + "env_var='%s' is not in env_vars='%s'", + env_var, + str(os.environ.keys()), + ) def env_vars_to_string() -> str: @@ -326,6 +347,7 @@ def _get_platform_info() -> str: Get platform information as a list of strings. """ import platform + txt_tmp: List[str] = [] uname = platform.uname() txt_tmp.append(f"system={uname.system}") @@ -345,6 +367,7 @@ def _get_psutil_info() -> str: """ try: import psutil + has_psutil = True except ModuleNotFoundError as e: _LOG.warning("psutil is not installed: %s", str(e)) @@ -501,7 +524,8 @@ def env_to_str( repo_config: bool = True, server_config: bool = True, system_signature: bool = True, - env_vars: bool = True) -> str: + env_vars: bool = True, +) -> str: """ Package all the information into a string. """ diff --git a/helpers/hserver.py b/helpers/hserver.py index 3703d92c2..45e40c44d 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -13,8 +13,8 @@ import subprocess from typing import Dict, List, Optional, Tuple -import helpers.repo_config_utils as hrecouti import helpers.hprint as hprint +import helpers.repo_config_utils as hrecouti # This module should depend only on: # - Python standard modules @@ -46,7 +46,8 @@ def _system_to_string(cmd: str) -> Tuple[int, str]: # Redirect stderr to stdout. stderr=subprocess.STDOUT, shell=True, - text=True) + text=True, + ) rc = result.returncode output = result.stdout output = output.strip() @@ -87,7 +88,7 @@ def get_dev_csfy_host_names() -> List[str]: host_names = ("dev1", "dev2", "dev3") return host_names - + def _get_host_name() -> str: """ Return the name of the host (not the machine) on which we are running. @@ -110,7 +111,8 @@ def _get_host_name() -> str: def _get_host_os_name() -> str: """ - Return the name of the OS on which we are running (e.g., "Linux", "Darwin"). + Return the name of the OS on which we are running (e.g., "Linux", + "Darwin"). If we are inside a Docker container, we use the name of the OS passed through the `CSFY_HOST_OS_NAME` env var. @@ -155,7 +157,7 @@ def is_host_csfy_server() -> bool: host_name = _get_host_name() ret = host_name in get_dev_csfy_host_names() return ret - + _MAC_OS_VERSION_MAPPING = { "Catalina": "19.", @@ -185,7 +187,7 @@ def get_host_mac_version() -> str: return version raise ValueError(f"Invalid host_os_version='{host_os_version}'") - + def is_host_mac_version(version: str) -> bool: """ Return whether we are running on a Mac with a specific version (e.g., @@ -201,8 +203,8 @@ def is_host_gp_mac() -> bool: """ Return whether we are running on a Mac owned by GP. - This is used to check if we can use a specific feature before releasing - it to all the users. + This is used to check if we can use a specific feature before + releasing it to all the users. """ host_name = _get_host_name() ret = host_name.startswith("gpmac.") @@ -392,8 +394,8 @@ def is_external_linux() -> bool: def is_external_dev() -> bool: """ - Detect whether we are running on an system outside of Causify system - (e.g., a contributor's laptop, an intern's laptop, a non-CSFY machine). + Detect whether we are running on an system outside of Causify system (e.g., + a contributor's laptop, an intern's laptop, a non-CSFY machine). """ ret = is_host_mac() or is_external_linux() return ret @@ -410,7 +412,8 @@ def _get_setup_signature() -> str: Dump all the variables that are used to make a decision about the values of the functions in `_get_setup_settings()`. - This function is used to mock the state of the system for testing purposes. + This function is used to mock the state of the system for testing + purposes. """ cmds = [] # is_prod_csfy() @@ -483,7 +486,7 @@ def is_outside_docker_container_on_host_mac() -> bool: def is_inside_docker_container_on_external_linux() -> bool: """ - Return whether we are running on a Docker container on an external Linux. + Return whether we are running on a Docker container on an external Linux. """ ret = is_inside_docker() and is_external_linux() return ret @@ -491,7 +494,7 @@ def is_inside_docker_container_on_external_linux() -> bool: def is_outside_docker_container_on_external_linux() -> bool: """ - Return whether we are running on a Docker container on an external Linux. + Return whether we are running on a Docker container on an external Linux. """ ret = not is_inside_docker() and is_external_linux() return ret @@ -499,7 +502,8 @@ def is_outside_docker_container_on_external_linux() -> bool: def _get_setup_settings() -> List[Tuple[str, bool]]: """ - Return a list of tuples with the name and value of the current server setup. + Return a list of tuples with the name and value of the current server + setup. """ func_names = [ "is_inside_docker_container_on_csfy_server", @@ -527,7 +531,8 @@ def _setup_to_str(setups: List[Tuple[str, bool]]) -> str: """ Return a string representation of the current server setup configuration. - :return: string with each setting on a new line, aligned with padding + :return: string with each setting on a new line, aligned with + padding """ # Find maximum length of setting names. max_len = max(len(name) for name, _ in setups) + 1 @@ -542,9 +547,10 @@ def _dassert_setup_consistency() -> None: """ Check that one and only one setup configuration is true. - This is used to ensure that the setup configuration is one of the expected - ones and uniquely defined. + This is used to ensure that the setup configuration is one of the + expected ones and uniquely defined. """ + def _indent(txt: str, *, num_spaces: int = 2) -> str: """ Add `num_spaces` spaces before each line of the passed string. @@ -626,7 +632,7 @@ def docker_needs_sudo() -> bool: def has_docker_privileged_mode() -> bool: """ Return whether the current container supports privileged mode. - + Docker privileged mode gives containers nearly all the same capabilities as the host system's kernel. Privileged mode allows to: @@ -686,14 +692,15 @@ def get_docker_info() -> str: else: has_sibling_containers_support_ = "*undef*" has_docker_dind_support_ = "*undef*" - txt_tmp.append(f"has_sibling_containers_support={has_sibling_containers_support_}") + txt_tmp.append( + f"has_sibling_containers_support={has_sibling_containers_support_}" + ) txt_tmp.append(f"has_docker_dind_support={has_docker_dind_support_}") # txt = hprint.to_info("Docker info", txt_tmp) return txt - # ############################################################################# # Detect Docker functionalities, based on the set-up. # ############################################################################# @@ -787,7 +794,11 @@ def enable_privileged_mode() -> bool: elif is_host_mac(version="Catalina"): # Docker for macOS Catalina supports dind. ret = True - elif is_host_mac(version="Monterey") or is_host_mac(version="Ventura") or is_host_mac(version="Sequoia"): + elif ( + is_host_mac(version="Monterey") + or is_host_mac(version="Ventura") + or is_host_mac(version="Sequoia") + ): # Docker doesn't seem to support dind for these versions of macOS. ret = False elif is_prod_csfy(): @@ -825,7 +836,11 @@ def has_docker_sudo() -> bool: def _is_mac_version_with_sibling_containers() -> bool: - return is_host_mac(version="Monterey") or is_host_mac(version="Ventura") or is_host_mac(version="Sequoia") + return ( + is_host_mac(version="Monterey") + or is_host_mac(version="Ventura") + or is_host_mac(version="Sequoia") + ) # TODO(gp): -> use_docker_sibling_container_support diff --git a/helpers/lib_tasks_print.py b/helpers/lib_tasks_print.py index b46e5cfe0..512c09a60 100644 --- a/helpers/lib_tasks_print.py +++ b/helpers/lib_tasks_print.py @@ -77,22 +77,25 @@ def print_tasks(ctx, as_code=False): # type: ignore @task -def print_env(ctx, +def print_env( + ctx, repo_config=True, server_config=True, system_signature=True, - env_vars=True + env_vars=True, ): # type: ignore """ Print the repo configuration. """ _ = ctx - print(henv.env_to_str( - repo_config=repo_config, - server_config=server_config, - system_signature=system_signature, - env_vars=env_vars - )) + print( + henv.env_to_str( + repo_config=repo_config, + server_config=server_config, + system_signature=system_signature, + env_vars=env_vars, + ) + ) # TODO(gp): diff --git a/helpers/repo_config_utils.py b/helpers/repo_config_utils.py index 2d92b4007..f0370a7d5 100644 --- a/helpers/repo_config_utils.py +++ b/helpers/repo_config_utils.py @@ -166,7 +166,8 @@ def get_name(self) -> str: def get_github_repo_account(self) -> str: """ - Return the account name of the repo on GitHub, e.g., `causify-ai`, `gpsaggese`. + Return the account name of the repo on GitHub, e.g., `causify-ai`, + `gpsaggese`. """ value = self._data["repo_info"]["github_repo_account"] return value @@ -177,10 +178,11 @@ def get_repo_short_name(self) -> str: """ value = self._data["repo_info"]["repo_name"] return value - + def get_repo_full_name(self) -> str: """ - Return the full name of the repo, e.g., `causify-ai/amp`, `gpsaggese/notes`. + Return the full name of the repo, e.g., `causify-ai/amp`, + `gpsaggese/notes`. """ github_repo_account = self._data["repo_info"]["github_repo_account"] repo_name = self._data["repo_info"]["repo_name"] @@ -253,15 +255,18 @@ def get_issue_prefix(self) -> str: def get_docker_base_image_name(self) -> str: """ - Return a base name for docker image. E.g., `helpers`. + Return a base name for docker image. + + E.g., `helpers`. """ value = self._data["docker_info"]["docker_image_name"] return value def get_use_sibling_container(self) -> bool: """ - Return whether to use a sibling container or a chilren docker-in-docker approach. - + Return whether to use a sibling container or a chilren docker-in-docker + approach. + This is used in unit tests to test dockerized executables. """ value = self._data["docker_info"]["use_sibling_container"] diff --git a/helpers/stage_linked_file.py b/helpers/stage_linked_file.py index b0ab2cb98..24373aea4 100644 --- a/helpers/stage_linked_file.py +++ b/helpers/stage_linked_file.py @@ -7,9 +7,6 @@ import argparse import logging -import os -import shutil -from typing import List _LOG = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -72,4 +69,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index 2ef1f5e9a..c269d63d9 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -9,6 +9,11 @@ _LOG = logging.getLogger(__name__) +# ############################################################################# +# _TestCase1 +# ############################################################################# + + class _TestCase1: # def test_config_func_to_str1(self) -> None: @@ -82,7 +87,7 @@ def setUp(self) -> None: # ############################################################################# -# Test_hserver_inside_ci +# Test_hserver_inside_ci1 # ############################################################################# @@ -98,7 +103,8 @@ class Test_hserver_inside_ci1(_TestCase1, hunitest.TestCase): def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = None - self.exp_get_docker_info = hprint.dedent(r""" + self.exp_get_docker_info = hprint.dedent( + r""" # Docker info has_docker=True docker_version='28.0.4' @@ -106,8 +112,10 @@ def setUp(self) -> None: has_privileged_mode=True is_inside_docker=True has_sibling_containers_support=True - has_docker_dind_support=True""") - self.exp_get_setup_settings = hprint.dedent(r""" + has_docker_dind_support=True""" + ) + self.exp_get_setup_settings = hprint.dedent( + r""" is_inside_docker_container_on_csfy_server True is_outside_docker_container_on_csfy_server False is_inside_docker_container_on_host_mac False @@ -117,14 +125,15 @@ def setUp(self) -> None: is_dev4 False is_ig_prod False is_prod_csfy False - """) + """ + ) self.exp_get_setup_signature = None self.exp_is_host_csfy_server = False self.exp_is_inside_ci = True # ############################################################################# -# Test_hserver_docker_container_on_csfy_server1 +# Test_hserver_inside_docker_container_on_csfy_server1 # ############################################################################# @@ -132,7 +141,9 @@ def setUp(self) -> None: not hserver.is_inside_docker_container_on_csfy_server(), reason="Config not matching", ) -class Test_hserver_inside_docker_container_on_csfy_server1(_TestCase1, hunitest.TestCase): +class Test_hserver_inside_docker_container_on_csfy_server1( + _TestCase1, hunitest.TestCase +): """ Run tests inside Docker container on a Causify dev server. """ @@ -140,7 +151,8 @@ class Test_hserver_inside_docker_container_on_csfy_server1(_TestCase1, hunitest. def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" + self.exp_get_docker_info = hprint.dedent( + r""" # Docker info has_docker=True docker_version='28.0.4' @@ -148,8 +160,10 @@ def setUp(self) -> None: has_privileged_mode=True is_inside_docker=True has_sibling_containers_support=True - has_docker_dind_support=True""") - self.exp_get_setup_settings = hprint.dedent(r""" + has_docker_dind_support=True""" + ) + self.exp_get_setup_settings = hprint.dedent( + r""" is_inside_docker_container_on_csfy_server True is_outside_docker_container_on_csfy_server False is_inside_docker_container_on_host_mac False @@ -159,7 +173,8 @@ def setUp(self) -> None: is_dev4 False is_ig_prod False is_prod_csfy False - """) + """ + ) self.exp_get_setup_signature = "" self.exp_is_host_csfy_server = True self.exp_is_host_mac = False @@ -175,7 +190,9 @@ def setUp(self) -> None: not hserver.is_outside_docker_container_on_csfy_server(), reason="Config not matching", ) -class Test_hserver_outside_docker_container_on_csfy_server1(_TestCase1, hunitest.TestCase): +class Test_hserver_outside_docker_container_on_csfy_server1( + _TestCase1, hunitest.TestCase +): """ Run tests outside Docker container on a Causify dev server. """ @@ -183,7 +200,8 @@ class Test_hserver_outside_docker_container_on_csfy_server1(_TestCase1, hunitest def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" + self.exp_get_docker_info = hprint.dedent( + r""" # Docker info has_docker=True docker_version='28.0.4' @@ -191,8 +209,10 @@ def setUp(self) -> None: has_privileged_mode=True is_inside_docker=True has_sibling_containers_support=True - has_docker_dind_support=True""") - self.exp_get_setup_settings = hprint.dedent(r""" + has_docker_dind_support=True""" + ) + self.exp_get_setup_settings = hprint.dedent( + r""" is_inside_docker_container_on_csfy_server False is_outside_docker_container_on_csfy_server True is_inside_docker_container_on_host_mac False @@ -202,7 +222,8 @@ def setUp(self) -> None: is_dev4 False is_ig_prod False is_prod_csfy False - """) + """ + ) self.exp_get_setup_signature = "" self.exp_is_host_csfy_server = True self.exp_is_inside_ci = False @@ -217,7 +238,9 @@ def setUp(self) -> None: not (hserver.is_inside_docker() and hserver.is_host_gp_mac()), reason="Config not matching", ) -class Test_hserver_inside_docker_container_on_gp_mac1(_TestCase1, hunitest.TestCase): +class Test_hserver_inside_docker_container_on_gp_mac1( + _TestCase1, hunitest.TestCase +): """ Run tests inside Docker container on GP's Mac. """ @@ -225,15 +248,18 @@ class Test_hserver_inside_docker_container_on_gp_mac1(_TestCase1, hunitest.TestC def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" + self.exp_get_docker_info = hprint.dedent( + r""" # Docker info has_docker=True docker_needs_sudo=False has_privileged_mode=True is_inside_docker=True has_sibling_containers_support=True - has_docker_dind_support=True""") - self.exp_get_setup_settings = hprint.dedent(r""" + has_docker_dind_support=True""" + ) + self.exp_get_setup_settings = hprint.dedent( + r""" is_inside_docker_container_on_csfy_server False is_outside_docker_container_on_csfy_server False is_inside_docker_container_on_host_mac True @@ -242,7 +268,8 @@ def setUp(self) -> None: is_outside_docker_container_on_external_linux False is_dev4 False is_ig_prod False - is_prod_csfy False""") + is_prod_csfy False""" + ) self.exp_is_host_csfy_server = False self.exp_is_host_mac = True self.exp_is_inside_ci = False @@ -257,7 +284,9 @@ def setUp(self) -> None: not (not hserver.is_inside_docker() and hserver.is_host_gp_mac()), reason="Config not matching", ) -class Test_hserver_outside_docker_container_on_gp_mac1(_TestCase1, hunitest.TestCase): +class Test_hserver_outside_docker_container_on_gp_mac1( + _TestCase1, hunitest.TestCase +): """ Run tests outside Docker container on GP's Mac. """ @@ -265,15 +294,18 @@ class Test_hserver_outside_docker_container_on_gp_mac1(_TestCase1, hunitest.Test def setUp(self) -> None: super().setUp() self.exp_config_func_to_str = "" - self.exp_get_docker_info = hprint.dedent(r""" + self.exp_get_docker_info = hprint.dedent( + r""" # Docker info has_docker=True docker_needs_sudo=False has_privileged_mode=True is_inside_docker=False has_sibling_containers_support=*undef* - has_docker_dind_support=*undef*""") - self.exp_get_setup_settings = hprint.dedent(r""" + has_docker_dind_support=*undef*""" + ) + self.exp_get_setup_settings = hprint.dedent( + r""" is_inside_docker_container_on_csfy_server False is_outside_docker_container_on_csfy_server False is_inside_docker_container_on_host_mac False @@ -282,7 +314,8 @@ def setUp(self) -> None: is_outside_docker_container_on_external_linux False is_dev4 False is_ig_prod False - is_prod_csfy False""") + is_prod_csfy False""" + ) self.exp_get_setup_signature = "" self.exp_is_host_csfy_server = False self.exp_is_host_mac = True @@ -294,4 +327,4 @@ def setUp(self) -> None: # TODO(gp): Add test mocking the environment variables in _get_setup_signature. # We should have one class for each set up (e.g., outside Mac, outside Linux, -# inside Docker, inside CI, etc.) \ No newline at end of file +# inside Docker, inside CI, etc.) diff --git a/linters/dockerized_pydeps.py b/linters/dockerized_pydeps.py index f35e64087..b4be012d5 100755 --- a/linters/dockerized_pydeps.py +++ b/linters/dockerized_pydeps.py @@ -10,9 +10,9 @@ import helpers.hdbg as hdbg import helpers.hdocker as hdocker import helpers.hparser as hparser -import helpers.hsystem as hsystem -import helpers.hserver as hserver import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem _LOG = logging.getLogger(__name__) @@ -78,10 +78,7 @@ def _run_dockerized_pydeps( is_caller_host=is_caller_host, use_sibling_container_for_callee=use_sibling_container_for_callee, ) - cmd = [ - "pydeps" - f"{in_file_path}" - ] + cmd = ["pydeps" f"{in_file_path}"] cmd = " ".join(cmd) executable = hdocker.get_docker_executable(use_sudo) docker_cmd = ( From 9f4543391d50780c78d8608743b0f3bfc2eb938f Mon Sep 17 00:00:00 2001 From: saggese Date: Tue, 15 Apr 2025 16:43:41 +0000 Subject: [PATCH 022/193] Update --- helpers/henv.py | 1 - helpers/hserver.py | 3 --- helpers/repo_config_utils.py | 11 ----------- 3 files changed, 15 deletions(-) diff --git a/helpers/henv.py b/helpers/henv.py index aa0293fd1..a550bc124 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -239,7 +239,6 @@ def env_vars_to_string() -> str: # ############################################################################# # Get Git info. -# Get Git info. # ############################################################################# diff --git a/helpers/hserver.py b/helpers/hserver.py index ce15cd3b1..500ec74d9 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -11,8 +11,6 @@ import os import shutil import subprocess -import shutil -import subprocess from typing import Dict, List, Optional, Tuple import helpers.hprint as hprint @@ -408,7 +406,6 @@ def is_inside_ci() -> bool: return ret -# TODO(gp): -> is_inside_docker_container() # TODO(gp): -> is_inside_docker_container() def is_inside_docker() -> bool: """ diff --git a/helpers/repo_config_utils.py b/helpers/repo_config_utils.py index f0370a7d5..700e7be5c 100644 --- a/helpers/repo_config_utils.py +++ b/helpers/repo_config_utils.py @@ -262,17 +262,6 @@ def get_docker_base_image_name(self) -> str: value = self._data["docker_info"]["docker_image_name"] return value - def get_use_sibling_container(self) -> bool: - """ - Return whether to use a sibling container or a chilren docker-in-docker - approach. - - This is used in unit tests to test dockerized executables. - """ - value = self._data["docker_info"]["use_sibling_container"] - assert value in ["True", "False"], f"Invalid boolen value: {value}" - return value == "True" - # s3_bucket_info def get_unit_test_bucket_path(self) -> str: From 7a25409c8010d61a0d8fcdfae3c59a7cf75066de Mon Sep 17 00:00:00 2001 From: saggese Date: Thu, 17 Apr 2025 13:03:32 +0000 Subject: [PATCH 023/193] Update --- helpers/hserver.py | 350 ++++++++++++-------------------------- helpers/lib_tasks_find.py | 4 +- 2 files changed, 107 insertions(+), 247 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index 500ec74d9..8286a365b 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -167,185 +167,6 @@ def is_host_csfy_server() -> bool: } -def is_host_mac() -> bool: - """ - Return whether we are running on macOS. - """ - host_os_name = _get_host_os_name() - # - ret = host_os_name == "Darwin" - return ret - - -def get_host_mac_version() -> str: - """ - Get the macOS version (e.g., "Catalina", "Monterey", "Ventura"). - """ - host_os_version = _get_host_os_version() - for version, tag in _MAC_OS_VERSION_MAPPING.items(): - if tag in host_os_version: - return version - raise ValueError(f"Invalid host_os_version='{host_os_version}'") - - -def is_host_mac_version(version: str) -> bool: - """ - Return whether we are running on a Mac with a specific version (e.g., - "Catalina", "Monterey", "Ventura"). - """ - assert version in _MAC_OS_VERSION_MAPPING, f"Invalid version='{version}'" - host_mac_version = get_host_mac_version() - ret = version.lower() == host_mac_version.lower() - return ret - - -def is_host_gp_mac() -> bool: - """ - Return whether we are running on a Mac owned by GP. - - This is used to check if we can use a specific feature before - releasing it to all the users. - """ - host_name = _get_host_name() - ret = host_name.startswith("gpmac.") - return ret - - -# We can't use `hsystem` to avoid import cycles. -def _system_to_string(cmd: str) -> Tuple[int, str]: - """ - Run a command and return the output and the return code. - - :param cmd: command to run - :return: tuple of (return code, output) - """ - result = subprocess.run( - cmd, - stdout=subprocess.PIPE, - # Redirect stderr to stdout. - stderr=subprocess.STDOUT, - shell=True, - text=True, - ) - rc = result.returncode - output = result.stdout - output = output.strip() - return rc, output - - -# ############################################################################# -# Host -# ############################################################################# - - -# We can't rely only on the name / version of the host to infer where we are -# running, since inside Docker the name of the host is like `01a7e34a82a5`. Of -# course, there is no way to know anything about the host for security reason, -# so we pass this value from the external environment to the container, through -# env vars (e.g., `CSFY_HOST_NAME`, `CSFY_HOST_OS_NAME`, `CSFY_HOST_VERSION`). - - -# Sometimes we want to know if: -# - The processor is x86_64 or arm64 -# - The host is Mac or Linux -# - We are running on a Causify machine or on an external machine -# - We are inside CI or not -# TODO(gp): Grep all the use cases in the codebase and use the right function. - - -def get_host_user_name() -> Optional[str]: - """ - Return the name of the user running the host. - """ - return os.environ.get("CSFY_HOST_USER_NAME", None) - - -def get_dev_csfy_host_names() -> List[str]: - """ - Return the names of the Causify dev servers. - """ - host_names = ("dev1", "dev2", "dev3") - return host_names - - -def _get_host_name() -> str: - """ - Return the name of the host (not the machine) on which we are running. - - If we are inside a Docker container, we use the name of the host passed - through the `CSFY_HOST_NAME` env var. - """ - if is_inside_docker(): - host_name = os.environ["CSFY_HOST_NAME"] - else: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws' - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' - # machine='x86_64' - host_name = os.uname()[1] - _LOG.debug("host_name=%s", host_name) - return host_name - - -def _get_host_os_name() -> str: - """ - Return the name of the OS on which we are running (e.g., "Linux", - "Darwin"). - - If we are inside a Docker container, we use the name of the OS passed - through the `CSFY_HOST_OS_NAME` env var. - """ - if is_inside_docker(): - host_os_name = os.environ["CSFY_HOST_OS_NAME"] - else: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws' - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' - # machine='x86_64' - host_os_name = os.uname()[0] - _LOG.debug("host_os_name=%s", host_os_name) - return host_os_name - - -def _get_host_os_version() -> str: - """ - Return the version of the OS on which we are running. - - If we are inside a Docker container, we use the version of the OS passed - through the `CSFY_HOST_OS_VERSION` env var. - """ - if is_inside_docker(): - host_os_version = os.environ["CSFY_HOST_OS_VERSION"] - else: - # sysname='Linux' - # nodename='dev1' - # release='5.15.0-1081-aws' - # version='#88~20.04.1-Ubuntu SMP Fri Mar 28 14:17:22 UTC 2025' - # machine='x86_64' - host_os_version = os.uname()[2] - _LOG.debug("host_os_version=%s", host_os_version) - return host_os_version - - -def is_host_csfy_server() -> bool: - """ - Return whether we are running on a Causify dev server. - """ - host_name = _get_host_name() - ret = host_name in get_dev_csfy_host_names() - return ret - - -_MAC_OS_VERSION_MAPPING = { - "Catalina": "19.", - "Monterey": "21.", - "Ventura": "22.", - "Sequoia": "24.", -} - - def is_host_mac() -> bool: """ Return whether we are running on macOS. @@ -406,7 +227,6 @@ def is_inside_ci() -> bool: return ret -# TODO(gp): -> is_inside_docker_container() def is_inside_docker() -> bool: """ Return whether we are inside a container or not. @@ -457,63 +277,63 @@ def is_dev4() -> bool: return is_dev4_ -def is_host_mac(*, version: Optional[str] = None) -> bool: - """ - Return whether we are running on macOS and, optionally, on a specific - version. - - :param version: check whether we are running on a certain macOS version (e.g., - `Catalina`, `Monterey`) - """ - _LOG.debug("version=%s", version) - host_os_name = os.uname()[0] - _LOG.debug("os.uname()=%s", str(os.uname())) - csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) - _LOG.debug( - "host_os_name=%s csfy_host_os_name=%s", host_os_name, csfy_host_os_name - ) - is_mac_ = host_os_name == "Darwin" or csfy_host_os_name == "Darwin" - if version is None: - # The user didn't request a specific version, so we return whether we - # are running on a Mac or not. - _LOG.debug("is_mac_=%s", is_mac_) - return is_mac_ - else: - # The user specified a version: if we are not running on a Mac then we - # return False, since we don't even have to check the macOS version. - if not is_mac_: - _LOG.debug("is_mac_=%s", is_mac_) - return False - # Check the macOS version we are running. - if version == "Catalina": - # Darwin gpmac.local 19.6.0 Darwin Kernel Version 19.6.0: - # root:xnu-6153.141.2~1/RELEASE_X86_64 x86_64 - macos_tag = "19.6" - elif version == "Monterey": - # Darwin alpha.local 21.5.0 Darwin Kernel Version 21.5.0: - # root:xnu-8020.121.3~4/RELEASE_ARM64_T6000 arm64 - macos_tag = "21." - elif version == "Ventura": - macos_tag = "22." - elif version == "Sequoia": - # Darwin gpmac.local 24.4.0 Darwin Kernel Version 24.4.0: - # root:xnu-11417.101.15~1/RELEASE_ARM64_T8112 arm64 - macos_tag = "24." - else: - raise ValueError(f"Invalid version='{version}'") - _LOG.debug("macos_tag=%s", macos_tag) - host_os_version = os.uname()[2] - # 'Darwin Kernel Version 19.6.0: Mon Aug 31 22:12:52 PDT 2020; - # root:xnu-6153.141.2~1/RELEASE_X86_64' - csfy_host_os_version = os.environ.get("CSFY_HOST_VERSION", "") - _LOG.debug( - "host_os_version=%s csfy_host_os_version=%s", - host_os_version, - csfy_host_os_version, - ) - is_mac_ = macos_tag in host_os_version or macos_tag in csfy_host_os_version - _LOG.debug("is_mac_=%s", is_mac_) - return is_mac_ +# def is_host_mac(*, version: Optional[str] = None) -> bool: +# """ +# Return whether we are running on macOS and, optionally, on a specific +# version. + +# :param version: check whether we are running on a certain macOS version (e.g., +# `Catalina`, `Monterey`) +# """ +# _LOG.debug("version=%s", version) +# host_os_name = os.uname()[0] +# _LOG.debug("os.uname()=%s", str(os.uname())) +# csfy_host_os_name = os.environ.get("CSFY_HOST_OS_NAME", None) +# _LOG.debug( +# "host_os_name=%s csfy_host_os_name=%s", host_os_name, csfy_host_os_name +# ) +# is_mac_ = host_os_name == "Darwin" or csfy_host_os_name == "Darwin" +# if version is None: +# # The user didn't request a specific version, so we return whether we +# # are running on a Mac or not. +# _LOG.debug("is_mac_=%s", is_mac_) +# return is_mac_ +# else: +# # The user specified a version: if we are not running on a Mac then we +# # return False, since we don't even have to check the macOS version. +# if not is_mac_: +# _LOG.debug("is_mac_=%s", is_mac_) +# return False +# # Check the macOS version we are running. +# if version == "Catalina": +# # Darwin gpmac.local 19.6.0 Darwin Kernel Version 19.6.0: +# # root:xnu-6153.141.2~1/RELEASE_X86_64 x86_64 +# macos_tag = "19.6" +# elif version == "Monterey": +# # Darwin alpha.local 21.5.0 Darwin Kernel Version 21.5.0: +# # root:xnu-8020.121.3~4/RELEASE_ARM64_T6000 arm64 +# macos_tag = "21." +# elif version == "Ventura": +# macos_tag = "22." +# elif version == "Sequoia": +# # Darwin gpmac.local 24.4.0 Darwin Kernel Version 24.4.0: +# # root:xnu-11417.101.15~1/RELEASE_ARM64_T8112 arm64 +# macos_tag = "24." +# else: +# raise ValueError(f"Invalid version='{version}'") +# _LOG.debug("macos_tag=%s", macos_tag) +# host_os_version = os.uname()[2] +# # 'Darwin Kernel Version 19.6.0: Mon Aug 31 22:12:52 PDT 2020; +# # root:xnu-6153.141.2~1/RELEASE_X86_64' +# csfy_host_os_version = os.environ.get("CSFY_HOST_VERSION", "") +# _LOG.debug( +# "host_os_version=%s csfy_host_os_version=%s", +# host_os_version, +# csfy_host_os_version, +# ) +# is_mac_ = macos_tag in host_os_version or macos_tag in csfy_host_os_version +# _LOG.debug("is_mac_=%s", is_mac_) +# return is_mac_ def is_prod_csfy() -> bool: @@ -558,14 +378,14 @@ def is_external_linux() -> bool: """ Detect whether we are running on a non-server/non-CI Linux machine. - This is true when we run on the machine of an intern, or a non-CSFY + This returns true when we run on the machine of an intern, or a non-CSFY contributor. """ if is_host_csfy_server() or is_inside_ci(): # Dev servers and CI are not external Linux systems. ret = False else: - # We need to check the host OS directly. + # We need to check if the host is Linux. host_os_name = _get_host_os_name() ret = host_os_name == "Linux" return ret @@ -573,8 +393,10 @@ def is_external_linux() -> bool: def is_external_dev() -> bool: """ - Detect whether we are running on an system outside of Causify system (e.g., - a contributor's laptop, an intern's laptop, a non-CSFY machine). + Detect whether we are running on an system outside of Causify. + + E.g., a Linux / Mac contributor's laptop, an intern's laptop, a non-CSFY + machine. """ ret = is_host_mac() or is_external_linux() return ret @@ -633,6 +455,7 @@ def _get_setup_signature() -> str: # - CI # - Container + def is_inside_docker_container_on_csfy_server() -> bool: """ Return whether we are running on a Docker container on a Causify server. @@ -643,7 +466,7 @@ def is_inside_docker_container_on_csfy_server() -> bool: def is_outside_docker_container_on_csfy_server() -> bool: """ - Return whether we are running on a Docker container on a Causify server. + Return whether we are running outside a Docker container on a Causify server. """ ret = not is_inside_docker() and is_host_csfy_server() return ret @@ -659,7 +482,7 @@ def is_inside_docker_container_on_host_mac() -> bool: def is_outside_docker_container_on_host_mac() -> bool: """ - Return whether we are running on a Docker container on a Mac host. + Return whether we are running outside of a Docker container on a Mac host. """ ret = not is_inside_docker() and is_host_mac() return ret @@ -675,7 +498,7 @@ def is_inside_docker_container_on_external_linux() -> bool: def is_outside_docker_container_on_external_linux() -> bool: """ - Return whether we are running on a Docker container on an external Linux. + Return whether we are outside of a Docker container on an external Linux. """ ret = not is_inside_docker() and is_external_linux() return ret @@ -685,6 +508,20 @@ def _get_setup_settings() -> List[Tuple[str, bool]]: """ Return a list of tuples with the name and value of the current server setup. + + E.g., + ```bash + is_inside_docker_container_on_csfy_server=True + is_outside_docker_container_on_csfy_server=False + is_inside_docker_container_on_host_mac=False + is_outside_docker_container_on_host_mac=True + is_inside_docker_container_on_external_linux=False + is_outside_docker_container_on_external_linux=True + is_dev4=False + is_ig_prod=False + is_prod_csfy=False + is_inside_ci=False + ``` """ func_names = [ "is_inside_docker_container_on_csfy_server", @@ -733,6 +570,8 @@ def _dassert_setup_consistency() -> None: expected ones and uniquely defined. """ + # We don't want to import `hprint` here because it will cause a circular + # import. def _indent(txt: str, *, num_spaces: int = 2) -> str: """ Add `num_spaces` spaces before each line of the passed string. @@ -783,6 +622,12 @@ def _indent(txt: str, *, num_spaces: int = 2) -> str: # ############################################################################# +# Each function below should run without asserting. E.g., when we check if +# docker supports privileged mode, we should check if `docker` is available, +# and then if docker supports privileged mode, instead of asserting if `docker` +# doesn't exist on the system. + + @functools.lru_cache() def has_docker() -> bool: """ @@ -810,6 +655,17 @@ def docker_needs_sudo() -> bool: assert False, "Failed to run docker" +def get_docker_executable() -> str: + """ + Return the docker executable, wrapper with `sudo` if needed. + """ + docker_needs_sudo = docker_needs_sudo() + executable = "docker" + if docker_needs_sudo: + executable = "sudo " + executable + return executable + + @functools.lru_cache() def has_docker_privileged_mode() -> bool: """ @@ -817,11 +673,15 @@ def has_docker_privileged_mode() -> bool: Docker privileged mode gives containers nearly all the same capabilities as the host system's kernel. + Privileged mode allows to: - run Docker-in-Docker - mount filesystems """ - cmd = "docker run --privileged hello-world 2>&1 >/dev/null" + if not has_docker(): + return False + docker_executable = get_docker_executable() + cmd = f"{docker_executable} run --privileged hello-world 2>&1 >/dev/null" rc = os.system(cmd) _print("cmd=%s -> rc=%s" % (cmd, rc)) has_privileged_mode = rc == 0 diff --git a/helpers/lib_tasks_find.py b/helpers/lib_tasks_find.py index 093c91a16..e0fd748f1 100644 --- a/helpers/lib_tasks_find.py +++ b/helpers/lib_tasks_find.py @@ -132,13 +132,13 @@ def find_test_class(ctx, class_name, dir_name=".", pbcopy=True, exact_match=Fals macOS) """ hlitauti.report_task(txt="class_name abs_dir pbcopy") - hdbg.dassert(class_name != "", "You need to specify a class name") + hdbg.dassert_ne(class_name, "", "You need to specify a class name") _ = ctx file_names = _find_test_files(dir_name) res = _find_test_class(class_name, file_names, exact_match) res = " ".join(res) # Print or copy to clipboard. - hsystem.to_pbcopy(res, pbcopy=True) + hsystem.to_pbcopy(res, pbcopy) # ////////////////////////////////////////////////////////////////////////////////// From 0d4bdca8a7b0a0de618331f5601f4d8e927497b4 Mon Sep 17 00:00:00 2001 From: saggese Date: Thu, 17 Apr 2025 13:26:06 +0000 Subject: [PATCH 024/193] Update --- helpers/hserver.py | 129 +++++++++++++++++++---------------- helpers/hsql_test.py | 5 +- helpers/test/test_hserver.py | 20 +++--- pytest.ini | 2 +- 4 files changed, 82 insertions(+), 74 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index 8286a365b..bf6e15148 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -659,9 +659,9 @@ def get_docker_executable() -> str: """ Return the docker executable, wrapper with `sudo` if needed. """ - docker_needs_sudo = docker_needs_sudo() + docker_needs_sudo_ = docker_needs_sudo() executable = "docker" - if docker_needs_sudo: + if docker_needs_sudo_: executable = "sudo " + executable return executable @@ -688,7 +688,10 @@ def has_docker_privileged_mode() -> bool: return has_privileged_mode -def has_sibling_containers_support() -> bool: +def has_docker_sibling_containers_support() -> bool: + """ + Return whether the current container supports running sibling containers. + """ # We need to be inside a container to run sibling containers. if not is_inside_docker(): return False @@ -698,7 +701,7 @@ def has_sibling_containers_support() -> bool: return False -def has_docker_dind_support() -> bool: +def has_docker_children_containers_support() -> bool: """ Return whether the current container supports Docker-in-Docker. """ @@ -709,6 +712,14 @@ def has_docker_dind_support() -> bool: return has_docker_privileged_mode() +def can_run_docker_from_docker() -> bool: + """ + Return whether we can run docker from docker, either as children or sibling + container. + """ + return has_docker_children_containers_support() or has_docker_sibling_containers_support() + + def get_docker_info() -> str: txt_tmp: List[str] = [] # @@ -729,15 +740,15 @@ def get_docker_info() -> str: txt_tmp.append(f"is_inside_docker={is_inside_docker_}") # if is_inside_docker_: - has_sibling_containers_support_ = has_sibling_containers_support() - has_docker_dind_support_ = has_docker_dind_support() + has_docker_sibling_containers_support_ = has_docker_sibling_containers_support() + has_docker_children_containers_support_ = has_docker_children_containers_support() else: - has_sibling_containers_support_ = "*undef*" - has_docker_dind_support_ = "*undef*" + has_docker_sibling_containers_support_ = "*undef*" + has_docker_children_containers_support_ = "*undef*" txt_tmp.append( - f"has_sibling_containers_support={has_sibling_containers_support_}" + f"has_docker_sibling_containers_support={has_docker_sibling_containers_support_}" ) - txt_tmp.append(f"has_docker_dind_support={has_docker_dind_support_}") + txt_tmp.append(f"has_docker_children_containers_support={has_docker_children_containers_support_}") # txt = hprint.to_info("Docker info", txt_tmp) return txt @@ -753,56 +764,56 @@ def get_docker_info() -> str: # the system. -# TODO(gp): -> has_docker_privileged_mode -@functools.lru_cache() -def has_dind_support() -> bool: - """ - Return whether the current container supports privileged mode. +# # TODO(gp): -> has_docker_privileged_mode +# @functools.lru_cache() +# def has_dind_support() -> bool: +# """ +# Return whether the current container supports privileged mode. - This is needed to use Docker-in-Docker. - """ - _print("is_inside_docker()=%s" % is_inside_docker()) - if not is_inside_docker(): - # Outside Docker there is no privileged mode. - _print("-> ret = False") - return False - # TODO(gp): Not sure this is really needed since we do this check - # after enable_privileged_mode controls if we have dind or not. - if _is_mac_version_with_sibling_containers(): - return False - # TODO(gp): This part is not multi-process friendly. When multiple - # processes try to run this code they interfere. A solution is to run `ip - # link` in the entrypoint and create a `has_docker_privileged_mode` file - # which contains the value. - # We rely on the approach from https://stackoverflow.com/questions/32144575 - # to check if there is support for privileged mode. - # Sometimes there is some state left, so we need to clean it up. - # TODO(Juraj): this is slow and inefficient, but works for now. - cmd = "sudo docker run hello-world" - rc = os.system(cmd) - _print("cmd=%s -> rc=%s" % (cmd, rc)) - has_dind = rc == 0 - # dind is supported on both Mac and GH Actions. - # TODO(Juraj): HelpersTask16. - # if check_repo: - # if hserver.is_inside_ci(): - # # Docker-in-docker is needed for GH actions. For all other builds is optional. - # assert has_dind, ( - # f"Expected privileged mode: has_dind={has_dind}\n" - # + hserver.setup_to_str() - # ) - # else: - # only_warning = True - # _raise_invalid_host(only_warning) - # return False - # else: - # csfy_repo_config = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") - # print( - # _WARNING - # + ": Skip checking since CSFY_REPO_CONFIG_CHECK=" - # + f"'{csfy_repo_config}'" - # ) - return has_dind +# This is needed to use Docker-in-Docker. +# """ +# _print("is_inside_docker()=%s" % is_inside_docker()) +# if not is_inside_docker(): +# # Outside Docker there is no privileged mode. +# _print("-> ret = False") +# return False +# # TODO(gp): Not sure this is really needed since we do this check +# # after enable_privileged_mode controls if we have dind or not. +# if _is_mac_version_with_sibling_containers(): +# return False +# # TODO(gp): This part is not multi-process friendly. When multiple +# # processes try to run this code they interfere. A solution is to run `ip +# # link` in the entrypoint and create a `has_docker_privileged_mode` file +# # which contains the value. +# # We rely on the approach from https://stackoverflow.com/questions/32144575 +# # to check if there is support for privileged mode. +# # Sometimes there is some state left, so we need to clean it up. +# # TODO(Juraj): this is slow and inefficient, but works for now. +# cmd = "sudo docker run hello-world" +# rc = os.system(cmd) +# _print("cmd=%s -> rc=%s" % (cmd, rc)) +# has_dind = rc == 0 +# # dind is supported on both Mac and GH Actions. +# # TODO(Juraj): HelpersTask16. +# # if check_repo: +# # if hserver.is_inside_ci(): +# # # Docker-in-docker is needed for GH actions. For all other builds is optional. +# # assert has_dind, ( +# # f"Expected privileged mode: has_dind={has_dind}\n" +# # + hserver.setup_to_str() +# # ) +# # else: +# # only_warning = True +# # _raise_invalid_host(only_warning) +# # return False +# # else: +# # csfy_repo_config = os.environ.get("CSFY_REPO_CONFIG_CHECK", "True") +# # print( +# # _WARNING +# # + ": Skip checking since CSFY_REPO_CONFIG_CHECK=" +# # + f"'{csfy_repo_config}'" +# # ) +# return has_dind def _raise_invalid_host(only_warning: bool) -> None: diff --git a/helpers/hsql_test.py b/helpers/hsql_test.py index 83ee3e72b..0d28262e3 100644 --- a/helpers/hsql_test.py +++ b/helpers/hsql_test.py @@ -27,12 +27,9 @@ # ############################################################################# -# TODO(Grisha): Why does it require `ck_infra`? -@pytest.mark.requires_ck_infra @pytest.mark.requires_docker_in_docker @pytest.mark.skipif( - not hserver.has_dind_support() - and not hserver.use_docker_sibling_containers(), + not hserver.can_run_docker_from_docker(), reason="Need docker children / sibling support", ) class TestDbHelper(hunitest.TestCase, abc.ABC): diff --git a/helpers/test/test_hserver.py b/helpers/test/test_hserver.py index 722649504..c9de3d01b 100644 --- a/helpers/test/test_hserver.py +++ b/helpers/test/test_hserver.py @@ -110,8 +110,8 @@ def setUp(self) -> None: docker_needs_sudo=False has_privileged_mode=True is_inside_docker=True - has_sibling_containers_support=True - has_docker_dind_support=True + has_docker_sibling_containers_support=True + has_docker_children_containers_support=True """ ) self.exp_get_setup_settings = hprint.dedent( @@ -160,8 +160,8 @@ def setUp(self) -> None: docker_needs_sudo=False has_privileged_mode=True is_inside_docker=True - has_sibling_containers_support=True - has_docker_dind_support=True + has_docker_sibling_containers_support=True + has_docker_children_containers_support=True """ ) self.exp_get_setup_settings = hprint.dedent( @@ -210,8 +210,8 @@ def setUp(self) -> None: docker_needs_sudo=False has_privileged_mode=True is_inside_docker=False - has_sibling_containers_support=*undef* - has_docker_dind_support=*undef* + has_docker_sibling_containers_support=*undef* + has_docker_children_containers_support=*undef* """ ) self.exp_get_setup_settings = hprint.dedent( @@ -260,8 +260,8 @@ def setUp(self) -> None: docker_needs_sudo=False has_privileged_mode=True is_inside_docker=True - has_sibling_containers_support=True - has_docker_dind_support=True + has_docker_sibling_containers_support=True + has_docker_children_containers_support=True """ ) self.exp_get_setup_settings = hprint.dedent( @@ -310,8 +310,8 @@ def setUp(self) -> None: docker_needs_sudo=False has_privileged_mode=True is_inside_docker=False - has_sibling_containers_support=*undef* - has_docker_dind_support=*undef* + has_docker_sibling_containers_support=*undef* + has_docker_children_containers_support=*undef* """ ) self.exp_get_setup_settings = hprint.dedent( diff --git a/pytest.ini b/pytest.ini index baf5ba608..d33a13dc9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -29,7 +29,7 @@ addopts = markers= requires_ck_infra: require to run inside CK infrastructure requires_ck_aws: require CK AWS connection - requires_docker_in_docker: require docker-in-docker or docker sibling containers + requires_docker_in_docker: require docker children or sibling containers no_container: run without a container, i.e., `invoke` target tests slow: tests that are considered slow superslow: tests that are considered superslow From d848ff099e2fd78b9506345dcaf7467bfc77886f Mon Sep 17 00:00:00 2001 From: Sonya Nikiforova Date: Tue, 15 Apr 2025 20:39:48 +0200 Subject: [PATCH 025/193] Helpers task553 break in slow tests dockerized executables (#569) * HelpersTask553: Fix path * HelpersTask553: Update doc --- dev_scripts_helpers/documentation/lint_notes.py | 2 +- docs/onboarding/intern.onboarding_checklist.reference.md | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index 062c34d0a..aba54f89e 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -160,7 +160,7 @@ def prettier_on_str( """ _LOG.debug("txt=\n%s", txt) # Save string as input. - tmp_file_name = "tmp.lint_notes.prettier.txt" + tmp_file_name = f"{os.getcwd()}/tmp.lint_notes.prettier.txt" hio.to_file(tmp_file_name, txt) # Call `prettier` in-place. prettier(tmp_file_name, tmp_file_name, *args, **kwargs) diff --git a/docs/onboarding/intern.onboarding_checklist.reference.md b/docs/onboarding/intern.onboarding_checklist.reference.md index 498112a0f..95f3d9c6c 100644 --- a/docs/onboarding/intern.onboarding_checklist.reference.md +++ b/docs/onboarding/intern.onboarding_checklist.reference.md @@ -52,6 +52,9 @@ - [ ] [helpers](https://github.com/causify-ai/helpers) - [ ] [tutorials](https://github.com/causify-ai/tutorials) +- [ ] **HiringMeister**: Give the intern write access to the current + Intern-focused project on GH + - [ ] **IT**: @Shayawnn Add the intern to the mailing group `contributors@causify.ai` so that they can send [morning TODO emails](https://github.com/causify-ai/helpers/blob/master/docs/work_organization/all.team_collaboration.how_to_guide.md#morning-todo-email) From 8250b06b657780bf0a8cd0b67b3eb940f50dedc7 Mon Sep 17 00:00:00 2001 From: Sonya Nikiforova Date: Tue, 15 Apr 2025 21:06:28 +0200 Subject: [PATCH 026/193] Helpers task553 break in slow tests dockerized executables (#570) * HelpersTask553: Fix path * HelpersTask553: Update doc * HelpersTask553: Use tempfile --- dev_scripts_helpers/documentation/lint_notes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index aba54f89e..3c4e4b81f 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -160,7 +160,8 @@ def prettier_on_str( """ _LOG.debug("txt=\n%s", txt) # Save string as input. - tmp_file_name = f"{os.getcwd()}/tmp.lint_notes.prettier.txt" + curr_dir = os.getcwd() + tmp_file_name = tempfile.NamedTemporaryFile(dir=curr_dir).name hio.to_file(tmp_file_name, txt) # Call `prettier` in-place. prettier(tmp_file_name, tmp_file_name, *args, **kwargs) From 29fd9b4b84a1cf1a5db3b32ba0c261d0e6d355d4 Mon Sep 17 00:00:00 2001 From: Sonya Nikiforova Date: Tue, 15 Apr 2025 21:29:44 +0200 Subject: [PATCH 027/193] HelpersTask530_Update_contributor_feedback_docs (#564) * HelpersTask530: Update contributor feedback doc * HelpersTask530: Improve style * HelpersTask530: Improve style --- .../all.development_documents.reference.md | 2 +- .../ck.hiring_process.how_to_guide.md | 24 +-- .../all.contributor_feedback.how_to_guide.md | 196 +++++++++++++++++ .../all.contributor_scoring.how_to_guide.md | 199 ------------------ 4 files changed, 205 insertions(+), 216 deletions(-) create mode 100644 docs/work_organization/all.contributor_feedback.how_to_guide.md delete mode 100644 docs/work_organization/all.contributor_scoring.how_to_guide.md diff --git a/docs/onboarding/all.development_documents.reference.md b/docs/onboarding/all.development_documents.reference.md index 6581bac8f..9e9c14fb1 100644 --- a/docs/onboarding/all.development_documents.reference.md +++ b/docs/onboarding/all.development_documents.reference.md @@ -41,7 +41,7 @@ This contains the absolute minimal amount of info to start developing ## Project management -- [Contributor Scoring](/docs/work_organization/all.contributor_scoring.how_to_guide.md) +- [Contributor Feedback](/docs/work_organization/all.contributor_feedback.how_to_guide.md) - How we give feedback to contributors - [Code review](/docs/coding/all.code_review.how_to_guide.md) diff --git a/docs/onboarding/ck.hiring_process.how_to_guide.md b/docs/onboarding/ck.hiring_process.how_to_guide.md index 51579fde0..fe3dbc63b 100644 --- a/docs/onboarding/ck.hiring_process.how_to_guide.md +++ b/docs/onboarding/ck.hiring_process.how_to_guide.md @@ -6,7 +6,7 @@ - [HiringMeister](#hiringmeister) - [Step by step](#step-by-step) - [Warm-up tasks](#warm-up-tasks) -- [Intern scoring](#intern-scoring) +- [Giving feedback](#giving-feedback) @@ -203,19 +203,11 @@ understanding of our process - [Outsourceable issues gdoc](https://docs.google.com/document/d/1uuezdmcsV9TG2YwXGvEd0PBgWScCpR-QPOcySI5LPIA/edit#heading=h.vcvkivfud7be) -## Intern scoring - -- The interns are scored every 2 weeks -- Scoring criteria and template are defined in detail in - [`all.contributor_scoring.how_to_guide.md`](/docs/work_organization/all.contributor_scoring.how_to_guide.md) - - Not all the criteria are used for scoring the interns - - See, e.g., - [Scoring sheet](https://docs.google.com/spreadsheets/d/1eIzQnUZFiCAei4_vYnNWc_wDRfpSHgCdDmIeqnDm78Y), - [Internships - Feedback 2024](https://docs.google.com/spreadsheets/d/1FCuM3dPP3QZCT7iOHSKXls-maSZ0Mwj7BjsbaUq-T0Y/edit?gid=1816709202#gid=1816709202) - - See also the guidelines for the - [performance feedback for full-time team members](https://docs.google.com/document/d/1wkG7c8LYhCnBINDRTpWcCLEQ5E9cW_A_bt4jr-Vxy64/edit#heading=h.16akje28vcjx), - for what is generally expected of our employees -- The scoring should be done by all members of the hiring team -- The final score of the intern is the average score of all the team members -- The final scores are delivered to the interns every 2 weeks +## Giving feedback + +- Every 2 weeks interns are provided feedback that includes scores given to + their skills and performance +- The process and scoring criteria are defined in + [`all.contributor_feedback.how_to_guide.md`](/docs/work_organization/all.contributor_feedback.how_to_guide.md) +- Scoring should be done by all members of the hiring team - Interns with a low score should be let go diff --git a/docs/work_organization/all.contributor_feedback.how_to_guide.md b/docs/work_organization/all.contributor_feedback.how_to_guide.md new file mode 100644 index 000000000..7eaa8cb60 --- /dev/null +++ b/docs/work_organization/all.contributor_feedback.how_to_guide.md @@ -0,0 +1,196 @@ +# Contributor Feedback + + + +- [Feedback principles](#feedback-principles) +- [Scoring metrics](#scoring-metrics) + * [General](#general) + * [Metrics for interns](#metrics-for-interns) + * [Metrics for permanent team members](#metrics-for-permanent-team-members) + + [Roles](#roles) +- [Scoring process](#scoring-process) + + + +## Feedback principles + +- We want to evaluate and provide feedback to our contributors on different + aspects of their work + +- As a way to formalize giving feedback, we assign numerical scores on a variety + of [metrics](#scoring-metrics) + +- Each metric is scored between 1 (poor), 3 (average) and 5 (excellent) + - We consider 4 as acceptable, anything less than 4 as problematic and needs + to improve + +- We don't take non-perfect scores personally but just as a way to understand + what to improve + +- Scoring is anonymous + +- Everyone should be scored by at least 2 people + +- Frequency: + - Every 2 weeks for interns + - Every month for permanent team members + +## Scoring metrics + +### General + +- Metrics should be independent + +- We should provide + - Concrete questions to assess how people do on each metric + - Ways to improve the score (e.g., "read this book!", "do more of this and + less of that") + +- Along with the numerical scores, there should be a possibility to add a + textual note that can be used to provide rationale of the feedback and to + suggest improvements + +### Metrics for interns + +- Sends good TODO emails + - Doesn't forget to send one + - Follows our + [formatting requirements](/docs/work_organization/all.team_collaboration.how_to_guide.md#morning-todo-email) + - Sets realistic ETAs +- Reads docs with attention + - Internalizes our conventions described in the docs +- Able to follow procedures + - Issue and PR-related workflows + - Org processes +- Independence + - Provides solutions rather than questions + - Doesn't need a lot of guidance + - Asks only "good" questions (not something that they should be able to solve + on their own) +- Attention to detail + - Doesn't forget to do small things, including but not limited to: + - Follow style conventions + - Apply fixes everywhere appropriate + - Keep the branch up to date + - Make sure there are no tmp files checked in + - Thinks about corner cases while writing code and tests +- Git / GitHub knowledge + - Doesn't run into problems with branches/PRs +- Python knowledge / coding ability + - Writes effective and beautiful code +- Commitment to the project + - Puts in the hours + - This is a minor point: the number of hours doesn't really matter as long + as stuff is done + - On the other hand, if somebody consistently doesn't put enough time to get + stuff done, it can become a problem + - Willing to learn and contribute + - Willing to solve problems +- Productivity + - Quick to successfully complete tasks +- Learns from reviews + - Doesn't repeat the same mistake twice + +### Metrics for permanent team members + +- Quality of code + - Writes elegant code + - Follows our standards and conventions +- Quality of design + - Designs beautiful but simple abstractions + - Adds abstractions only when needed + - Orchestrates software components properly + - Uses design patterns when needed +- Attention to details + - Thinks in terms of corner cases + - Debugs things carefully + - Takes pride in a well-done product (e.g., code, documentation) +- Productivity + - Closes issues effectively without unnecessary iterations +- Makes and achieves ETAs + - Accurately estimates complexity of issues + - Thinks of risks and unknown unknowns, best / average / worst ETAs + - Resolves issues in set ETAs + - Puts in a sufficient amount of hours to make progress +- Autonomy + - Understands specs + - Needs an acceptable level of supervision to execute the tasks + - Does what's right according to our shared way of doing things without + reminders +- Follows our PR process + - Learns from reviews and doesn't make the same mistakes + - Runs Linter consistently before each iteration + - Does a PR / day (even a draft) +- Follows our organizational process + - Sends a daily TODO email + - Updates their issues regularly + - Curates GitHub +- Team work + - Helps others on the team when others need help / supervision + - Takes the initiative and goes the extra mile when needed + - Sacrifices for the greater good (e.g., doing stuff that is not fun to do) +- Communication + - Files issues with clear specs + - Explains technical issues and gives updates properly and with clarity + - Reports problems and solutions with proper context + - Speaks and writes English well +- Ability to run a team + - Can juggle multiple topics at once + - Can split the work in issues + - Can provide clear and extensive specs + - Is ok with being interrupted to help team members +- Positive energy + - Has an upbeat approach to working even if sh\*t doesn't work (since things + never work) + - Isn't a + [Negative Nelly](https://www.urbandictionary.com/define.php?term=negative%20nelly) +- Dev %, Data scientist %, Devops % + - This measures how much of each [role](#roles) the team member can cover + +#### Roles + +- We want to determine how comfortable the team member is engaging in different + types of activities + - This is helpful to understand which roles a new hire can play + +- Current roles: + - Data science + - Example of activities: + - Write notebooks + - Do research + - Debug data + - Dev + - Example of activities: + - Write code + - Refactor code + - Architecture code + - Debug code + - Unit test code + - DevOps + - Example of activities: + - Manage / supervise infra + - Airflow + - Docker + - AWS + - Administer Linux + +- E.g., X is a data scientist and has Data science=5, Dev=3, DevOps=1 +- Roles are not mutually exclusive + - A jack-of-all-trades can get a high score for all the roles + +## Scoring process + +- The process is organized and guided by + - HiringMeister for interns + - FeedbackMeister for permanent team members + +- Scoring is done via a Google Form, which is distributed to the scorers in + Asana + +- Scores for each metric are averaged in a spreadsheet, which is then made + available to people as feedback + - If there are textual notes accompanying numerical scores, their summary is + also provided + +- Contributors receive an email which includes the feedback and a link to this + doc to help interpret the metrics diff --git a/docs/work_organization/all.contributor_scoring.how_to_guide.md b/docs/work_organization/all.contributor_scoring.how_to_guide.md deleted file mode 100644 index cbffe085e..000000000 --- a/docs/work_organization/all.contributor_scoring.how_to_guide.md +++ /dev/null @@ -1,199 +0,0 @@ -# Contributor Scoring - - - -- [Scoring process](#scoring-process) - * [General](#general) - * [Current process](#current-process) -- [Scoring topics](#scoring-topics) - * [General](#general-1) - * [Current topics](#current-topics) - * [Roles](#roles) - * [More detailed topics for candidate full- and part-time](#more-detailed-topics-for-candidate-full--and-part-time) - + [Technical](#technical) - + [Process-related](#process-related) - - - -## Scoring process - -### General - -- We want to evaluate and provide feedback to our team members on different - aspects of their work. - -- We don't take non-perfect scores personally but just as a way to understand - what to improve. - -- The scoring template is here - [Scoring template](https://docs.google.com/spreadsheets/u/2/d/1WsWT8By2hr1VqB6ulIXf3_Rfa0zE2KHI/edit?usp=drive_web&ouid=106425005676808098789&rtpof=true) - (this is an Excel spreadsheet since you need to upload it and it needs to be a - file and not a Google Sheet). - -- Each metric is scored between 1 (poor), 3 (average) and 5 (excellent) - - We consider 4 as acceptable, anything less than 4 as problematic and needs - improve - -- We want to score everyone we work with: - - Initially only people that we supervise, later on anyone - - Feedback is anonymous - - At least 2 persons should score everyone - -- Scoring frequency - - Every 2 weeks for full-time candidates, part-time collaborators - - Every month for full-time team - -### Current process - -- Every scoring needs to happen (e.g., every two weeks): - - Mentor make a copy of the Excel spreadsheet - [Scoring template](https://docs.google.com/spreadsheets/u/2/d/1WsWT8By2hr1VqB6ulIXf3_Rfa0zE2KHI/edit?usp=drive_web&ouid=106425005676808098789&rtpof=true) - - Rename the template "Scoring - {Scorer} - {ScoringDate}" (e.g., "Scoring - - GP - 2023-09-01") - - Fill out the rows for the people that they need to score by looking at the - Mentor column - - Upload your Scoring Excel file - [here](https://docs.google.com/forms/d/e/1FAIpQLSdXhjHo52Roz_ROY-zlkg0YPMHCzoDXmPpCd1x-KmeCtQVd5g/viewform) - - You should see - - - - (For admin use, the source is - [here](https://docs.google.com/forms/d/1IXpcMSrtVI0xO3eNMrzGNJ0zpv-KySQVXjujlSuZlpo/edit) - and - [here](https://drive.google.com/drive/u/1/folders/1r-npms62yEvO90bXq8yZ99MkQk21c6SrQ5PpT1KqLpF1cnUqwJgO8E7cuD2t6zZe2P3hwjbe)) - - One of the integrators (GP, Paul, or somebody else) merges all the scoring - template in a single one, and then creates the averaged score for each - person - - The scores are then distributed anonymously - - Scored team members don't know who / how many mentors scored them - (although they have a clue about at least one mentor) - -## Scoring topics - -### General - -- Topics should be independent - -- We should provide - - Concrete questions to assess how people do on each topic - - Ways to improve the score (e.g., "read this book!", "do more of this and - less of this") - -### Current topics - -- Scoring table contains the following fields: - - Quality of code - - Writes elegant code? - - Follows our standards and conventions? - - Designs beautiful abstractions, when needed? - - Quality of design - - Designs beautiful but simple abstractions? - - Adds abstractions only when needed? - - Orchestrates software components properly? - - Uses design patterns, when needed? - - Attention to details - - Thinks in terms of corner cases? - - Debugs things carefully? - - Takes pride in well-done product (e.g., code, documentation)? - - Productivity - - Closes issues effectively without unnecessary iterations? - - It is a qualitative measure of progress per unit of time - - Make and achieve ETAs - - Estimates complexity in bugs? - - Thinks of risks and unknown unknowns, best / average / worst ETAs? - - Resolves issues in set ETAs? - - Puts in a sufficient amount of hours to make progress? - - Autonomy - - Understands specs? - - Needs a lot of supervision to execute the tasks? - - Does what's right according to our shared way of doing things without - reminders? - - Follow our PR process - - Learns from reviews and doesn't make the same mistakes? - - Runs Linter consistently before each iteration? - - Does a PR / day (even draft)? - - Follow our organizational process - - Sends a daily TODO email? - - Updates their issues daily? - - Curates GitHub? - - Team work - - Helps others on the team when others need help / supervision? - - Takes the initiative and goes the extra mile when needed? - - Sacrifices for the greater good (e.g., doing stuff that is not fun to do)? - - Communication - - Files issues with clear specs? - - Explains technical issues and gives updates properly and with clarity? - - Reports problems and solutions with proper context? - - Speaks and writes English well? - - Ability to run a team - - Can juggle multiple topics at once? - - Can split the work in specs? - - Is it ok with being interrupted to help team members? - - Positive energy - - Has an upbeat approach to working even if sh\*t doesn't work (since things - never work)? - - Is a - [Negative Nelly](https://www.urbandictionary.com/define.php?term=negative%20nelly)? - - Dev %, Data scientist %, Devops % - - This just measures how much of a role one team member can cover - - See below - -### Roles - -- We want to define how each team-member is comfortable covering several high - level activities. - -- The idea is to understand what roles a new hire can play. - -- Roles are not mutually exclusive - - E.g., a jack-of-all-trades can be 4 on all topics - - E.g., XYZ is a data scientist and has data science=5, dev=3, devops=1 - - Data science - - Example of activities are: - - Write notebooks - - Do research - - Debug data - - Dev - - Example of activities are: - - Write code - - Refactor code - - Architecture code - - Debug code - - Unit test code - - DevOps - - Example of activities are: - - Manage / supervise infra - - Airflow - - Docker - - AWS - - Administer Linux - -### More detailed topics for candidate full- and part-time - -- When interviewing a candidate / collaborator we want to have more topics - -#### Technical - -- Unit testing - - Does he / she write good unit tests? - - Does he / she use coverage to understand what / how to test? -- Git / GitHub knowledge -- Linux knowledge -- Review churning - - Do the reviews take too long? -- Python knowledge / coding ability - -#### Process-related - -- Send good TODO email -- Follow our PR process -- Read the docs with attention -- Follow our organizational process -- Make and achieve ETAs -- Autonomy / Independence - - Does he / she need a lot of supervision to execute the tasks? -- Productivity -- Number of hours - - This is a minor metric: the number of hours doesn't really matter as long as - stuff is done - - On the other hand, if somebody consistently doesn't put enough time to get - the needed stuff done, it can become a problem From a5af608162669f8d53ae77a653d95150c2946f09 Mon Sep 17 00:00:00 2001 From: neomisule <119680709+neomisule@users.noreply.github.com> Date: Tue, 15 Apr 2025 17:05:56 -0400 Subject: [PATCH 028/193] HelpersTask464-use-is_header-in-hmarkdown (#552) * HelpersTask464: changed code to use is_header instead of regex directly * HelpersTask464: Fixed the comment and variable name --------- Co-authored-by: Neomi Co-authored-by: Sonya Nikiforova --- helpers/hmarkdown.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index f1db92320..104fb9c24 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -436,23 +436,17 @@ def extract_headers_from_markdown( hdbg.dassert_isinstance(txt, str) hdbg.dassert_lte(1, max_level) header_list: HeaderList = [] - # Parse an header like `# Header1` or `## Header2`. - header_pattern = re.compile(r"^(#+)\s+(.*)") # Process the input file to extract headers. for line_number, line in enumerate(txt.splitlines(), start=1): # TODO(gp): Use the iterator. # Skip the visual separators. if is_markdown_line_separator(line): continue - # TODO(gp): Use is_header - match = header_pattern.match(line) - if match: - # The number of '#' determines level. - level = len(match.group(1)) - if level <= max_level: - title = match.group(2).strip() - header_info = HeaderInfo(level, title, line_number) - header_list.append(header_info) + # Get the header level and title. + is_header_, level, title = is_header(line) + if is_header_ and level <= max_level: + header_info = HeaderInfo(level, title, line_number) + header_list.append(header_info) # Check the header list. if sanity_check: check_header_list(header_list) From 0fa19f6c13191923c41a24b48a81e57a868bba5a Mon Sep 17 00:00:00 2001 From: Peeyush Dyavarashetty <32363748+Peeyush4@users.noreply.github.com> Date: Tue, 15 Apr 2025 17:45:46 -0400 Subject: [PATCH 029/193] HelpersTask461 Add tests for remove_code_delimiters (#496) * First test case operation for test_hmarkdown.py * Use assert_equal(dedent=True), set eg 4, 5, set tests 1-7 and removed test_no_code * Removed additional functions * Capitalized Python in comments and set code in inline * Remove hprint.dedent() in test4 in test_hmarkdown.py * Removed & and reduced content in test 3 * Change in comment for test6 --------- Co-authored-by: Sonya Nikiforova --- helpers/test/test_hmarkdown.py | 191 +++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index c25f401c2..8f6d54e60 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1240,3 +1240,194 @@ def _helper_process( # Check output. actual = hio.from_file(write_file) self.assertEqual(actual, "\n".join(expected)) + + +# ############################################################################# +# Test_remove_code_delimiters1 +# ############################################################################# + + +class Test_remove_code_delimiters1(hunitest.TestCase): + + def test1(self) -> None: + """ + Test a basic example. + """ + # Prepare inputs. + content = r""" + ```python + def hello_world(): + print("Hello, World!") + ``` + """ + content = hprint.dedent(content) + # Call function. + act = hmarkdo.remove_code_delimiters(content) + # Check output. + exp = r""" + def hello_world(): + print("Hello, World!") + """ + self.assert_equal(str(act), exp, dedent=True) + + def test2(self) -> None: + """ + Test an example with empty lines at the start and end. + """ + # Prepare inputs. + content = r""" + + ```python + + def check_empty_lines(): + print("Check empty lines are present!") + + ``` + + """ + content = hprint.dedent(content) + # Call function. + act = hmarkdo.remove_code_delimiters(content) + # Check output. + exp = r""" + def check_empty_lines(): + print("Check empty lines are present!") + """ + self.assert_equal(str(act), exp, dedent=True) + + def test3(self) -> None: + """ + Test a markdown with headings, Python and yaml blocks. + """ + # Prepare inputs. + content = r""" + # Section 1 + + This section contains comment and python code. + + > "Knowledge is like a tree, growing stronger with each branch of understanding." + + ```python + def greet(name): + return f"Hello, {name}!" + print(greet("World")) + ``` + + # Section 2 + + Key points below. + + - Case Study 1: Implementation in modern industry + - Case Study 2: Comparative analysis of traditional vs. modern methods + + ```yaml + future: + - AI integration + - Process optimization + - Sustainable solutions + ``` + """ + content = hprint.dedent(content) + # Call function. + act = hmarkdo.remove_code_delimiters(content) + # Check output. + exp = r""" + # Section 1 + + This section contains comment and python code. + + > "Knowledge is like a tree, growing stronger with each branch of understanding." + + + def greet(name): + return f"Hello, {name}!" + print(greet("World")) + + + # Section 2 + + Key points below. + + - Case Study 1: Implementation in modern industry + - Case Study 2: Comparative analysis of traditional vs. modern methods + + yaml + future: + - AI integration + - Process optimization + - Sustainable solutions + + """ + self.assert_equal(str(act), exp, dedent=True) + + def test4(self) -> None: + """ + Test another markdown with headings and multiple indent Python blocks. + """ + # Prepare inputs. + content = _get_markdown_example5() + # Call function. + act = hmarkdo.remove_code_delimiters(content) + # Check output. + exp = r""" + - Functions can be declared in the body of another function + - E.g., to hide utility functions in the scope of the function that uses them + + def print_integers(values): + + def _is_integer(value): + try: + return value == int(value) + except: + return False + + for v in values: + if _is_integer(v): + print(v) + + - Hello + """ + self.assert_equal(str(act), exp, dedent=True) + + def test5(self) -> None: + """ + Test an empty string. + """ + # Prepare inputs. + content = "" + # Call function. + act = hmarkdo.remove_code_delimiters(content) + # Check output. + exp = "" + self.assert_equal(str(act), exp, dedent=True) + + def test6(self) -> None: + """ + Test a Python and immediate markdown code block. + """ + # Prepare inputs. + content = r""" + ```python + def no_start_python(): + print("No mention of python at the start")``` + ``` + + ``` + A markdown paragraph contains + delimiters that needs to be removed. + ``` + """ + content = hprint.dedent(content) + # Call function. + act = hmarkdo.remove_code_delimiters(content) + # Check output. + exp = r""" + def no_start_python(): + print("No mention of python at the start") + + + + A markdown paragraph contains + delimiters that needs to be removed. + """ + self.assert_equal(str(act), exp, dedent=True) From 94bd24190da996a7f5de0545e742aff7f53ed932 Mon Sep 17 00:00:00 2001 From: allenmatt10 <157498336+allenmatt10@users.noreply.github.com> Date: Wed, 16 Apr 2025 03:20:01 -0400 Subject: [PATCH 030/193] HelpersTask540_bare_links_capitalized_in_bullet_lists (#562) * HelpersTask540: bare links capitalized in md files * Updated nits * Updated comments --- linters/amp_fix_md_links.py | 11 ++++++ linters/test/test_amp_fix_md_links.py | 56 +++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/linters/amp_fix_md_links.py b/linters/amp_fix_md_links.py index 28bcb4c7f..134a755f5 100644 --- a/linters/amp_fix_md_links.py +++ b/linters/amp_fix_md_links.py @@ -29,6 +29,7 @@ FILE_PATH_REGEX = r"\.{0,2}\w*\/\S+\.[\w\.]+" HTML_LINK_REGEX = r'(.*?)' MD_LINK_REGEX = r"\[(.+)\]\(((?!#).*)\)" +BARE_LINK_REGEX = r"(?()]+)" def _make_path_absolute(path: str) -> str: @@ -345,6 +346,16 @@ def fix_links(file_name: str) -> Tuple[List[str], List[str], List[str]]: fig_pointer, updated_line, file_name, i ) warnings.extend(line_warnings) + # Bare URLs. + bare_link_matches = re.findall(BARE_LINK_REGEX, updated_line) + for bare_link in bare_link_matches: + # Convert bare URLs to Markdown-style links. + new_bare_link = bare_link.replace("Http://", "http://").replace( + "Https://", "https://" + ) + updated_line = updated_line.replace( + bare_link, f"[{new_bare_link}]({new_bare_link})" + ) # Store the updated line. updated_lines.append(updated_line) out_warnings = [w for w in warnings if len(w)] diff --git a/linters/test/test_amp_fix_md_links.py b/linters/test/test_amp_fix_md_links.py index 779584369..1a673d2d2 100644 --- a/linters/test/test_amp_fix_md_links.py +++ b/linters/test/test_amp_fix_md_links.py @@ -345,6 +345,62 @@ def test7(self) -> None: ] self.assertEqual(expected, updated_lines) + def test8(self) -> None: + """ + Test single bare link conversion to Markdown-style link. + """ + # Prepare inputs. + text = r""" + https://gspread-pandas.readthedocs.io/en/latest/configuration.html + """ + file_name = "test_bare_links.md" + file_path = self.write_input_file(text, file_name) + # Run. + _, actual, _ = lafimdli.fix_links(file_path) + # Check. + expected = [ + "[https://gspread-pandas.readthedocs.io/en/latest/configuration.html](https://gspread-pandas.readthedocs.io/en/latest/configuration.html)", + ] + self.assertEqual(expected, actual) + + def test9(self) -> None: + """ + Test bulleted bare link conversion to Markdown-style link. + """ + # Prepare inputs. + text = r""" + - Http://gspread-pandas.readthedocs.io/en/latest/configuration.html + """ + file_name = "test_bare_links.md" + file_path = self.write_input_file(text, file_name) + # Run. + _, actual, _ = lafimdli.fix_links(file_path) + # Check. + expected = [ + "- [http://gspread-pandas.readthedocs.io/en/latest/configuration.html](http://gspread-pandas.readthedocs.io/en/latest/configuration.html)", + ] + self.assertEqual(expected, actual) + + def test10(self) -> None: + """ + Test multiple bare links conversion to Markdown-style links. + """ + # Prepare inputs. + text = r""" + http://github.com/google/styleguide/blob/gh-pages/docguide/style.md + - Https://github.com/causify-ai/tutorials/blob/master/llms/tutorial-openai_new.ipynb + """ + file_name = "test_bare_links.md" + file_path = self.write_input_file(text, file_name) + # Run. + _, actual, _ = lafimdli.fix_links(file_path) + # Check. + expected = [ + "[http://github.com/google/styleguide/blob/gh-pages/docguide/style.md](http://github.com/google/styleguide/blob/gh-pages/docguide/style.md)", + "- [https://github.com/causify-ai/tutorials/blob/master/llms/tutorial-openai_new.ipynb](https://github.com/causify-ai/tutorials/blob/master/llms/tutorial-openai_new.ipynb)", + ] + self.assertEqual(expected, actual) + # ############################################################################# # Test_make_path_absolute From ce335fc4b9101a75c6ebb9cadf2b01bdef4fe2ae Mon Sep 17 00:00:00 2001 From: Vlad Date: Wed, 16 Apr 2025 16:36:38 +0700 Subject: [PATCH 031/193] CmampTask11789_Make_cmamp_GitHub_Actions_workflows_reusable (#505) * CmampTask11789: fix submodules * CmampTask11789: path * CmampTask11789: fix in the common_run_tests.yml * CmampTask11789: depth * CmampTask11789: ssh * CmampTask11789: name * CmampTask11789: swap names * CmampTask11789: change var * CmampTask11789: caller * CmampTask11789: test secrets * CmampTask11789: playing around * CmampTask11789: one more token * CmampTask11789: secrets * CmampTask11789: fix submodules * CmampTask11789: path * CmampTask11789: fix in the common_run_tests.yml * CmampTask11789: depth * CmampTask11789: ssh * CmampTask11789: name * CmampTask11789: swap names * CmampTask11789: change var * CmampTask11789: caller * CmampTask11789: test secrets * CmampTask11789: playing around * CmampTask11789: one more token * CmampTask11789: secrets * CmampTask11789: revert debug and change token * CmampTask11789: fix token * CmampTask11789: revert token * CmampTask11789: script for PYTHONPATH * CmampTask11789: delete ssh update pythonpath * CmampTask11789: simplify * CmampTask11789: recursive * CmampTask11789: lint and delete script * CmampTask11789: comment * CmampTask11789: remove unnecessary comments * CmampTask11789: comment * CmampTask11789: echo --------- Co-authored-by: vlady Co-authored-by: Samarth KaPatel --- .github/workflows/common_linter.yml | 8 ++++++++ .github/workflows/common_run_tests.yml | 23 +++++++++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/.github/workflows/common_linter.yml b/.github/workflows/common_linter.yml index a5ff0ca1d..2db48fa6c 100644 --- a/.github/workflows/common_linter.yml +++ b/.github/workflows/common_linter.yml @@ -54,8 +54,16 @@ jobs: # in order to compare the current branch with the master branch. # The `0` means unlimited depth. fetch-depth: 0 + submodules: true token: ${{ secrets.GITHUB_TOKEN }} + # To access modules in `amp` and `helpers_root`, make sure PYTHONPATH includes + # them, just as it's set in `setenv.sh`. + # TODO(Grisha): re-use the approach from `dev_scripts_cmamp/thin_client/setenv.sh`. + - name: Update PYTHONPATH + if: ${{ github.event.repository.name != 'helpers' }} + run: echo "PYTHONPATH=.:helpers_root" >> $GITHUB_ENV + # Install packages that are required to run the job via GH. - name: Install dependencies run: | diff --git a/.github/workflows/common_run_tests.yml b/.github/workflows/common_run_tests.yml index 6684290df..caaacd5a4 100644 --- a/.github/workflows/common_run_tests.yml +++ b/.github/workflows/common_run_tests.yml @@ -50,7 +50,7 @@ jobs: # This is needed to pull the docker image from GHCR. - name: Login to GHCR - run: docker login ghcr.io -u gpsaggese -p ${{ secrets.GITHUB_TOKEN }} + run: docker login ghcr.io -u gpsaggese -p ${{ secrets.GH_ACTION_ACCESS_TOKEN }} # Make everything accessible by any user to avoid permission errors. - name: Cleanup @@ -61,13 +61,20 @@ jobs: - name: Checkout code uses: actions/checkout@v2 with: - submodules: true - token: ${{ secrets.GITHUB_TOKEN }} + submodules: recursive + fetch-depth: 0 + token: ${{ secrets.GH_ACTION_ACCESS_TOKEN }} - # To see the modules in `amp`, PYTHONPATH needs to include `amp` in the - # same way we do in `setenv.sh`. + # To access modules in `amp` and `helpers_root`, make sure PYTHONPATH includes + # them, just as it's set in `setenv.sh`. - name: Update PYTHONPATH - run: echo "PYTHONPATH=.:amp" >> $GITHUB_ENV + run: | + PYTHONPATH="$(realpath .)" + # Add all submodule paths (recursively). + SUBMODULES_PATHS=$(git submodule foreach --quiet --recursive 'echo $(pwd)' | paste -sd:) + PYTHONPATH="$PYTHONPATH:$SUBMODULES_PATHS" + # Export final PYTHONPATH to the environment. + echo "PYTHONPATH=$PYTHONPATH" >> $GITHUB_ENV # Install packages that are required to run the job via GH. - name: Install dependencies @@ -92,7 +99,7 @@ jobs: CSFY_AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }} CSFY_AWS_DEFAULT_REGION: ${{ env.AWS_DEFAULT_REGION }} CSFY_AWS_S3_BUCKET: ${{ vars.CSFY_AWS_S3_BUCKET }} - GH_ACTION_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_ACTION_ACCESS_TOKEN: ${{ secrets.GH_ACTION_ACCESS_TOKEN }} run: invoke ${{ inputs.test-name }} # In case of manual run, this workflow step will not be skipped and @@ -102,7 +109,7 @@ jobs: if: ${{ always() && github.event_name == 'workflow_dispatch' }} uses: guibranco/github-status-action-v2@main with: - authToken: ${{ secrets.GITHUB_TOKEN }} + authToken: ${{ secrets.GH_ACTION_ACCESS_TOKEN }} context: ${{ inputs.test-name }} description: 'Job is done (manual run)' state: ${{ job.status }} From 466c57245b353ba1e5d42634043869927b1b2725 Mon Sep 17 00:00:00 2001 From: Peeyush Dyavarashetty <32363748+Peeyush4@users.noreply.github.com> Date: Wed, 16 Apr 2025 10:53:56 -0400 Subject: [PATCH 032/193] HelpersTask533 Incorrect header level update after code blocks (#555) * Added code block boundary and its checks in fix_md_headers() * Added testcase and small change in code_boundary comment section * Removed unnecessary files * Using hstring to solve the issue * Remove unnecessary test.txt file * Add comment for variable that informs lines inside code blocks * Add good comment --- linters/amp_check_md_toc_headers.py | 5 +- .../Test_fix_md_headers.test2/output/test.txt | 14 ----- linters/test/test_amp_check_md_toc_headers.py | 52 ++++++++++++++++--- 3 files changed, 49 insertions(+), 22 deletions(-) delete mode 100644 linters/test/outcomes/Test_fix_md_headers.test2/output/test.txt diff --git a/linters/amp_check_md_toc_headers.py b/linters/amp_check_md_toc_headers.py index e72874634..148e26573 100644 --- a/linters/amp_check_md_toc_headers.py +++ b/linters/amp_check_md_toc_headers.py @@ -17,6 +17,7 @@ import helpers.hdbg as hdbg import helpers.hio as hio import helpers.hparser as hparser +import helpers.hstring as hstring import linters.action as liaction import linters.utils as liutils @@ -41,10 +42,12 @@ def fix_md_headers(lines: List[str], file_name: str) -> List[str]: """ fixed_lines = [] last_header_level = 0 + # Get code block indices to exclude comment symbols from header processing. + code_line_indices = hstring.get_code_block_line_indices(lines) for idx, line in enumerate(lines): fixed_line = line match = HEADER_REGEX.match(line) - if match: + if match and idx not in code_line_indices: # Count the number of leading `#`. current_level = len(match.group(1)) # Capture the rest of the line (after the initial `#` header). diff --git a/linters/test/outcomes/Test_fix_md_headers.test2/output/test.txt b/linters/test/outcomes/Test_fix_md_headers.test2/output/test.txt deleted file mode 100644 index a53db1a91..000000000 --- a/linters/test/outcomes/Test_fix_md_headers.test2/output/test.txt +++ /dev/null @@ -1,14 +0,0 @@ -# linter warnings - - -# linted file - - -# Given Header level 1; no change - -## Given Header level 3; change to 2 - -## Given Header level 2; no change - -### Given Header level 4; change to 3 - \ No newline at end of file diff --git a/linters/test/test_amp_check_md_toc_headers.py b/linters/test/test_amp_check_md_toc_headers.py index 78178610c..633364c30 100644 --- a/linters/test/test_amp_check_md_toc_headers.py +++ b/linters/test/test_amp_check_md_toc_headers.py @@ -56,13 +56,16 @@ def test2(self) -> None: lines = hio.from_file(file_path).splitlines() updated_lines = lacmtohe.fix_md_headers(lines, file_path) # Check. - output = "\n".join( - ["# linter warnings", ""] - + [] - + ["", "# linted file", ""] - + updated_lines - ) - self.check_string(output, purify_text=True) + output = """ +# Given Header level 1; no change + +## Given Header level 3; change to 2 + +## Given Header level 2; no change + +### Given Header level 4; change to 3 + """ + self.assertEqual(updated_lines, output.splitlines()) def test3(self) -> None: """ @@ -91,6 +94,41 @@ def test3(self) -> None: updated_lines, txt_with_non_header_pound_signs.splitlines() ) + def test4(self) -> None: + """ + Test Python and bash code along with headers. + """ + txt_with_python_and_bash_code = """ +# Header 1 + +## Header 2 + + ```bash +# Comment. + > i run_fast_tests + ``` +### Side heading 1 + +```python +# Comment. +for i in range(10): + print(i) +``` + +### Side heading 2 + """ + file_name = "txt_with_python_and_bash_code.md" + file_path = self._write_input_file( + txt_with_python_and_bash_code, file_name + ) + # Run. + lines = hio.from_file(file_path).splitlines() + updated_lines = lacmtohe.fix_md_headers(lines, file_path) + # Check. + self.assertEqual( + updated_lines, txt_with_python_and_bash_code.splitlines() + ) + def _write_input_file(self, txt: str, file_name: str) -> str: """ Write test content to a file. From 45ed5d428fd3d8013a90194464aa937b8834ef52 Mon Sep 17 00:00:00 2001 From: Heanh Sok Date: Wed, 16 Apr 2025 11:20:23 -0400 Subject: [PATCH 033/193] CmampTask11895_Build_failure___cmamp___Update_helpers_submodule___14474016777 (#571) * Checkpoint * checkpoint * checkpoint * checkpoint * lint --- helpers/hserver.py | 1 - helpers/hunit_test_utils.py | 2 +- helpers/test/test_repo_config_amp.py | 64 ++++++++++++++-------------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index bf6e15148..60441b88b 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -1131,7 +1131,6 @@ def config_func_to_str() -> str: "is_inside_docker", "is_inside_ecs_container", "is_inside_unit_test", - "is_mac", "is_prod_csfy", "run_docker_as_root", "skip_submodules_test", diff --git a/helpers/hunit_test_utils.py b/helpers/hunit_test_utils.py index adb65e302..0d0ddcbb2 100644 --- a/helpers/hunit_test_utils.py +++ b/helpers/hunit_test_utils.py @@ -476,7 +476,7 @@ def execute_only_on_mac(*, version: Optional[str] = None) -> None: def check_env_to_str( self_: Any, exp: str, *, skip_secrets_vars: bool = False ) -> None: - act = henv.env_to_str(add_system_signature=False) + act = henv.env_to_str(system_signature=False) act = hunitest.filter_text("get_name", act) act = hunitest.filter_text("get_repo_map", act) act = hunitest.filter_text("CSFY_HOST_", act) diff --git a/helpers/test/test_repo_config_amp.py b/helpers/test/test_repo_config_amp.py index 41b8734db..f9960a7b5 100644 --- a/helpers/test/test_repo_config_amp.py +++ b/helpers/test/test_repo_config_amp.py @@ -233,39 +233,41 @@ def test_amp_ci(self) -> None: reason="Run only in //cmamp", ) def test_cmamp_ci(self) -> None: - # hunteuti.execute_only_on_ci() + hunteuti.execute_only_on_ci() # exp = r""" - # Repo config: - # repo_config.config - get_host_name='github.com' - get_html_dir_to_url_mapping='{'s3://cryptokaizen-html': 'http://172.30.2.44', 's3://cryptokaizen-html/v2': 'http://172.30.2.44/v2'}' - get_invalid_words='[]' - get_docker_base_image_name='cmamp' - # Server config: - # hserver.config - enable_privileged_mode()='True' - get_docker_shared_group()='' - get_docker_user()='' - get_shared_data_dirs()='None' - has_dind_support()='True' - has_docker_sudo()='False' - is_AM_S3_available()='True' - is_CK_S3_available()='True' - is_dev4()='False' - is_dev_csfy()='False' - is_inside_ci()='True' - is_inside_docker()='True' - is_mac(version='Catalina')='False' - is_mac(version='Monterey')='False' - is_mac(version='Sequoia')='False' - is_mac(version='Ventura')='False' - run_docker_as_root()='True' - skip_submodules_test()='False' - use_docker_db_container_name_to_connect()='False' - use_docker_network_mode_host()='False' - use_docker_sibling_containers()='False' - # Env vars: + # Repo config + get_host_name='github.com' + get_html_dir_to_url_mapping='{'s3://cryptokaizen-html': 'http://172.30.2.44', 's3://cryptokaizen-html/v2': 'http://172.30.2.44/v2'}' + get_invalid_words='[]' + get_docker_base_image_name='cmamp' + # Server config + enable_privileged_mode='True' + get_docker_shared_group='' + get_docker_user='' + get_host_user_name='runner' + get_shared_data_dirs='None' + has_dind_support='True' + has_docker_sudo='False' + is_AM_S3_available='True' + is_CK_S3_available='True' + is_dev4='False' + is_dev_csfy='False' + is_external_linux='False' + is_host_mac='False' + is_ig_prod='False' + is_inside_ci='True' + is_inside_docker='True' + is_inside_ecs_container='False' + is_inside_unit_test='True' + is_prod_csfy='False' + run_docker_as_root='True' + skip_submodules_test='False' + use_docker_db_container_name_to_connect='False' + use_docker_network_mode_host='False' + use_docker_sibling_containers='False' + use_main_network='False' + # Env vars CSFY_CI='true' CSFY_ECR_BASE_PATH='$CSFY_ECR_BASE_PATH' CSFY_ENABLE_DIND='1' From ffaba479bad15aab471a8c335e718bfc4d180d8d Mon Sep 17 00:00:00 2001 From: neomisule <119680709+neomisule@users.noreply.github.com> Date: Wed, 16 Apr 2025 14:35:21 -0400 Subject: [PATCH 034/193] HelpersTask576: corrected the command typo (#577) Co-authored-by: Neomi Co-authored-by: Sonya Nikiforova --- docs/coding/all.run_unit_tests.how_to_guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/coding/all.run_unit_tests.how_to_guide.md b/docs/coding/all.run_unit_tests.how_to_guide.md index 9670b2bc3..8caf7cd3b 100644 --- a/docs/coding/all.run_unit_tests.how_to_guide.md +++ b/docs/coding/all.run_unit_tests.how_to_guide.md @@ -69,7 +69,7 @@ # Run only fast tests. > i run_fast_tests # Run only slow tests. - > i run_slow_test + > i run_slow_tests # Run only superslow tests. > i run_superslow_tests ``` From f33266c4f0b62864e96293934a278b215b4e54ce Mon Sep 17 00:00:00 2001 From: Sandeep Thalapanane <113313930+sandeepthalapanane@users.noreply.github.com> Date: Wed, 16 Apr 2025 15:00:33 -0400 Subject: [PATCH 035/193] HelpersTask280_Markdown_code_blocks_are_reflowed (#573) * HelpersTask280: Change markdown block to text block, add unit tests * HelpersTask280: Revert back code from linter restructuring * HelpersTask280: Add TODO to unit test --------- Co-authored-by: Sonya Nikiforova --- .../all.devops_docker.how_to_guide.md | 8 ++--- .../output/test.txt | 29 +++++++++++++++++++ linters/test/test_amp_dev_scripts.py | 29 ++++++++++++++++++- 3 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 linters/test/outcomes/Test_linter_py1.test_linter_md2/output/test.txt diff --git a/docs/work_tools/all.devops_docker.how_to_guide.md b/docs/work_tools/all.devops_docker.how_to_guide.md index 985f8d303..367ec817c 100644 --- a/docs/work_tools/all.devops_docker.how_to_guide.md +++ b/docs/work_tools/all.devops_docker.how_to_guide.md @@ -242,7 +242,7 @@ [`/devops/docker_build/pyproject.toml`](/devops/docker_build/pyproject.toml) file to the `[tool.poetry.dependencies]` section - E.g., to add `pytest-timeout` do: - ```markdown + ```text [tool.poetry.dependencies] ... pytest-timeout = "*" @@ -331,7 +331,7 @@ ``` - The command above will generate `./tmp.requirements.txt` with the list of the imported packages, e.g., - ```markdown + ```text amp==1.1.4 async_solipsism==0.3 beautifulsoup4==4.11.1 @@ -548,8 +548,8 @@ registries, the subsequent step involves pushing the `dev` image to GHCR registry. However, this action currently requires manual execution due to restricted access - - Access to the `causify-ai` packages is limited. To gain access, kindly - reach out to GP, Samarth or Vlad + - Access to the `causify-ai` packages is limited. To gain access, kindly reach + out to GP, Samarth or Vlad - To proceed, perform a Docker login using your GitHub username and PAT (Personal Access Token): ```bash diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_md2/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_md2/output/test.txt new file mode 100644 index 000000000..d45d7145e --- /dev/null +++ b/linters/test/outcomes/Test_linter_py1.test_linter_md2/output/test.txt @@ -0,0 +1,29 @@ +# linter log +HH:MM:SS - INFO  hdbg.py init_logger:{LINE_NUM} > cmd='linters/base.py --files $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md' +HH:MM:SS - INFO  base.py _run_linter:{LINE_NUM} Using num_threads='serial' since there is only one file to lint +HH:MM:SS - INFO  base.py _lint:{LINE_NUM} +Linting file: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md' +//////////////////////////////////////////////////////////////////////////////// +linter_warnings.txt +//////////////////////////////////////////////////////////////////////////////// +file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md'] +actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] +//////////////////////////////////////////////////////////////////////////////// +$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md' is not referenced in README.md [check_md_reference] +HH:MM:SS - INFO  hdbg.py init_logger:{LINE_NUM} > cmd='./dev_scripts_helpers/documentation/lint_notes.py -i $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md --in_place' [lint_md] + +//////////////////////////////////////////////////////////////////////////////// + +# linter file + + +- [Header1](#header1) + + + +# Header1 + +```text +test text +nothing should be changed +``` diff --git a/linters/test/test_amp_dev_scripts.py b/linters/test/test_amp_dev_scripts.py index eaf2e14ea..f0c3217f9 100644 --- a/linters/test/test_amp_dev_scripts.py +++ b/linters/test/test_amp_dev_scripts.py @@ -21,6 +21,7 @@ class Test_linter_py1(hunitest.TestCase): + def write_input_file(self, txt: str, file_name: str) -> Tuple[str, str]: """ Write test content to the file. @@ -126,6 +127,32 @@ def test_linter_md1(self) -> None: # Check. self.check_string(output, purify_text=True) + # TODO(heanh): Remove the skip when the dockerized executable issue is resolved. + @pytest.mark.slow("About 6 sec") + @pytest.mark.skip( + "Skip due to issue related to dockerized executable. See HelpersTask553." + ) + def test_linter_md2(self) -> None: + """ + Run Linter as executable on Markdown file with a fenced block. + """ + txt = r""" +# Header1 +```text +test text +nothing should be changed +``` + """ + # Run. + file_name = "hello.md" + as_system_call = True + output = self.run_linter(txt, file_name, as_system_call) + # Remove the line: + # '12-16_14:59 ^[[33mWARNING^[[0m: _refresh_toc :138 : No tags for table' + output = hunitest.filter_text("No tags for table", output) + # Check. + self.check_string(output, purify_text=True) + def test_linter_txt1(self) -> None: """ Run Linter as executable on a txt file with empty lines at the end. @@ -510,4 +537,4 @@ def _run_linter( output.extend(txt.split("\n")) # ////////////// output_as_str = "\n".join(output) - return output_as_str + return output_as_str \ No newline at end of file From fb9490d93e6576ee2bac7328817845fce0406f60 Mon Sep 17 00:00:00 2001 From: Indro <69083680+indrayudd@users.noreply.github.com> Date: Thu, 17 Apr 2025 04:58:30 -0400 Subject: [PATCH 036/193] HelpersTask445_Fix_bug_in_removed_blocks_storage_linter (#535) * Functional Commit * Functional Commit * Lint commit + minor bug fix in _execute * Lint commit + minor bugfix in _execute fn * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Reviewer Changes - Functional Commit * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Reviewer Changes - Functional Commit * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Reviewer Changes - Linter commit * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Bug Fix amp_doc_formatter.py to pass slow tests * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Bug Fix amp_doc_formatter.py to pass slow tests * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Make variables clearer and docstrings more comprehensive * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Remove tmp file * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Reviewer changes. * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Test Input Files added. * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Added missed test 9 input * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Reviewer Changes. * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Rearrange test.txt files. * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Reviewer Changes. * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: Add renamed test. * HelpersTask445: Fix typo * HelpersTask445: Fix indent * HelpersTask445_Fix_bug_in_removed_blocks_storage_linter: test_amp_doc_dormatter rearranged. --------- Co-authored-by: Indrayudd Roy Chowdhury Co-authored-by: Indrayudd Roy Chowdhury Co-authored-by: Sonya Nikiforova --- helpers/hstring.py | 27 +++++++ .../TestGetDocstrings.test1/input/test.txt | 18 +++++ helpers/test/test_hstring.py | 30 ++++++++ linters/amp_doc_formatter.py | 64 +++++++++++++++- .../input/test.txt | 26 +++++++ linters/test/test_amp_doc_formatter.py | 76 ++++++++++++++++--- 6 files changed, 228 insertions(+), 13 deletions(-) create mode 100644 helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt create mode 100644 linters/test/outcomes/TestFindUnbalancedBackticks.test1/input/test.txt diff --git a/helpers/hstring.py b/helpers/hstring.py index 678852876..de401a1b9 100644 --- a/helpers/hstring.py +++ b/helpers/hstring.py @@ -89,12 +89,39 @@ def get_docstring_line_indices(lines: List[str]) -> List[int]: # Switch the docstring flag. # pylint: disable=modified-iterating-dict quotes[q] = not quotes[q] + if q in ('"""', "'''") and not quotes[q]: + # A triple-quote has just been closed. + # Reset the triple backticks flag. + quotes["```"] = False if any(quotes.values()): # Store the index if the quotes have been opened but not closed yet. docstring_line_indices.append(i) return docstring_line_indices +def get_docstrings(lines: List[str]) -> List[List[int]]: + """ + Get line indices grouped together by the docstring they belong to. + + :param lines: lines from the file to process + :return: grouped lines within docstrings + """ + # Get indices of lines that are within docstrings. + doc_indices = get_docstring_line_indices(lines) + # Group these indices into consecutive docstrings. + docstrings = [] + if doc_indices: + current_docstring = [doc_indices[0]] + for idx in doc_indices[1:]: + if idx == current_docstring[-1] + 1: + current_docstring.append(idx) + else: + docstrings.append(current_docstring) + current_docstring = [idx] + docstrings.append(current_docstring) + return docstrings + + # TODO(gp): GFI. Move to hpython_code.py def get_code_block_line_indices(lines: List[str]) -> List[int]: """ diff --git a/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt b/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt new file mode 100644 index 000000000..8c6bdf3cf --- /dev/null +++ b/helpers/test/outcomes/TestGetDocstrings.test1/input/test.txt @@ -0,0 +1,18 @@ +def func1(): + """ + First function. + + ``` + foo + ``` + """ + + +def func2(): + """ + Second function. + + ``` + foo + ``` + """ \ No newline at end of file diff --git a/helpers/test/test_hstring.py b/helpers/test/test_hstring.py index 252d3dfa1..08418e9b7 100644 --- a/helpers/test/test_hstring.py +++ b/helpers/test/test_hstring.py @@ -1,5 +1,7 @@ +import os from typing import List, Tuple +import helpers.hio as hio import helpers.hstring as hstring import helpers.hunit_test as hunitest @@ -241,3 +243,31 @@ def test_assert_equal1(self) -> None: """ expected = ["```", "Test one."] self.helper(code, expected) + + +# ############################################################################# +# TestGetDocstrings +# ############################################################################# + + +class TestGetDocstrings(hunitest.TestCase): + + def test1(self) -> None: + """ + Test that grouped lines within docstrings are correctly returned. + """ + # Prepare inputs. + test_get_docstring_lines_input_dir = self.get_input_dir() + text_file_path = os.path.join( + test_get_docstring_lines_input_dir, "test.txt" + ) + text = hio.from_file(text_file_path) + lines = text.splitlines() + # Run. + actual = hstring.get_docstrings(lines) + # Check. + expected = [ + [1, 2, 3, 4, 5, 6], + [11, 12, 13, 14, 15, 16], + ] + self.assertEqual(actual, expected) diff --git a/linters/amp_doc_formatter.py b/linters/amp_doc_formatter.py index 7b08a4d5e..191187b02 100644 --- a/linters/amp_doc_formatter.py +++ b/linters/amp_doc_formatter.py @@ -49,6 +49,47 @@ def check_if_possible(self) -> bool: check: bool = hsystem.check_exec(self._executable) return check + @staticmethod + def _find_unbalanced_triple_backticks(file_name: str) -> List[int]: + """ + Check if the file contains contains docstrings with unbalanced triple + backticks. + + E.g., ''' + ``` + ^ Unbalanced backticks here. You'd normally expect it to be + closed below. + ''' + + If the docstring contains triple backticks that are unbalanced (opened but not closed), + the whole docstring should be passed to `docformatter` without removing the code block + wrapped in those backticks. + + :param file_name: file to process + :return: all the starting indices of docstrings where the + unbalanced backticks exist + """ + contents = hio.from_file(file_name) + lines = contents.splitlines() + # Get lines that are within docstrings. + docstrings = hstring.get_docstrings(lines) + idxs_docstrings_with_unbalanced_backticks = [] + # Process each docstring. + for docstring in docstrings: + leftmost_triple_backticks_count = 0 + for idx in docstring: + if lines[idx].lstrip().startswith("```"): + # Count this triple backticks at the leftmost position. + leftmost_triple_backticks_count += 1 + if leftmost_triple_backticks_count % 2 != 0: + # Odd number of leftmost triple backticks in this docstring. + # Append the docstring that has unbalanced triple backticks. + # Convert zero-indexed numbers to one-indexed line numbers. + # This would accurately link it to the part of the code; + # Where the docstring starts on code editors. + idxs_docstrings_with_unbalanced_backticks.append(docstring[0] + 1) + return idxs_docstrings_with_unbalanced_backticks + @staticmethod def _remove_ignored_docstrings(file_name: str) -> Dict[str, str]: """ @@ -149,7 +190,8 @@ def _restore_ignored_docstrings( @staticmethod def _restore_removed_code_blocks( - file_name: str, removed_blocks_storage: Dict[str, List[str]] + file_name: str, + removed_blocks_storage: Dict[str, List[str]], ) -> None: """ Restore code blocks that have been previously removed. @@ -188,9 +230,16 @@ def _execute(self, file_name: str, pedantic: int) -> List[str]: if self.skip_if_not_py(file_name): # Apply only to Python files. return [] + # Check for unbalanced backticks. + idxs_docstrings_with_unbalanced_backticks = ( + self._find_unbalanced_triple_backticks(file_name) + ) # Clear and store ignored docstrings and code. _ignored_docstrings = self._remove_ignored_docstrings(file_name) - _removed_code = self._remove_code_blocks(file_name) + if idxs_docstrings_with_unbalanced_backticks: + _removed_code = {} + else: + _removed_code = self._remove_code_blocks(file_name) # Execute docformatter. opts = "--make-summary-multi-line --pre-summary-newline --in-place" cmd = f"{self._executable} {opts} {file_name}" @@ -198,7 +247,16 @@ def _execute(self, file_name: str, pedantic: int) -> List[str]: _, output = liutils.tee(cmd, self._executable, abort_on_error=False) # Restore ignored docstrings and code. self._restore_ignored_docstrings(file_name, _ignored_docstrings) - self._restore_removed_code_blocks(file_name, _removed_code) + if _removed_code: + self._restore_removed_code_blocks(file_name, _removed_code) + if idxs_docstrings_with_unbalanced_backticks: + # Append generated warnings. + for start_idx in idxs_docstrings_with_unbalanced_backticks: + output.append( + f"{file_name}:{start_idx}: Found unbalanced triple backticks; " + f"make sure both opening and closing backticks are the leftmost" + f" element of their line" + ) return output diff --git a/linters/test/outcomes/TestFindUnbalancedBackticks.test1/input/test.txt b/linters/test/outcomes/TestFindUnbalancedBackticks.test1/input/test.txt new file mode 100644 index 000000000..f46e4abd8 --- /dev/null +++ b/linters/test/outcomes/TestFindUnbalancedBackticks.test1/input/test.txt @@ -0,0 +1,26 @@ +def func1(): + """ + First function. + + ``` + Valid backticks. + ``` + """ + + +def func2(): + """ + Second function. + + ``` + Missing closing backticks. + """ + + +def func3(): + """ + Third function. + + ``` + Missing closing backticks. + """ \ No newline at end of file diff --git a/linters/test/test_amp_doc_formatter.py b/linters/test/test_amp_doc_formatter.py index 3ca26477a..fa71c10ef 100644 --- a/linters/test/test_amp_doc_formatter.py +++ b/linters/test/test_amp_doc_formatter.py @@ -1,4 +1,5 @@ import os +from typing import List, Tuple import helpers.hio as hio import helpers.hunit_test as hunitest @@ -28,7 +29,7 @@ def test1(self) -> None: Test 2. """ ''' - actual = self._docformatter(text) + actual, _, _ = self._docformatter(text) self.assertEqual(expected.strip(), actual.strip()) def test2(self) -> None: @@ -47,7 +48,7 @@ def test2(self) -> None: This is a test. """ ''' - actual = self._docformatter(text) + actual, _, _ = self._docformatter(text) self.assertEqual(expected.strip(), actual.strip()) def test3(self) -> None: @@ -62,7 +63,7 @@ def test3(self) -> None: This is a test. """ ''' - actual = self._docformatter(text) + actual, _, _ = self._docformatter(text) self.assertEqual(expected.strip(), actual.strip()) def test4(self) -> None: @@ -87,7 +88,7 @@ def sample_method() -> None: """ ''' expected = text - actual = self._docformatter(text) + actual, _, _ = self._docformatter(text) self.assertEqual(expected, actual) def test5(self) -> None: @@ -118,7 +119,7 @@ def sample_method2() -> None: This is a test. """ ''' - actual = self._docformatter(text) + actual, _, _ = self._docformatter(text) self.assertEqual(expected, actual) def test6(self) -> None: @@ -129,19 +130,74 @@ def test6(self) -> None: text_file_path = os.path.join(test6_input_dir, "test.txt") text = hio.from_file(text_file_path) expected = text - actual = self._docformatter(text) + actual, _, _ = self._docformatter(text) self.assertEqual(expected, actual) - def _docformatter(self, text: str) -> str: + def test7(self) -> None: + """ + Test that unbalanced backticks are correctly warned of. + """ + # Prepare inputs. + text = ''' +""" +E.g., ``` +foo +``` +:param x: a parameter that the function takes in +""" + ''' + # Run. + actual_content, actual_warning_list, temp_file = self._docformatter(text) + actual_warnings = "\n".join(actual_warning_list) + expected_warnings = ( + f"{temp_file}:2: Found unbalanced triple backticks; " + f"make sure both opening and closing backticks are " + f"the leftmost element of their line" + ) + # Check. + self.assertEqual(actual_warnings, expected_warnings) + self.assert_equal(actual_content, text, fuzzy_match=True) + + def _docformatter(self, text: str) -> Tuple[str, List[str], str]: """ Run the docformatter on the temp file in scratch space. :param text: content to be formatted - :return: modified content after formatting + :param scratch_dir: directory for temp files + :return: + - modified content after formatting + - warnings + - filepath for temporary file """ scratch_dir = self.get_scratch_space() temp_file = os.path.join(scratch_dir, "temp_file.py") hio.to_file(temp_file, text) - lamdofor._DocFormatter().execute(file_name=temp_file, pedantic=0) + warnings = lamdofor._DocFormatter().execute( + file_name=temp_file, pedantic=0 + ) content: str = hio.from_file(temp_file) - return content + return content, warnings, temp_file + + +# ############################################################################# +# TestFindUnbalancedBackticks +# ############################################################################# + + +class TestFindUnbalancedBackticks(hunitest.TestCase): + + def test1(self) -> None: + """ + Test that the starting indices of docstrings with unbalanced backticks + are correctly returned. + """ + # Prepare inputs. + test_get_docstring_lines_input_dir = self.get_input_dir() + file_path = os.path.join(test_get_docstring_lines_input_dir, "test.txt") + # Run. + actual = lamdofor._DocFormatter._find_unbalanced_triple_backticks( + file_path + ) + # Check. + expected = [12, 21] + self.assertEqual(actual, expected) From fab24cf8564b60e3413938d8ed1bc82214885d52 Mon Sep 17 00:00:00 2001 From: allenmatt10 <157498336+allenmatt10@users.noreply.github.com> Date: Thu, 17 Apr 2025 06:59:36 -0400 Subject: [PATCH 037/193] HelpersTask568_moving_line_in_golden_output (#584) * HelpersTask568: Filtered out glitchy line * Typo fix * Added todo and skip markers back --- .../output/test.txt | 1 - .../output/test.txt | 1 - linters/test/test_amp_dev_scripts.py | 18 ++++++++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_md1/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_md1/output/test.txt index 7c8e81c1a..f795e58c2 100644 --- a/linters/test/outcomes/Test_linter_py1.test_linter_md1/output/test.txt +++ b/linters/test/outcomes/Test_linter_py1.test_linter_md1/output/test.txt @@ -9,7 +9,6 @@ linter_warnings.txt file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md1/tmp.scratch/hello.md'] actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] //////////////////////////////////////////////////////////////////////////////// -$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md1/tmp.scratch/hello.md: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md1/tmp.scratch/hello.md' is not referenced in README.md [check_md_reference] HH:MM:SS - INFO  hdbg.py init_logger:{LINE_NUM} > cmd='./dev_scripts_helpers/documentation/lint_notes.py -i $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md1/tmp.scratch/hello.md --in_place' [lint_md] //////////////////////////////////////////////////////////////////////////////// diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_md2/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_md2/output/test.txt index d45d7145e..412564229 100644 --- a/linters/test/outcomes/Test_linter_py1.test_linter_md2/output/test.txt +++ b/linters/test/outcomes/Test_linter_py1.test_linter_md2/output/test.txt @@ -9,7 +9,6 @@ linter_warnings.txt file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md'] actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] //////////////////////////////////////////////////////////////////////////////// -$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md' is not referenced in README.md [check_md_reference] HH:MM:SS - INFO  hdbg.py init_logger:{LINE_NUM} > cmd='./dev_scripts_helpers/documentation/lint_notes.py -i $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md2/tmp.scratch/hello.md --in_place' [lint_md] //////////////////////////////////////////////////////////////////////////////// diff --git a/linters/test/test_amp_dev_scripts.py b/linters/test/test_amp_dev_scripts.py index f0c3217f9..c310c5e3f 100644 --- a/linters/test/test_amp_dev_scripts.py +++ b/linters/test/test_amp_dev_scripts.py @@ -121,9 +121,12 @@ def test_linter_md1(self) -> None: file_name = "hello.md" as_system_call = True output = self.run_linter(txt, file_name, as_system_call) - # Remove the line: - # '12-16_14:59 ^[[33mWARNING^[[0m: _refresh_toc :138 : No tags for table' - output = hunitest.filter_text("No tags for table", output) + # Remove the lines: + # '12-16_14:59 ^[[33mWARNING^[[0m: _refresh_toc :138 : No tags for table'. + # '$GIT_ROOT/linters/test/outcomes/.../hello.md: is not referenced in README.md'. + log_filters = ["No tags for table", "is not referenced in README.md"] + for log_filter in log_filters: + output = hunitest.filter_text(log_filter, output) # Check. self.check_string(output, purify_text=True) @@ -147,9 +150,12 @@ def test_linter_md2(self) -> None: file_name = "hello.md" as_system_call = True output = self.run_linter(txt, file_name, as_system_call) - # Remove the line: - # '12-16_14:59 ^[[33mWARNING^[[0m: _refresh_toc :138 : No tags for table' - output = hunitest.filter_text("No tags for table", output) + # Remove the lines: + # '12-16_14:59 ^[[33mWARNING^[[0m: _refresh_toc :138 : No tags for table'. + # '$GIT_ROOT/linters/test/outcomes/.../hello.md: is not referenced in README.md'. + log_filters = ["No tags for table", "is not referenced in README.md"] + for log_filter in log_filters: + output = hunitest.filter_text(log_filter, output) # Check. self.check_string(output, purify_text=True) From 285aa5adf6475fc5c2bfb9a6d09ac961ce71667b Mon Sep 17 00:00:00 2001 From: allenmatt10 <157498336+allenmatt10@users.noreply.github.com> Date: Thu, 17 Apr 2025 07:37:07 -0400 Subject: [PATCH 038/193] HelpersTask334_avoid_empty_lines_in_code (#528) * Before implementing code to replace empty lines * Before implementing code to replace empty lines * Updated code and tests to remove empty lines * Updated golden outcomes * Updated golden outcomes * Updated additional golden outcomes * Updated additional outcomes * Updated test cases to input dir and other nits * HelpersTask51_Improve_latex_toolchain_11_13 (#519) * Improve * Improve * Improve * Helpers task406 create is external dev (#526) * Add is_external_dev() helper to detect local macOS or external Linux environments * Modify is_external_dev() * Made changes as per the PR review * Modify hserver to resolve merge commit * HelpersTask333 Convert VeraCrypt doc to Markdown (#513) * Added base how_to_use_veracrypt and files * Fixed name and position for veracrypt documentation * Set table of contents * Set all lines for code * Linter ran * Again Linter updated code * Set all the errors suggested. Linter is creating issues * Set minor code changes. Linter is creating trouble * Original docs set * Bigger comments in multiline * Removed old files * Factored function indices retreival in hstring.py, updated test cases and other nits * Factored function indices retreival in hstring.py, updated test cases and other nits * Removed period at the end of description in doc file * Added helper method, todo and updated comments * Reverted changes to linter doc to debug issue * Updated linter doc again * Updated comments and moved code between files * Updated comments, moved code, and renamed files * Updated some nits * Updated function as a single pass * Added logs * Added logs --------- Co-authored-by: GP Saggese <33238329+gpsaggese@users.noreply.github.com> Co-authored-by: Sandeep Thalapanane <113313930+sandeepthalapanane@users.noreply.github.com> Co-authored-by: Peeyush Dyavarashetty <32363748+Peeyush4@users.noreply.github.com> Co-authored-by: Sonya Nikiforova --- .../all.developing_linter.how_to_guide.md | 3 + linters/amp_remove_empty_lines_in_function.py | 134 ++++++++++++++++++ linters/base.py | 6 + .../output/test.txt | 2 +- .../output/test.txt | 2 +- .../output/test.txt | 2 +- .../output/test.txt | 2 +- .../output/test.txt | 2 +- .../output/test.txt | 2 +- .../output/test.txt | 2 +- .../input/test.txt | 24 ++++ .../output/test.txt | 22 +++ .../input/test.txt | 40 ++++++ .../output/test.txt | 36 +++++ .../input/test.txt | 61 ++++++++ .../output/test.txt | 53 +++++++ .../input/test.txt | 22 +++ .../output/test.txt | 22 +++ ...test_amp_remove_empty_lines_in_function.py | 54 +++++++ 19 files changed, 484 insertions(+), 7 deletions(-) create mode 100644 linters/amp_remove_empty_lines_in_function.py create mode 100644 linters/test/outcomes/Test_remove_empty_lines.test1/input/test.txt create mode 100644 linters/test/outcomes/Test_remove_empty_lines.test1/output/test.txt create mode 100644 linters/test/outcomes/Test_remove_empty_lines.test2/input/test.txt create mode 100644 linters/test/outcomes/Test_remove_empty_lines.test2/output/test.txt create mode 100644 linters/test/outcomes/Test_remove_empty_lines.test3/input/test.txt create mode 100644 linters/test/outcomes/Test_remove_empty_lines.test3/output/test.txt create mode 100644 linters/test/outcomes/Test_remove_empty_lines.test4/input/test.txt create mode 100644 linters/test/outcomes/Test_remove_empty_lines.test4/output/test.txt create mode 100644 linters/test/test_amp_remove_empty_lines_in_function.py diff --git a/docs/coding/all.developing_linter.how_to_guide.md b/docs/coding/all.developing_linter.how_to_guide.md index 9f6209d18..935abd276 100644 --- a/docs/coding/all.developing_linter.how_to_guide.md +++ b/docs/coding/all.developing_linter.how_to_guide.md @@ -125,6 +125,9 @@ - A wrapper around [`jupytext`](https://jupytext.readthedocs.io) - Keeps paired `.ipynb` and `.py` files synchronized +- `amp_remove_empty_lines_in_function.py` + - Cleans up empty lines inside functions and methods + ### Checking if the files are in accordance with our rules (non-modifying) - `amp_check_file_size.py` diff --git a/linters/amp_remove_empty_lines_in_function.py b/linters/amp_remove_empty_lines_in_function.py new file mode 100644 index 000000000..7f1150c47 --- /dev/null +++ b/linters/amp_remove_empty_lines_in_function.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +""" +Remove empty lines within a function. + +Import as: + +import linters.amp_remove_empty_lines_in_function as larelinfu +""" +import argparse +import logging +import re +from typing import List + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hparser as hparser +import helpers.hstring as hstring +import linters.action as liaction +import linters.utils as liutils + +_LOG = logging.getLogger(__name__) + + +def _remove_empty_lines(text: str) -> List[str]: + """ + Process file to remove empty lines in functions. + + :param text: file to process + :return: formatted file without empty lines in functions + """ + lines = text.splitlines() + # Extract indices of docstrings. + docstring_indices = set(hstring.get_docstring_line_indices(lines)) + cleaned_file = [] + inside_function = False + base_indent = 0 + for i, line in enumerate(lines): + stripped = line.strip() + # Match lines that define a function, for example, 'def func1():' or 'def func2(a, b):'. + match = re.match(r"(\s*)def\s+\w+", line) + if match: + inside_function = True + base_indent = len(match.group(1)) + _LOG.debug( + "Function header found at line %d with base indentation %d", + i, + base_indent, + ) + current_indent = len(line) - len(line.lstrip()) + if inside_function and i in docstring_indices and stripped == "": + # Keep empty lines inside the docstring. + cleaned_file.append("") + continue + if inside_function and stripped == "": + # Remove empty lines inside the function. + _LOG.debug("Removing empty line found at line %d inside function.", i) + continue + if inside_function and stripped != "" and current_indent <= base_indent: + # Retain trailing empty lines after the function, + # as Python doesn't distinguish between indented and non-indented empty lines, + # so we preserve them manually to avoid accidental removal. + if match: + # Retain empty lines between the previous function and the current function. + k = len(cleaned_file) + while lines[i - 1] == "": + if cleaned_file[k - 1] == "": + # Check if no functions precede the current one, + # then the empty lines are already retained. + i -= 1 + k -= 1 + else: + # Retain empty lines between two functions. + cleaned_file.append("") + i -= 1 + else: + # Retain empty lines between the previous function and the surrounding code. + while lines[i - 1] == "": + cleaned_file.append("") + i -= 1 + inside_function = False + base_indent = 0 + cleaned_file.append(line) + return cleaned_file + + +# ############################################################################# +# _RemoveEmptyLines +# ############################################################################# + + +class _RemoveEmptyLines(liaction.Action): + + def check_if_possible(self) -> bool: + return True + + def _execute(self, file_name: str, pedantic: int) -> List[str]: + _ = pedantic + if self.skip_if_not_py(file_name): + # Apply only to Python files. + return [] + # Remove empty lines from functions in the file. + file_content = hio.from_file(file_name) + updated_lines = _remove_empty_lines(file_content) + # Save the updated file with cleaned functions. + liutils.write_file_back( + file_name, file_content.split("\n"), updated_lines + ) + return [] + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "files", + nargs="+", + action="store", + type=str, + help="files to process", + ) + hparser.add_verbosity_arg(parser) + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level) + action = _RemoveEmptyLines() + action.run(args.files) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/linters/base.py b/linters/base.py index da501edf5..349d30043 100755 --- a/linters/base.py +++ b/linters/base.py @@ -53,6 +53,7 @@ import linters.amp_normalize_import as lamnoimp import linters.amp_processjupytext as lampproc import linters.amp_pylint as lamppyli +import linters.amp_remove_empty_lines_in_function as larelinfu import linters.amp_warn_incorrectly_formatted_todo as lawifoto import linters.utils as liutils @@ -217,6 +218,11 @@ def _get_files_to_lint(args: argparse.Namespace) -> List[str]: # "Reflows, capitalizes and adds punctuation to comment lines", # lamficom._FixComment, # pylint: disable=protected-access # ), + ( + "remove_empty_lines_in_function", + "Removes empty lines in functions", + larelinfu._RemoveEmptyLines, # pylint: disable=protected-access + ), ( "black", "Runs `black` to format the code", diff --git a/linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/output/test.txt index a69249827..0f2c6f89f 100644 --- a/linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/output/test.txt +++ b/linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/output/test.txt @@ -7,7 +7,7 @@ Linting file: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_DevToolsTask linter_warnings.txt //////////////////////////////////////////////////////////////////////////////// file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/tmp.scratch/input.py'] -actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] +actions=25 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'remove_empty_lines_in_function', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] //////////////////////////////////////////////////////////////////////////////// linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/tmp.scratch/input.py:{LINE_NUM}: [C0209(consider-using-f-string), ] Formatting a regular string which could be an f-string [pylint] diff --git a/linters/test/outcomes/Test_linter_py1.test_linter1/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter1/output/test.txt index 0fe5e1515..bcc5c7c3e 100644 --- a/linters/test/outcomes/Test_linter_py1.test_linter1/output/test.txt +++ b/linters/test/outcomes/Test_linter_py1.test_linter1/output/test.txt @@ -7,7 +7,7 @@ Linting file: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter1/tmp. linter_warnings.txt //////////////////////////////////////////////////////////////////////////////// file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter1/tmp.scratch/input.py'] -actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] +actions=25 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'remove_empty_lines_in_function', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] //////////////////////////////////////////////////////////////////////////////// $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter1/tmp.scratch/input.py: 'helpers.hcache' is imported multiple times [normalize_imports] $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter1/tmp.scratch/input.py:{LINE_NUM}: SyntaxWarning: invalid escape sequence '\s'm = re.search("\s", txt) [doc_formatter] diff --git a/linters/test/outcomes/Test_linter_py1.test_linter2/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter2/output/test.txt index 08576edff..6cda88b78 100644 --- a/linters/test/outcomes/Test_linter_py1.test_linter2/output/test.txt +++ b/linters/test/outcomes/Test_linter_py1.test_linter2/output/test.txt @@ -1,6 +1,6 @@ # linter log file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter2/tmp.scratch/input.py'] -actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] +actions=25 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'remove_empty_lines_in_function', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] //////////////////////////////////////////////////////////////////////////////// $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter2/tmp.scratch/input.py: 'helpers.hcache' is imported multiple times [normalize_imports] $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter2/tmp.scratch/input.py:{LINE_NUM}: SyntaxWarning: invalid escape sequence '\s'm = re.search("\s", txt) [doc_formatter] diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/output/test.txt index 568c45536..3d1ba8d4b 100644 --- a/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/output/test.txt +++ b/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/output/test.txt @@ -7,7 +7,7 @@ Linting file: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_ipynb linter_warnings.txt //////////////////////////////////////////////////////////////////////////////// file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/tmp.scratch/input.ipynb'] -actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] +actions=25 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'remove_empty_lines_in_function', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] //////////////////////////////////////////////////////////////////////////////// HH:MM:SS - INFO  hdbg.py init_logger:{LINE_NUM} > cmd='./dev_scripts_helpers/notebooks/add_toc_to_notebook.py --input_files $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/tmp.scratch/input.ipynb' [add_toc_to_notebook] $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/tmp.scratch/input.ipynb:1: All notebook filenames start with `Master_` or match: `\S+Task\d+_...` [check_filename] diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_md1/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_md1/output/test.txt index f795e58c2..e2038a36b 100644 --- a/linters/test/outcomes/Test_linter_py1.test_linter_md1/output/test.txt +++ b/linters/test/outcomes/Test_linter_py1.test_linter_md1/output/test.txt @@ -7,7 +7,7 @@ Linting file: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md1/t linter_warnings.txt //////////////////////////////////////////////////////////////////////////////// file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md1/tmp.scratch/hello.md'] -actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] +actions=25 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'remove_empty_lines_in_function', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] //////////////////////////////////////////////////////////////////////////////// HH:MM:SS - INFO  hdbg.py init_logger:{LINE_NUM} > cmd='./dev_scripts_helpers/documentation/lint_notes.py -i $GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_md1/tmp.scratch/hello.md --in_place' [lint_md] diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_txt1/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_txt1/output/test.txt index 2fb8b8343..0ec4ff5d0 100644 --- a/linters/test/outcomes/Test_linter_py1.test_linter_txt1/output/test.txt +++ b/linters/test/outcomes/Test_linter_py1.test_linter_txt1/output/test.txt @@ -7,7 +7,7 @@ Linting file: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_txt1/ linter_warnings.txt //////////////////////////////////////////////////////////////////////////////// file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_txt1/tmp.scratch/test.txt'] -actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] +actions=25 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'remove_empty_lines_in_function', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_txt2/output/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_txt2/output/test.txt index c224e3e9b..54b8b9d08 100644 --- a/linters/test/outcomes/Test_linter_py1.test_linter_txt2/output/test.txt +++ b/linters/test/outcomes/Test_linter_py1.test_linter_txt2/output/test.txt @@ -7,7 +7,7 @@ Linting file: '$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_txt2/ linter_warnings.txt //////////////////////////////////////////////////////////////////////////////// file_paths=1 ['$GIT_ROOT/linters/test/outcomes/Test_linter_py1.test_linter_txt2/tmp.scratch/test.txt'] -actions=24 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] +actions=25 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'remove_empty_lines_in_function', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// diff --git a/linters/test/outcomes/Test_remove_empty_lines.test1/input/test.txt b/linters/test/outcomes/Test_remove_empty_lines.test1/input/test.txt new file mode 100644 index 000000000..a4fb58020 --- /dev/null +++ b/linters/test/outcomes/Test_remove_empty_lines.test1/input/test.txt @@ -0,0 +1,24 @@ +import os +import numpy as np +import pandas as pd + +from typing import List, Tuple + +def add(a: int, b: int) -> int: + """ + Return the sum of 2 numbers. + + :param a: first number + :param b: second number + :return: sum of 2 numbers + """ + + print("Inside function add()") + + return a + b + + +result = add(2, 3) + +print("Result: ", result) +print("Outside function") \ No newline at end of file diff --git a/linters/test/outcomes/Test_remove_empty_lines.test1/output/test.txt b/linters/test/outcomes/Test_remove_empty_lines.test1/output/test.txt new file mode 100644 index 000000000..9164e6664 --- /dev/null +++ b/linters/test/outcomes/Test_remove_empty_lines.test1/output/test.txt @@ -0,0 +1,22 @@ +import os +import numpy as np +import pandas as pd + +from typing import List, Tuple + +def add(a: int, b: int) -> int: + """ + Return the sum of 2 numbers. + + :param a: first number + :param b: second number + :return: sum of 2 numbers + """ + print("Inside function add()") + return a + b + + +result = add(2, 3) + +print("Result: ", result) +print("Outside function") \ No newline at end of file diff --git a/linters/test/outcomes/Test_remove_empty_lines.test2/input/test.txt b/linters/test/outcomes/Test_remove_empty_lines.test2/input/test.txt new file mode 100644 index 000000000..88f516fd2 --- /dev/null +++ b/linters/test/outcomes/Test_remove_empty_lines.test2/input/test.txt @@ -0,0 +1,40 @@ +import os +import numpy as np +import pandas as pd + +from typing import List, Tuple + + +class Calculator: + + def add(self, a: int, b: int) -> int: + """ + Return the sum of 2 numbers. + + :param a: first number + :param b: second number + :return: sum of 2 numbers + """ + + print("Inside function add()") + + return a + b + + def subtract(self, a: int, b: int) -> int: + """ + Return the difference of 2 numbers. + + :param a: first number + :param b: second number + :return: difference of 2 numbers + """ + + print("Inside function subtract()") + + return a - b + + +calc = Calculator() + +print(calc.add(10, 5)) +print(calc.subtract(10, 5)) \ No newline at end of file diff --git a/linters/test/outcomes/Test_remove_empty_lines.test2/output/test.txt b/linters/test/outcomes/Test_remove_empty_lines.test2/output/test.txt new file mode 100644 index 000000000..27088dad2 --- /dev/null +++ b/linters/test/outcomes/Test_remove_empty_lines.test2/output/test.txt @@ -0,0 +1,36 @@ +import os +import numpy as np +import pandas as pd + +from typing import List, Tuple + + +class Calculator: + + def add(self, a: int, b: int) -> int: + """ + Return the sum of 2 numbers. + + :param a: first number + :param b: second number + :return: sum of 2 numbers + """ + print("Inside function add()") + return a + b + + def subtract(self, a: int, b: int) -> int: + """ + Return the difference of 2 numbers. + + :param a: first number + :param b: second number + :return: difference of 2 numbers + """ + print("Inside function subtract()") + return a - b + + +calc = Calculator() + +print(calc.add(10, 5)) +print(calc.subtract(10, 5)) \ No newline at end of file diff --git a/linters/test/outcomes/Test_remove_empty_lines.test3/input/test.txt b/linters/test/outcomes/Test_remove_empty_lines.test3/input/test.txt new file mode 100644 index 000000000..27563c80e --- /dev/null +++ b/linters/test/outcomes/Test_remove_empty_lines.test3/input/test.txt @@ -0,0 +1,61 @@ +import os +import numpy as np +import pandas as pd + +from typing import List, Tuple + +def check_largest(a: int, b: int) -> None: + """ + Display largest among two numbers. + + :param a: first number to compare + :param b: second number to compare + """ + + print("Inside check_largest()") + if a > b: + + print(a, "is largest") + elif b > a: + + print(b, "is largest") + else: + + print("Both are same") + +class Calculator: + + def add(self, a: int, b: int) -> int: + """ + Return the sum of 2 numbers. + + :param a: first number + :param b: second number + :return: sum of 2 numbers + """ + + print("Inside function add()") + + return a + b + + def subtract(self, a: int, b: int) -> int: + """ + Return the difference of 2 numbers. + + :param a: first number + :param b: second number + :return: difference of 2 numbers + """ + + print("Inside function subtract()") + + return a - b + +a = 10 +b = 5 + +check_largest(a, b) + +calc = Calculator() +print(calc.add(a, b)) +print(calc.subtract(a, b)) \ No newline at end of file diff --git a/linters/test/outcomes/Test_remove_empty_lines.test3/output/test.txt b/linters/test/outcomes/Test_remove_empty_lines.test3/output/test.txt new file mode 100644 index 000000000..7445c8c1a --- /dev/null +++ b/linters/test/outcomes/Test_remove_empty_lines.test3/output/test.txt @@ -0,0 +1,53 @@ +import os +import numpy as np +import pandas as pd + +from typing import List, Tuple + +def check_largest(a: int, b: int) -> None: + """ + Display largest among two numbers. + + :param a: first number to compare + :param b: second number to compare + """ + print("Inside check_largest()") + if a > b: + print(a, "is largest") + elif b > a: + print(b, "is largest") + else: + print("Both are same") + +class Calculator: + + def add(self, a: int, b: int) -> int: + """ + Return the sum of 2 numbers. + + :param a: first number + :param b: second number + :return: sum of 2 numbers + """ + print("Inside function add()") + return a + b + + def subtract(self, a: int, b: int) -> int: + """ + Return the difference of 2 numbers. + + :param a: first number + :param b: second number + :return: difference of 2 numbers + """ + print("Inside function subtract()") + return a - b + +a = 10 +b = 5 + +check_largest(a, b) + +calc = Calculator() +print(calc.add(a, b)) +print(calc.subtract(a, b)) \ No newline at end of file diff --git a/linters/test/outcomes/Test_remove_empty_lines.test4/input/test.txt b/linters/test/outcomes/Test_remove_empty_lines.test4/input/test.txt new file mode 100644 index 000000000..9164e6664 --- /dev/null +++ b/linters/test/outcomes/Test_remove_empty_lines.test4/input/test.txt @@ -0,0 +1,22 @@ +import os +import numpy as np +import pandas as pd + +from typing import List, Tuple + +def add(a: int, b: int) -> int: + """ + Return the sum of 2 numbers. + + :param a: first number + :param b: second number + :return: sum of 2 numbers + """ + print("Inside function add()") + return a + b + + +result = add(2, 3) + +print("Result: ", result) +print("Outside function") \ No newline at end of file diff --git a/linters/test/outcomes/Test_remove_empty_lines.test4/output/test.txt b/linters/test/outcomes/Test_remove_empty_lines.test4/output/test.txt new file mode 100644 index 000000000..9164e6664 --- /dev/null +++ b/linters/test/outcomes/Test_remove_empty_lines.test4/output/test.txt @@ -0,0 +1,22 @@ +import os +import numpy as np +import pandas as pd + +from typing import List, Tuple + +def add(a: int, b: int) -> int: + """ + Return the sum of 2 numbers. + + :param a: first number + :param b: second number + :return: sum of 2 numbers + """ + print("Inside function add()") + return a + b + + +result = add(2, 3) + +print("Result: ", result) +print("Outside function") \ No newline at end of file diff --git a/linters/test/test_amp_remove_empty_lines_in_function.py b/linters/test/test_amp_remove_empty_lines_in_function.py new file mode 100644 index 000000000..50defbd2b --- /dev/null +++ b/linters/test/test_amp_remove_empty_lines_in_function.py @@ -0,0 +1,54 @@ +import os + +import helpers.hio as hio +import helpers.hunit_test as hunitest +import linters.amp_remove_empty_lines_in_function as larelinfu + + +# ############################################################################# +# Test_remove_empty_lines +# ############################################################################# + + +class Test_remove_empty_lines(hunitest.TestCase): + + def test1(self) -> None: + """ + Test cleaning empty lines in a single function. + """ + self._run_test() + + def test2(self) -> None: + """ + Test cleaning empty lines in methods inside a class. + """ + self._run_test() + + def test3(self) -> None: + """ + Test cleaning empty lines in methods and functions. + """ + self._run_test() + + def test4(self) -> None: + """ + Test cleaning empty lines in methods and functions without any empty + lines. + """ + self._run_test() + + def _run_test(self) -> None: + """ + Helper to run test cases. + """ + # Prepare inputs. + test_input_dir = self.get_input_dir() + text_file_path = os.path.join(test_input_dir, "test.txt") + text = hio.from_file(text_file_path) + # Run. + actual = larelinfu._remove_empty_lines(text) + # Check. + test_output_dir = self.get_output_dir() + output_file_path = os.path.join(test_output_dir, "test.txt") + expected = hio.from_file(output_file_path) + self.assert_equal(expected, "\n".join(actual)) From 79e11f9d2633263700d2aa4c6f992be8c4f87554 Mon Sep 17 00:00:00 2001 From: allenmatt10 <157498336+allenmatt10@users.noreply.github.com> Date: Thu, 17 Apr 2025 15:20:19 -0400 Subject: [PATCH 039/193] HelpersTask585: Moved inputs to dir (#587) --- .../input/test.txt | 11 + .../input/test.txt | 124 ++++++++ .../input/test.txt | 124 ++++++++ .../input/test.txt | 36 +++ .../input/test.txt | 9 + .../input/test.txt | 5 + .../input/test.txt | 15 + .../input/test.txt | 8 + linters/test/test_amp_dev_scripts.py | 273 +++--------------- 9 files changed, 369 insertions(+), 236 deletions(-) create mode 100644 linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/input/test.txt create mode 100644 linters/test/outcomes/Test_linter_py1.test_linter1/input/test.txt create mode 100644 linters/test/outcomes/Test_linter_py1.test_linter2/input/test.txt create mode 100644 linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/input/test.txt create mode 100644 linters/test/outcomes/Test_linter_py1.test_linter_md1/input/test.txt create mode 100644 linters/test/outcomes/Test_linter_py1.test_linter_md2/input/test.txt create mode 100644 linters/test/outcomes/Test_linter_py1.test_linter_txt1/input/test.txt create mode 100644 linters/test/outcomes/Test_linter_py1.test_linter_txt2/input/test.txt diff --git a/linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/input/test.txt b/linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/input/test.txt new file mode 100644 index 000000000..1ae016e73 --- /dev/null +++ b/linters/test/outcomes/Test_linter_py1.test_DevToolsTask408/input/test.txt @@ -0,0 +1,11 @@ + +import logging + +import helpers.hdbg as hdbg + +_LOG = logging.getLogger(__name__) + +s = "hello" +a = "Checking {}".format(s) +_LOG.debug("Checking '%s'.", s) +hdbg.dassert(s.startswith("h"), "Checking '%s'.", s) \ No newline at end of file diff --git a/linters/test/outcomes/Test_linter_py1.test_linter1/input/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter1/input/test.txt new file mode 100644 index 000000000..d36e95e21 --- /dev/null +++ b/linters/test/outcomes/Test_linter_py1.test_linter1/input/test.txt @@ -0,0 +1,124 @@ + +from typing import Any, List +import helpers.hdbg as hdbg +import helpers.hcache as hcac +import helpers.hio as io +import nltk +import pandas as pd +import python +import tqdm.autonotebook as tqdm + +# hcac._get_cache_types() +hcac._get_cache_types() +x = "hcac._get_cache_types()" + +def func(a: str, lst: List[str]) -> Any: + """First comment line.""" + import helpers.hcache as hcache + hcache._get_cache_types() + for i in tqdm.tqdm(lst): + a += "string {}".format(i) + return a + +def func2(df: pd.DataFrame, a: str) -> pd.DataFrame: + """ + Generate "random returns". Use lag + noise as predictor. + + ``` + git@github.com:alphamatic/amp + https://github.com/alphamatic/amp + ``` + + The stage names refer to Node objects, which are not json serializable. + We don't use io.dassert_is_valid_file_name(). + + E.g., + ``` + PostgreSQL 11.5 on x86_64-pc-linux-gnu + compiled by gcc (GCC) 4.8.3 20140911 (Red Hat 4.8.3-9), 64-bit + ``` + """ + io.dassert_is_valid_file_name("test.py") + b = """ + Before separating line. + ########################################################################## + Comments inside string. + ########################################################################## + """ + result_df = df.loc[a+b:] + return result_df + +def func3(a: str) -> str: + """ + Generate "random returns". Use lag + noise as predictor. + """ + if a is not None: + assert isinstance(a, str), (f"You passed '{a}' or type '{type(a)}'" + "instead of str") + ## [C0330(bad-continuation), ] Wrong hanging indentation before + ## block (add 4 spaces). + return a + + +# ############################################################################# +# New part. +# ############################################################################# + + +class MyClass: + """ + Contains all of the logic to construct the standard bars from chapter 2. + This class shouldn't be used directly. We have added functions to the + package such as get_dollar_bars which will create an instance of this class + and then construct the standard bars, to return to the user. + + This is because we wanted to simplify the logic as much as possible, + for the end user. + """ + @staticmethod + def _private_static_method(a: str) -> str: + """ + For reference, let + + - N = 2 + - M = 3 + """ + return a + + def _private_regular_method(self, a: str) -> str: + """ + Read csv file(s) or pd.DataFrame in batches and then constructs the + financial data structure in the form of a DataFrame. The csv file or + DataFrame must have only 3 columns: date_time, price, & volume. + """ + # Returning + return a + +############################################################################## +# New part 2. +############################################################################## + +class TestReplaceShortImportInCode: + def _helper(self, actual: str, expected: str) -> None: + """ + ...... + """ + assert expected == actual + + def test1(self) -> None: + """ + No matches. + """ + code = "import test as te" + expected = code + self._helper(code, expected) + +# Comment before initializing. +class TestAnother(): + pass + +if __name__ == "main": + txt = "hello" + m = re.search("\s", txt) + n = nltk.word_tokenize(txt) + hdbg.dassert_path_exists("filename.txt") \ No newline at end of file diff --git a/linters/test/outcomes/Test_linter_py1.test_linter2/input/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter2/input/test.txt new file mode 100644 index 000000000..d36e95e21 --- /dev/null +++ b/linters/test/outcomes/Test_linter_py1.test_linter2/input/test.txt @@ -0,0 +1,124 @@ + +from typing import Any, List +import helpers.hdbg as hdbg +import helpers.hcache as hcac +import helpers.hio as io +import nltk +import pandas as pd +import python +import tqdm.autonotebook as tqdm + +# hcac._get_cache_types() +hcac._get_cache_types() +x = "hcac._get_cache_types()" + +def func(a: str, lst: List[str]) -> Any: + """First comment line.""" + import helpers.hcache as hcache + hcache._get_cache_types() + for i in tqdm.tqdm(lst): + a += "string {}".format(i) + return a + +def func2(df: pd.DataFrame, a: str) -> pd.DataFrame: + """ + Generate "random returns". Use lag + noise as predictor. + + ``` + git@github.com:alphamatic/amp + https://github.com/alphamatic/amp + ``` + + The stage names refer to Node objects, which are not json serializable. + We don't use io.dassert_is_valid_file_name(). + + E.g., + ``` + PostgreSQL 11.5 on x86_64-pc-linux-gnu + compiled by gcc (GCC) 4.8.3 20140911 (Red Hat 4.8.3-9), 64-bit + ``` + """ + io.dassert_is_valid_file_name("test.py") + b = """ + Before separating line. + ########################################################################## + Comments inside string. + ########################################################################## + """ + result_df = df.loc[a+b:] + return result_df + +def func3(a: str) -> str: + """ + Generate "random returns". Use lag + noise as predictor. + """ + if a is not None: + assert isinstance(a, str), (f"You passed '{a}' or type '{type(a)}'" + "instead of str") + ## [C0330(bad-continuation), ] Wrong hanging indentation before + ## block (add 4 spaces). + return a + + +# ############################################################################# +# New part. +# ############################################################################# + + +class MyClass: + """ + Contains all of the logic to construct the standard bars from chapter 2. + This class shouldn't be used directly. We have added functions to the + package such as get_dollar_bars which will create an instance of this class + and then construct the standard bars, to return to the user. + + This is because we wanted to simplify the logic as much as possible, + for the end user. + """ + @staticmethod + def _private_static_method(a: str) -> str: + """ + For reference, let + + - N = 2 + - M = 3 + """ + return a + + def _private_regular_method(self, a: str) -> str: + """ + Read csv file(s) or pd.DataFrame in batches and then constructs the + financial data structure in the form of a DataFrame. The csv file or + DataFrame must have only 3 columns: date_time, price, & volume. + """ + # Returning + return a + +############################################################################## +# New part 2. +############################################################################## + +class TestReplaceShortImportInCode: + def _helper(self, actual: str, expected: str) -> None: + """ + ...... + """ + assert expected == actual + + def test1(self) -> None: + """ + No matches. + """ + code = "import test as te" + expected = code + self._helper(code, expected) + +# Comment before initializing. +class TestAnother(): + pass + +if __name__ == "main": + txt = "hello" + m = re.search("\s", txt) + n = nltk.word_tokenize(txt) + hdbg.dassert_path_exists("filename.txt") \ No newline at end of file diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/input/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/input/test.txt new file mode 100644 index 000000000..0ea16f851 --- /dev/null +++ b/linters/test/outcomes/Test_linter_py1.test_linter_ipynb1/input/test.txt @@ -0,0 +1,36 @@ + +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import re\n", + "\n", + "# TODO: Fix.\n", + "res = re.findall(r\"[a-z]+\", \"some text\")\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_md1/input/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_md1/input/test.txt new file mode 100644 index 000000000..ff88bee03 --- /dev/null +++ b/linters/test/outcomes/Test_linter_py1.test_linter_md1/input/test.txt @@ -0,0 +1,9 @@ + +# Good. +- Good time management + 1. choose the right tasks + - Avoid non-essential tasks + +## Bad +- Hello + - World \ No newline at end of file diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_md2/input/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_md2/input/test.txt new file mode 100644 index 000000000..36d7a6c01 --- /dev/null +++ b/linters/test/outcomes/Test_linter_py1.test_linter_md2/input/test.txt @@ -0,0 +1,5 @@ + +# Header1 +```text +test text +nothing should be changed \ No newline at end of file diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_txt1/input/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_txt1/input/test.txt new file mode 100644 index 000000000..349610237 --- /dev/null +++ b/linters/test/outcomes/Test_linter_py1.test_linter_txt1/input/test.txt @@ -0,0 +1,15 @@ + +//src/linters/test/test_precommit.py +//src/linters/utils.py +//src/zenhub_stats/notebooks/stats.py +//src/zenhub_stats/stats.py +//src/zenhub_stats/test/test_stats.py +//src/zenhub_stats/zenhub_typing/__init__.py +//src/zenhub_stats/zenhub_typing/issue.py + + + + + + + diff --git a/linters/test/outcomes/Test_linter_py1.test_linter_txt2/input/test.txt b/linters/test/outcomes/Test_linter_py1.test_linter_txt2/input/test.txt new file mode 100644 index 000000000..fa8f36dce --- /dev/null +++ b/linters/test/outcomes/Test_linter_py1.test_linter_txt2/input/test.txt @@ -0,0 +1,8 @@ + +//src/linters/test/test_precommit.py +//src/linters/utils.py +//src/zenhub_stats/notebooks/stats.py +//src/zenhub_stats/stats.py +//src/zenhub_stats/test/test_stats.py +//src/zenhub_stats/zenhub_typing/__init__.py +//src/zenhub_stats/zenhub_typing/issue.py \ No newline at end of file diff --git a/linters/test/test_amp_dev_scripts.py b/linters/test/test_amp_dev_scripts.py index c310c5e3f..2030bb6fb 100644 --- a/linters/test/test_amp_dev_scripts.py +++ b/linters/test/test_amp_dev_scripts.py @@ -20,6 +20,11 @@ # ############################################################################# +# ############################################################################# +# Test_linter_py1 +# ############################################################################# + + class Test_linter_py1(hunitest.TestCase): def write_input_file(self, txt: str, file_name: str) -> Tuple[str, str]: @@ -75,11 +80,12 @@ def test_linter1(self) -> None: """ Run Linter as executable on Python code. """ - txt = self._get_horrible_python_code1() + # Get input. + text = self._get_input_text() # Run. file_name = "input.py" as_system_call = True - output = self.run_linter(txt, file_name, as_system_call) + output = self.run_linter(text, file_name, as_system_call) # Check. self.check_string(output, purify_text=True) @@ -88,11 +94,12 @@ def test_linter2(self) -> None: """ Run Linter as library on Python code. """ - txt = self._get_horrible_python_code1() + # Get input. + text = self._get_input_text() # Run. file_name = "input.py" as_system_call = False - output = self.run_linter(txt, file_name, as_system_call) + output = self.run_linter(text, file_name, as_system_call) # Check. self.check_string(output, purify_text=True) @@ -107,20 +114,12 @@ def test_linter_md1(self) -> None: """ Run Linter as executable on Markdown. """ - txt = r""" -# Good. -- Good time management - 1. choose the right tasks - - Avoid non-essential tasks - -## Bad -- Hello - - World - """ + # Get input. + text = self._get_input_text() # Run. file_name = "hello.md" as_system_call = True - output = self.run_linter(txt, file_name, as_system_call) + output = self.run_linter(text, file_name, as_system_call) # Remove the lines: # '12-16_14:59 ^[[33mWARNING^[[0m: _refresh_toc :138 : No tags for table'. # '$GIT_ROOT/linters/test/outcomes/.../hello.md: is not referenced in README.md'. @@ -139,17 +138,12 @@ def test_linter_md2(self) -> None: """ Run Linter as executable on Markdown file with a fenced block. """ - txt = r""" -# Header1 -```text -test text -nothing should be changed -``` - """ + # Get input. + text = self._get_input_text() # Run. file_name = "hello.md" as_system_call = True - output = self.run_linter(txt, file_name, as_system_call) + output = self.run_linter(text, file_name, as_system_call) # Remove the lines: # '12-16_14:59 ^[[33mWARNING^[[0m: _refresh_toc :138 : No tags for table'. # '$GIT_ROOT/linters/test/outcomes/.../hello.md: is not referenced in README.md'. @@ -165,26 +159,12 @@ def test_linter_txt1(self) -> None: The content of txt files is not linted, see DevToolsTask553. """ - txt = r""" -//src/linters/test/test_precommit.py -//src/linters/utils.py -//src/zenhub_stats/notebooks/stats.py -//src/zenhub_stats/stats.py -//src/zenhub_stats/test/test_stats.py -//src/zenhub_stats/zenhub_typing/__init__.py -//src/zenhub_stats/zenhub_typing/issue.py - - - - - - - -""" + # Get input. + text = self._get_input_text() # Run. file_name = "test.txt" as_system_call = True - output = self.run_linter(txt, file_name, as_system_call) + output = self.run_linter(text, file_name, as_system_call) # Remove the line: # '12-16_14:59 ^[[33mWARNING^[[0m: _refresh_toc :138 : No tags for table' output = hunitest.filter_text("No tags for table", output) @@ -197,19 +177,12 @@ def test_linter_txt2(self) -> None: The content of txt files is not linted, see DevToolsTask553. """ - txt = r""" -//src/linters/test/test_precommit.py -//src/linters/utils.py -//src/zenhub_stats/notebooks/stats.py -//src/zenhub_stats/stats.py -//src/zenhub_stats/test/test_stats.py -//src/zenhub_stats/zenhub_typing/__init__.py -//src/zenhub_stats/zenhub_typing/issue.py -""" + # Get input. + text = self._get_input_text() # Run. file_name = "test.txt" as_system_call = True - output = self.run_linter(txt, file_name, as_system_call) + output = self.run_linter(text, file_name, as_system_call) # Remove the line: # '12-16_14:59 ^[[33mWARNING^[[0m: _refresh_toc :138 : No tags for table' output = hunitest.filter_text("No tags for table", output) @@ -221,22 +194,12 @@ def test_DevToolsTask408(self) -> None: """ Test pylint's string formatting warnings. """ - txt = """ -import logging - -import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - -s = "hello" -a = "Checking {}".format(s) -_LOG.debug("Checking '%s'.", s) -hdbg.dassert(s.startswith("h"), "Checking '%s'.", s) -""" + # Get input. + text = self._get_input_text() # Run. file_name = "input.py" as_system_call = True - output = self.run_linter(txt, file_name, as_system_call) + output = self.run_linter(text, file_name, as_system_call) # Check. self.check_string(output, purify_text=True) @@ -245,11 +208,12 @@ def test_linter_ipynb1(self) -> None: """ Run Linter as executable on a notebook. """ - contents_ipynb = self._get_ipynb_contents1() + # Get input. + text = self._get_input_text() # Run. file_name = "input.ipynb" as_system_call = True - output = self.run_linter(contents_ipynb, file_name, as_system_call) + output = self.run_linter(text, file_name, as_system_call) # Check. self.check_string(output, purify_text=True) @@ -313,176 +277,6 @@ def test_linter_ipynb_paired2(self) -> None: # ######################################################################### - @staticmethod - def _get_horrible_python_code1() -> str: - txt = r''' -from typing import Any, List -import helpers.hdbg as hdbg -import helpers.hcache as hcac -import helpers.hio as io -import nltk -import pandas as pd -import python -import tqdm.autonotebook as tqdm - -# hcac._get_cache_types() -hcac._get_cache_types() -x = "hcac._get_cache_types()" - -def func(a: str, lst: List[str]) -> Any: - """First comment line.""" - import helpers.hcache as hcache - hcache._get_cache_types() - for i in tqdm.tqdm(lst): - a += "string {}".format(i) - return a - -def func2(df: pd.DataFrame, a: str) -> pd.DataFrame: - """ - Generate "random returns". Use lag + noise as predictor. - - ``` - git@github.com:alphamatic/amp - https://github.com/alphamatic/amp - ``` - - The stage names refer to Node objects, which are not json serializable. - We don't use io.dassert_is_valid_file_name(). - - E.g., - ``` - PostgreSQL 11.5 on x86_64-pc-linux-gnu - compiled by gcc (GCC) 4.8.3 20140911 (Red Hat 4.8.3-9), 64-bit - ``` - """ - io.dassert_is_valid_file_name("test.py") - b = """ - Before separating line. - ########################################################################## - Comments inside string. - ########################################################################## - """ - result_df = df.loc[a+b:] - return result_df - -def func3(a: str) -> str: - """ - Generate "random returns". Use lag + noise as predictor. - """ - if a is not None: - assert isinstance(a, str), (f"You passed '{a}' or type '{type(a)}'" - "instead of str") - ## [C0330(bad-continuation), ] Wrong hanging indentation before - ## block (add 4 spaces). - return a - - -# ############################################################################# -# New part. -# ############################################################################# - - -class MyClass: - """ - Contains all of the logic to construct the standard bars from chapter 2. - This class shouldn't be used directly. We have added functions to the - package such as get_dollar_bars which will create an instance of this class - and then construct the standard bars, to return to the user. - - This is because we wanted to simplify the logic as much as possible, - for the end user. - """ - @staticmethod - def _private_static_method(a: str) -> str: - """ - For reference, let - - - N = 2 - - M = 3 - """ - return a - - def _private_regular_method(self, a: str) -> str: - """ - Read csv file(s) or pd.DataFrame in batches and then constructs the - financial data structure in the form of a DataFrame. The csv file or - DataFrame must have only 3 columns: date_time, price, & volume. - """ - # Returning - return a - -############################################################################## -# New part 2. -############################################################################## - -class TestReplaceShortImportInCode: - def _helper(self, actual: str, expected: str) -> None: - """ - ...... - """ - assert expected == actual - - def test1(self) -> None: - """ - No matches. - """ - code = "import test as te" - expected = code - self._helper(code, expected) - -# Comment before initializing. -class TestAnother(): - pass - -if __name__ == "main": - txt = "hello" - m = re.search("\s", txt) - n = nltk.word_tokenize(txt) - hdbg.dassert_path_exists("filename.txt") - ''' - return txt - - @staticmethod - def _get_ipynb_contents1() -> str: - contents_ipynb = r""" -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import re\n", - "\n", - "# TODO: Fix.\n", - "res = re.findall(r\"[a-z]+\", \"some text\")\n", - "\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} - """ - return contents_ipynb - def _run_linter( self, file_name: str, @@ -543,4 +337,11 @@ def _run_linter( output.extend(txt.split("\n")) # ////////////// output_as_str = "\n".join(output) - return output_as_str \ No newline at end of file + return output_as_str + + def _get_input_text(self) -> str: + # Prepare input. + test_input_dir = self.get_input_dir() + text_file_path = os.path.join(test_input_dir, "test.txt") + text = hio.from_file(text_file_path) + return text From 2b3eb49ed961d65f763a54cea9bdcb2821269578 Mon Sep 17 00:00:00 2001 From: Indro <69083680+indrayudd@users.noreply.github.com> Date: Thu, 17 Apr 2025 15:29:35 -0400 Subject: [PATCH 040/193] HelpersTask563_Unify_line_numbering_in_custom_Linter_warnings_for_consistent_navigation (#589) * HelpersTask563: to in 5 Linter files. * HelpersTask563: Change tests to match new convention. --------- Co-authored-by: Indrayudd Roy Chowdhury Co-authored-by: Sonya Nikiforova --- linters/amp_check_import.py | 6 +---- linters/amp_check_md_toc_headers.py | 2 +- linters/amp_check_merge_conflict.py | 2 +- linters/amp_fix_md_links.py | 2 +- .../amp_warn_incorrectly_formatted_todo.py | 3 +-- .../Test_fix_links.test1/output/test.txt | 24 +++++++++---------- .../Test_fix_links.test3/output/test.txt | 10 ++++---- .../Test_fix_links.test5/output/test.txt | 8 +++---- .../output/test.txt | 2 +- 9 files changed, 27 insertions(+), 32 deletions(-) diff --git a/linters/amp_check_import.py b/linters/amp_check_import.py index e65bed2d3..b9b6ee458 100644 --- a/linters/amp_check_import.py +++ b/linters/amp_check_import.py @@ -25,14 +25,11 @@ def _check_import(file_name: str, line_num: int, line: str) -> str: # The maximum length of an 'import as'. MAX_LEN_IMPORT = 8 - msg = "" - if liutils.is_init_py(file_name): # In **init**.py we can import in weird ways. (e.g., the evil # `from ... import *`). return msg - m = re.match(r"\s*from\s+(\S+)\s+import\s+.*", line) if m: if m.group(1) != "typing": @@ -69,11 +66,10 @@ def _execute(self, file_name: str, pedantic: int) -> List[str]: return [] output = [] lines = hio.from_file(file_name).split("\n") - for i, line in enumerate(lines): + for i, line in enumerate(lines, start=1): msg = _check_import(file_name, i, line) if msg: output.append(msg) - return output diff --git a/linters/amp_check_md_toc_headers.py b/linters/amp_check_md_toc_headers.py index 148e26573..6a48b5fa9 100644 --- a/linters/amp_check_md_toc_headers.py +++ b/linters/amp_check_md_toc_headers.py @@ -84,7 +84,7 @@ def verify_toc_position(lines: List[str], file_name: str) -> List[str]: warnings = [] # Check for the start TOC markers. toc_start_found: bool = False - for line_num, line in enumerate(lines): + for line_num, line in enumerate(lines, start=1): # Check for the start of TOC markers. stripped_line = line.strip() if TOC_REGEX.match(stripped_line): diff --git a/linters/amp_check_merge_conflict.py b/linters/amp_check_merge_conflict.py index cf4308504..ab3d7adb3 100644 --- a/linters/amp_check_merge_conflict.py +++ b/linters/amp_check_merge_conflict.py @@ -57,7 +57,7 @@ def _execute(self, file_name: str, pedantic: int) -> List[str]: return [] output = [] # Check the file lines for merge conflict markers. - for i, line in enumerate(lines): + for i, line in enumerate(lines, start=1): msg = _check_merge_conflict(file_name, i, line) if msg: # Store the warning message. diff --git a/linters/amp_fix_md_links.py b/linters/amp_fix_md_links.py index 134a755f5..328a2996e 100644 --- a/linters/amp_fix_md_links.py +++ b/linters/amp_fix_md_links.py @@ -304,7 +304,7 @@ def fix_links(file_name: str) -> Tuple[List[str], List[str], List[str]]: docstring_line_indices = hstring.get_docstring_line_indices(lines) updated_lines: List[str] = [] warnings: List[str] = [] - for i, line in enumerate(lines): + for i, line in enumerate(lines, start=1): updated_line = line # Check the formatting. # HTML-style links. diff --git a/linters/amp_warn_incorrectly_formatted_todo.py b/linters/amp_warn_incorrectly_formatted_todo.py index 2668ad9e2..ba6464ff6 100644 --- a/linters/amp_warn_incorrectly_formatted_todo.py +++ b/linters/amp_warn_incorrectly_formatted_todo.py @@ -64,11 +64,10 @@ def _execute(self, file_name: str, pedantic: int) -> List[str]: return [] lines = hio.from_file(file_name).split("\n") output = [] - for i, line in enumerate(lines): + for i, line in enumerate(lines, start=1): msg = _warn_incorrectly_formatted_todo(file_name, i, line) if msg: output.append(msg) - return output diff --git a/linters/test/outcomes/Test_fix_links.test1/output/test.txt b/linters/test/outcomes/Test_fix_links.test1/output/test.txt index 95fe78283..9fc30cf08 100644 --- a/linters/test/outcomes/Test_fix_links.test1/output/test.txt +++ b/linters/test/outcomes/Test_fix_links.test1/output/test.txt @@ -1,16 +1,16 @@ # linter warnings -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:37: '/helpersssss/hhhhgit.py' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:76: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:79: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:82: '/import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:85: '/iiimport_check/example/output/basicccc.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:85: '/iiimport_check/example/output/basicccc.png' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:88: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:91: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:95: '/import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:98: '../../import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:101: '/iiimport_check/example/output/basicccc.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:101: '/iiimport_check/example/output/basicccc.png' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:38: '/helpersssss/hhhhgit.py' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:77: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:80: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:83: '/import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:86: '/iiimport_check/example/output/basicccc.png' does not follow the format 'figs/test.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:86: '/iiimport_check/example/output/basicccc.png' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:89: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:92: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:96: '/import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:99: '../../import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:102: '/iiimport_check/example/output/basicccc.png' does not follow the format 'figs/test.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:102: '/iiimport_check/example/output/basicccc.png' does not exist # linted file - Markdown-style link with a text label diff --git a/linters/test/outcomes/Test_fix_links.test3/output/test.txt b/linters/test/outcomes/Test_fix_links.test3/output/test.txt index bbc60211b..78ee63a22 100644 --- a/linters/test/outcomes/Test_fix_links.test3/output/test.txt +++ b/linters/test/outcomes/Test_fix_links.test3/output/test.txt @@ -1,9 +1,9 @@ # linter warnings -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:0: '/docs/markdown_example.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:2: '/docs/html_example.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:4: '/missing_markdown.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:6: '/missing_html.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:12: '/nested.md)' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:1: '/docs/markdown_example.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:3: '/docs/html_example.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:5: '/missing_markdown.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:7: '/missing_html.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:13: '/nested.md)' does not exist # linted file Markdown link: [Valid Markdown Link](/docs/markdown_example.md) diff --git a/linters/test/outcomes/Test_fix_links.test5/output/test.txt b/linters/test/outcomes/Test_fix_links.test5/output/test.txt index 4f2847d44..75fd16dcf 100644 --- a/linters/test/outcomes/Test_fix_links.test5/output/test.txt +++ b/linters/test/outcomes/Test_fix_links.test5/output/test.txt @@ -1,8 +1,8 @@ # linter warnings -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:0: '$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:2: '/docs/markdown_exam.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:4: '$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:6: '$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:1: '$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:3: '/docs/markdown_exam.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:5: '$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:7: '$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md' does not exist # linted file Markdown link: [Valid Markdown and header Link]($GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md#introduction) diff --git a/linters/test/outcomes/Test_verify_toc_postion.test1/output/test.txt b/linters/test/outcomes/Test_verify_toc_postion.test1/output/test.txt index af5f57da7..d0357e171 100644 --- a/linters/test/outcomes/Test_verify_toc_postion.test1/output/test.txt +++ b/linters/test/outcomes/Test_verify_toc_postion.test1/output/test.txt @@ -1,5 +1,5 @@ # linter warnings -/app/linters/test/outcomes/Test_verify_toc_postion.test1/tmp.scratch/test.md:3: Content found before TOC. +/app/linters/test/outcomes/Test_verify_toc_postion.test1/tmp.scratch/test.md:4: Content found before TOC. # linted file From 4d4b2aa4dd4971666514c904db9707c86ca15c7e Mon Sep 17 00:00:00 2001 From: Krishna P Taduri <40231735+tkpratardan@users.noreply.github.com> Date: Fri, 18 Apr 2025 07:47:35 -0400 Subject: [PATCH 041/193] HelpersTask207_create_index_for_readme (#237) * checkpoint * Move file to dev_scripts_helpers/documentation * edit docstring and change print statements to log * pass content directly to generate_summary_for_file * add backticks in docstring for var names * edit generate_markdown_index docstring * moved summary insertion * add unit test * removed update and generate from generate_readme_index.py * add unit tests * lint and change prompt to guarantee * add golden files for tests * add openai module to env * removed openai install * lint * lint * fix import openai * implemented refresh * lint * implement pytest.importorskip * golden files for tests * nits and edit test cases * refresh golden files * nits on test case docstring * change example output * refactor target_path to dir_path * refactor target_path to dir_path on tests * HelpersTask207: Lint; update comments --------- Co-authored-by: aangelo9 <153690899+aangelo9@users.noreply.github.com> Co-authored-by: aangelo9 Co-authored-by: sonniki --- .../documentation/generate_readme_index.py | 272 ++++++++++++++++ .../output/test.txt | 21 ++ .../output/test.txt | 25 ++ .../output/test.txt | 17 + .../output/test.txt | 21 ++ .../test/test_generate_readme_index.py | 308 ++++++++++++++++++ 6 files changed, 664 insertions(+) create mode 100644 dev_scripts_helpers/documentation/generate_readme_index.py create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test1/output/test.txt create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test2/output/test.txt create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test3/output/test.txt create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test4/output/test.txt create mode 100644 dev_scripts_helpers/documentation/test/test_generate_readme_index.py diff --git a/dev_scripts_helpers/documentation/generate_readme_index.py b/dev_scripts_helpers/documentation/generate_readme_index.py new file mode 100644 index 000000000..0fce0439d --- /dev/null +++ b/dev_scripts_helpers/documentation/generate_readme_index.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python + +""" +Generate a Markdown index in the README file. + +Usage: + generate_readme_index.py --index_mode {generate,refresh} [--dir_path DIR_PATH] [--model MODEL] + +This script creates or updates a README file with an index of Markdown files in a given directory, +including their relative paths and summaries. + +Example output: + +# README for `dir_path` + +Below is a list of all Markdown files found under `dir_path`. + +## Markdown Index + +- **File Name**: welcome.md + **Relative Path**: [welcome.md](welcome.md) + **Summary**: Introduces the repository, its purpose, and how to navigate the documentation. + Serves as the landing page for new contributors and users. + +- **File Name**: docs/intro.md + **Relative Path**: [docs/intro.md](docs/intro.md) + **Summary**: Provides an overview of the project's architecture and core concepts. + Useful for understanding the big picture before diving into the codebase. + +- **File Name**: docs/guide/setup.md + **Relative Path**: [docs/guide/setup.md](docs/guide/setup.md) + **Summary**: Placeholder summary for docs/guide/setup.md + +Options: + --index_mode {generate,refresh} + Choose to either generate summaries from scratch or refresh only new files. + --dir_path DIR_PATH + Path to the given directory. Defaults to the Git repository root. + --model MODEL + Specify the summarization model. Use 'placeholder' to skip OpenAI API usage. +""" + +import argparse +import logging +import os +import re +from typing import Dict, List + +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hopenai as hopenai + +_LOG = logging.getLogger(__name__) + + +def _get_existing_summaries( + dir_path: str, markdown_files: List[str] +) -> Dict[str, str]: + """ + Extract and filter summaries from the existing README file. + + Only summaries for Markdown files that still exist in the codebase + are retained. + + :param dir_path: directory path where README.md file is located + :param markdown_files: all Markdown file paths + :return: summaries of existing files + """ + readme_path = os.path.join(dir_path, "README.md") + content = hio.from_file(readme_path) + content = content.strip() + pattern = re.compile( + # Matches **File Name**: file_name.md. + r"- \*\*File Name\*\*: (?P.+?)" + # Matches **Relative Path**: [path](link). + r"\*\*Relative Path\*\*: \[(?P[^\]]+)\]\([^)]+\)\s*" + # Matches **Summary**: content. + r"\*\*Summary\*\*: (?P.*?)(?=\n- \*\*File Name|\Z)", + re.DOTALL, + ) + summaries = {} + for match in pattern.finditer(content): + # Parse content. + rel_path = match.group("rel_path").strip() + summary = match.group("summary").strip().replace("\n", " ") + if rel_path in markdown_files: + # Store summaries of the files that still exist. + summaries[rel_path] = summary + else: + _LOG.debug("Deleting summary for %s", rel_path) + return summaries + + +def _generate_summary_for_file(file_path: str, model: str) -> str: + """ + Generate a two-line summary for a given Markdown file. + + :param file_path: full path to the Markdown file + :param model: name of the model for summary generation, e.g. "placeholder", "gpt-4o-mini" + - "placeholder" model inserts a dummy summary instead of generating one + :return: a short summary of a file + """ + if model == "placeholder": + # Skip OpenAI API usage. + _LOG.debug("Using placeholder summary for %s", file_path) + summary = f"Placeholder summary for {file_path}" + return summary + _LOG.debug("Generating summary for: %s", file_path) + content = hio.from_file(file_path) + prompt = ( + "Summarize the following content in exactly two lines. " + "Do not include any introduction or list markers. " + "Just return the summary itself, nothing else.\n\n" + f"{content}" + ) + summary = hopenai.get_completion(user_prompt=prompt, model=model) + summary = str(summary.strip()) + return summary + + +def _build_index_lines( + dir_path: str, + markdown_files: List[str], + summaries: Dict[str, str], + model: str, +) -> str: + """ + Construct the Markdown index content to write into README. + + :param dir_path: directory path for labelling README + :param markdown_files: all Markdown file paths + :param summaries: Markdown file paths and their summaries + :param model: name of the model for summary generation, e.g. "placeholder", "gpt-4o-mini" + - "placeholder" model inserts a dummy summary instead of generating one + :return: formatted Markdown files index + """ + # File starter. + if dir_path == hgit.find_git_root(): + lines = [ + "# README for the repository", + "", + "Below is a list of all Markdown files found in the repository.", + ] + else: + rel_path = os.path.relpath(dir_path) + lines = [ + f"# README for `{rel_path}`", + "", + f"Below is a list of all Markdown files found under `{rel_path}`.", + ] + lines.extend( + [ + "", + "## Markdown Index", + "", + ] + ) + for file_path in markdown_files: + if file_path not in summaries: + # Create a new summary for the file. + summary = _generate_summary_for_file(file_path, model=model) + else: + # Get the existing summary for the file. + summary = summaries[file_path] + # Construct the info paragraph in the README format. + lines.append( + f"- **File Name**: {file_path} \n" + f" **Relative Path**: [{file_path}]({file_path}) \n" + f" **Summary**: {summary} \n" + ) + content = "\n".join(lines) + return content + + +def list_markdown_files(dir_path: str) -> List[str]: + """ + List all Markdown files in the given directory. + + :param dir_path: directory path to search + :return: the full paths of all Markdown files found + """ + markdown_files = [] + for root, _, files in os.walk(dir_path): + for file in files: + if file.endswith(".md") and file.lower() != "readme.md": + # Get markdown files and ignore README.md. + rel_path = os.path.relpath(os.path.join(root, file), dir_path) + markdown_files.append(rel_path) + markdown_files = sorted(markdown_files) + return markdown_files + + +def generate_markdown_index( + dir_path: str, + markdown_files: List[str], + index_mode: str, + *, + model: str = "placeholder", +) -> str: + """ + Generate the full Markdown index content to be written into README. + + Depending on the index mode, this function either creates new summaries + for all Markdown files (`generate`) or only for newly added ones (`refresh`). + Summaries are created using a provided model or a placeholder string. + + :param dir_path: directory path to index Markdown files + :param markdown_files: all Markdown file paths + :param index_mode: method of dealing with the existing README file + - "generate": overwrite with the index generated from scratch + - "refresh": remove obsolete entries and add missing ones + :param model: LLM model to use for summarization + :return: complete Markdown index content + """ + if index_mode == "generate": + # Start with an empty summary. + summaries = {} + elif index_mode == "refresh": + # Retrieve summaries from the existing README. + summaries = _get_existing_summaries(dir_path, markdown_files) + else: + raise ValueError( + f"Invalid index_mode='{index_mode}'. Expected 'generate' or 'refresh'." + ) + content = _build_index_lines( + dir_path, markdown_files, model=model, summaries=summaries + ) + return content + + +def _main() -> None: + parser = argparse.ArgumentParser( + description="Generate or refresh a Markdown index in the README file." + ) + parser.add_argument( + "--index_mode", + choices=["generate", "refresh"], + required=True, + help="Choose execute index mode: generate or refresh", + ) + parser.add_argument( + "--dir_path", + type=str, + default=hgit.find_git_root(), + help="Path to directory folder. Defaults to Git root.", + ) + parser.add_argument( + "--model", + type=str, + default="placeholder", + help="LLM model to use for summarization. Defaults to 'placeholder', which creates a dummy summary.", + ) + args = parser.parse_args() + # Fetch all Markdown files in the directory. + markdown_files = list_markdown_files(args.dir_path) + if markdown_files: + content = generate_markdown_index( + dir_path=args.dir_path, + markdown_files=markdown_files, + index_mode=args.index_mode, + model=args.model, + ) + # Write content to README. + readme_path = os.path.join(args.dir_path, "README.md") + hio.to_file(readme_path, content) + else: + # Skip if no Markdown files in the directory. + _LOG.debug("No Markdown files found; skipping index generation.") + + +if __name__ == "__main__": + _main() diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test1/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test1/output/test.txt new file mode 100644 index 000000000..b0d135177 --- /dev/null +++ b/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test1/output/test.txt @@ -0,0 +1,21 @@ +# README for `test/outcomes/Test_generate_readme_index.test1/tmp.scratch` + +Below is a list of all Markdown files found under `test/outcomes/Test_generate_readme_index.test1/tmp.scratch`. + +## Markdown Index + +- **File Name**: docs/guide/setup.md + **Relative Path**: [docs/guide/setup.md](docs/guide/setup.md) + **Summary**: Placeholder summary for docs/guide/setup.md + +- **File Name**: docs/guide/usage.md + **Relative Path**: [docs/guide/usage.md](docs/guide/usage.md) + **Summary**: Placeholder summary for docs/guide/usage.md + +- **File Name**: docs/intro.md + **Relative Path**: [docs/intro.md](docs/intro.md) + **Summary**: Placeholder summary for docs/intro.md + +- **File Name**: welcome.md + **Relative Path**: [welcome.md](welcome.md) + **Summary**: Placeholder summary for welcome.md diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test2/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test2/output/test.txt new file mode 100644 index 000000000..3ffa1990d --- /dev/null +++ b/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test2/output/test.txt @@ -0,0 +1,25 @@ +# README for `test/outcomes/Test_generate_readme_index.test2/tmp.scratch` + +Below is a list of all Markdown files found under `test/outcomes/Test_generate_readme_index.test2/tmp.scratch`. + +## Markdown Index + +- **File Name**: docs/guide/new_file.md + **Relative Path**: [docs/guide/new_file.md](docs/guide/new_file.md) + **Summary**: Placeholder summary for docs/guide/new_file.md + +- **File Name**: docs/guide/setup.md + **Relative Path**: [docs/guide/setup.md](docs/guide/setup.md) + **Summary**: Provides step-by-step instructions to set up the development environment. Essential for onboarding new contributors and initializing project dependencies. + +- **File Name**: docs/guide/usage.md + **Relative Path**: [docs/guide/usage.md](docs/guide/usage.md) + **Summary**: Describes how to use the project's key features and available commands. Helps users understand how to interact with the system effectively. + +- **File Name**: docs/intro.md + **Relative Path**: [docs/intro.md](docs/intro.md) + **Summary**: Offers an overview of the project's purpose, goals, and core components. Ideal as a starting point for readers new to the repository. + +- **File Name**: welcome.md + **Relative Path**: [welcome.md](welcome.md) + **Summary**: Welcomes readers to the repository and outlines the structure of documentation. Encourages contributors to explore and engage with the content. diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test3/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test3/output/test.txt new file mode 100644 index 000000000..abdd57694 --- /dev/null +++ b/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test3/output/test.txt @@ -0,0 +1,17 @@ +# README for `test/outcomes/Test_generate_readme_index.test3/tmp.scratch` + +Below is a list of all Markdown files found under `test/outcomes/Test_generate_readme_index.test3/tmp.scratch`. + +## Markdown Index + +- **File Name**: docs/guide/setup.md + **Relative Path**: [docs/guide/setup.md](docs/guide/setup.md) + **Summary**: Provides step-by-step instructions to set up the development environment. Essential for onboarding new contributors and initializing project dependencies. + +- **File Name**: docs/intro.md + **Relative Path**: [docs/intro.md](docs/intro.md) + **Summary**: Offers an overview of the project's purpose, goals, and core components. Ideal as a starting point for readers new to the repository. + +- **File Name**: welcome.md + **Relative Path**: [welcome.md](welcome.md) + **Summary**: Welcomes readers to the repository and outlines the structure of documentation. Encourages contributors to explore and engage with the content. \ No newline at end of file diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test4/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test4/output/test.txt new file mode 100644 index 000000000..5f8971ec8 --- /dev/null +++ b/dev_scripts_helpers/documentation/test/outcomes/Test_generate_readme_index.test4/output/test.txt @@ -0,0 +1,21 @@ +# README for `test/outcomes/Test_generate_readme_index.test4/tmp.scratch` + +Below is a list of all Markdown files found under `test/outcomes/Test_generate_readme_index.test4/tmp.scratch`. + +## Markdown Index + +- **File Name**: docs/guide/new_file.md + **Relative Path**: [docs/guide/new_file.md](docs/guide/new_file.md) + **Summary**: Placeholder summary for docs/guide/new_file.md + +- **File Name**: docs/guide/usage.md + **Relative Path**: [docs/guide/usage.md](docs/guide/usage.md) + **Summary**: Describes how to use the project's key features and available commands. Helps users understand how to interact with the system effectively. + +- **File Name**: docs/intro.md + **Relative Path**: [docs/intro.md](docs/intro.md) + **Summary**: Offers an overview of the project's purpose, goals, and core components. Ideal as a starting point for readers new to the repository. + +- **File Name**: welcome.md + **Relative Path**: [welcome.md](welcome.md) + **Summary**: Welcomes readers to the repository and outlines the structure of documentation. Encourages contributors to explore and engage with the content. \ No newline at end of file diff --git a/dev_scripts_helpers/documentation/test/test_generate_readme_index.py b/dev_scripts_helpers/documentation/test/test_generate_readme_index.py new file mode 100644 index 000000000..1789da748 --- /dev/null +++ b/dev_scripts_helpers/documentation/test/test_generate_readme_index.py @@ -0,0 +1,308 @@ +import os +import textwrap + +import pytest + +pytest.importorskip( + "openai" +) # noqa: E402 # pylint: disable=wrong-import-position + +import dev_scripts_helpers.documentation.generate_readme_index as dshdgrein +import helpers.hio as hio +import helpers.hunit_test as hunitest + + +# ############################################################################# +# Test_list_markdown_files +# ############################################################################# + + +class Test_list_markdown_files(hunitest.TestCase): + + def test1(self) -> None: + """ + Test retrieving all Markdown files in a directory. + """ + # Sample nested documents. + file_structure = { + "welcome.md": "# welcome page", + "docs/intro.md": "# Introduction", + "docs/guide/setup.md": "# Setup Guide", + "docs/guide/usage.md": "# Usage Guide", + } + # Expected output. + expected = [ + "docs/guide/setup.md", + "docs/guide/usage.md", + "docs/intro.md", + "welcome.md", + ] + dir_path = self.get_scratch_space() + for path, content in file_structure.items(): + self._write_input_file(content, path) + # Run. + actual = dshdgrein.list_markdown_files(dir_path) + # Check. + self.assertEqual(actual, expected) + + def test2(self) -> None: + """ + Test that non-Markdown files are ignored. + """ + # Sample nested documents. + file_structure = { + "welcome.md": "# welcome page", + "docs/intro.md": "# Introduction", + "docs/guide/setup.md": "# Setup Guide", + "docs/guide/build.py": "Build setup", + "docs/guide/usage.md": "# Usage Guide", + } + # Expected output. + expected = [ + "docs/guide/setup.md", + "docs/guide/usage.md", + "docs/intro.md", + "welcome.md", + ] + dir_path = self.get_scratch_space() + for path, content in file_structure.items(): + self._write_input_file(content, path) + # Run. + actual = dshdgrein.list_markdown_files(dir_path) + # Check. + self.assertEqual(actual, expected) + + def test3(self) -> None: + """ + Test that the existing README is ignored. + """ + # Sample nested documents. + file_structure = { + "welcome.md": "# welcome page", + "docs/intro.md": "# Introduction", + "docs/guide/setup.md": "# Setup Guide", + "README.md": "# Markdown Index", + } + dir_path = self.get_scratch_space() + for path, content in file_structure.items(): + self._write_input_file(content, path) + # Expected output. + expected = ["docs/guide/setup.md", "docs/intro.md", "welcome.md"] + # Run. + actual = dshdgrein.list_markdown_files(dir_path) + # Check. + self.assertEqual(actual, expected) + + def test4(self) -> None: + """ + Test for empty directory. + """ + dir_path = self.get_scratch_space() + # Expected output. + expected = [] + # Run. + actual = dshdgrein.list_markdown_files(dir_path) + # Check. + self.assertEqual(actual, expected) + + def _write_input_file(self, txt: str, file_name: str) -> str: + """ + Write test content to a file in the scratch space. + + :param txt: the content of the file + :param file_name: the name of the file + :return: the path to the file with the test content + """ + txt = txt.strip() + # Get file path to write. + dir_name = self.get_scratch_space() + file_path = os.path.join(dir_name, file_name) + file_path = os.path.abspath(file_path) + # Create the file. + hio.to_file(file_path, txt) + return file_path + + +# ############################################################################# +# Test_generate_readme_index +# ############################################################################# + + +class Test_generate_readme_index(hunitest.TestCase): + + def test1(self) -> None: + """ + Test generating README from scratch using placeholder summary. + """ + # Prepare inputs. + dir_path = self.get_scratch_space() + markdown_files = [ + "docs/guide/setup.md", + "docs/guide/usage.md", + "docs/intro.md", + "welcome.md", + ] + index_mode = "generate" + model = "placeholder" + # Run. + actual = dshdgrein.generate_markdown_index( + dir_path=dir_path, + markdown_files=markdown_files, + index_mode=index_mode, + model=model, + ) + # Check. + self.check_string(actual) + + def test2(self) -> None: + """ + Test refreshing README by adding a new file. + """ + # Prepare inputs. + existing_content = """ + # README for `test/outcomes/Test_generate_readme_index.test2/tmp.scratch` + + Below is a list of all Markdown files found under `test/outcomes/Test_generate_readme_index.test2/tmp.scratch`. + + ## Markdown Index + + - **File Name**: docs/guide/setup.md + **Relative Path**: [docs/guide/setup.md](docs/guide/setup.md) + **Summary**: Provides step-by-step instructions to set up the development environment. Essential for onboarding new contributors and initializing project dependencies. + + - **File Name**: docs/guide/usage.md + **Relative Path**: [docs/guide/usage.md](docs/guide/usage.md) + **Summary**: Describes how to use the project's key features and available commands. Helps users understand how to interact with the system effectively. + + - **File Name**: docs/intro.md + **Relative Path**: [docs/intro.md](docs/intro.md) + **Summary**: Offers an overview of the project's purpose, goals, and core components. Ideal as a starting point for readers new to the repository. + + - **File Name**: welcome.md + **Relative Path**: [welcome.md](welcome.md) + **Summary**: Welcomes readers to the repository and outlines the structure of documentation. Encourages contributors to explore and engage with the content. + + """ + dir_path = self._write_readme(existing_content) + markdown_files = [ + "docs/guide/new_file.md", + "docs/guide/setup.md", + "docs/guide/usage.md", + "docs/intro.md", + "welcome.md", + ] + index_mode = "refresh" + model = "placeholder" + # Run. + actual = dshdgrein.generate_markdown_index( + dir_path=dir_path, + markdown_files=markdown_files, + index_mode=index_mode, + model=model, + ) + # Check. + self.check_string(actual) + + def test3(self) -> None: + """ + Test refreshing README by removing an obsolete file. + """ + # Prepare inputs. + existing_content = """ + # README for `test/outcomes/Test_generate_readme_index.test3/tmp.scratch` + + Below is a list of all Markdown files found under `test/outcomes/Test_generate_readme_index.test3/tmp.scratch`. + + ## Markdown Index + + - **File Name**: docs/guide/setup.md + **Relative Path**: [docs/guide/setup.md](docs/guide/setup.md) + **Summary**: Provides step-by-step instructions to set up the development environment. Essential for onboarding new contributors and initializing project dependencies. + + - **File Name**: docs/guide/usage.md + **Relative Path**: [docs/guide/usage.md](docs/guide/usage.md) + **Summary**: Describes how to use the project's key features and available commands. Helps users understand how to interact with the system effectively. + + - **File Name**: docs/intro.md + **Relative Path**: [docs/intro.md](docs/intro.md) + **Summary**: Offers an overview of the project's purpose, goals, and core components. Ideal as a starting point for readers new to the repository. + + - **File Name**: welcome.md + **Relative Path**: [welcome.md](welcome.md) + **Summary**: Welcomes readers to the repository and outlines the structure of documentation. Encourages contributors to explore and engage with the content. + + """ + dir_path = self._write_readme(existing_content) + markdown_files = ["docs/guide/setup.md", "docs/intro.md", "welcome.md"] + index_mode = "refresh" + model = "placeholder" + # Run. + actual = dshdgrein.generate_markdown_index( + dir_path=dir_path, + markdown_files=markdown_files, + index_mode=index_mode, + model=model, + ) + # Check. + self.check_string(actual) + + def test4(self) -> None: + """ + Test refreshing README by adding a new file and removing another. + """ + # Prepare inputs. + existing_content = """ + # README for `test/outcomes/Test_generate_readme_index.test4/tmp.scratch` + + Below is a list of all Markdown files found under `test/outcomes/Test_generate_readme_index.test4/tmp.scratch`. + + ## Markdown Index + + - **File Name**: docs/guide/setup.md + **Relative Path**: [docs/guide/setup.md](docs/guide/setup.md) + **Summary**: Provides step-by-step instructions to set up the development environment. Essential for onboarding new contributors and initializing project dependencies. + + - **File Name**: docs/guide/usage.md + **Relative Path**: [docs/guide/usage.md](docs/guide/usage.md) + **Summary**: Describes how to use the project's key features and available commands. Helps users understand how to interact with the system effectively. + + - **File Name**: docs/intro.md + **Relative Path**: [docs/intro.md](docs/intro.md) + **Summary**: Offers an overview of the project's purpose, goals, and core components. Ideal as a starting point for readers new to the repository. + + - **File Name**: welcome.md + **Relative Path**: [welcome.md](welcome.md) + **Summary**: Welcomes readers to the repository and outlines the structure of documentation. Encourages contributors to explore and engage with the content. + + """ + dir_path = self._write_readme(existing_content) + markdown_files = [ + "docs/guide/new_file.md", + "docs/guide/usage.md", + "docs/intro.md", + "welcome.md", + ] + index_mode = "refresh" + model = "placeholder" + # Run. + actual = dshdgrein.generate_markdown_index( + dir_path=dir_path, + markdown_files=markdown_files, + index_mode=index_mode, + model=model, + ) + # Check. + self.check_string(actual) + + def _write_readme(self, content: str) -> str: + """ + Create a README file with content. + + :param content: the content to write into the README file + :return: the path to the directory containing the README + """ + content = textwrap.dedent(content) + dir_path = self.get_scratch_space() + readme_path = os.path.join(dir_path, "README.md") + hio.to_file(readme_path, content) + return dir_path From bce5247da014845961f58952664e9ff9a94f7786 Mon Sep 17 00:00:00 2001 From: aangelo9 <153690899+aangelo9@users.noreply.github.com> Date: Fri, 18 Apr 2025 13:19:38 -0400 Subject: [PATCH 042/193] HelpersTask541_Add_skip_files_option_to_Linter (#572) * implement --skip_files * move --skip_files arg * add skip_test param on lint task * fix _filter_files call * revert lib_tasks_lint and nits * remove import Optional * lib_tasks_lint nits * base nits --- helpers/lib_tasks_lint.py | 7 +++++++ linters/base.py | 22 ++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/helpers/lib_tasks_lint.py b/helpers/lib_tasks_lint.py index e6a136d01..cce9b3a02 100644 --- a/helpers/lib_tasks_lint.py +++ b/helpers/lib_tasks_lint.py @@ -184,6 +184,7 @@ def lint( # type: ignore stage="prod", version="", files="", + skip_files="", dir_name="", modified=False, last_commit=False, @@ -200,6 +201,9 @@ def lint( # type: ignore # To lint specific files: > i lint --files="dir1/file1.py dir2/file2.md" + # To lint the files changed in the last commit, excluding specific files: + > i lint --last-commit --skip-files="dir1/file1.py dir2/file2.md" + # To lint all the files in the current dir using only formatting actions: > i lint --dir-name . --only-format @@ -213,6 +217,7 @@ def lint( # type: ignore :param stage: the image stage to use (e.g., "prod", "dev", "local") :param version: the version of the container to use :param files: specific files to lint (e.g. "dir1/file1.py dir2/file2.md") + :param skip_files: specific files to skip during linting (e.g. "dir1/file1.py dir2/file2.md") :param dir_name: name of the dir where all files should be linted :param modified: lint the files modified in the current git client :param last_commit: lint the files modified in the previous commit @@ -251,6 +256,8 @@ def lint( # type: ignore lint_cmd_opts.append("--branch") else: raise ValueError("No file selection arguments are specified") + if len(skip_files) > 0: + lint_cmd_opts.append(f"--skip_files {skip_files}") # lint_cmd_opts.append(f"--num_threads {num_threads}") # Add the action selection argument, if needed. diff --git a/linters/base.py b/linters/base.py index 349d30043..83e852332 100755 --- a/linters/base.py +++ b/linters/base.py @@ -65,17 +65,19 @@ # ############################################################################# -def _filter_files(file_paths: List[str]) -> List[str]: +def _filter_files(file_paths: List[str], file_paths_to_skip: List[str]) -> List[str]: """ - Filter the list of files to be linted. + Filter the list of files by removing invalid or excluded ones. The following files are skipped: - Files that do not exist - Non-files (directories) - Ipynb checkpoints - Input and output files in unit tests + - Files explicitly excluded by the user - :param file_paths: all the original files to be linted + :param file_paths: all the original files to validate and filter + :param file_paths_to_skip: files to exclude from processing :return: files that passed the filters """ file_paths_to_keep: List[str] = [] @@ -88,6 +90,8 @@ def _filter_files(file_paths: List[str]) -> List[str]: is_valid &= ".ipynb_checkpoints/" not in file_path # Skip input and output files used in unit tests. is_valid &= not liutils.is_test_input_output_file(file_path) + # Skip files explicitly excluded by user. + is_valid &= file_path not in file_paths_to_skip if is_valid: file_paths_to_keep.append(file_path) else: @@ -127,8 +131,12 @@ def _get_files_to_lint(args: argparse.Namespace) -> List[str]: cmd = f"find {dir_name} -name '*' -type f" _, output = hsystem.system_to_string(cmd) file_paths = output.split("\n") + file_paths_to_skip: List[str] = [] + if args.skip_files: + # Get the files to skip during linting. + file_paths_to_skip = args.skip_files # Remove files that should not be linted. - file_paths = _filter_files(file_paths) + file_paths = _filter_files(file_paths, file_paths_to_skip) if len(file_paths) < 1: _LOG.warning("No files that can be linted were found") return file_paths @@ -450,6 +458,12 @@ def _parse() -> argparse.ArgumentParser: action="store_true", help="Select files modified in the current branch with respect to master", ) + parser.add_argument( + "--skip_files", + nargs="+", + type=str, + help="Files to skip during linting" + ) # Action selection. parser.add_argument( "--only_format", From fe610b79e77e030e0ac62bf71585f7253c8dfd7d Mon Sep 17 00:00:00 2001 From: Peeyush Dyavarashetty <32363748+Peeyush4@users.noreply.github.com> Date: Fri, 18 Apr 2025 13:24:38 -0400 Subject: [PATCH 043/193] HelpersTask545 Unstage tmp files added by Linter (#592) * Remove staging tmp.scratch dir files * Changes requested --------- Co-authored-by: Sonya Nikiforova --- linters/base.py | 4 +++- linters/utils.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/linters/base.py b/linters/base.py index 83e852332..c63af99d8 100755 --- a/linters/base.py +++ b/linters/base.py @@ -370,8 +370,10 @@ def _lint( # Annotate each lint with a [tag] specifying the action name. cur_action_lints = [lnt + f" [{action_name}]" for lnt in cur_action_lints] lints.extend(cur_action_lints) - if not hserver.is_inside_ci(): + in_tmp_scratch_dir = liutils.is_under_tmp_scratch_dir(file_path) + if not hserver.is_inside_ci() and not in_tmp_scratch_dir: # Stage the linted file for commit if Linter was run manually (not within CI). + # Skip staging files in `tmp.scratch` dir as they are temporary. cmd = f"git add {file_path}" hsystem.system(cmd) return lints diff --git a/linters/utils.py b/linters/utils.py index 75483cec1..30117c961 100644 --- a/linters/utils.py +++ b/linters/utils.py @@ -119,6 +119,7 @@ def tee(cmd: str, executable: str, abort_on_error: bool) -> Tuple[int, List[str] # ############################################################################# + # TODO(gp): Move in a more general file: probably system_interaction. def _is_under_dir(file_name: str, dir_name: str) -> bool: """ @@ -128,6 +129,13 @@ def _is_under_dir(file_name: str, dir_name: str) -> bool: return dir_name in subdir_names +def is_under_tmp_scratch_dir(file_name: str) -> bool: + """ + Return whether a file is under the temporary scratch directory. + """ + return _is_under_dir(file_name, "tmp.scratch") + + def is_under_test_dir(file_name: str) -> bool: """ Return whether a file is under a test directory (which is called "test"). From dd456efa90ccd303938cba8cfdebcd5d283dfa0e Mon Sep 17 00:00:00 2001 From: Heanh Sok Date: Fri, 18 Apr 2025 16:17:02 -0400 Subject: [PATCH 044/193] HelpersTask282_Rename_DIR_PREFIX_to_DIR_SUFFIX (#567) * checkpoint * checkpoint * checkpoint * checkpoint * checkpoint --- dev_scripts_helpers/thin_client/build.py | 10 +++--- .../thin_client/test_super_repo.sh | 10 +++--- .../thin_client/thin_client_utils.py | 18 +++++----- dev_scripts_helpers/thin_client/tmux.py | 36 ++++++++----------- helpers/repo_config_utils.py | 18 ++++++++++ 5 files changed, 50 insertions(+), 42 deletions(-) diff --git a/dev_scripts_helpers/thin_client/build.py b/dev_scripts_helpers/thin_client/build.py index 1fa3c2c36..68f7a25e4 100755 --- a/dev_scripts_helpers/thin_client/build.py +++ b/dev_scripts_helpers/thin_client/build.py @@ -17,15 +17,12 @@ import helpers.hprint as hprint import helpers.hserver as hserver import helpers.hsystem as hsystem +import helpers.repo_config_utils as hrecouti _LOG = logging.getLogger(__name__) SCRIPT_PATH = os.path.abspath(__file__) -# This is specific of this repo. -# To customize: xyz -DIR_PREFIX = "helpers" - def _system(cmd: str) -> None: print(hprint.frame(cmd)) @@ -46,8 +43,9 @@ def _main(parser: argparse.ArgumentParser) -> None: raise RuntimeError( "AWS CLI is not installed. Please install it and try again." ) + dir_suffix = hrecouti.get_repo_config().get_dir_suffix() # Create the virtual environment. - venv_dir = tcu.get_venv_dir(DIR_PREFIX) + venv_dir = tcu.get_venv_dir(dir_suffix) # Double check that the dir is in home. hdbg.dassert( venv_dir.startswith(os.environ["HOME"] + "/src/venv"), @@ -69,7 +67,7 @@ def _main(parser: argparse.ArgumentParser) -> None: activate_cmd = f"source {venv_dir}/bin/activate" _system(activate_cmd) # Install the requirements. - thin_environ_dir = tcu.get_thin_environment_dir(DIR_PREFIX) + thin_environ_dir = tcu.get_thin_environment_dir(dir_suffix) requirements_path = os.path.join(thin_environ_dir, "requirements.txt") tmp_requirements_path = os.path.join(thin_environ_dir, "tmp.requirements.txt") shutil.copy(requirements_path, tmp_requirements_path) diff --git a/dev_scripts_helpers/thin_client/test_super_repo.sh b/dev_scripts_helpers/thin_client/test_super_repo.sh index 93b75528b..d70b2cddf 100755 --- a/dev_scripts_helpers/thin_client/test_super_repo.sh +++ b/dev_scripts_helpers/thin_client/test_super_repo.sh @@ -1,18 +1,18 @@ #!/bin/bash -xe -DIR_PREFIX="sports_analytics" +DIR_SUFFIX="sports_analytics" -# dev_scripts_{DIR_PREFIX}/thin_client/build.py +# dev_scripts_{DIR_SUFFIX}/thin_client/build.py # Test helpers setenv. (cd helpers_root; source dev_scripts_helpers/thin_client/setenv.sh) # Test super-repo setenv. -source dev_scripts_${DIR_PREFIX}/thin_client/setenv.sh +source dev_scripts_${DIR_SUFFIX}/thin_client/setenv.sh # Test tmux. -dev_scripts_${DIR_PREFIX}/thin_client/tmux.py --create_global_link -dev_scripts_${DIR_PREFIX}/thin_client/tmux.py --index 1 +dev_scripts_${DIR_SUFFIX}/thin_client/tmux.py --create_global_link +dev_scripts_${DIR_SUFFIX}/thin_client/tmux.py --index 1 # Test building image. i docker_build_local_image --version 1.0.0 && i docker_tag_local_image_as_dev --version 1.0.0 diff --git a/dev_scripts_helpers/thin_client/thin_client_utils.py b/dev_scripts_helpers/thin_client/thin_client_utils.py index bcad5ce42..e43c6187b 100644 --- a/dev_scripts_helpers/thin_client/thin_client_utils.py +++ b/dev_scripts_helpers/thin_client/thin_client_utils.py @@ -45,15 +45,15 @@ def get_home_dir() -> str: return home_dir -def get_thin_environment_dir(dir_prefix: str) -> str: +def get_thin_environment_dir(dir_suffix: str) -> str: git_root_dir = get_git_root_dir() - thin_environ_dir = f"{git_root_dir}/dev_scripts_{dir_prefix}/thin_client" + thin_environ_dir = f"{git_root_dir}/dev_scripts_{dir_suffix}/thin_client" return thin_environ_dir -def get_venv_dir(dir_prefix: str) -> str: +def get_venv_dir(dir_suffix: str) -> str: home_dir = get_home_dir() - venv_dir = f"{home_dir}/src/venv/client_venv.{dir_prefix}" + venv_dir = f"{home_dir}/src/venv/client_venv.{dir_suffix}" return venv_dir @@ -235,7 +235,7 @@ def _create_repo_tmux( def create_tmux_session( parser: argparse.ArgumentParser, script_path: str, - dir_prefix: str, + dir_suffix: str, setenv_path: str, # TODO(Juraj): deprecate the var, the behavior is now inferred. has_subrepo: bool, @@ -244,7 +244,7 @@ def create_tmux_session( Creates a new tmux session or attaches to an existing one. This function checks if a tmux session with the given name (derived from - `dir_prefix` and `index` argument) already exists. If it does, the function + `dir_suffix` and `index` argument) already exists. If it does, the function either attaches to the existing session or destroys it and creates a new one, based on the `force_restart` argument. If the session does not exist, a new one is created. @@ -254,7 +254,7 @@ def create_tmux_session( :param parser: Argument parser object. :param script_path: Path to the script file. - :param dir_prefix: Prefix for the directory and tmux session name. + :param dir_suffix: Prefix for the directory and tmux session name. :param setenv_path: Path to the shell script for setting up the environment. :param has_subrepo: Flag indicating if the project has a subrepository. """ @@ -266,14 +266,14 @@ def create_tmux_session( if args.create_global_link: _LOG.info("Creating the global link") hdbg.dassert_file_exists(script_path) - cmd = f"ln -sf {script_path} ~/go_{dir_prefix}.py" + cmd = f"ln -sf {script_path} ~/go_{dir_suffix}.py" system(cmd) _LOG.info("Link created: exiting") sys.exit(0) # hdbg.dassert_is_not(args.index, None, "Need to specify --index") idx = int(args.index) - tmux_name = f"{dir_prefix}{idx}" + tmux_name = f"{dir_suffix}{idx}" _LOG.info("tmux_name=%s", tmux_name) # _LOG.debug("Checking if the tmux session '%s' already exists", tmux_name) diff --git a/dev_scripts_helpers/thin_client/tmux.py b/dev_scripts_helpers/thin_client/tmux.py index 251c63215..a8334c3a8 100755 --- a/dev_scripts_helpers/thin_client/tmux.py +++ b/dev_scripts_helpers/thin_client/tmux.py @@ -7,37 +7,29 @@ import os import sys -# To customize: xyz -_HAS_SUBREPO = False +_LOG = logging.getLogger(__name__) -_SCRIPT_PATH = os.path.abspath(__file__) +# We need to tweak `PYTHONPATH` directly since we are bootstrapping the system. +sys.path.append("helpers_root/dev_scripts_helpers/thin_client") +import thin_client_utils as tcu + +sys.path.append("helpers_root/helpers") +import helpers.repo_config_utils as hrecouti +_HAS_SUBREPO = hrecouti.get_repo_config().use_helpers_as_nested_module() +_SCRIPT_PATH = os.path.abspath(__file__) if _HAS_SUBREPO: # Change the directory to the real directory if we are in a symlink. dir_name = os.path.dirname(os.path.realpath(_SCRIPT_PATH)) + "/../.." # print(dir_name) os.chdir(dir_name) -# We need to tweak `PYTHONPATH` directly since we are bootstrapping the system. -sys.path.append("helpers_root/dev_scripts_helpers/thin_client") -import thin_client_utils as tcu - -#sys.path.append("helpers_root/helpers") -#import helpers.hdbg as hdbg - -_LOG = logging.getLogger(__name__) - - if __name__ == "__main__": parser = tcu.create_parser(__doc__) - if _HAS_SUBREPO: - # To customize: xyz - dir_prefix = "xyz" - else: - # `helpers` has no super-repo. - dir_prefix = "helpers" - setenv_path = os.path.join(f"dev_scripts_{dir_prefix}", "thin_client", - "setenv.sh") + dir_suffix = hrecouti.get_repo_config().get_dir_suffix() + setenv_path = os.path.join( + f"dev_scripts_{dir_suffix}", "thin_client", "setenv.sh" + ) tcu.create_tmux_session( - parser, _SCRIPT_PATH, dir_prefix, setenv_path, _HAS_SUBREPO + parser, _SCRIPT_PATH, dir_suffix, setenv_path, _HAS_SUBREPO ) diff --git a/helpers/repo_config_utils.py b/helpers/repo_config_utils.py index 700e7be5c..9edc92f87 100644 --- a/helpers/repo_config_utils.py +++ b/helpers/repo_config_utils.py @@ -318,6 +318,24 @@ def get_html_dir_to_url_mapping(self) -> Dict[str, str]: } return dir_to_url + def get_dir_suffix(self) -> str: + """ + Return the suffix of the dev_scripts_{dir_suffix} dir for the repo. + + E.g., `helpers` for `dev_scripts_helpers` in //helpers repo. + """ + value = self._data["runnable_dir_info"]["dir_suffix"] + return value + + def use_helpers_as_nested_module(self) -> bool: + """ + Return whether the helpers repo is used as a nested module. + """ + value = bool( + self._data["runnable_dir_info"]["use_helpers_as_nested_module"] + ) + return value + # TODO(gp): Add functions for container_registry_info. # Utils. From 49a9ccd6d04e69591336767cf8eb81496bf189e4 Mon Sep 17 00:00:00 2001 From: Sonya Nikiforova Date: Fri, 18 Apr 2025 22:29:54 +0200 Subject: [PATCH 045/193] Helpers task591 document intern offboarding procedure (#594) * HelpersTask591: Update offboarding docs * HelpersTask591: Improve style --- .../admin.onboarding_process.reference.md | 90 ------------------- .../ck.hiring_process.how_to_guide.md | 24 ++++- .../ck.offboarding_process.how_to_guide.md | 28 ++++++ 3 files changed, 50 insertions(+), 92 deletions(-) delete mode 100644 docs/onboarding/admin.onboarding_process.reference.md create mode 100644 docs/onboarding/ck.offboarding_process.how_to_guide.md diff --git a/docs/onboarding/admin.onboarding_process.reference.md b/docs/onboarding/admin.onboarding_process.reference.md deleted file mode 100644 index 7396025c4..000000000 --- a/docs/onboarding/admin.onboarding_process.reference.md +++ /dev/null @@ -1,90 +0,0 @@ -# Untitled - - - -- [On-boarding Process](#on-boarding-process) -- [Off-boarding process](#off-boarding-process) - - - -## On-boarding Process - -1. A candidate (intern or full-time candidate) submits a request - -- The `ResearchMeister` gets the notification from the submission Gsheet and - creates an Asana task under `Not started` with: - - Name (nickname) email - - E.g, `Yuanxuan ****** ` - - Put the form links, the GitHub account, the LinkedIn / CV link, etc. in the - Asana description (all the info that we typically need to access quickly) - - Google form: ? - - CV / LinkedIn: ? - - GitHub handle: ? - - Email: ? - - Devops candidate: Yes / no - - Intern vs Full-time candidate: ? - - Quick review if the person has the skills we are looking for - - Assign to a shepherd - - If interns -> Samarth/Sonaal - - If full-time candidate -> shepherd - - If not clear ask GP - -2. The shepherd reviews the CV to get a sense if they are decent or not - - It posts a quick summary of pros and cons - - We want to increase the quality of the collaborators, so if there is a red - flag we can decide to not on-board - - If you are uncertain, ask more people to take a look - - The goal is to avoid on-boarding collaborators that will likely disappoint - us - -3. If the candidate is a no go, GP sends an email of rejection -4. The on-boarding shepherd is in charge of updating the Asana task with every - interesting event -5. We start the on-boarding process as per - - Update the - [Contributor List](https://docs.google.com/spreadsheets/d/1eRZJaj5-1g6W7w_Ay4UhJEdtAvrTTM1V94cKj6_Vwoc/edit#gid=1253964093) - - Copy the information from - [Contributor Info](https://docs.google.com/spreadsheets/d/13Mxj5ZIydMQHSmJUDCpURB5w-50RPXC0AjgKWYcMZnw/edit#gid=2038824432) - response sheet to the - [Contributor List](https://docs.google.com/spreadsheets/d/1eRZJaj5-1g6W7w_Ay4UhJEdtAvrTTM1V94cKj6_Vwoc/edit#gid=1253964093) - as it is a master sheet for all the collaborators - - Ping GP on the Asana task for that collaborator for invitation to the repo - - Add Contributor email as Commenter to the - [KaizenFlow - Contributors gdrive](https://drive.google.com/drive/u/0/folders/1LXwKpmaFWJI-887IoA50sVC8-dw_1L8I) - -6. When the collaborator is ready to be on-boarded, file an issue like "On-board - " - - The content of the issue will be the following checklist (note that GitHub - needs full paths to point to the documentation from an issue) - ``` - - [ ] Acknowledge the pledge to put effort and time in the project - - [ ] Fork, star, watch the KaizenFlow repo so that GitHub promotes our repo (we gotta spread the work) - - [ ] Follow the [quick start for development](/docs/onboarding/kaizenflow.prepare_for_development.how_to_guide.md) - - [ ] Set up the [development environment](/docs/onboarding/intern.set_up_development_on_laptop.how_to_guide.md) - - [ ] Read and start internalizing [KaizenFlow Python coding style guide](/docs/coding/all.coding_style.how_to_guide.md) - - [ ] Read about your [first code review](/docs/coding/all.submit_code_for_review.how_to_guide.md) - - [ ] Peruse the map of [all the documentation](/docs/onboarding/all.development_documents.reference.md) - - [ ] Learn about our [org process](/docs/work_organization/all.team_collaboration.how_to_guide.md) - - [ ] Get assigned a warm-up issue - - If you are graduating soon and you would like to get a full-time job in - one of the companies in the KaizenFlow ecosystem reach out to GP at - gp@causify.ai - ``` - - Admins need to keep track on the progress being made by the collaborator. - -7. We score candidates every two weeks - - [Public gsheet](https://docs.google.com/spreadsheets/d/1a8ypuO2ODOzjp9BaRN23HWa5P7ruTKs_gHZWaPYsvy4) - - [Private gsheet](https://docs.google.com/spreadsheets/d/1Qr2-Uo8YxkShrfGY43PV_E1W1Trkc-c5SiNelxLBKwE) - -## Off-boarding process - -A list of things to do to off-board an intern / collaborator - -1. [ ] Remove from [GitHub](https://github.com/kaizen-ai/kaizenflow) -2. [ ] Remove from - [contributors](https://groups.google.com/u/0/a/crypto-kaizen.com/g/contributors/members)@ -3. [ ] Remove from the Slack workspace -4. [ ] Remove Gdocs access: - 1. [KaizenFlow](https://drive.google.com/drive/folders/1-aaFlPtlbJ-pUL-c5GQbjFgZRp9ZNRUk?usp=sharing) - 2. [Process](https://drive.google.com/drive/folders/1sJDqCjM1Q_nq8diyZDiWO8mVBQW5Wg_X?usp=sharing) - 3. [Crypto-tech](https://drive.google.com/drive/folders/1zawE6IEBDpWLTbpK-03z75f5pu_T9Jba?usp=sharing) diff --git a/docs/onboarding/ck.hiring_process.how_to_guide.md b/docs/onboarding/ck.hiring_process.how_to_guide.md index fe3dbc63b..fa1a75ff4 100644 --- a/docs/onboarding/ck.hiring_process.how_to_guide.md +++ b/docs/onboarding/ck.hiring_process.how_to_guide.md @@ -65,7 +65,7 @@ - Members of the hiring team alternate being the HiringMeister for 2 weeks - To see who is the HiringMeister now, refer to - [Rotation Meisters](https://docs.google.com/spreadsheets/d/1Ab6a3BVeLX1l1B3_A6rNY9pHRsofeoCw2ip2dkQ6SdA) + [Rotation Meisters](https://docs.google.com/spreadsheets/d/12OhDW4hzSLekorrri2WfRkV8h3JcnB8WQd1JEL_n0D8/edit) - HiringMeister's duties include: - Organizing the hiring and onboarding process (see [below](#step-by-step) for more details) @@ -113,7 +113,7 @@ ``` - We use this Asana task to communicate about the applicant -- HiringMeister: send an email to the applicant with a link to the +- HiringMeister/GP: send an email to the applicant with a link to the [questionnaire](https://docs.google.com/forms/d/e/1FAIpQLScWAavYiYj1IfWGP1QEv2jqjKvQKnFjseryhzmIIHZKnZ4HkA/viewform) to gather information about them - Responses are available @@ -130,11 +130,31 @@ profile, we should proceed further in the process - It's ok to ask more team members to take a look - If the candidate is a no-go, GP sends an email of rejection + - If we decide to onboard the candidate, continue with the steps below + +- HiringMeister/GP: send an email asking to confirm if they are still interested + and ready to go + - Proceed with the steps below only if they respond with a confirmation + +- HiringMeister/GP: send invitations to GitHub repos with `write` permissions: + - [`helpers`](https://github.com/causify-ai/helpers/settings/access) + - [`tutorials`](https://github.com/causify-ai/tutorials/settings/access) - HiringMeister: create a GitHub issue for onboarding the intern - Follow the instructions in [`intern.onboarding_checklist.reference.md`](/docs/onboarding/intern.onboarding_checklist.reference.md) +- HiringMeister: update the Asana task for the intern + - Move to the "Onboarding" section + - Add a section to track the intern's issues and PRs + + ```verbatim + GitHub: + Onboarding issue: + Issues: + PRs: + ``` + - HiringMeister: regularly check the updates made by the intern in the onboarding issue and help resolve any errors they face - This "light" onboarding process should take 2-3 days max diff --git a/docs/onboarding/ck.offboarding_process.how_to_guide.md b/docs/onboarding/ck.offboarding_process.how_to_guide.md new file mode 100644 index 000000000..3f8d29ade --- /dev/null +++ b/docs/onboarding/ck.offboarding_process.how_to_guide.md @@ -0,0 +1,28 @@ +# Offboarding process + + + +- [Offboarding for interns](#offboarding-for-interns) +- [Offboarding for permanent team members](#offboarding-for-permanent-team-members) + + + +## Offboarding for interns + +- [ ] Send a good-bye email +- [ ] Remove access to GitHub repos + - [ ] [helpers](https://github.com/causify-ai/helpers) + - [ ] [tutorials](https://github.com/causify-ai/tutorials) +- [ ] Remove from the GitHub Intern-focused project +- [ ] Remove from the `contributors@causify.ai` mailing group +- [ ] Remove from the intern Telegram channel +- [ ] If applicable, remove access to the Google Drive +- [ ] If applicable, deprecate AWS credentials and access to the + `causify-data-collaborators` bucket +- [ ] Move the Asana task corresponding to the intern to the "Let go" section +- [ ] Update the + [Access Tracker](https://docs.google.com/spreadsheets/d/1UQEHzWhgnQ6s1NK3qr03a0-CgDuLmXBqhSc_-ISeLx4/edit?gid=0#gid=0) + gdoc + - Put "No" in the columns that they no longer have access to + +## Offboarding for permanent team members From 3964b917350d2a70d5bd3d07cdeff764af1aba77 Mon Sep 17 00:00:00 2001 From: Peeyush Dyavarashetty <32363748+Peeyush4@users.noreply.github.com> Date: Sat, 19 Apr 2025 15:19:25 -0400 Subject: [PATCH 046/193] Push load and save labels in sync_gh_issue_labels (#593) Co-authored-by: Sonya Nikiforova --- .../github/sync_gh_issue_labels.py | 94 ++++++++++--------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/dev_scripts_helpers/github/sync_gh_issue_labels.py b/dev_scripts_helpers/github/sync_gh_issue_labels.py index 51f8940f0..35cf69ec7 100755 --- a/dev_scripts_helpers/github/sync_gh_issue_labels.py +++ b/dev_scripts_helpers/github/sync_gh_issue_labels.py @@ -63,6 +63,53 @@ def __init__(self, name: str, description: str, color: str): def __repr__(self): return f"label(name='{self.name}', description='{self.description}', color='{self.color}')" + # ######################################################################### + # Label loading/saving + # ######################################################################### + + @staticmethod + def load_labels(path: str) -> List["Label"]: + """ + Load labels from label inventory manifest file. + + :param path: path to label inventory manifest file + :return: label objects + """ + with open(path, "r", encoding="utf-8") as file: + yaml_data = yaml.safe_load(file) + labels = [ + Label( + name=item["name"], + description=item["description"], + color=item["color"], + ) + for item in yaml_data + ] + return labels + + @staticmethod + def save_labels(labels: List["Label"], path: str) -> None: + """ + Save labels to the label inventory manifest file. + + :param labels: label objects + :param path: path to save the label inventory manifest file to + """ + with open(path, "w", encoding="utf-8") as file: + labels_data = [ + Label( + name=label.name, + description=label.description if label.description else None, + color=label.color, + ).to_dict() + for label in labels + ] + # Set `default_flow_style=False` to use block style instead of + # flow style for better readability. + yaml.dump( + labels_data, file, default_flow_style=False, sort_keys=False + ) + @property def name(self) -> str: return self._name @@ -88,49 +135,6 @@ def to_dict(self) -> Dict[str, str]: } -# TODO(*): GSI. Move to `Label` class as static method. -def _load_labels(path: str) -> List[Label]: - """ - Load labels from label inventory manifest file. - - :param path: path to label inventory manifest file - :return: label objects - """ - with open(path, "r") as file: - yaml_data = yaml.safe_load(file) - labels = [ - Label( - name=item["name"], - description=item["description"], - color=item["color"], - ) - for item in yaml_data - ] - return labels - - -# TODO(*): GSI. Move to `Label` class as static method. -def _save_labels(labels: List[Label], path: str) -> None: - """ - Save labels to the label inventory manifest file. - - :param labels: label objects - :param path: path to save the label inventory manifest file to - """ - with open(path, "w") as file: - labels_data = [ - Label( - name=label.name, - description=label.description if label.description else None, - color=label.color, - ).to_dict() - for label in labels - ] - # Set `default_flow_style=False` to use block style instead of - # flow style for better readability. - yaml.dump(labels_data, file, default_flow_style=False, sort_keys=False) - - def _parse() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter @@ -179,7 +183,7 @@ def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) # Load labels from label inventory manifest file. - labels = _load_labels(args.input_file) + labels = Label.load_labels(args.input_file) labels_map = {label.name: label for label in labels} token = os.environ[args.token_env_var] hdbg.dassert(token) @@ -196,7 +200,7 @@ def _main(parser: argparse.ArgumentParser) -> None: git_root_dir = hgit.get_client_root(False) file_name = f"tmp.labels.{args.owner}.{args.repo}.yaml" file_path = f"{git_root_dir}/{file_name}" - _save_labels(current_labels, file_path) + Label.save_labels(current_labels, file_path) _LOG.info("Labels backed up to %s", file_path) else: _LOG.warning("Skipping saving labels as per user request") From d120611296b8018c44be4d8915f51dfc365089e3 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Tue, 22 Apr 2025 12:06:25 -0400 Subject: [PATCH 047/193] Improve bounty hunters doc --- ..._hunters.onboarding_checklist.reference.md | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 docs/onboarding/bounty_hunters.onboarding_checklist.reference.md diff --git a/docs/onboarding/bounty_hunters.onboarding_checklist.reference.md b/docs/onboarding/bounty_hunters.onboarding_checklist.reference.md new file mode 100644 index 000000000..9c419085d --- /dev/null +++ b/docs/onboarding/bounty_hunters.onboarding_checklist.reference.md @@ -0,0 +1,57 @@ +- Source: + [`bounty.onboarding_checklist.reference.md`](https://github.com/causify-ai/helpers/blob/master/docs/onboarding/bounty.onboarding_checklist.reference.md) + +### Org + +- [ ] **Collaborator**: Fork the repos + - [ ] [helpers](https://github.com/causify-ai/helpers) + - [ ] [tutorials](https://github.com/causify-ai/tutorials) + +- [ ] **Collaborator**: File an issue with this checklist + - The title is "Onboarding {{Name}}" + - The issue should be assigned to the collaborator + +- [ ] **Collaborator**: Update this GitHub issue if you face any problems. If applicable, do a PR proposing improvements to the checklist (or any other docs), since this will allow us to improve the process as we move forward + +- [ ] **Collaborator**: Post your laptop's OS (Windows, Linux, Mac) in the comments of this issue + +- [ ] **Collaborator**: Confirm access to the public GH repos + - [ ] [helpers](https://github.com/causify-ai/helpers) + - [ ] [tutorials](https://github.com/causify-ai/tutorials) + +### IT setup + +- [ ] **Collaborator**: Set up the development environment following instructions in [`intern.set_up_development_on_laptop.how_to_guide.md`](https://github.com/causify-ai/helpers/blob/master/docs/onboarding/intern.set_up_development_on_laptop.how_to_guide.md) + +### Must-read + +- [ ] **Collaborator**: Carefully study all the documents in [the must-read list](https://github.com/causify-ai/helpers/blob/master/docs/onboarding/all.dev_must_read_checklist.reference.md) + - [ ] [General rules of collaboration](https://github.com/causify-ai/helpers/blob/master/docs/work_organization/all.team_collaboration.how_to_guide.md) + - [ ] [Coding style guide](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.coding_style.how_to_guide.md) + - [ ] [How to write unit tests](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.write_unit_tests.how_to_guide.md) + - [ ] [How to run unit tests](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.run_unit_tests.how_to_guide.md) + - [ ] [Creating a Jupyter Notebook](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md) + - [ ] [What to do before opening a PR](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.submit_code_for_review.how_to_guide.md) + - [ ] [Code review process](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.code_review.how_to_guide.md) + - [ ] [Git workflows and best practices](https://github.com/causify-ai/helpers/blob/master/docs/work_tools/git/all.git.how_to_guide.md) + - [ ] [GitHub organization](https://github.com/causify-ai/helpers/blob/master/docs/work_organization/all.use_github.how_to_guide.md) + - [ ] [Tips for writing documentation](https://github.com/causify-ai/helpers/blob/master/docs/documentation_meta/all.writing_docs.how_to_guide.md) + - They will help you get up to speed with our practices and development style + - Read them carefully one by one + - Ask questions + - Memorize / internalize all the information + - Take notes + - Mark the reading as done + - Open a GH issue/PR to propose improvements to the documentation + +### Final checks + +- [ ] **Collaborator**: Exercise all the important parts of the systems + - [ ] Create a GitHub issue + - [ ] Check out and pull the latest version of the repo code + - [ ] Create a branch + - [ ] Run regressions (`i run_fast_tests`) + - [ ] Run Linter (`i lint --files="..."`) + - [ ] Start a Docker container (`i docker_bash`) + - [ ] Start a Jupyter server (`i docker_jupyter`) + - [ ] Do a PR From c95e8e9db66cc6c777597f1d63a181424e28ee8f Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Fri, 25 Apr 2025 20:45:06 -0400 Subject: [PATCH 048/193] Improve --- .../documentation/transform_notes.py | 4 ++-- helpers/hmarkdown.py | 20 +++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index 06fa76aa0..5b646a0c9 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -84,10 +84,10 @@ def _main(parser: argparse.ArgumentParser) -> None: txt = "\n".join(txt) txt = hmarkdo.remove_formatting(txt) hparser.write_file(txt, out_file_name) - elif cmd == "md_fix_chatgpt_math_syntax": + elif cmd == "md_fix_chatgpt_output": txt = hparser.read_file(in_file_name) txt = "\n".join(txt) - txt = hmarkdo.fix_chatgpt_math_syntax(txt) + txt = hmarkdo.fix_chatgpt_output(txt) hparser.write_file(txt, out_file_name) else: assert 0, f"Invalid cmd='{cmd}'" diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 104fb9c24..db9471fcc 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -60,7 +60,7 @@ def is_header(line: str) -> Tuple[bool, int, str]: - The level of the header (0 if not a header) - The title of the header (empty string if not a header) """ - hdbg.dassert(not is_markdown_line_separator(line), "line='%s'", line) + #hdbg.dassert(not is_markdown_line_separator(line), "line='%s'", line) m = re.match(r"(#+)\s+(.*)", line) is_header_ = bool(m) if m: @@ -274,7 +274,7 @@ def remove_formatting(txt: str) -> str: return txt -def fix_chatgpt_math_syntax(txt: str) -> str: +def fix_chatgpt_output(txt: str) -> str: # Replace \( ... \) math syntax with $ ... $. txt = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", txt) # Replace \[ ... \] math syntax with $$ ... $$, handling multiline equations. @@ -283,6 +283,22 @@ def fix_chatgpt_math_syntax(txt: str) -> str: txt = re.sub(r"P\((.*?)\)", r"\\Pr(\1)", txt) # Replace \mid with `|`. txt = re.sub(r"\\mid", r"|", txt) + # E.g.,`` • Description Logics (DLs) are a family`` + # Replace `•` with `-` + txt = re.sub(r"•\s+", r"- ", txt) + # Replace `\t` with 2 spaces + txt = re.sub(r"\t", r" ", txt) + # Remove `⸻`. + txt = re.sub(r"⸻", r"", txt) + # “ + txt = re.sub(r"“", r'"', txt) + # ” + txt = re.sub(r"”", r'"', txt) + # ’ + txt = re.sub(r"’", r"'", txt) + # Remove empty spaces at beginning / end of Latex equations $...$. + # E.g., $ \text{Student} $ becomes $\text{Student}$ + txt = re.sub(r'\$\s+(.*?)\s\$', r'$\1$', txt) return txt From dab6cbb131faaac8a32d42fd3b8031f9bcae0591 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sat, 26 Apr 2025 11:47:27 -0400 Subject: [PATCH 049/193] Update --- .../documentation/dockerized_prettier.py | 12 +-- .../documentation/transform_notes.py | 14 ++- dev_scripts_helpers/llms/llm_transform.py | 26 +++-- .../llms/test/test_llm_transform.py | 33 +++++- helpers/hmarkdown.py | 6 ++ helpers/hparser.py | 100 ++++++++++++++++-- 6 files changed, 161 insertions(+), 30 deletions(-) diff --git a/dev_scripts_helpers/documentation/dockerized_prettier.py b/dev_scripts_helpers/documentation/dockerized_prettier.py index ed89a97b1..80de06a11 100755 --- a/dev_scripts_helpers/documentation/dockerized_prettier.py +++ b/dev_scripts_helpers/documentation/dockerized_prettier.py @@ -45,8 +45,7 @@ def _parse() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) - parser.add_argument("-i", "--input", action="store", required=True) - parser.add_argument("-o", "--output", action="store", default="") + hparser.add_input_output_args(parser) hparser.add_dockerized_script_arg(parser) hparser.add_verbosity_arg(parser) return parser @@ -55,18 +54,19 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: # Parse everything that can be parsed and returns the rest. args, cmd_opts = parser.parse_known_args() + in_file_name, out_file_name = hparser.parse_input_output_args( + args, clear_screen=True + ) if not cmd_opts: cmd_opts = [] hdbg.init_logger( verbosity=args.log_level, use_exec_path=True, force_white=False ) _LOG.debug("cmd_opts: %s", cmd_opts) - if not args.output: - args.output = args.input hdocker.run_dockerized_prettier( - args.input, + in_file_name, cmd_opts, - args.output, + out_file_name, force_rebuild=args.dockerized_force_rebuild, use_sudo=args.dockerized_use_sudo, ) diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index 5b646a0c9..dbeca6e33 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -28,6 +28,7 @@ """ import argparse +import hashlib import logging import helpers.hdbg as hdbg @@ -61,7 +62,13 @@ def _main(parser: argparse.ArgumentParser) -> None: in_file_name, out_file_name = hparser.parse_input_output_args( args, clear_screen=True ) - if cmd == "toc": + if cmd == "test": + # Compute the hash of a string to test the flow. + txt = hparser.read_file(in_file_name) + txt = "\n".join(txt) + txt = hashlib.sha256(txt.encode("utf-8")).hexdigest() + hparser.write_file(txt, out_file_name) + elif cmd == "toc": txt = hparser.read_file(in_file_name) max_level = 3 header_list = hmarkdo.extract_headers_from_markdown( @@ -89,6 +96,11 @@ def _main(parser: argparse.ArgumentParser) -> None: txt = "\n".join(txt) txt = hmarkdo.fix_chatgpt_output(txt) hparser.write_file(txt, out_file_name) + elif cmd == "md_clean_up": + txt = hparser.read_file(in_file_name) + txt = "\n".join(txt) + txt = hmarkdo.md_clean_up(txt) + hparser.write_file(txt, out_file_name) else: assert 0, f"Invalid cmd='{cmd}'" diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 4927ba424..651bc985a 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -203,17 +203,21 @@ def _main(parser: argparse.ArgumentParser) -> None: return # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) - # Since we need to call a container and passing stdin/stdout is tricky, - # we read the input and save it in a temporary file. - in_lines = hparser.read_file(in_file_name) - if in_file_name == "-": - tmp_in_file_name = "tmp.llm_transform.in.txt" - in_txt = "\n".join(in_lines) - hio.to_file(tmp_in_file_name, in_txt) - else: - tmp_in_file_name = in_file_name - # - tmp_out_file_name = "tmp.llm_transform.out.txt" + # # Since we need to call a container and passing stdin/stdout is tricky, + # # we read the input and save it in a temporary file. + # in_lines = hparser.read_file(in_file_name) + # if in_file_name == "-": + # tmp_in_file_name = "tmp.llm_transform.in.txt" + # in_txt = "\n".join(in_lines) + # hio.to_file(tmp_in_file_name, in_txt) + # else: + # tmp_in_file_name = in_file_name + # # + # tmp_out_file_name = "tmp.llm_transform.out.txt" + tmp_in_file_name, tmp_out_file_name = hparser.adapt_input_output_args_for_dockerized_scripts( + in_file_name, "llm_transform" + ) + # TODO(gp): We should just automatically pass-through the options. cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] if args.fast_model: diff --git a/dev_scripts_helpers/llms/test/test_llm_transform.py b/dev_scripts_helpers/llms/test/test_llm_transform.py index a13fb7236..54d3256fd 100644 --- a/dev_scripts_helpers/llms/test/test_llm_transform.py +++ b/dev_scripts_helpers/llms/test/test_llm_transform.py @@ -49,9 +49,9 @@ def setup_test(self) -> Tuple[str, str, str]: out_file_name = os.path.join(self.get_scratch_space(), "output.md") return script, in_file_name, out_file_name - def test1(self) -> None: + def test_md_rewrite1(self) -> None: """ - Run the `llm_transform.py` script with a specific prompt tag and verify + Run the `llm_transform.py` script with the prompt `md_rewrite` and verify the output. """ script, in_file_name, out_file_name = self.setup_test() @@ -63,6 +63,8 @@ def test1(self) -> None: hsystem.system(cmd) # Check. self.assertTrue(os.path.exists(out_file_name)) + # TODO(gp): We should be able to check the output once we have CmampTask10710 + # fixed and we can run dind. if False: act = hio.from_file(out_file_name) exp = r""" @@ -74,10 +76,33 @@ def test1(self) -> None: """ self.assert_equal(act, exp, dedent=True) + def test_test1(self) -> None: + """ + Run the `llm_transform.py` script with the prompt `test` and verify + the output. + """ + script, in_file_name, out_file_name = self.setup_test() + # Run test. + prompt_tag = "test" + cmd = f"{script} -i {in_file_name} -o {out_file_name} -p {prompt_tag}" + hsystem.system(cmd) + # Check. + self.assertTrue(os.path.exists(out_file_name)) + act = hio.from_file(out_file_name) + exp = r""" + - If there is no pattern we can try learning, measure if learning works and, in + the worst case, conclude that it does not work + - If we can find the solution in one step or program the solution, machine + learning is not the recommended technique, but it still works + - Without data we cannot do anything: data is all that matters + """ + self.assert_equal(act, exp, dedent=True) + + # TODO(gp): This can be enabled once we can mock the OpenAI interactions. @pytest.mark.skip(reason="Run manually since it needs OpenAI credentials") - def test2(self) -> None: + def test_all_prompts1(self) -> None: """ - Run the `llm_transform.py` script with various prompt tags and print + Run the `llm_transform.py` script with all the prompt tags and print the output. """ script, in_file_name, out_file_name = self.setup_test() diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index db9471fcc..37156e277 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -302,6 +302,12 @@ def fix_chatgpt_output(txt: str) -> str: return txt +def md_clean_up(txt: str) -> str: + # Remove dot at the end of each line. + txt = re.sub(r'\.\s*$', '', txt, flags=re.MULTILINE) + return txt + + # ############################################################################# # Header processing # ############################################################################# diff --git a/helpers/hparser.py b/helpers/hparser.py index bdd7c6ea0..5f574c35d 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -166,6 +166,8 @@ def parse_dst_dir_arg(args: argparse.Namespace) -> Tuple[str, bool]: # ############################################################################# + + def add_action_arg( parser: argparse.ArgumentParser, valid_actions: List[str], @@ -262,24 +264,75 @@ def mark_action(action: str, actions: List[str]) -> Tuple[bool, List[str]]: # Command line options for input/output processing. # ############################################################################# +# For non-dockerized scripts the following idiom is used: +# +# ```python +# # Add input/output arguments to parser. +# hparser.add_input_output_args(parser) +# # Handle input/output arguments, including stdin/stdout. +# in_file_name, out_file_name = hparser.parse_input_output_args(args) +# ... +# # Read input file, handling stdin. +# in_lines = hparser.read_file(in_file_name) +# ... +# # Write output, handling stdout. +# hparser.write_file(txt, out_file_name) +# ``` +# See helpers_root/dev_scripts_helpers/coding_tools/transform_skeleton.py as an +# example. + +# For dockerized scripts the following idiom is used inside the wrapper, which +# calls the dockerized script: +# +# ```python +# # Add input/output arguments to parser. +# hparser.add_input_output_args(parser) +# # Handle input/output arguments, including stdin/stdout. +# in_file_name, out_file_name = hparser.parse_input_output_args(args) +# tmp_in_file_name, tmp_out_file_name = hparser.adapt_input_output_args_for_dockerized_scripts( +# in_file_name, "llm_transform") +# ... +# # For stdin/stdout, suppress the output of the container. +# suppress_output = in_file_name == "-" or out_file_name == "-" +# _run_dockerized_llm_transform( +# tmp_in_file_name, +# cmd_line_opts, +# tmp_out_file_name, +# return_cmd=False, +# force_rebuild=args.dockerized_force_rebuild, +# use_sudo=args.dockerized_use_sudo, +# suppress_output=suppress_output, +# ) +# ... +# # Write output, handling stdout. +# hparser.write_file(txt, out_file_name) +# ``` +# +# See helpers_root/dev_scripts_helpers/llms/llm_transform.py as an example. + def add_input_output_args( parser: argparse.ArgumentParser, *, in_default: Optional[str] = None, + in_required: bool = True, out_default: Optional[str] = None, + out_required: bool = False, ) -> argparse.ArgumentParser: """ - Add options to parse input and output file name. + Add options to parse input and output file name, and handle stdin / stdout. :param in_default: default file to be used for input - If `None`, it must be specified by the user - :param in_default: same as `in_default` but for output + :param in_required: whether the input file is required + :param out_default: default file to be used for output + - If `None`, it must be specified by the user + :param out_required: whether the output file is required """ parser.add_argument( "-i", "--in_file_name", - required=(in_default is None), + required=in_required, type=str, default=in_default, help="Input file or `-` for stdin", @@ -287,7 +340,7 @@ def add_input_output_args( parser.add_argument( "-o", "--out_file_name", - required=(out_default is None), + required=out_required, type=str, default=out_default, help="Output file or `-` for stdout", @@ -299,22 +352,28 @@ def parse_input_output_args( args: argparse.Namespace, *, clear_screen: bool = False ) -> Tuple[str, str]: """ + Parse input and output file name, handling stdin / stdout. + :return input and output file name. """ in_file_name = args.in_file_name out_file_name = args.out_file_name if out_file_name is None: + # If the output file is not specified, use the input file name, i.e., + # in place. out_file_name = in_file_name - # Print summary. + # Print summary. If we are using stdin / stdout, don't print anything since + # we don't want to pollute the output. if in_file_name != "-": if clear_screen: os.system("clear") _LOG.info(hprint.to_str("in_file_name")) _LOG.info(hprint.to_str("out_file_name")) + return in_file_name, out_file_name -# TODO(gp): -> from_file for symmetry for hio. +# TODO(gp): GFI -> from_file for symmetry for hio. def read_file(file_name: str) -> List[str]: """ Read file or stdin (represented by `-`), returning an array of lines. @@ -334,7 +393,7 @@ def read_file(file_name: str) -> List[str]: return txt -# TODO(gp): -> to_file for symmetry for hio. +# TODO(gp): GFI -> to_file for symmetry for hio. def write_file(txt: Union[str, List[str]], file_name: str) -> None: """ Write txt in a file or stdout (represented by `-`). @@ -350,6 +409,31 @@ def write_file(txt: Union[str, List[str]], file_name: str) -> None: f.write("\n".join(txt)) _LOG.info("Written file '%s'", file_name) + +def adapt_input_output_args_for_dockerized_scripts( + in_file_name: str, tag: str +) -> Tuple[str, str]: + """ + Adapt input and output file name for dockerized scripts. + + Since we need to call a container and passing stdin/stdout is tricky, + we read the input and save it in a temporary file. + + :param tag: tag to be used for the temporary file name (e.g., `llm_transform`) + """ + # Since we need to call a container and passing stdin/stdout is tricky, + # we read the input and save it in a temporary file. + in_lines = read_file(in_file_name) + if in_file_name == "-": + tmp_in_file_name = f"tmp.{tag}.in.txt" + in_txt = "\n".join(in_lines) + hio.to_file(tmp_in_file_name, in_txt) + else: + tmp_in_file_name = in_file_name + # + tmp_out_file_name = f"tmp.{tag}.out.txt" + return tmp_in_file_name, tmp_out_file_name + # ############################################################################# # Command line options for parallel processing. @@ -581,4 +665,4 @@ def add_prompt_arg( action="store_true", help="Use a fast LLM model vs a high-quality one", ) - return parser + return parser \ No newline at end of file From b776fa702eb150c64ea7b64d80cdd4115e287d26 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 27 Apr 2025 11:03:00 -0400 Subject: [PATCH 050/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- .../git/git_hooks/commit-msg.py | 5 +- .../git/git_hooks/pre-commit.py | 20 ++++--- dev_scripts_helpers/llms/llm_prompts.py | 41 +++++++++++---- dev_scripts_helpers/llms/llm_transform.py | 2 +- .../llms/test/test_llm_transform.py | 52 +++++++++++++------ helpers/henv.py | 4 +- helpers/hserver.py | 29 +++++------ helpers/hunit_test_utils.py | 4 +- helpers/lib_tasks_docker.py | 4 +- 9 files changed, 105 insertions(+), 56 deletions(-) diff --git a/dev_scripts_helpers/git/git_hooks/commit-msg.py b/dev_scripts_helpers/git/git_hooks/commit-msg.py index 8883f4959..cdf492a5a 100755 --- a/dev_scripts_helpers/git/git_hooks/commit-msg.py +++ b/dev_scripts_helpers/git/git_hooks/commit-msg.py @@ -40,9 +40,10 @@ def _main(): ) sys.exit(1) # Read pre-commit output. - get_git_root_dir = dshgghout.get_git_root_dir() + #get_git_root_dir = dshgghout.get_git_root_dir() precommit_output_path = ( - f"{get_git_root_dir}/.git/hooks/tmp.precommit_output.txt" + #f"{get_git_root_dir}/.git/hooks/tmp.precommit_output.txt" + f"tmp.precommit_output.txt" ) try: with open(precommit_output_path, "r") as f: diff --git a/dev_scripts_helpers/git/git_hooks/pre-commit.py b/dev_scripts_helpers/git/git_hooks/pre-commit.py index 269f805ca..fdd86b00a 100755 --- a/dev_scripts_helpers/git/git_hooks/pre-commit.py +++ b/dev_scripts_helpers/git/git_hooks/pre-commit.py @@ -16,6 +16,7 @@ # NOTE: This file should depend only on Python standard libraries. import logging +import os import pathlib import sys from typing import List @@ -34,7 +35,7 @@ def _write_output_to_file(lines: List[str]) -> None: :param lines: pre-commit output lines """ - out_path = pathlib.Path(".git/hooks/tmp.precommit_output.txt") + out_path = pathlib.Path("tmp.precommit_output.txt") with out_path.open("w") as f: for line in lines: f.write(line + "\n") @@ -53,15 +54,20 @@ def _write_output_to_file(lines: List[str]) -> None: # dshgghout.check_file_size() lines.append("- 'check_file_size' passed") - # - dshgghout.check_words() - lines.append("- 'check_words' passed") + # TODO(gp): Disabled for now since it's too strict. + # dshgghout.check_words() + # lines.append("- 'check_words' passed") # dshgghout.check_python_compile() lines.append("- 'check_python_compile' passed") - # - dshgghout.check_gitleaks() - lines.append("- 'check_gitleaks' passed") + assert os.path.exists(".git") + if os.path.isdir(".git"): + dshgghout.check_gitleaks() + lines.append("- 'check_gitleaks' passed") + else: + # TODO(gp): Fix HelpersTask622. + _LOG.warning("Skipping 'check_gitleaks' since we are in a submodule") + lines.append("- 'check_gitleaks' skipped") print( "\n" + dshgghout.color_highlight( diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 7bd4317ef..c7c4a8fa6 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -1,5 +1,6 @@ import ast import functools +import hashlib import logging import os import re @@ -44,10 +45,14 @@ def get_prompt_tags() -> List[str]: matched_functions = sorted(matched_functions) return matched_functions + # Store the prompts that need a certain post-transforms to be applied outside # the container. OUTSIDE_CONTAINER_POST_TRANSFORMS = {} + +# TODO(gp): We should embed this outside_container_post_transforms in the +# prompts. if not OUTSIDE_CONTAINER_POST_TRANSFORMS: OUTSIDE_CONTAINER_POST_TRANSFORMS = { # These are all the prompts with post_transforms with @@ -82,6 +87,17 @@ def get_outside_container_post_transforms(transform_name: str) -> Set[str]: _PROMPT_OUT = Tuple[str, Set[str], Set[str]] + +def test() -> _PROMPT_OUT: + """ + This is just needed as a placeholder to test the flow. + """ + system = "" + pre_transforms = set() + post_transforms = set() + return system, pre_transforms, post_transforms + + _CONTEXT = r""" You are a proficient Python coder who pays attention to detail. I will pass you a chunk of Python code. @@ -562,16 +578,21 @@ def run_prompt( "Not all pre_transforms were run: %s", pre_transforms, ) - # We need to import this here since we have this package only when running - # inside a Dockerized executable. We don't want an import to this file - # assert since openai is not available in the local dev environment. - import helpers.hopenai as hopenai - - response = hopenai.get_completion( - txt, system_prompt=system_prompt, model=model, print_cost=True - ) - # _LOG.debug(hprint.to_str("response")) - txt_out = hopenai.response_to_txt(response) + if prompt_tag == "test": + txt = "\n".join(txt) + txt_out = hashlib.sha256(txt.encode("utf-8")).hexdigest() + else: + # We need to import this here since we have this package only when + # running inside a Dockerized executable. We don't want an import to + # this file assert since openai is not available in the local dev + # environment. + import helpers.hopenai as hopenai + + response = hopenai.get_completion( + txt, system_prompt=system_prompt, model=model, print_cost=True + ) + # _LOG.debug(hprint.to_str("response")) + txt_out = hopenai.response_to_txt(response) hdbg.dassert_isinstance(txt_out, str) # Run post-transforms. if _to_run("remove_code_delimiters", post_transforms): diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 651bc985a..70ce1a083 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -17,7 +17,7 @@ > llm_transform.py -i input.txt -o output.txt -p list # Code review -> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_propose_refactoring +> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_review # Propose refactoring > llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_propose_refactoring diff --git a/dev_scripts_helpers/llms/test/test_llm_transform.py b/dev_scripts_helpers/llms/test/test_llm_transform.py index 54d3256fd..8ea5dcf2f 100644 --- a/dev_scripts_helpers/llms/test/test_llm_transform.py +++ b/dev_scripts_helpers/llms/test/test_llm_transform.py @@ -29,7 +29,7 @@ class Test_llm_transform1(hunitest.TestCase): Run the script `llm_transform.py` in a Docker container. """ - def setup_test(self) -> Tuple[str, str, str]: + def setup_test(self, txt_id: str) -> Tuple[str, str, str]: """ Set up the test environment by creating an input markdown file and determining the script and output file paths. @@ -37,12 +37,20 @@ def setup_test(self) -> Tuple[str, str, str]: :returns: A tuple containing the script path, input file path, and output file path. """ - txt = r""" - - If there is no pattern we can try learning, measure if learning works and, in the worst case, conclude that it does not work - - If we can find the solution in one step or program the solution, machine learning is not the recommended technique, but it still works - - Without data we cannot do anything: data is all that matters - """ - txt = hprint.dedent(txt) + if txt_id == 0: + txt = r""" + - If there is no pattern we can try learning, measure if learning works and, in the worst case, conclude that it does not work + - If we can find the solution in one step or program the solution, machine learning is not the recommended technique, but it still works + - Without data we cannot do anything: data is all that matters + """ + txt = hprint.dedent(txt) + elif txt_id == 1: + txt = r""" + hello + """ + txt = hprint.dedent(txt) + else: + raise ValueError(f"Invalid txt_id: {txt_id}") in_file_name = os.path.join(self.get_scratch_space(), "input.md") hio.to_file(in_file_name, txt) script = hsystem.find_file_in_repo("llm_transform.py") @@ -54,7 +62,7 @@ def test_md_rewrite1(self) -> None: Run the `llm_transform.py` script with the prompt `md_rewrite` and verify the output. """ - script, in_file_name, out_file_name = self.setup_test() + script, in_file_name, out_file_name = self.setup_test(txt_id=0) # Run test. # We use this prompt since it doesn't call OpenAI, but it exercises all # the code. @@ -81,7 +89,7 @@ def test_test1(self) -> None: Run the `llm_transform.py` script with the prompt `test` and verify the output. """ - script, in_file_name, out_file_name = self.setup_test() + script, in_file_name, out_file_name = self.setup_test(txt_id=1) # Run test. prompt_tag = "test" cmd = f"{script} -i {in_file_name} -o {out_file_name} -p {prompt_tag}" @@ -90,11 +98,25 @@ def test_test1(self) -> None: self.assertTrue(os.path.exists(out_file_name)) act = hio.from_file(out_file_name) exp = r""" - - If there is no pattern we can try learning, measure if learning works and, in - the worst case, conclude that it does not work - - If we can find the solution in one step or program the solution, machine - learning is not the recommended technique, but it still works - - Without data we cannot do anything: data is all that matters + 1ad0d344ac10cac079e4eed01074c5e6ca29da2f91ce99bfaea890479aace045 + """ + self.assert_equal(act, exp, dedent=True) + + def test_test2(self) -> None: + """ + Run the `llm_transform.py` script with the prompt `test` through stdin. + """ + script, in_file_name, out_file_name = self.setup_test(txt_id=1) + # Run test. + prompt_tag = "test" + txt = "hello" + cmd = f"echo {txt} | {script} -i - -o {out_file_name} -p {prompt_tag}" + hsystem.system(cmd) + # Check. + self.assertTrue(os.path.exists(out_file_name)) + act = hio.from_file(out_file_name) + exp = r""" + 1ad0d344ac10cac079e4eed01074c5e6ca29da2f91ce99bfaea890479aace045 """ self.assert_equal(act, exp, dedent=True) @@ -105,7 +127,7 @@ def test_all_prompts1(self) -> None: Run the `llm_transform.py` script with all the prompt tags and print the output. """ - script, in_file_name, out_file_name = self.setup_test() + script, in_file_name, out_file_name = self.setup_test(txt_id=0) # Run test. transforms = dshlllpr.get_transforms() for prompt_tag in transforms: diff --git a/helpers/henv.py b/helpers/henv.py index a550bc124..668a4b2b7 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -126,10 +126,10 @@ def get_env_vars() -> List[str]: "CSFY_HOST_NAME", # The OS of the host running Docker. "CSFY_HOST_OS_NAME", + # The version of the host running Docker. + "CSFY_HOST_OS_VERSION", # The name of the user running the host. "CSFY_HOST_USER_NAME", - # The version of the host running Docker. - "CSFY_HOST_VERSION", # Whether to check if certain property of the repo are as expected or not. "CSFY_REPO_CONFIG_CHECK", # Path to use for `repo_config.py`. E.g., used when running `helpers` diff --git a/helpers/hserver.py b/helpers/hserver.py index 60441b88b..abec35291 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -63,7 +63,7 @@ def _system_to_string(cmd: str) -> Tuple[int, str]: # running, since inside Docker the name of the host is like `01a7e34a82a5`. Of # course, there is no way to know anything about the host for security reason, # so we pass this value from the external environment to the container, through -# env vars (e.g., `CSFY_HOST_NAME`, `CSFY_HOST_OS_NAME`, `CSFY_HOST_VERSION`). +# env vars (e.g., `CSFY_HOST_NAME`, `CSFY_HOST_OS_NAME`, `CSFY_HOST_OS_VERSION`). # Sometimes we want to know if: @@ -844,16 +844,16 @@ def enable_privileged_mode() -> bool: ret = True elif is_inside_ci(): ret = True - elif is_host_mac(version="Catalina"): - # Docker for macOS Catalina supports dind. - ret = True - elif ( - is_host_mac(version="Monterey") - or is_host_mac(version="Ventura") - or is_host_mac(version="Sequoia") - ): - # Docker doesn't seem to support dind for these versions of macOS. - ret = False + elif is_host_mac(): + mac_version = get_host_mac_version() + if mac_version == "Catalina": + # Docker for macOS Catalina supports dind. + ret = True + elif mac_version in ("Monterey", "Ventura", "Sequoia"): + # Docker doesn't seem to support dind for these versions of macOS. + ret = False + else: + raise ValueError(f"Invalid version='{version}'") elif is_prod_csfy(): ret = False else: @@ -889,11 +889,8 @@ def has_docker_sudo() -> bool: def _is_mac_version_with_sibling_containers() -> bool: - return ( - is_host_mac(version="Monterey") - or is_host_mac(version="Ventura") - or is_host_mac(version="Sequoia") - ) + mac_version = get_host_mac_version() + return mac_version in ("Monterey", "Ventura", "Sequoia") # TODO(gp): -> use_docker_sibling_container_support diff --git a/helpers/hunit_test_utils.py b/helpers/hunit_test_utils.py index 0d0ddcbb2..21a64dd08 100644 --- a/helpers/hunit_test_utils.py +++ b/helpers/hunit_test_utils.py @@ -468,7 +468,9 @@ def execute_only_on_dev_csfy() -> None: def execute_only_on_mac(*, version: Optional[str] = None) -> None: - is_host_mac_ = hserver.is_host_mac(version=version) + is_host_mac_ = hserver.is_host_mac() + if version: + is_host_mac_ = hserver.is_host_mac_version(version) if not is_host_mac_: pytest.skip(f"Only run on Mac with version={version}") diff --git a/helpers/lib_tasks_docker.py b/helpers/lib_tasks_docker.py index a3355fa63..0b9d2b4d7 100644 --- a/helpers/lib_tasks_docker.py +++ b/helpers/lib_tasks_docker.py @@ -523,7 +523,7 @@ def _generate_docker_compose_file( # ``` csfy_host_os_name = os.uname()[0] csfy_host_name = os.uname()[1] - csfy_host_version = os.uname()[2] + csfy_host_os_version = os.uname()[2] csfy_host_user_name = getpass.getuser() # We assume that we don't use this code inside a container, since otherwise # we would need to distinguish the container style (see @@ -563,8 +563,8 @@ def _generate_docker_compose_file( f"CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL", f"CSFY_HOST_NAME={csfy_host_name}", f"CSFY_HOST_OS_NAME={csfy_host_os_name}", + f"CSFY_HOST_OS_VERSION={csfy_host_os_version}", f"CSFY_HOST_USER_NAME={csfy_host_user_name}", - f"CSFY_HOST_VERSION={csfy_host_version}", "CSFY_REPO_CONFIG_CHECK=True", # Use inferred path for `repo_config.py`. "CSFY_REPO_CONFIG_PATH=", From 289d52f9fc4bafb706d97770f5aa1183e87e6ebf Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 27 Apr 2025 11:12:20 -0400 Subject: [PATCH 051/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/git/git_hooks/commit-msg.py | 2 -- dev_scripts_helpers/llms/llm_transform.py | 11 ----------- helpers/hserver.py | 1 + 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/dev_scripts_helpers/git/git_hooks/commit-msg.py b/dev_scripts_helpers/git/git_hooks/commit-msg.py index cdf492a5a..7db50f963 100755 --- a/dev_scripts_helpers/git/git_hooks/commit-msg.py +++ b/dev_scripts_helpers/git/git_hooks/commit-msg.py @@ -40,9 +40,7 @@ def _main(): ) sys.exit(1) # Read pre-commit output. - #get_git_root_dir = dshgghout.get_git_root_dir() precommit_output_path = ( - #f"{get_git_root_dir}/.git/hooks/tmp.precommit_output.txt" f"tmp.precommit_output.txt" ) try: diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 70ce1a083..e209ad895 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -203,17 +203,6 @@ def _main(parser: argparse.ArgumentParser) -> None: return # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) - # # Since we need to call a container and passing stdin/stdout is tricky, - # # we read the input and save it in a temporary file. - # in_lines = hparser.read_file(in_file_name) - # if in_file_name == "-": - # tmp_in_file_name = "tmp.llm_transform.in.txt" - # in_txt = "\n".join(in_lines) - # hio.to_file(tmp_in_file_name, in_txt) - # else: - # tmp_in_file_name = in_file_name - # # - # tmp_out_file_name = "tmp.llm_transform.out.txt" tmp_in_file_name, tmp_out_file_name = hparser.adapt_input_output_args_for_dockerized_scripts( in_file_name, "llm_transform" ) diff --git a/helpers/hserver.py b/helpers/hserver.py index abec35291..900d44e6b 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -227,6 +227,7 @@ def is_inside_ci() -> bool: return ret +# TODO(gp): -> is_inside_docker_container() def is_inside_docker() -> bool: """ Return whether we are inside a container or not. From ffcebb7b65c687dc67a2648ec76c07994d539394 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 27 Apr 2025 11:14:43 -0400 Subject: [PATCH 052/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- .../git/git_hooks/commit-msg.py | 4 +--- dev_scripts_helpers/llms/llm_prompts.py | 22 +++++++++---------- dev_scripts_helpers/llms/llm_transform.py | 7 +++--- .../llms/test/test_llm_transform.py | 8 +++---- linters/base.py | 2 +- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/dev_scripts_helpers/git/git_hooks/commit-msg.py b/dev_scripts_helpers/git/git_hooks/commit-msg.py index 7db50f963..86c42b5f4 100755 --- a/dev_scripts_helpers/git/git_hooks/commit-msg.py +++ b/dev_scripts_helpers/git/git_hooks/commit-msg.py @@ -40,9 +40,7 @@ def _main(): ) sys.exit(1) # Read pre-commit output. - precommit_output_path = ( - f"tmp.precommit_output.txt" - ) + precommit_output_path = f"tmp.precommit_output.txt" try: with open(precommit_output_path, "r") as f: precommit_output = f.read().strip() diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index c7c4a8fa6..726d10914 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -57,17 +57,17 @@ def get_prompt_tags() -> List[str]: OUTSIDE_CONTAINER_POST_TRANSFORMS = { # These are all the prompts with post_transforms with # `convert_to_vim_cfile`. - "convert_file_names": - ["code_review", + "convert_file_names": [ + "code_review", "code_review_and_find_missing_docstrings", "code_propose_refactoring", - ], - "prettier_on_str": - ["md_rewrite", - "md_summarize_short", - "slide_improve", - "slide_colorize", - ] + ], + "prettier_on_str": [ + "md_rewrite", + "md_summarize_short", + "slide_improve", + "slide_colorize", + ], } valid_prompts = get_prompt_tags() for _, prompts in OUTSIDE_CONTAINER_POST_TRANSFORMS.items(): @@ -347,7 +347,7 @@ def code_apply_csfy_style1() -> _PROMPT_OUT: system = _CONTEXT file_name = "template_code.py" file_content = hio.from_file(file_name) - system += fr""" + system += rf""" Apply the style described below to the Python code without changing the behavior of the code. ``` @@ -373,7 +373,7 @@ def code_apply_csfy_style2() -> _PROMPT_OUT: - Always use imperative in comments - Remove empty spaces in functions - Add type hints, when missing - - Use * before mandatory parameters + - Use * before mandatory parameters - Make local functions private - Convert .format() to f-string unless it’s a _LOG """ diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index e209ad895..69a073a93 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -203,10 +203,11 @@ def _main(parser: argparse.ArgumentParser) -> None: return # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) - tmp_in_file_name, tmp_out_file_name = hparser.adapt_input_output_args_for_dockerized_scripts( - in_file_name, "llm_transform" + tmp_in_file_name, tmp_out_file_name = ( + hparser.adapt_input_output_args_for_dockerized_scripts( + in_file_name, "llm_transform" + ) ) - # TODO(gp): We should just automatically pass-through the options. cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] if args.fast_model: diff --git a/dev_scripts_helpers/llms/test/test_llm_transform.py b/dev_scripts_helpers/llms/test/test_llm_transform.py index 8ea5dcf2f..6f4658ffb 100644 --- a/dev_scripts_helpers/llms/test/test_llm_transform.py +++ b/dev_scripts_helpers/llms/test/test_llm_transform.py @@ -59,8 +59,8 @@ def setup_test(self, txt_id: str) -> Tuple[str, str, str]: def test_md_rewrite1(self) -> None: """ - Run the `llm_transform.py` script with the prompt `md_rewrite` and verify - the output. + Run the `llm_transform.py` script with the prompt `md_rewrite` and + verify the output. """ script, in_file_name, out_file_name = self.setup_test(txt_id=0) # Run test. @@ -86,8 +86,8 @@ def test_md_rewrite1(self) -> None: def test_test1(self) -> None: """ - Run the `llm_transform.py` script with the prompt `test` and verify - the output. + Run the `llm_transform.py` script with the prompt `test` and verify the + output. """ script, in_file_name, out_file_name = self.setup_test(txt_id=1) # Run test. diff --git a/linters/base.py b/linters/base.py index c63af99d8..0e1d69ed6 100755 --- a/linters/base.py +++ b/linters/base.py @@ -488,7 +488,7 @@ def _parse() -> argparse.ArgumentParser: parser.add_argument( "--num_threads", action="store", - default="-1", + default="serial", help="Number of threads to use ('serial' to run serially, -1 to use " "all CPUs)", ) From 07b79bef8362a6ce6a5a8a27c2c974be7505baab Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 27 Apr 2025 11:18:36 -0400 Subject: [PATCH 053/193] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 2 +- ....replace_common_files_with_script_links.md | 1 + helpers/hdocker.py | 176 ++++++++++-------- helpers/henv.py | 1 - helpers/hmarkdown.py | 6 +- helpers/hparser.py | 6 +- helpers/hserver.py | 30 ++- helpers/lib_tasks_lint.py | 2 +- linters/base.py | 9 +- pytest.ini | 2 +- 10 files changed, 127 insertions(+), 108 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 726d10914..c574bae1e 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -310,7 +310,7 @@ def code_use_f_strings() -> _PROMPT_OUT: system += r""" Use f-strings (formatted string literals) instead of % formatting and format strings. Do not print any comment, just the converted code. - + For instance, convert: "Hello, %s. You are %d years old." % (name, age) to diff --git a/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md b/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md index ab19b90f7..32b676040 100644 --- a/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md +++ b/docs/work_tools/dev_system/all.replace_common_files_with_script_links.md @@ -138,6 +138,7 @@ ### Workflow Summary 1. Set up symbolic links: + ```bash > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` diff --git a/helpers/hdocker.py b/helpers/hdocker.py index aeffea761..9337bbff0 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -132,10 +132,11 @@ def volume_rm(volume_name: str, use_sudo: bool) -> None: # ############################################################################# - + def get_current_arch() -> str: """ - Return the architecture that we are running on (e.g., arm64, aarch64, x86_64). + Return the architecture that we are running on (e.g., arm64, aarch64, + x86_64). """ cmd = "uname -m" _, current_arch = hsystem.system_to_one_line(cmd) @@ -149,7 +150,7 @@ def _is_compatible_arch(val1: str, val2: str) -> bool: hdbg.dassert_in(val2, valid_arch) if val1 == val2: return True - compatible_sets = [{'x86_64', 'amd64'}, {'aarch64', 'arm64'}] + compatible_sets = [{"x86_64", "amd64"}, {"aarch64", "arm64"}] for comp_set in compatible_sets: if {val1, val2}.issubset(comp_set): return True @@ -170,8 +171,8 @@ def check_image_compatibility_with_current_arch( :param use_sudo: Whether to use sudo for Docker commands. :param pull_image_if_needed: Whether to pull the image if it doesn't exist. - :param assert_on_error: Whether to raise an error if the image is not - compatible with the current architecture. + :param assert_on_error: Whether to raise an error if the image is + not compatible with the current architecture. """ _LOG.debug(hprint.func_signature_to_str()) hdbg.dassert_ne(image_name, "") @@ -194,9 +195,7 @@ def check_image_compatibility_with_current_arch( hdbg.dfatal("Image '%s' not found", image_name) # Check the image architecture. executable = get_docker_executable(use_sudo) - cmd = ( - f"{executable} inspect {image_name}" + r" --format '{{.Architecture}}'" - ) + cmd = f"{executable} inspect {image_name}" + r" --format '{{.Architecture}}'" _, image_arch = hsystem.system_to_one_line(cmd) _LOG.debug(hprint.to_str("image_arch")) # Check architecture compatibility. @@ -314,11 +313,11 @@ def get_docker_base_cmd(use_sudo: bool) -> List[str]: docker_executable = get_docker_executable(use_sudo) # Get all the environment variables that start with `AM_`, `CK_`, `CSFY_`. vars_to_pass = [ - v for v in os.environ.keys() if - # TODO(gp): We should only pass the `CSFY_` vars. - v.startswith("AM_") or - v.startswith("CK_") or - v.startswith("CSFY_") + v + for v in os.environ.keys() + if + # TODO(gp): We should only pass the `CSFY_` vars. + v.startswith("AM_") or v.startswith("CK_") or v.startswith("CSFY_") ] vars_to_pass.append("OPENAI_API_KEY") vars_to_pass = sorted(vars_to_pass) @@ -328,7 +327,7 @@ def get_docker_base_cmd(use_sudo: bool) -> List[str]: docker_executable, "run --rm", "--user $(id -u):$(id -g)", - vars_to_pass_as_str + vars_to_pass_as_str, ] return docker_cmd @@ -397,7 +396,7 @@ def build_container_image( f"{docker_executable} build", f"-f {temp_dockerfile}", f"-t {image_name_out}", - #"--platform linux/aarch64", + # "--platform linux/aarch64", ] if not use_cache: cmd.append("--no-cache") @@ -437,12 +436,12 @@ def _dassert_valid_path(file_path: str, is_input: bool) -> None: # but we assume that at the least the directory should be already # present. dir_name = os.path.normpath(os.path.dirname(file_path)) - hio.create_dir(dir_name, incremental=True) + hio.create_dir(dir_name, incremental=True) hdbg.dassert( - os.path.exists(file_path) - or os.path.exists(dir_name), + os.path.exists(file_path) or os.path.exists(dir_name), "Invalid path: '%s' and '%s' don't exist", - file_path, dir_name + file_path, + dir_name, ) @@ -658,12 +657,14 @@ def run_dockerized_prettier( bash_cmd += f" > {out_file_path}" # Build the Docker command. docker_cmd = get_docker_base_cmd(use_sudo) - docker_cmd.extend([ - " --entrypoint ''", - f"--workdir {callee_mount_path} --mount {mount}", - f"{container_image}", - f'bash -c "{bash_cmd}"' - ]) + docker_cmd.extend( + [ + " --entrypoint ''", + f"--workdir {callee_mount_path} --mount {mount}", + f"{container_image}", + f'bash -c "{bash_cmd}"', + ] + ) docker_cmd = " ".join(docker_cmd) if return_cmd: ret = docker_cmd @@ -770,7 +771,7 @@ def convert_pandoc_cmd_to_arguments(cmd: str) -> Dict[str, Any]: args, unknown_args = parser.parse_known_args(cmd) _LOG.debug(hprint.to_str("args unknown_args")) # Filter out the option terminator if present. - # Remove the `--` option terminator to treat `--option-after-terminator` as a regular argument, not as an option. + # Remove the `--` option terminator to treat `--option-after-terminator` as a regular argument, not as an option. unknown_args = [arg for arg in unknown_args if arg != "--"] # Return all the arguments in a dictionary with names that match the # function signature of `run_dockerized_pandoc()`. @@ -997,11 +998,13 @@ def run_dockerized_pandoc( # input.md -o output.md \ # -s --toc docker_cmd = get_docker_base_cmd(use_sudo) - docker_cmd.extend([ - f"--workdir {callee_mount_path} --mount {mount}", - f"{container_image}", - f"{pandoc_cmd}", - ]) + docker_cmd.extend( + [ + f"--workdir {callee_mount_path} --mount {mount}", + f"{container_image}", + f"{pandoc_cmd}", + ] + ) docker_cmd = " ".join(docker_cmd) if return_cmd: ret = docker_cmd @@ -1068,15 +1071,15 @@ def run_dockerized_markdown_toc( # --workdir /app --mount type=bind,source=.,target=/app \ # tmp.markdown_toc \ # -i ./test.md - bash_cmd = ( - f"/usr/local/bin/markdown-toc {cmd_opts_as_str} -i {in_file_path}" - ) + bash_cmd = f"/usr/local/bin/markdown-toc {cmd_opts_as_str} -i {in_file_path}" docker_cmd = get_docker_base_cmd(use_sudo) - docker_cmd.extend([ - f"--workdir {callee_mount_path} --mount {mount}", - f"{container_image}", - f'bash -c "{bash_cmd}"' - ]) + docker_cmd.extend( + [ + f"--workdir {callee_mount_path} --mount {mount}", + f"{container_image}", + f'bash -c "{bash_cmd}"', + ] + ) docker_cmd = " ".join(docker_cmd) # TODO(gp): Note that `suppress_output=False` seems to hang the call. hsystem.system(docker_cmd) @@ -1208,7 +1211,7 @@ def run_dockerized_latex( texlive-latex-extra \ lmodern \ tikzit - + RUN rm -rf /var/lib/apt/lists/* \ && apt-get clean @@ -1272,11 +1275,13 @@ def run_dockerized_latex( _LOG.debug(hprint.to_str("latex_cmd")) # docker_cmd = get_docker_base_cmd(use_sudo) - docker_cmd.extend([ - f"--workdir {callee_mount_path} --mount {mount}", - f"{container_image}", - f"{latex_cmd}" - ]) + docker_cmd.extend( + [ + f"--workdir {callee_mount_path} --mount {mount}", + f"{container_image}", + f"{latex_cmd}", + ] + ) docker_cmd = " ".join(docker_cmd) # TODO(gp): Factor this out. if return_cmd: @@ -1302,7 +1307,7 @@ def run_basic_latex( """ _LOG.debug(hprint.func_signature_to_str()) # - #hdbg.dassert_file_extension(input_file_name, "tex") + # hdbg.dassert_file_extension(input_file_name, "tex") hdbg.dassert_file_exists(in_file_name) hdbg.dassert_file_extension(out_file_name, "pdf") # There is a horrible bug in pdflatex that if the input file is not the last @@ -1407,12 +1412,14 @@ def run_dockerized_imagemagick( cmd_opts_as_str = " ".join(cmd_opts) cmd = f"magick {cmd_opts_as_str} {in_file_path} {out_file_path}" docker_cmd = get_docker_base_cmd(use_sudo) - docker_cmd.extend([ - "--entrypoint ''", - f"--workdir {callee_mount_path} --mount {mount}", - container_image, - f'bash -c "{cmd}"' - ]) + docker_cmd.extend( + [ + "--entrypoint ''", + f"--workdir {callee_mount_path} --mount {mount}", + container_image, + f'bash -c "{cmd}"', + ] + ) docker_cmd = " ".join(docker_cmd) # TODO(gp): Factor this out. if return_cmd: @@ -1518,12 +1525,14 @@ def run_dockerized_plantuml( ) plantuml_cmd = f"plantuml -t{dst_ext} -o {out_file_path} {in_file_path}" docker_cmd = get_docker_base_cmd(use_sudo) - docker_cmd.extend([ - " --entrypoint ''", - f"--workdir {callee_mount_path} --mount {mount}", - f"{container_image}", - f'bash -c "{plantuml_cmd}"' - ]) + docker_cmd.extend( + [ + " --entrypoint ''", + f"--workdir {callee_mount_path} --mount {mount}", + f"{container_image}", + f'bash -c "{plantuml_cmd}"', + ] + ) docker_cmd = " ".join(docker_cmd) hsystem.system(docker_cmd) @@ -1574,15 +1583,15 @@ def run_dockerized_mermaid( is_caller_host=is_caller_host, use_sibling_container_for_callee=use_sibling_container_for_callee, ) - mermaid_cmd = ( - f" -i {in_file_path} -o {out_file_path}" - ) + mermaid_cmd = f" -i {in_file_path} -o {out_file_path}" docker_cmd = get_docker_base_cmd(use_sudo) - docker_cmd.extend([ - f"--workdir {callee_mount_path} --mount {mount}", - container_image, - mermaid_cmd, - ]) + docker_cmd.extend( + [ + f"--workdir {callee_mount_path} --mount {mount}", + container_image, + mermaid_cmd, + ] + ) docker_cmd = " ".join(docker_cmd) hsystem.system(docker_cmd) @@ -1596,8 +1605,8 @@ def run_dockerized_mermaid2( use_sudo: bool = False, ) -> None: """ - Run `mermaid` in a Docker container, building the container from scratch and - using a puppeteer config. + Run `mermaid` in a Docker container, building the container from scratch + and using a puppeteer config. """ _LOG.debug(hprint.func_signature_to_str()) # Build the container, if needed. @@ -1673,12 +1682,14 @@ def run_dockerized_mermaid2( ) # TODO(gp): Factor out building the docker cmd. docker_cmd = get_docker_base_cmd(use_sudo) - docker_cmd.extend([ - "--entrypoint ''", - f"--workdir {callee_mount_path} --mount {mount}", - container_image, - f'bash -c "{mermaid_cmd}"' - ]) + docker_cmd.extend( + [ + "--entrypoint ''", + f"--workdir {callee_mount_path} --mount {mount}", + container_image, + f'bash -c "{mermaid_cmd}"', + ] + ) docker_cmd = " ".join(docker_cmd) hsystem.system(docker_cmd) @@ -1742,19 +1753,20 @@ def run_dockerized_graphviz( ) cmd_opts = " ".join(cmd_opts) graphviz_cmd = [ - "dot" - f"{cmd_opts}", + "dot" f"{cmd_opts}", "-T png", "-Gdpi=300", f"-o {out_file_path}", - in_file_path + in_file_path, ] graphviz_cmd = " ".join(graphviz_cmd) docker_cmd = get_docker_base_cmd(use_sudo) - docker_cmd.extend([ - f"--workdir {callee_mount_path} --mount {mount}", - container_image, - graphviz_cmd, - ]) + docker_cmd.extend( + [ + f"--workdir {callee_mount_path} --mount {mount}", + container_image, + graphviz_cmd, + ] + ) docker_cmd = " ".join(docker_cmd) hsystem.system(docker_cmd) diff --git a/helpers/henv.py b/helpers/henv.py index 668a4b2b7..6c18fa5b3 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -371,7 +371,6 @@ def _get_psutil_info() -> str: except ModuleNotFoundError as e: _LOG.warning("psutil is not installed: %s", str(e)) has_psutil = False - txt_tmp = [] if has_psutil: txt_tmp.append(f"cpu count={psutil.cpu_count()}") diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 37156e277..c5c6deaab 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -60,7 +60,7 @@ def is_header(line: str) -> Tuple[bool, int, str]: - The level of the header (0 if not a header) - The title of the header (empty string if not a header) """ - #hdbg.dassert(not is_markdown_line_separator(line), "line='%s'", line) + # hdbg.dassert(not is_markdown_line_separator(line), "line='%s'", line) m = re.match(r"(#+)\s+(.*)", line) is_header_ = bool(m) if m: @@ -298,13 +298,13 @@ def fix_chatgpt_output(txt: str) -> str: txt = re.sub(r"’", r"'", txt) # Remove empty spaces at beginning / end of Latex equations $...$. # E.g., $ \text{Student} $ becomes $\text{Student}$ - txt = re.sub(r'\$\s+(.*?)\s\$', r'$\1$', txt) + txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) return txt def md_clean_up(txt: str) -> str: # Remove dot at the end of each line. - txt = re.sub(r'\.\s*$', '', txt, flags=re.MULTILINE) + txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) return txt diff --git a/helpers/hparser.py b/helpers/hparser.py index 5f574c35d..c9afcc63c 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -166,8 +166,6 @@ def parse_dst_dir_arg(args: argparse.Namespace) -> Tuple[str, bool]: # ############################################################################# - - def add_action_arg( parser: argparse.ArgumentParser, valid_actions: List[str], @@ -409,7 +407,7 @@ def write_file(txt: Union[str, List[str]], file_name: str) -> None: f.write("\n".join(txt)) _LOG.info("Written file '%s'", file_name) - + def adapt_input_output_args_for_dockerized_scripts( in_file_name: str, tag: str ) -> Tuple[str, str]: @@ -665,4 +663,4 @@ def add_prompt_arg( action="store_true", help="Use a fast LLM model vs a high-quality one", ) - return parser \ No newline at end of file + return parser diff --git a/helpers/hserver.py b/helpers/hserver.py index 900d44e6b..ad36dc820 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -379,8 +379,8 @@ def is_external_linux() -> bool: """ Detect whether we are running on a non-server/non-CI Linux machine. - This returns true when we run on the machine of an intern, or a non-CSFY - contributor. + This returns true when we run on the machine of an intern, or a non- + CSFY contributor. """ if is_host_csfy_server() or is_inside_ci(): # Dev servers and CI are not external Linux systems. @@ -395,9 +395,9 @@ def is_external_linux() -> bool: def is_external_dev() -> bool: """ Detect whether we are running on an system outside of Causify. - - E.g., a Linux / Mac contributor's laptop, an intern's laptop, a non-CSFY - machine. + + E.g., a Linux / Mac contributor's laptop, an intern's laptop, a non- + CSFY machine. """ ret = is_host_mac() or is_external_linux() return ret @@ -467,7 +467,8 @@ def is_inside_docker_container_on_csfy_server() -> bool: def is_outside_docker_container_on_csfy_server() -> bool: """ - Return whether we are running outside a Docker container on a Causify server. + Return whether we are running outside a Docker container on a Causify + server. """ ret = not is_inside_docker() and is_host_csfy_server() return ret @@ -718,7 +719,10 @@ def can_run_docker_from_docker() -> bool: Return whether we can run docker from docker, either as children or sibling container. """ - return has_docker_children_containers_support() or has_docker_sibling_containers_support() + return ( + has_docker_children_containers_support() + or has_docker_sibling_containers_support() + ) def get_docker_info() -> str: @@ -741,15 +745,21 @@ def get_docker_info() -> str: txt_tmp.append(f"is_inside_docker={is_inside_docker_}") # if is_inside_docker_: - has_docker_sibling_containers_support_ = has_docker_sibling_containers_support() - has_docker_children_containers_support_ = has_docker_children_containers_support() + has_docker_sibling_containers_support_ = ( + has_docker_sibling_containers_support() + ) + has_docker_children_containers_support_ = ( + has_docker_children_containers_support() + ) else: has_docker_sibling_containers_support_ = "*undef*" has_docker_children_containers_support_ = "*undef*" txt_tmp.append( f"has_docker_sibling_containers_support={has_docker_sibling_containers_support_}" ) - txt_tmp.append(f"has_docker_children_containers_support={has_docker_children_containers_support_}") + txt_tmp.append( + f"has_docker_children_containers_support={has_docker_children_containers_support_}" + ) # txt = hprint.to_info("Docker info", txt_tmp) return txt diff --git a/helpers/lib_tasks_lint.py b/helpers/lib_tasks_lint.py index cce9b3a02..153700b9e 100644 --- a/helpers/lib_tasks_lint.py +++ b/helpers/lib_tasks_lint.py @@ -190,7 +190,7 @@ def lint( # type: ignore last_commit=False, branch=False, # It needs to be a string to allow the user to specify "serial". - num_threads="-1", + num_threads="serial", only_format=False, only_check=False, ): diff --git a/linters/base.py b/linters/base.py index 0e1d69ed6..997bf6560 100755 --- a/linters/base.py +++ b/linters/base.py @@ -65,7 +65,9 @@ # ############################################################################# -def _filter_files(file_paths: List[str], file_paths_to_skip: List[str]) -> List[str]: +def _filter_files( + file_paths: List[str], file_paths_to_skip: List[str] +) -> List[str]: """ Filter the list of files by removing invalid or excluded ones. @@ -461,10 +463,7 @@ def _parse() -> argparse.ArgumentParser: help="Select files modified in the current branch with respect to master", ) parser.add_argument( - "--skip_files", - nargs="+", - type=str, - help="Files to skip during linting" + "--skip_files", nargs="+", type=str, help="Files to skip during linting" ) # Action selection. parser.add_argument( diff --git a/pytest.ini b/pytest.ini index d33a13dc9..47551c146 100644 --- a/pytest.ini +++ b/pytest.ini @@ -6,7 +6,7 @@ norecursedirs = dev_scripts/old helpers/old im/ib/data/extract/gateway - notebooks + notebooks #ignore = .git im/ib/data/extract/gateway #ignore-glob = notebooks* old/* From 373ec57e6061e0d15a2e9a8688b1762f18414058 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 27 Apr 2025 21:34:25 -0400 Subject: [PATCH 054/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- .../documentation/lint_notes.py | 1 + .../documentation/notes_to_pdf.py | 1 + .../documentation/transform_notes.py | 10 +++++---- helpers/hmarkdown.py | 21 +++++++++---------- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index 3c4e4b81f..1dcdff260 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -94,6 +94,7 @@ def _preprocess(txt: str) -> str: return txt_new_as_str +# TODO(gp): Move this somewhere else. # TODO(gp): Remove the code path using non dockerized executable, after fix for # CmampTask10710. def prettier( diff --git a/dev_scripts_helpers/documentation/notes_to_pdf.py b/dev_scripts_helpers/documentation/notes_to_pdf.py index 740f58f23..1c6363b59 100755 --- a/dev_scripts_helpers/documentation/notes_to_pdf.py +++ b/dev_scripts_helpers/documentation/notes_to_pdf.py @@ -206,6 +206,7 @@ def _render_images(file_name: str, prefix: str) -> str: out = "\n".join(out) file3 = f"{prefix}.render_image2.txt" hio.to_file(file3, out) + _LOG.info("Remove commented code and saved file='%s'", file3) # file_out = file3 return file_out diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index dbeca6e33..db49c4fae 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -31,6 +31,7 @@ import hashlib import logging +import dev_scripts_helpers.documentation.lint_notes as dshdlino import helpers.hdbg as hdbg import helpers.hlatex as hlatex import helpers.hmarkdown as hmarkdo @@ -91,15 +92,16 @@ def _main(parser: argparse.ArgumentParser) -> None: txt = "\n".join(txt) txt = hmarkdo.remove_formatting(txt) hparser.write_file(txt, out_file_name) - elif cmd == "md_fix_chatgpt_output": + elif cmd == "md_clean_up": txt = hparser.read_file(in_file_name) txt = "\n".join(txt) - txt = hmarkdo.fix_chatgpt_output(txt) + txt = hmarkdo.md_clean_up(txt) + txt = dshdlino.prettier_on_str(txt) hparser.write_file(txt, out_file_name) - elif cmd == "md_clean_up": + elif cmd == "md_format": txt = hparser.read_file(in_file_name) txt = "\n".join(txt) - txt = hmarkdo.md_clean_up(txt) + txt = dshdlino.prettier_on_str(txt) hparser.write_file(txt, out_file_name) else: assert 0, f"Invalid cmd='{cmd}'" diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index c5c6deaab..ff4a98c7e 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -26,17 +26,18 @@ # ############################################################################# -def is_markdown_line_separator(line: str, min_repeats: int = 3) -> bool: +def is_markdown_line_separator(line: str, min_repeats: int = 5) -> bool: """ Check if the given line is a Markdown separator. - This function determines if a line consists of repeated characters (`#`, `/`, `-`, `=`) - that would indicate a markdown separator. + This function determines if a line consists of repeated characters (`#`, + `/`, `-`, `=`) that would indicate a markdown separator. :param line: the current line of text being processed - :param min_repeats: the minimum number of times the characters have to be repeated to be - considered a separator, e.g., if `min_repeats` = 2, then `##`, `###`, `//` are - considered to be line separators, but `#`, `/` are not + :param min_repeats: the minimum number of times the characters have to be + repeated to be considered a separator, e.g., if `min_repeats` = 2, then + `##`, `###`, `//` are considered to be line separators, but `#`, `/` are + not :return: true if the line is a separator """ separator_pattern = rf""" @@ -274,7 +275,7 @@ def remove_formatting(txt: str) -> str: return txt -def fix_chatgpt_output(txt: str) -> str: +def md_clean_up(txt: str) -> str: # Replace \( ... \) math syntax with $ ... $. txt = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", txt) # Replace \[ ... \] math syntax with $$ ... $$, handling multiline equations. @@ -296,13 +297,11 @@ def fix_chatgpt_output(txt: str) -> str: txt = re.sub(r"”", r'"', txt) # ’ txt = re.sub(r"’", r"'", txt) + # → + txt = re.sub(r"→", r"$\\rightarrow$", txt) # Remove empty spaces at beginning / end of Latex equations $...$. # E.g., $ \text{Student} $ becomes $\text{Student}$ txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) - return txt - - -def md_clean_up(txt: str) -> str: # Remove dot at the end of each line. txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) return txt From 21b1921f669d0479e2fb2bacec8453784ec3ed54 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Wed, 30 Apr 2025 20:17:46 -0400 Subject: [PATCH 055/193] Update --- dev_scripts_helpers/llms/llm_prompts.py | 292 ++++++++++++------ .../llms/test/test_llm_prompts.py | 76 +++++ 2 files changed, 274 insertions(+), 94 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index c574bae1e..89eb1fcc0 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -13,6 +13,9 @@ _LOG = logging.getLogger(__name__) +# ############################################################################# +# get_prompt_tags() +# ############################################################################# @functools.lru_cache(maxsize=1) def get_prompt_tags() -> List[str]: @@ -46,6 +49,9 @@ def get_prompt_tags() -> List[str]: return matched_functions +# ############################################################################# + + # Store the prompts that need a certain post-transforms to be applied outside # the container. OUTSIDE_CONTAINER_POST_TRANSFORMS = {} @@ -58,10 +64,11 @@ def get_prompt_tags() -> List[str]: # These are all the prompts with post_transforms with # `convert_to_vim_cfile`. "convert_file_names": [ - "code_review", + "code_review_correctness", "code_review_and_find_missing_docstrings", "code_propose_refactoring", ], + # remove_code_delimiters "prettier_on_str": [ "md_rewrite", "md_summarize_short", @@ -104,95 +111,71 @@ def test() -> _PROMPT_OUT: """ -def code_comment() -> _PROMPT_OUT: +def code_add_comments() -> _PROMPT_OUT: """ Add comments to Python code. """ system = _CONTEXT system += r""" - Every a chunk of 4 or 5 lines of code add comment explaining the code. - Comments should go before the logical chunk of code they describe. - Comments should be in imperative form, a full English phrase, and end with a - period. - """ - # You are a proficient Python coder and write English very well. - # Given the Python code passed below, improve or add comments to the code. - # Comments must be for every logical chunk of 4 or 5 lines of Python code. - # Do not comment every single line of code and especially logging statements. - # Each comment should be in imperative form, a full English phrase, and end - # with a period. + - Every a chunk of 4 or 5 lines of code add comment explaining the code + - Comments should go before the logical chunk of code they describe + - Comments should be in imperative form, a full English phrase, and end with a + period `.` + - Do not comment every single line of code and especially logging statements + """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms -def code_docstring() -> _PROMPT_OUT: - """ - Add a REST docstring to Python code. - """ +def code_add_docstrings() -> _PROMPT_OUT: + ''' + Add or complete a REST docstring to Python code. Each function should have a + docstring that describes the function, its parameters, and its return value. + ''' system = _CONTEXT - system += r""" - Add a docstring to the function passed. + system += r''' + Make sure each function as a REST docstring - The first comment should be in imperative mode and fit in a single line of - less than 80 characters. + less than 80 characters - To describe the parameters use the REST style, which requires each parameter to be prepended with :param - """ + + An example of a correct docstring is: + + def _format_greeting(name: str, *, greeting: str = DEFAULT_GREETING) -> str: + """ + Format a greeting message with the given name. + + :param name: the name to include in the greeting + :param greeting: the base greeting message to use + :return: formatted greeting + """ + ''' pre_transforms = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms -def code_type_hints() -> _PROMPT_OUT: +def code_fix_type_hints() -> _PROMPT_OUT: system = _CONTEXT system += r""" - You will add type hints to the function passed. + Add type hints to the Python code passed. """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms -def _get_code_unit_test_prompt(num_tests: int) -> str: - system = _CONTEXT - system += r""" - You will write a unit test suite for the function passed. - - Write {num_tests} unit tests for the function passed - Just output the Python code - Use the following style for the unit tests: - When calling the function passed assume it's under the module called uut and the user has called `import uut as uut` - ``` - act = call to the function passed - exp = expected code - self.assert_equal(act, exp) - ``` - """ - return system - - -def code_unit_test() -> _PROMPT_OUT: - system = _get_code_unit_test_prompt(5) - pre_transforms = set() - post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms - - -def code_1_unit_test() -> _PROMPT_OUT: - system = _get_code_unit_test_prompt(1) - pre_transforms = set() - post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms - - -def code_review() -> _PROMPT_OUT: +def code_review_correctness() -> _PROMPT_OUT: system = _CONTEXT system += r""" - You will review the code and make sure it is correct. - You will also make sure that the code is clean and readable. - You will also make sure that the code is efficient. - You will also make sure that the code is robust. - You will also make sure that the code is maintainable. + You will review the code and make sure it is: + - correct + - clean and readable + - efficient + - robust + - maintainable Do not print any comment, besides for each point of improvement, you will print the line number and the proposed improvement in the following style: @@ -203,7 +186,7 @@ def code_review() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -# TODO(gp): This is kind of expensive and we should use a linter stage. +# TODO(gp): This is kind of expensive and we should use a procedural linter stage. def code_review_and_find_missing_docstrings() -> _PROMPT_OUT: """ Find missing docstrings in Python code. @@ -278,14 +261,16 @@ def code_apply_linter_issues() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_fix_string() -> _PROMPT_OUT: +def code_fix_log_string() -> _PROMPT_OUT: """ Fix the log statements to use % formatting. """ system = _CONTEXT system += r""" - Use % formatting instead of f-strings (formatted string literals). - Do not print any comment, just the converted code. + Fix logging statements and dassert statements by using % formatting instead + of f-strings (formatted string literals). + + Do not print any comment, but just the converted code. For instance, convert: _LOG.info(f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}'") @@ -302,14 +287,16 @@ def code_fix_string() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_use_f_strings() -> _PROMPT_OUT: +def code_fix_by_using_f_strings() -> _PROMPT_OUT: """ - Use f-strings, like `f"Hello, {name}. You are {age} years old."`. + Fix code to use f-strings, like `f"Hello, {name}. You are {age} years old."`. """ system = _CONTEXT system += r""" Use f-strings (formatted string literals) instead of % formatting and format - strings. Do not print any comment, just the converted code. + strings. + + Do not print any comment, but just the converted code. For instance, convert: "Hello, %s. You are %d years old." % (name, age) @@ -321,14 +308,15 @@ def code_use_f_strings() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_use_perc_strings() -> _PROMPT_OUT: +def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. """ system = _CONTEXT system += r""" Use % formatting instead of f-strings (formatted string literals). - Do not print any comment, just the converted code. + + Do not print any comment, but just the converted code. For instance, convert: f"Hello, {name}. You are {age} years old." @@ -340,9 +328,9 @@ def code_use_perc_strings() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_apply_csfy_style1() -> _PROMPT_OUT: +def code_apply_csfy_style() -> _PROMPT_OUT: """ - Apply the csfy style to the code. + Apply the style to the code using template code in `template_code.py`. """ system = _CONTEXT file_name = "template_code.py" @@ -355,28 +343,114 @@ def code_apply_csfy_style1() -> _PROMPT_OUT: ``` Do not remove any code, just format the existing code using the style. - Do not report any explanation of what you did, just the converted code. + Do not report any explanation of what you did, but just the converted code. """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms -def code_apply_csfy_style2() -> _PROMPT_OUT: +def code_fix_from_imports() -> _PROMPT_OUT: """ - Apply the csfy style to the code. + Fix code to use imports instead of "from import" statements. """ system = _CONTEXT system += r""" - Apply the following style to the code: - - Convert docstrings into REST docstrings - - Always use imperative in comments - - Remove empty spaces in functions - - Add type hints, when missing - - Use * before mandatory parameters - - Make local functions private - - Convert .format() to f-string unless it’s a _LOG + Replace any Python "from import" statement like: + from X import Y + with the form: + import X + and then replace the uses of Y with X.Y + + For instance, replace: + from langchain_openai import OpenAIEmbeddings + with: + import langchain_openai + and then replace the uses of OpenAIEmbeddings with langchain_openai.OpenAIEmbeddings + """ + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: + """ + Fix code missing the star before optional parameters. + """ + system = _CONTEXT + system += r""" + When you find a Python function with optional parameters, add a star after + the mandatory parameters and before the optional parameters, and make sure + that the function is called with the correct number of arguments. + + For instance, replace: + def reflow_label(label: str, max_length: int = 10) -> str: + + reflow_label("Hello, world!", 10) + + with + def reflow_label(label: str, *, max_length: int = 10) -> str: + + reflow_label("Hello, world!", max_length=10) + """ + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +def code_fix_csfy_style() -> _PROMPT_OUT: + """ + Apply the csfy style to the code. + """ + function_names = ["code_add_comments", + "code_add_docstrings", + "code_fix_type_hints"] + system_prompts = [] + for function_name in function_names: + system, pre_transforms, post_transforms = eval(function_name)() + system_prompts.append(system) + hdbg.dassert_eq(pre_transforms, set()) + hdbg.dassert_eq(post_transforms, {"remove_code_delimiters"}) + system = "\n\n".join(system_prompts) + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +# ############################################################################# + + +def _get_code_unit_test_prompt(num_tests: int) -> str: + system = _CONTEXT + system += rf""" + - You will write a unit test suite for the function passed. + + - Write {num_tests} unit tests for the function passed + - Just output the Python code + - Use the following style for the unit tests: + - When calling the function passed assume it's under the module called uut + and the user has called `import uut as uut` + ``` + act = call to the function passed + exp = expected code + self.assert_equal(act, exp) + ``` """ + return system + + +def code_unit_test() -> _PROMPT_OUT: + system = _get_code_unit_test_prompt(5) + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +def code_1_unit_test() -> _PROMPT_OUT: + system = _get_code_unit_test_prompt(1) + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms # ############################################################################# @@ -433,16 +507,17 @@ def slide_colorize() -> _PROMPT_OUT: I will give you markdown text in the next prompt - Do not change the text or the structure of the text - - You will use multiple colors using pandoc \textcolor{COLOR}{text} to highlight - only the most important phrases in the text—those that are key to understanding - the main points. Keep the highlights minimal and avoid over-marking. Focus on - critical concepts, key data, or essential takeaways rather than full sentences - or excessive details. - - You can use the following colors in the given order: red, orange, green, teal, cyan, blue, violet, brown + - You will use multiple colors using pandoc \textcolor{COLOR}{text} to + highlight only the most important phrases in the text—those that are key to + understanding the main points. Keep the highlights minimal and avoid + over-marking. Focus on critical concepts, key data, or essential takeaways + rather than full sentences or excessive details. + - You can use the following colors in the given order: red, orange, green, + teal, cyan, blue, violet, brown - You can highlight only 4 words or phrases in the text - Print only the markdown without any explanation + Print only the markdown without any explanation. """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -455,10 +530,12 @@ def slide_colorize_points() -> _PROMPT_OUT: I will give you markdown text in the next prompt - Do not change the text or the structure of the text - - You will highlight with \textcolor{COLOR}{text} the bullet point at the first level, without highlighting the - character - - You can use the following colors in the given order: red, orange, green, teal, cyan, blue, violet, brown + - You will highlight with \textcolor{COLOR}{text} the bullet point at the + first level, without highlighting the - character + - You can use the following colors in the given order: red, orange, green, + teal, cyan, blue, violet, brown - Print only the markdown without any explanation + Print only the markdown without any explanation. """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -473,6 +550,17 @@ def slide_colorize_points() -> _PROMPT_OUT: def _convert_to_vim_cfile_str(txt: str, in_file_name: str) -> str: """ Convert the text passed to a string representing a vim cfile. + + E.g., + ``` + 57: The docstring should use more detailed type annotations for ... + 98-104: Simplify the hash computation logic with a helper ... + ``` + become: + ``` + test.py:57: The docstring should use more detailed type annotations for ... + test.py:98: Simplify the hash computation logic with a helper ... + ``` """ ret_out = [] for line in txt.split("\n"): @@ -500,7 +588,7 @@ def _convert_to_vim_cfile_str(txt: str, in_file_name: str) -> str: # ``` regex = re.compile( r""" - ^(\d+)-\d+: # Line number(s) followed by colon + ^(\d+)-\d+: # Line number(s) followed by colon \s* # Space (.*)$ # Rest of line """, @@ -539,6 +627,15 @@ def _convert_to_vim_cfile(txt: str, in_file_name: str, out_file_name: str) -> st return txt_out +def _annotate_with_cfile(txt: str) -> str: + """ + Given code and a corresponding cfile, annotate the code with the cfile. + + Use TODO(*): to + """ + + + # ############################################################################# # run_prompt() # ############################################################################# @@ -557,6 +654,13 @@ def run_prompt( ) -> Optional[str]: """ Run the prompt passed and apply the transforms to the response. + + :param prompt_tag: tag of the prompt to run + :param txt: text to run the prompt on + :param model: model to use + :param in_file_name: name of the input file + :param out_file_name: name of the output file + :return: transformed text """ _LOG.debug(hprint.to_str("prompt_tag model in_file_name out_file_name")) # Get the info corresponding to the prompt tag. diff --git a/dev_scripts_helpers/llms/test/test_llm_prompts.py b/dev_scripts_helpers/llms/test/test_llm_prompts.py index 6190ed552..60725e0ec 100644 --- a/dev_scripts_helpers/llms/test/test_llm_prompts.py +++ b/dev_scripts_helpers/llms/test/test_llm_prompts.py @@ -60,3 +60,79 @@ def test1(self) -> None: _LOG.debug(hprint.to_str("prompt_tags")) # self.assertGreater(len(prompt_tags), 0) + + +# ############################################################################# +# Test_prompt_tags1 +# ############################################################################# + + +@pytest.mark.skipif( + hserver.is_inside_ci() or hserver.is_dev_csfy(), + reason="Disabled because of CmampTask10710", +) +class Test_prompt_tags1(hunitest.TestCase): + + def test1(self) -> None: + prompt_tags = dshlllpr.get_prompt_tags() + _LOG.debug(hprint.to_str("prompt_tags")) + # + self.assertGreater(len(prompt_tags), 0) + + +# ############################################################################# + + +@pytest.mark.skipif( + hserver.is_inside_ci() or hserver.is_dev_csfy(), + reason="Disabled because of CmampTask10710", +) +class Test_run_prompt1(hunitest.TestCase): + + def _run_prompt(self, prompt_tag: str, input_txt: str, exp_output: str) -> None: + # Prepare the input. + input_txt = hprint.dedent(input_txt) + model = "gpt-4o" + in_file_name = "test.py" + out_file_name = "test.py" + # Run the prompt. + act_output = dshlllpr.run_prompt( + prompt_tag, input_txt, model, in_file_name, out_file_name + ) + # Check the output. + exp_output = hprint.dedent(exp_output) + self.assert_equal(act_output, exp_output, fuzzy_match=True) + + # TODO(gp): Add one tests for each prompt. + + def test_code_fix_from_imports1(self) -> None: + prompt_tag = "code_fix_from_imports" + txt = """ + from bs4 import BeautifulSoup + + start_soup = BeautifulSoup(start_response.content, "html.parser") + """ + exp_output = """ + import bs4 + + start_soup = bs4.BeautifulSoup(start_response.content, "html.parser") + """ + self._run_prompt(prompt_tag, txt, exp_output) + + def test_code_fix_star_before_optional_parameters1(self) -> None: + prompt_tag = "code_fix_star_before_optional_parameters" + txt = """ + def transform(input: str, value: str, output: Optional[str] = None) -> str: + print(f"input={input}, value={value}, output={output}") + + transform("input", "value") + transform("input", "value", "output") + """ + exp_output = """ + def transform(input: str, value: str, *, output: Optional[str] = None) -> str: + print(f"input={input}, value={value}, output={output}") + + transform("input", "value") + transform("input", "value", output="output") + """ + self._run_prompt(prompt_tag, txt, exp_output) \ No newline at end of file From efca5e7cf50d8cb63b10626c51888fcf5748c079 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Wed, 30 Apr 2025 21:05:32 -0400 Subject: [PATCH 056/193] Update --- dev_scripts_helpers/llms/llm_prompts.py | 311 +++++++++++----------- dev_scripts_helpers/llms/llm_transform.py | 7 + 2 files changed, 166 insertions(+), 152 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 89eb1fcc0..892a88c74 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -13,10 +13,12 @@ _LOG = logging.getLogger(__name__) + # ############################################################################# # get_prompt_tags() # ############################################################################# + @functools.lru_cache(maxsize=1) def get_prompt_tags() -> List[str]: """ @@ -65,7 +67,6 @@ def get_prompt_tags() -> List[str]: # `convert_to_vim_cfile`. "convert_file_names": [ "code_review_correctness", - "code_review_and_find_missing_docstrings", "code_propose_refactoring", ], # remove_code_delimiters @@ -95,6 +96,12 @@ def get_outside_container_post_transforms(transform_name: str) -> Set[str]: _PROMPT_OUT = Tuple[str, Set[str], Set[str]] +_CONTEXT = r""" +You are a proficient Python coder who pays attention to detail. +I will pass you a chunk of Python code. +""" + + def test() -> _PROMPT_OUT: """ This is just needed as a placeholder to test the flow. @@ -105,13 +112,7 @@ def test() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -_CONTEXT = r""" -You are a proficient Python coder who pays attention to detail. -I will pass you a chunk of Python code. -""" - - -def code_add_comments() -> _PROMPT_OUT: +def code_fix_comments() -> _PROMPT_OUT: """ Add comments to Python code. """ @@ -128,7 +129,7 @@ def code_add_comments() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_add_docstrings() -> _PROMPT_OUT: +def code_fix_docstrings() -> _PROMPT_OUT: ''' Add or complete a REST docstring to Python code. Each function should have a docstring that describes the function, its parameters, and its return value. @@ -167,100 +168,6 @@ def code_fix_type_hints() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_review_correctness() -> _PROMPT_OUT: - system = _CONTEXT - system += r""" - You will review the code and make sure it is: - - correct - - clean and readable - - efficient - - robust - - maintainable - - Do not print any comment, besides for each point of improvement, you will - print the line number and the proposed improvement in the following style: - : - """ - pre_transforms = {"add_line_numbers"} - post_transforms = {"convert_to_vim_cfile"} - return system, pre_transforms, post_transforms - - -# TODO(gp): This is kind of expensive and we should use a procedural linter stage. -def code_review_and_find_missing_docstrings() -> _PROMPT_OUT: - """ - Find missing docstrings in Python code. - """ - system = _CONTEXT - system += r""" - You will review the code and find missing docstrings. - - Do not print any comment, only print the line number in the following style: - : - """ - pre_transforms = {"add_line_numbers"} - post_transforms = {"convert_to_vim_cfile"} - return system, pre_transforms, post_transforms - - -# def code_review_and_fix() -> _PROMPT_OUT: -# system = _CONTEXT -# system += r""" -# You will review the code and make sure it is correct and readable. - -# You will print the code with the proposed improvements, minimizing the -# number of changes to the code that are not strictly needed. -# """ -# pre_transforms = {"add_line_numbers"} -# post_transforms = {"convert_to_vim_cfile"} -# return system, pre_transforms, post_transforms - - -def code_propose_refactoring() -> _PROMPT_OUT: - system = _CONTEXT - system += r""" - You will review the code and look for opportunities to refactor the code, - by removing redundancy and copy-paste code. - - Do not print any comment, besides for each point of improvement, you will - print the line number and the proposed improvement in the following style: - : - """ - pre_transforms = {"add_line_numbers"} - post_transforms = {"convert_to_vim_cfile"} - return system, pre_transforms, post_transforms - - -def code_refactor_and_fix() -> _PROMPT_OUT: - system = _CONTEXT - system += r""" - You will review the code and look for opportunities to refactor the code, - by removing redundancy and copy-paste code, and apply refactoring to remove - redundancy in the code, minimizing the number of changes to the code that - are not needed. - """ - pre_transforms = set() - post_transforms = set() - return system, pre_transforms, post_transforms - - -def code_apply_linter_issues() -> _PROMPT_OUT: - system = _CONTEXT - system += r""" - I will pass you Python code and a list of linting errors in the format - ::: - - You will fix the code according to the linting errors passed, minimizing the - number of changes to the code that are not needed. - -tutorial_github/github_utils.py:105: [W0718(broad-exception-caught), get_github_contributors] Catching too general exception Exception [pylint] -tutorial_github/github_utils.py:106: [W1203(logging-fstring-interpolation), get_github_contributors] Use lazy % formatting in logging functions [pylint] - """ - pre_transforms = set() - post_transforms = set() - return system, pre_transforms, post_transforms - - def code_fix_log_string() -> _PROMPT_OUT: """ Fix the log statements to use % formatting. @@ -293,8 +200,10 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: """ system = _CONTEXT system += r""" - Use f-strings (formatted string literals) instead of % formatting and format - strings. + Fix statements like + raise ValueError(f"Unsupported data_source='{data_source}'") + by using f-strings (formatted string literals) instead of % formatting and + format strings. Do not print any comment, but just the converted code. @@ -328,28 +237,6 @@ def code_fix_by_using_perc_strings() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_apply_csfy_style() -> _PROMPT_OUT: - """ - Apply the style to the code using template code in `template_code.py`. - """ - system = _CONTEXT - file_name = "template_code.py" - file_content = hio.from_file(file_name) - system += rf""" - Apply the style described below to the Python code without changing the - behavior of the code. - ``` - {file_content} - ``` - Do not remove any code, just format the existing code using the style. - - Do not report any explanation of what you did, but just the converted code. - """ - pre_transforms = set() - post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms - - def code_fix_from_imports() -> _PROMPT_OUT: """ Fix code to use imports instead of "from import" statements. @@ -402,9 +289,15 @@ def code_fix_csfy_style() -> _PROMPT_OUT: """ Apply the csfy style to the code. """ - function_names = ["code_add_comments", - "code_add_docstrings", - "code_fix_type_hints"] + function_names = ["code_fix_comments", + "code_fix_docstrings", + "code_fix_type_hints", + "code_fix_log_string", + "code_fix_from_imports", + "code_fix_by_using_f_strings", + "code_fix_by_using_perc_strings", + "code_fix_star_before_optional_parameters", + ] system_prompts = [] for function_name in function_names: system, pre_transforms, post_transforms = eval(function_name)() @@ -420,6 +313,95 @@ def code_fix_csfy_style() -> _PROMPT_OUT: # ############################################################################# +def code_review_correctness() -> _PROMPT_OUT: + system = _CONTEXT + system += r""" + You will review the code and make sure it is: + - correct + - clean and readable + - efficient + - robust + - maintainable + + Do not print any comment, besides for each point of improvement, you will + print the line number and the proposed improvement in the following style: + : + """ + pre_transforms = {"add_line_numbers"} + post_transforms = {"convert_to_vim_cfile"} + return system, pre_transforms, post_transforms + + +def code_propose_refactoring() -> _PROMPT_OUT: + system = _CONTEXT + system += r""" + You will review the code and look for opportunities to refactor the code, + by removing redundancy and copy-paste code. + + Do not print any comment, besides for each point of improvement, you will + print the line number and the proposed improvement in the following style: + : + """ + pre_transforms = {"add_line_numbers"} + post_transforms = {"convert_to_vim_cfile"} + return system, pre_transforms, post_transforms + + +def code_refactor_and_fix() -> _PROMPT_OUT: + system = _CONTEXT + system += r""" + You will review the code and look for opportunities to refactor the code, + by removing redundancy and copy-paste code, and apply refactoring to remove + redundancy in the code, minimizing the number of changes to the code that + are not needed. + """ + pre_transforms = set() + post_transforms = set() + return system, pre_transforms, post_transforms + + +def code_apply_linter_issues() -> _PROMPT_OUT: + system = _CONTEXT + system += r""" + I will pass you Python code and a list of linting errors in the format + ::: + + You will fix the code according to the linting errors passed, minimizing the + number of changes to the code that are not needed. + +tutorial_github/github_utils.py:105: [W0718(broad-exception-caught), get_github_contributors] Catching too general exception Exception [pylint] +tutorial_github/github_utils.py:106: [W1203(logging-fstring-interpolation), get_github_contributors] Use lazy % formatting in logging functions [pylint] + """ + pre_transforms = set() + post_transforms = set() + return system, pre_transforms, post_transforms + + +def code_apply_csfy_style() -> _PROMPT_OUT: + """ + Apply the style to the code using template code in `template_code.py`. + """ + system = _CONTEXT + file_name = "template_code.py" + file_content = hio.from_file(file_name) + system += rf""" + Apply the style described below to the Python code without changing the + behavior of the code. + ``` + {file_content} + ``` + Do not remove any code, just format the existing code using the style. + + Do not report any explanation of what you did, but just the converted code. + """ + pre_transforms = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +# ############################################################################# + + def _get_code_unit_test_prompt(num_tests: int) -> str: system = _CONTEXT system += rf""" @@ -546,22 +528,8 @@ def slide_colorize_points() -> _PROMPT_OUT: # Transforms. # ############################################################################# - -def _convert_to_vim_cfile_str(txt: str, in_file_name: str) -> str: - """ - Convert the text passed to a string representing a vim cfile. - E.g., - ``` - 57: The docstring should use more detailed type annotations for ... - 98-104: Simplify the hash computation logic with a helper ... - ``` - become: - ``` - test.py:57: The docstring should use more detailed type annotations for ... - test.py:98: Simplify the hash computation logic with a helper ... - ``` - """ +def _extract_vim_cfile_lines(txt: str) -> List[str]: ret_out = [] for line in txt.split("\n"): _LOG.debug(hprint.to_str("line")) @@ -601,9 +569,32 @@ def _convert_to_vim_cfile_str(txt: str, in_file_name: str) -> str: else: _LOG.warning("Can't parse line: '%s'", line) continue - ret_out.append(f"{in_file_name}:{line_number}: {description}") + ret_out.append((line_number, description)) + return ret_out + + +def _convert_to_vim_cfile_str(txt: str, in_file_name: str) -> str: + """ + Convert the text passed to a string representing a vim cfile. + + E.g., + ``` + 57: The docstring should use more detailed type annotations for ... + 98-104: Simplify the hash computation logic with a helper ... + ``` + become: + ``` + test.py:57: The docstring should use more detailed type annotations for ... + test.py:98: Simplify the hash computation logic with a helper ... + ``` + """ + ret_out = _extract_vim_cfile_lines(txt) + # Append the file name to the description. + ret_out2 = [] + for line_number, description in ret_out: + ret_out2.append(f"{in_file_name}:{line_number}: {description}") # Save the output. - txt_out = "\n".join(ret_out) + txt_out = "\n".join(ret_out2) return txt_out @@ -627,13 +618,29 @@ def _convert_to_vim_cfile(txt: str, in_file_name: str, out_file_name: str) -> st return txt_out -def _annotate_with_cfile(txt: str) -> str: +# TODO(gp): This should become an invoke, where we read a file and a cfile and +# inject TODOs. +def _annotate_with_cfile(txt: str, txt_cfile: str) -> str: """ - Given code and a corresponding cfile, annotate the code with the cfile. - - Use TODO(*): to + Annotate a file `txt` with TODOs from the cfile `txt_cfile`. """ - + ret_out = _extract_vim_cfile_lines(txt_cfile) + # Convert ret_out to a dict. + ret_out_dict = {} + for line_number, line in ret_out: + if line_number not in ret_out_dict: + ret_out_dict[line_number] = [line] + else: + ret_out_dict[line_number].append(line) + # Annotate the code. + txt_out = [] + for line_number, line in txt: + if line_number in ret_out_dict: + for todo in ret_out_dict[line_number]: + txt_out.append(f"# TODO(*): {todo}") + else: + txt_out.append(line) + return "\n".join(txt_out) # ############################################################################# diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 69a073a93..d073394ec 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -23,6 +23,13 @@ > llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_propose_refactoring """ +# TODO(gp): There are different modes to run the script +# - process the input and write the transformed output +# - process the input and extract a cfile with the required changes +# - apply changes to the input from a cfile (e.g., from a previous run or the +# linter) + + import argparse import logging import os From 6326f7d325d43ad65fb888b372200c7329cf5e23 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 08:08:00 -0400 Subject: [PATCH 057/193] Update --- .../llms/dockerized_llm_apply_cfile.py | 141 +++++++++++ dev_scripts_helpers/llms/llm_apply_cfile.py | 218 +++++++++++++++++ dev_scripts_helpers/llms/llm_prompts.py | 223 +++++++++++------- 3 files changed, 500 insertions(+), 82 deletions(-) create mode 100755 dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py create mode 100755 dev_scripts_helpers/llms/llm_apply_cfile.py diff --git a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py new file mode 100755 index 000000000..1d808bc8c --- /dev/null +++ b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +""" +This script is designed to run a transformation script using LLMs. It requires +certain dependencies to be present (e.g., `openai`) and thus it is executed +within a Docker container. + +To use this script, you need to provide the input file, output file, and +the type of transformation to apply. +""" + +import argparse +import logging +import re +from typing import List, Tuple + +import dev_scripts_helpers.llms.llm_prompts as dshlllpr +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hparser as hparser + +_LOG = logging.getLogger(__name__) + + +def _parse_cfile(cfile: str) -> List[Tuple[str, str]]: + """ + Read and parse a cfile. + + :param cfile: path to the cfile + :return: list of tuples, each containing a line number and a transform, e.g., + [(file_name, line_number, transform), ...] + """ + # Read the cfile. + cfile_lines = hio.from_file(cfile) + # Parse the cfile. + for line in cfile_lines: + _LOG.debug("line=%s", line) + # Parse the lines of the cfile, like + # dev_scripts_helpers/llms/llm_prompts.py:106: in public function `test`:D404: First word of the docstring should not be `This` [doc_formatter] + # dev_scripts_helpers/llms/llm_prompts.py:110: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] + # extracting the file name, line number, and transform. + regex = r"^(.+):(\d+): (.*)$" + match = re.match(regex, line) + hdbg.dassert_in(match, "Failed to parse line: %s", line) + # Extract the file name, line number, and transform. + file_name = match.group(1) + line_number = match.group(2) + transform = match.group(3) + # Add values to the list. + cfile_lines.append((file_name, line_number, transform)) + return cfile_lines + + +def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model: str) -> None: + """ + Apply the transforms to the file. + + :param cfile_lines: list of tuples, each containing a file name, line + number, and transform + :param model: model to use for the transformation + """ + # Create a dict from file to line number to transform. + file_to_line_to_transform = {} + for file_name, line_number, transform in cfile_lines: + if file_name not in file_to_line_to_transform: + file_to_line_to_transform[file_name] = [] + file_to_line_to_transform[file_name].append((line_number, transform)) + # Apply the transforms to the file. + for file_name, line_to_transform in file_to_line_to_transform.items(): + # Read the file. + txt_in = hio.from_file(file_name) + # Prepare the instructions for the prompt. + instructions = "\n".join( + [f"{line_number}: {transform}" for line_number, transform in line_to_transform] + ) + # Transform the file using the instructions. + txt_out = dshlllpr.run_prompt(prompt_tag, txt_in, model, instructions=instructions, in_file_name="", out_file_name="") + # Write the file. + hio.to_file(txt_out, file_name) + + + +# # TODO(gp): This should become an invoke or a command, where we read a file +# and a cfile and inject TODOs in the code. +# def _annotate_with_cfile(txt: str, txt_cfile: str) -> str: +# """ +# Annotate a file `txt` with TODOs from the cfile `txt_cfile`. +# """ +# ret_out = _extract_vim_cfile_lines(txt_cfile) +# # Convert ret_out to a dict. +# ret_out_dict = {} +# for line_number, line in ret_out: +# if line_number not in ret_out_dict: +# ret_out_dict[line_number] = [line] +# else: +# ret_out_dict[line_number].append(line) +# # Annotate the code. +# txt_out = [] +# for line_number, line in txt: +# if line_number in ret_out_dict: +# for todo in ret_out_dict[line_number]: +# txt_out.append(f"# TODO(*): {todo}") +# else: +# txt_out.append(line) +# return "\n".join(txt_out) + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--cfile", + type=str, + required=True, + help="Path to the cfile", + ) + hparser.add_prompt_arg(parser) + hparser.add_verbosity_arg(parser, log_level="CRITICAL") + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + # TODO(gp): Factor this out. + if args.fast_model: + model = "gpt-4o-mini" + else: + model = "gpt-4o" + # Apply the transforms. + cfile_lines = _parse_cfile(args.cfile) + _apply_transforms(cfile_lines, args.prompt_tag, model) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/dev_scripts_helpers/llms/llm_apply_cfile.py b/dev_scripts_helpers/llms/llm_apply_cfile.py new file mode 100755 index 000000000..5b98e8b05 --- /dev/null +++ b/dev_scripts_helpers/llms/llm_apply_cfile.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 + +""" +Read cfile input and implement a transform for each line of the cfile using LLMs. + +The script `dockerized_llm_apply_cfile.py` is executed within a Docker container to ensure +all dependencies are met. The Docker container is built dynamically if +necessary. The script requires an OpenAI API key to be set in the environment. + +Examples +# Basic Usage +> llm_apply_cfile.py -i cfile.txt +""" + + +import argparse +import logging +import os +import re +from typing import List, Optional + +import dev_scripts_helpers.documentation.lint_notes as dshdlino +import dev_scripts_helpers.llms.llm_prompts as dshlllpr +import helpers.hdbg as hdbg +import helpers.hdocker as hdocker +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hparser as hparser +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + + +def _parse() -> argparse.ArgumentParser: + """ + Same interface as `dockerized_llm_apply_cfile.py`. + """ + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--cfile", + type=str, + required=True, + help="Path to the cfile", + ) + hparser.add_prompt_arg(parser) + hparser.add_dockerized_script_arg(parser) + # Use CRITICAL to avoid logging anything. + hparser.add_verbosity_arg(parser, log_level="CRITICAL") + return parser + + +def _run_dockerized_llm_apply_cfile( + in_file_path: str, + cmd_opts: List[str], + out_file_path: str, + *, + return_cmd: bool = False, + force_rebuild: bool = False, + use_sudo: bool = False, + suppress_output: bool = False, +) -> Optional[str]: + """ + Run dockerized_llm_transform.py in a Docker container with all its + dependencies. + """ + _LOG.debug(hprint.func_signature_to_str()) + # + hdbg.dassert_in("OPENAI_API_KEY", os.environ) + hdbg.dassert_isinstance(cmd_opts, list) + # Build the container, if needed. + container_image = "tmp.llm_transform" + dockerfile = r""" + FROM python:3.12-alpine + + # Install Bash. + #RUN apk add --no-cache bash + + # Set Bash as the default shell. + #SHELL ["/bin/bash", "-c"] + + # Install pip packages. + RUN pip install --upgrade pip + RUN pip install --no-cache-dir PyYAML + + RUN pip install --no-cache-dir openai + """ + container_image = hdocker.build_container_image( + container_image, dockerfile, force_rebuild, use_sudo + ) + # Convert files to Docker paths. + is_caller_host = not hserver.is_inside_docker() + use_sibling_container_for_callee = True + caller_mount_path, callee_mount_path, mount = hdocker.get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + in_file_path = hdocker.convert_caller_to_callee_docker_path( + in_file_path, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + out_file_path = hdocker.convert_caller_to_callee_docker_path( + out_file_path, + caller_mount_path, + callee_mount_path, + check_if_exists=False, + is_input=False, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + helpers_root = hgit.find_helpers_root() + helpers_root = hdocker.convert_caller_to_callee_docker_path( + helpers_root, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=False, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + git_root = hgit.find_git_root() + # TODO(gp): -> llm_apply_cfile.py + script = hsystem.find_file_in_repo( + "dockerized_llm_transform.py", root_dir=git_root + ) + script = hdocker.convert_caller_to_callee_docker_path( + script, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + cmd_opts_as_str = " ".join(cmd_opts) + cmd = f" {script} -i {in_file_path} -o {out_file_path} {cmd_opts_as_str}" + docker_cmd = hdocker.get_docker_base_cmd(use_sudo) + docker_cmd.extend( + [ + f"-e PYTHONPATH={helpers_root}", + f"--workdir {callee_mount_path}", + f"--mount {mount}", + container_image, + cmd, + ] + ) + docker_cmd = " ".join(docker_cmd) + if return_cmd: + ret = docker_cmd + else: + # TODO(gp): Note that `suppress_output=False` seems to hang the call. + hsystem.system(docker_cmd, suppress_output=suppress_output) + ret = None + return ret + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger( + verbosity=args.log_level, use_exec_path=True, force_white=False + ) + if args.prompt == "list": + print("# Available prompt tags:") + print("\n".join(dshlllpr.get_prompt_tags())) + return + # TODO(gp): We should just automatically pass-through the options. + cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] + if args.fast_model: + cmd_line_opts.append("--fast_model") + if args.debug: + cmd_line_opts.append("-d") + # cmd_line_opts = [] + # for arg in vars(args): + # if arg not in ["input", "output"]: + # value = getattr(args, arg) + # if isinstance(value, bool): + # if value: + # cmd_line_opts.append(f"--{arg.replace('_', '-')}") + # else: + # cmd_line_opts.append(f"--{arg.replace('_', '-')} {value}") + # For stdin/stdout, suppress the output of the container. + suppress_output = False + _run_dockerized_llm_apply_cfile( + args.cfile, + cmd_line_opts, + return_cmd=False, + force_rebuild=args.dockerized_force_rebuild, + use_sudo=args.dockerized_use_sudo, + suppress_output=suppress_output, + ) + # Run post-transforms outside the container. + # # 1) _convert_file_names(). + # prompts = dshlllpr.get_outside_container_post_transforms("convert_file_names") + # if args.prompt in prompts: + # _convert_file_names(in_file_name, tmp_out_file_name) + # # 2) prettier_on_str(). + # out_txt = hio.from_file(tmp_out_file_name) + # prompts = dshlllpr.get_outside_container_post_transforms("prettier_on_str") + # if args.prompt in prompts: + # # Note that we need to run this outside the `llm_transform` container to + # # avoid to do docker-in-docker in the `llm_transform` container (which + # # doesn't support that). + # out_txt = dshdlino.prettier_on_str(out_txt) + # Read the output from the container and write it to the output file from + # command line (e.g., `-` for stdout). + #hparser.write_file(out_txt, out_file_name) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 892a88c74..698f28917 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -112,6 +112,11 @@ def test() -> _PROMPT_OUT: return system, pre_transforms, post_transforms +# ############################################################################# +# Fix. +# ############################################################################# + + def code_fix_comments() -> _PROMPT_OUT: """ Add comments to Python code. @@ -130,10 +135,12 @@ def code_fix_comments() -> _PROMPT_OUT: def code_fix_docstrings() -> _PROMPT_OUT: - ''' - Add or complete a REST docstring to Python code. Each function should have a - docstring that describes the function, its parameters, and its return value. - ''' + """ + Add or complete a REST docstring to Python code. + + Each function should have a docstring that describes the function, + its parameters, and its return value. + """ system = _CONTEXT system += r''' Make sure each function as a REST docstring @@ -141,9 +148,9 @@ def code_fix_docstrings() -> _PROMPT_OUT: less than 80 characters - To describe the parameters use the REST style, which requires each parameter to be prepended with :param - - An example of a correct docstring is: + An example of a correct docstring is: + ``` def _format_greeting(name: str, *, greeting: str = DEFAULT_GREETING) -> str: """ Format a greeting message with the given name. @@ -152,6 +159,7 @@ def _format_greeting(name: str, *, greeting: str = DEFAULT_GREETING) -> str: :param greeting: the base greeting message to use :return: formatted greeting """ + ``` ''' pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -162,6 +170,25 @@ def code_fix_type_hints() -> _PROMPT_OUT: system = _CONTEXT system += r""" Add type hints to the Python code passed. + + For example, convert: + ``` + def process_data(data, threshold=0.5): + results = [] + for item in data: + if item > threshold: + results.append(item) + return results + ``` + to: + ``` + def process_data(data: List[float], threshold: float = 0.5) -> List[float]: + results: List[float] = [] + for item in data: + if item > threshold: + results.append(item) + return results + ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -180,14 +207,22 @@ def code_fix_log_string() -> _PROMPT_OUT: Do not print any comment, but just the converted code. For instance, convert: + ``` _LOG.info(f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}'") + ``` to + ``` _LOG.info("env_var='%s' is not in env_vars='%s'", env_var, str(os.environ.keys())) + ``` For instance, convert: + ``` hdbg.dassert_in(env_var, os.environ, f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}''") + ``` to + ``` hdbg.dassert_in(env_var, os.environ, "env_var='%s' is not in env_vars='%s'", env_var, str(os.environ.keys())) + ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -196,21 +231,29 @@ def code_fix_log_string() -> _PROMPT_OUT: def code_fix_by_using_f_strings() -> _PROMPT_OUT: """ - Fix code to use f-strings, like `f"Hello, {name}. You are {age} years old."`. + Fix code to use f-strings, like `f"Hello, {name}. + + You are {age} years old."`. """ system = _CONTEXT system += r""" - Fix statements like - raise ValueError(f"Unsupported data_source='{data_source}'") + Fix statements like: + ``` + raise ValueError(f"Unsupported data_source='{data_source}'") + ``` by using f-strings (formatted string literals) instead of % formatting and format strings. - + Do not print any comment, but just the converted code. For instance, convert: + ``` "Hello, %s. You are %d years old." % (name, age) + ``` to + ``` f"Hello, {name}. You are {age} years old." + ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -219,7 +262,9 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ - Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. + Use % formatting, like `"Hello, %s. + + You are %d years old." % (name, age)`. """ system = _CONTEXT system += r""" @@ -228,9 +273,13 @@ def code_fix_by_using_perc_strings() -> _PROMPT_OUT: Do not print any comment, but just the converted code. For instance, convert: + ``` f"Hello, {name}. You are {age} years old." + ``` to + ``` "Hello, %s. You are %d years old." % (name, age) + ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -243,17 +292,21 @@ def code_fix_from_imports() -> _PROMPT_OUT: """ system = _CONTEXT system += r""" - Replace any Python "from import" statement like: - from X import Y - with the form: - import X - and then replace the uses of Y with X.Y + Replace any Python "from import" statement like `from X import Y` with the + form `import X` and then replace the uses of `Y` with `X.Y` For instance, replace: + ``` from langchain_openai import OpenAIEmbeddings + ``` with: - import langchain_openai - and then replace the uses of OpenAIEmbeddings with langchain_openai.OpenAIEmbeddings + ``` + import langchain_openai + ``` + Then replace the uses of `OpenAIEmbeddings` with: + ``` + langchain_openai.OpenAIEmbeddings + ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -271,14 +324,17 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: that the function is called with the correct number of arguments. For instance, replace: + ``` def reflow_label(label: str, max_length: int = 10) -> str: reflow_label("Hello, world!", 10) - - with + ``` + with the following: + ``` def reflow_label(label: str, *, max_length: int = 10) -> str: reflow_label("Hello, world!", max_length=10) + ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -289,15 +345,17 @@ def code_fix_csfy_style() -> _PROMPT_OUT: """ Apply the csfy style to the code. """ - function_names = ["code_fix_comments", - "code_fix_docstrings", - "code_fix_type_hints", - "code_fix_log_string", - "code_fix_from_imports", - "code_fix_by_using_f_strings", - "code_fix_by_using_perc_strings", - "code_fix_star_before_optional_parameters", - ] + # > grep "def code_fix" ./dev_scripts_helpers/llms/llm_prompts.py | awk '{print $2 }' + function_names = [ + "code_fix_comments", + "code_fix_docstrings", + "code_fix_type_hints", + "code_fix_log_string", + "code_fix_by_using_f_strings", + "code_fix_by_using_perc_strings", + "code_fix_from_imports", + "code_fix_star_before_optional_parameters", + ] system_prompts = [] for function_name in function_names: system, pre_transforms, post_transforms = eval(function_name)() @@ -311,6 +369,8 @@ def code_fix_csfy_style() -> _PROMPT_OUT: # ############################################################################# +# Review. +# ############################################################################# def code_review_correctness() -> _PROMPT_OUT: @@ -347,7 +407,7 @@ def code_propose_refactoring() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_refactor_and_fix() -> _PROMPT_OUT: +def code_remove_redundancy() -> _PROMPT_OUT: system = _CONTEXT system += r""" You will review the code and look for opportunities to refactor the code, @@ -360,23 +420,6 @@ def code_refactor_and_fix() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_apply_linter_issues() -> _PROMPT_OUT: - system = _CONTEXT - system += r""" - I will pass you Python code and a list of linting errors in the format - ::: - - You will fix the code according to the linting errors passed, minimizing the - number of changes to the code that are not needed. - -tutorial_github/github_utils.py:105: [W0718(broad-exception-caught), get_github_contributors] Catching too general exception Exception [pylint] -tutorial_github/github_utils.py:106: [W1203(logging-fstring-interpolation), get_github_contributors] Use lazy % formatting in logging functions [pylint] - """ - pre_transforms = set() - post_transforms = set() - return system, pre_transforms, post_transforms - - def code_apply_csfy_style() -> _PROMPT_OUT: """ Apply the style to the code using template code in `template_code.py`. @@ -400,6 +443,34 @@ def code_apply_csfy_style() -> _PROMPT_OUT: # ############################################################################# +# Apply transforms. +# ############################################################################# + + +def code_apply_linter_instructions() -> _PROMPT_OUT: + """ + Apply the transforms passed in a cfile to the code. + """ + system = _CONTEXT + system += r""" + I will pass you Python code and a list of linting errors in the format + :: + + For example: + 105: [W0718(broad-exception-caught), get_github_contributors] Catching too general exception Exception [pylint] + 106: [W1203(logging-fstring-interpolation), get_github_contributors] Use lazy % formatting in logging functions [pylint] + + You will fix the code according to the linting errors passed, minimizing the + number of changes to the code that are not needed. + """ + pre_transforms = {"add_line_numbers"} + post_transforms = {"remove_line_numbers"} + return system, pre_transforms, post_transforms + + +# ############################################################################# +# Unit tests. +# ############################################################################# def _get_code_unit_test_prompt(num_tests: int) -> str: @@ -528,7 +599,7 @@ def slide_colorize_points() -> _PROMPT_OUT: # Transforms. # ############################################################################# - + def _extract_vim_cfile_lines(txt: str) -> List[str]: ret_out = [] for line in txt.split("\n"): @@ -576,7 +647,7 @@ def _extract_vim_cfile_lines(txt: str) -> List[str]: def _convert_to_vim_cfile_str(txt: str, in_file_name: str) -> str: """ Convert the text passed to a string representing a vim cfile. - + E.g., ``` 57: The docstring should use more detailed type annotations for ... @@ -618,38 +689,15 @@ def _convert_to_vim_cfile(txt: str, in_file_name: str, out_file_name: str) -> st return txt_out -# TODO(gp): This should become an invoke, where we read a file and a cfile and -# inject TODOs. -def _annotate_with_cfile(txt: str, txt_cfile: str) -> str: - """ - Annotate a file `txt` with TODOs from the cfile `txt_cfile`. - """ - ret_out = _extract_vim_cfile_lines(txt_cfile) - # Convert ret_out to a dict. - ret_out_dict = {} - for line_number, line in ret_out: - if line_number not in ret_out_dict: - ret_out_dict[line_number] = [line] - else: - ret_out_dict[line_number].append(line) - # Annotate the code. - txt_out = [] - for line_number, line in txt: - if line_number in ret_out_dict: - for todo in ret_out_dict[line_number]: - txt_out.append(f"# TODO(*): {todo}") - else: - txt_out.append(line) - return "\n".join(txt_out) - - # ############################################################################# # run_prompt() # ############################################################################# -# Apply transforms to the response. def _to_run(action: str, transforms: Set[str]) -> bool: + """ + Return True if the action should be run. + """ if action in transforms: transforms.remove(action) return True @@ -657,7 +705,10 @@ def _to_run(action: str, transforms: Set[str]) -> bool: def run_prompt( - prompt_tag: str, txt: str, model: str, in_file_name: str, out_file_name: str + prompt_tag: str, txt: str, model: str, + *, + instructions: Optional[str] = None, + in_file_name: str = "", out_file_name: str = "", ) -> Optional[str]: """ Run the prompt passed and apply the transforms to the response. @@ -665,14 +716,15 @@ def run_prompt( :param prompt_tag: tag of the prompt to run :param txt: text to run the prompt on :param model: model to use - :param in_file_name: name of the input file - :param out_file_name: name of the output file + :param instructions: instructions to add to the system prompt + (e.g., line numbers and transforms to apply to each file) + :param in_file_name: name of the input file (needed only for cfile) + :param out_file_name: name of the output file (needed only for cfile) :return: transformed text """ - _LOG.debug(hprint.to_str("prompt_tag model in_file_name out_file_name")) + _LOG.debug(hprint.func_signature_to_str()) # Get the info corresponding to the prompt tag. prompt_tags = get_prompt_tags() - _LOG.debug(hprint.to_str("prompt_tags")) hdbg.dassert_in(prompt_tag, prompt_tags) python_cmd = f"{prompt_tag}()" system_prompt, pre_transforms, post_transforms = eval(python_cmd) @@ -680,16 +732,21 @@ def run_prompt( hdbg.dassert_isinstance(pre_transforms, set) hdbg.dassert_isinstance(post_transforms, set) system_prompt = hprint.dedent(system_prompt) - # Run pre-transforms. + # 1) Run pre-transforms. if _to_run("add_line_numbers", pre_transforms): txt = hmarkdo.add_line_numbers(txt) + if _to_run("add_instructions", pre_transforms): + hdbg.dassert_is_not(instructions, None) + system_prompt = "The instructions are:\n" + system_prompt hdbg.dassert_eq( len(pre_transforms), 0, "Not all pre_transforms were run: %s", pre_transforms, ) + # 2) Run the prompt. if prompt_tag == "test": + # Compute the hash of the text. txt = "\n".join(txt) txt_out = hashlib.sha256(txt.encode("utf-8")).hexdigest() else: @@ -705,7 +762,7 @@ def run_prompt( # _LOG.debug(hprint.to_str("response")) txt_out = hopenai.response_to_txt(response) hdbg.dassert_isinstance(txt_out, str) - # Run post-transforms. + # 3) Run post-transforms. if _to_run("remove_code_delimiters", post_transforms): txt_out = hmarkdo.remove_code_delimiters(txt_out) if _to_run("remove_end_of_line_periods", post_transforms): @@ -713,6 +770,8 @@ def run_prompt( if _to_run("remove_empty_lines", post_transforms): txt_out = hmarkdo.remove_empty_lines(txt_out) if _to_run("convert_to_vim_cfile", post_transforms): + hdbg.dassert_ne(in_file_name, "") + hdbg.dassert_ne(out_file_name, "") txt_out = _convert_to_vim_cfile(txt_out, in_file_name, out_file_name) hdbg.dassert_eq( len(post_transforms), From 48a21846aa4262c86fc4df75e931c58323324e07 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 08:35:15 -0400 Subject: [PATCH 058/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- .../llms/dockerized_llm_apply_cfile.py | 28 +++++++-- dev_scripts_helpers/llms/llm_apply_cfile.py | 18 ++---- dev_scripts_helpers/llms/llm_prompts.py | 58 +++---------------- dev_scripts_helpers/llms/llm_transform.py | 6 +- .../llms/test/test_llm_apply_cfile.py | 54 +++++++++++++++++ 5 files changed, 91 insertions(+), 73 deletions(-) create mode 100644 dev_scripts_helpers/llms/test/test_llm_apply_cfile.py diff --git a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py index 1d808bc8c..03a22bed6 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py @@ -18,11 +18,12 @@ import helpers.hdbg as hdbg import helpers.hio as hio import helpers.hparser as hparser +import helpers.hsystem as hsystem _LOG = logging.getLogger(__name__) -def _parse_cfile(cfile: str) -> List[Tuple[str, str]]: +def _parse_cfile(cfile: str) -> List[Tuple[str, str, str]]: """ Read and parse a cfile. @@ -32,23 +33,29 @@ def _parse_cfile(cfile: str) -> List[Tuple[str, str]]: """ # Read the cfile. cfile_lines = hio.from_file(cfile) + cfile_lines = cfile_lines.split("\n") + # + ret = [] # Parse the cfile. for line in cfile_lines: _LOG.debug("line=%s", line) + hdbg.dassert_isinstance(line, str) # Parse the lines of the cfile, like # dev_scripts_helpers/llms/llm_prompts.py:106: in public function `test`:D404: First word of the docstring should not be `This` [doc_formatter] # dev_scripts_helpers/llms/llm_prompts.py:110: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] # extracting the file name, line number, and transform. regex = r"^(.+):(\d+): (.*)$" match = re.match(regex, line) - hdbg.dassert_in(match, "Failed to parse line: %s", line) + if match is None: + _LOG.debug("Failed to parse line '%s'", line) + continue # Extract the file name, line number, and transform. file_name = match.group(1) line_number = match.group(2) transform = match.group(3) # Add values to the list. - cfile_lines.append((file_name, line_number, transform)) - return cfile_lines + ret.append((file_name, line_number, transform)) + return ret def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model: str) -> None: @@ -65,10 +72,19 @@ def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model if file_name not in file_to_line_to_transform: file_to_line_to_transform[file_name] = [] file_to_line_to_transform[file_name].append((line_number, transform)) + # + _LOG.info("Files to transform: %s", len(file_to_line_to_transform.keys())) + _LOG.info("Total number of transform: %s", len(cfile_lines)) # Apply the transforms to the file. for file_name, line_to_transform in file_to_line_to_transform.items(): + _LOG.info("Applying transforms to file '%s'", file_name) + # Look for file in the current directory. + cmd = f'find -path "*/{file_name}"' + _, act_file_name = hsystem.system_to_one_line(cmd) + _LOG.debug("Found file '%s' -> '%s'", file_name, act_file_name) # Read the file. - txt_in = hio.from_file(file_name) + hdbg.dassert_path_exists(act_file_name) + txt_in = hio.from_file(act_file_name) # Prepare the instructions for the prompt. instructions = "\n".join( [f"{line_number}: {transform}" for line_number, transform in line_to_transform] @@ -134,7 +150,7 @@ def _main(parser: argparse.ArgumentParser) -> None: model = "gpt-4o" # Apply the transforms. cfile_lines = _parse_cfile(args.cfile) - _apply_transforms(cfile_lines, args.prompt_tag, model) + _apply_transforms(cfile_lines, args.prompt, model) if __name__ == "__main__": diff --git a/dev_scripts_helpers/llms/llm_apply_cfile.py b/dev_scripts_helpers/llms/llm_apply_cfile.py index 5b98e8b05..e2f57992f 100755 --- a/dev_scripts_helpers/llms/llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/llm_apply_cfile.py @@ -57,7 +57,6 @@ def _parse() -> argparse.ArgumentParser: def _run_dockerized_llm_apply_cfile( in_file_path: str, cmd_opts: List[str], - out_file_path: str, *, return_cmd: bool = False, force_rebuild: bool = False, @@ -78,10 +77,10 @@ def _run_dockerized_llm_apply_cfile( FROM python:3.12-alpine # Install Bash. - #RUN apk add --no-cache bash + RUN apk add --no-cache bash # Set Bash as the default shell. - #SHELL ["/bin/bash", "-c"] + SHELL ["/bin/bash", "-c"] # Install pip packages. RUN pip install --upgrade pip @@ -107,15 +106,6 @@ def _run_dockerized_llm_apply_cfile( is_caller_host=is_caller_host, use_sibling_container_for_callee=use_sibling_container_for_callee, ) - out_file_path = hdocker.convert_caller_to_callee_docker_path( - out_file_path, - caller_mount_path, - callee_mount_path, - check_if_exists=False, - is_input=False, - is_caller_host=is_caller_host, - use_sibling_container_for_callee=use_sibling_container_for_callee, - ) helpers_root = hgit.find_helpers_root() helpers_root = hdocker.convert_caller_to_callee_docker_path( helpers_root, @@ -129,7 +119,7 @@ def _run_dockerized_llm_apply_cfile( git_root = hgit.find_git_root() # TODO(gp): -> llm_apply_cfile.py script = hsystem.find_file_in_repo( - "dockerized_llm_transform.py", root_dir=git_root + "dockerized_llm_apply_cfile.py", root_dir=git_root ) script = hdocker.convert_caller_to_callee_docker_path( script, @@ -141,7 +131,7 @@ def _run_dockerized_llm_apply_cfile( use_sibling_container_for_callee=use_sibling_container_for_callee, ) cmd_opts_as_str = " ".join(cmd_opts) - cmd = f" {script} -i {in_file_path} -o {out_file_path} {cmd_opts_as_str}" + cmd = f" {script} --cfile {in_file_path} {cmd_opts_as_str}" docker_cmd = hdocker.get_docker_base_cmd(use_sudo) docker_cmd.extend( [ diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 698f28917..f7e742704 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -251,9 +251,6 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: "Hello, %s. You are %d years old." % (name, age) ``` to - ``` - f"Hello, {name}. You are {age} years old." - ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -273,13 +270,7 @@ def code_fix_by_using_perc_strings() -> _PROMPT_OUT: Do not print any comment, but just the converted code. For instance, convert: - ``` - f"Hello, {name}. You are {age} years old." - ``` to - ``` - "Hello, %s. You are %d years old." % (name, age) - ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -296,17 +287,8 @@ def code_fix_from_imports() -> _PROMPT_OUT: form `import X` and then replace the uses of `Y` with `X.Y` For instance, replace: - ``` - from langchain_openai import OpenAIEmbeddings - ``` with: - ``` - import langchain_openai - ``` Then replace the uses of `OpenAIEmbeddings` with: - ``` - langchain_openai.OpenAIEmbeddings - ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -324,17 +306,7 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: that the function is called with the correct number of arguments. For instance, replace: - ``` - def reflow_label(label: str, max_length: int = 10) -> str: - - reflow_label("Hello, world!", 10) - ``` with the following: - ``` - def reflow_label(label: str, *, max_length: int = 10) -> str: - - reflow_label("Hello, world!", max_length=10) - ``` """ pre_transforms = set() post_transforms = {"remove_code_delimiters"} @@ -430,9 +402,6 @@ def code_apply_csfy_style() -> _PROMPT_OUT: system += rf""" Apply the style described below to the Python code without changing the behavior of the code. - ``` - {file_content} - ``` Do not remove any code, just format the existing code using the style. Do not report any explanation of what you did, but just the converted code. @@ -483,11 +452,6 @@ def _get_code_unit_test_prompt(num_tests: int) -> str: - Use the following style for the unit tests: - When calling the function passed assume it's under the module called uut and the user has called `import uut as uut` - ``` - act = call to the function passed - exp = expected code - self.assert_equal(act, exp) - ``` """ return system @@ -649,15 +613,7 @@ def _convert_to_vim_cfile_str(txt: str, in_file_name: str) -> str: Convert the text passed to a string representing a vim cfile. E.g., - ``` - 57: The docstring should use more detailed type annotations for ... - 98-104: Simplify the hash computation logic with a helper ... - ``` become: - ``` - test.py:57: The docstring should use more detailed type annotations for ... - test.py:98: Simplify the hash computation logic with a helper ... - ``` """ ret_out = _extract_vim_cfile_lines(txt) # Append the file name to the description. @@ -705,10 +661,13 @@ def _to_run(action: str, transforms: Set[str]) -> bool: def run_prompt( - prompt_tag: str, txt: str, model: str, + prompt_tag: str, + txt: str, + model: str, *, instructions: Optional[str] = None, - in_file_name: str = "", out_file_name: str = "", + in_file_name: str = "", + out_file_name: str = "", ) -> Optional[str]: """ Run the prompt passed and apply the transforms to the response. @@ -716,10 +675,11 @@ def run_prompt( :param prompt_tag: tag of the prompt to run :param txt: text to run the prompt on :param model: model to use - :param instructions: instructions to add to the system prompt - (e.g., line numbers and transforms to apply to each file) + :param instructions: instructions to add to the system prompt (e.g., + line numbers and transforms to apply to each file) :param in_file_name: name of the input file (needed only for cfile) - :param out_file_name: name of the output file (needed only for cfile) + :param out_file_name: name of the output file (needed only for + cfile) :return: transformed text """ _LOG.debug(hprint.func_signature_to_str()) diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index d073394ec..82cc22f14 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -24,10 +24,8 @@ """ # TODO(gp): There are different modes to run the script -# - process the input and write the transformed output -# - process the input and extract a cfile with the required changes -# - apply changes to the input from a cfile (e.g., from a previous run or the -# linter) +# - run the script to process input and write transformed output +# - run the script to process input and extract a cfile import argparse diff --git a/dev_scripts_helpers/llms/test/test_llm_apply_cfile.py b/dev_scripts_helpers/llms/test/test_llm_apply_cfile.py new file mode 100644 index 000000000..4adc0cad2 --- /dev/null +++ b/dev_scripts_helpers/llms/test/test_llm_apply_cfile.py @@ -0,0 +1,54 @@ +import logging +import os +from typing import Tuple + +import pytest + +import dev_scripts_helpers.llms.llm_prompts as dshlllpr +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_llm_transform1 +# ############################################################################# + + +@pytest.mark.skipif( + hserver.is_inside_ci() or hserver.is_dev_csfy(), + reason="Disabled because of CmampTask10710", +) +class Test_llm_apply_cfile1(hunitest.TestCase): + """ + Run the script `llm_transform.py` in a Docker container. + """ + pass + # i lint --files dev_scripts_helpers/llms/llm_prompts.py + + # llm_apply_cfile.py --cfile linter_warnings.txt -p code_apply_linter_instructions -v DEBUG + + +# cmd line='./linters/base.py --files dev_scripts_helpers/llms/llm_prompts.py --num_threads serial' +# file_paths=1 ['dev_scripts_helpers/llms/llm_prompts.py'] +# actions=25 ['add_python_init_files', 'add_toc_to_notebook', 'fix_md_links', 'lint_md', 'check_md_toc_headers', 'autoflake', 'fix_whitespaces', 'doc_formatter', 'isort', 'class_method_order', 'normalize_imports', 'format_separating_line', 'add_class_frames', 'remove_empty_lines_in_func +# tion', 'black', 'process_jupytext', 'check_file_size', 'check_filename', 'check_merge_conflict', 'check_import', 'warn_incorrectly_formatted_todo', 'check_md_reference', 'flake8', 'pylint', 'mypy'] +# //////////////////////////////////////////////////////////////////////////////// +# dev_scripts_helpers/llms/llm_prompts.py:106: in public function `test`:D404: First word of the docstring should not be `This` [doc_formatter] +# dev_scripts_helpers/llms/llm_prompts.py:110: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:111: error: Need type annotation for "post_transforms" (hint: "post_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:132: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:164: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:193: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:227: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:255: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:275: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:293: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:311: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:390: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] +# dev_scripts_helpers/llms/llm_prompts.py:391: error: Need type annotation for "post_transforms" (hint: "post_transforms: set[] = ...") [var-annotated] [mypy] \ No newline at end of file From 43a5c997b823547e7bb4e17eee254499710d3144 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 08:42:25 -0400 Subject: [PATCH 059/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py | 8 ++++++-- dev_scripts_helpers/llms/llm_prompts.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py index 03a22bed6..ee4329094 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py @@ -14,6 +14,8 @@ import re from typing import List, Tuple +import tqdm + import dev_scripts_helpers.llms.llm_prompts as dshlllpr import helpers.hdbg as hdbg import helpers.hio as hio @@ -76,7 +78,7 @@ def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model _LOG.info("Files to transform: %s", len(file_to_line_to_transform.keys())) _LOG.info("Total number of transform: %s", len(cfile_lines)) # Apply the transforms to the file. - for file_name, line_to_transform in file_to_line_to_transform.items(): + for file_name, line_to_transform in tqdm.tqdm(file_to_line_to_transform.items()): _LOG.info("Applying transforms to file '%s'", file_name) # Look for file in the current directory. cmd = f'find -path "*/{file_name}"' @@ -89,10 +91,12 @@ def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model instructions = "\n".join( [f"{line_number}: {transform}" for line_number, transform in line_to_transform] ) + print(instructions) + assert 0 # Transform the file using the instructions. txt_out = dshlllpr.run_prompt(prompt_tag, txt_in, model, instructions=instructions, in_file_name="", out_file_name="") # Write the file. - hio.to_file(txt_out, file_name) + hio.to_file(act_file_name, txt_out) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index f7e742704..72d5dc6ea 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -433,7 +433,7 @@ def code_apply_linter_instructions() -> _PROMPT_OUT: number of changes to the code that are not needed. """ pre_transforms = {"add_line_numbers"} - post_transforms = {"remove_line_numbers"} + post_transforms = set() return system, pre_transforms, post_transforms From 7aa83c6450ba93d84d1c604a6f14256f28b080f6 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 08:50:43 -0400 Subject: [PATCH 060/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- .../llms/dockerized_llm_apply_cfile.py | 2 -- dev_scripts_helpers/llms/llm_prompts.py | 12 ++++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py index ee4329094..af44d98b1 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py @@ -91,8 +91,6 @@ def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model instructions = "\n".join( [f"{line_number}: {transform}" for line_number, transform in line_to_transform] ) - print(instructions) - assert 0 # Transform the file using the instructions. txt_out = dshlllpr.run_prompt(prompt_tag, txt_in, model, instructions=instructions, in_file_name="", out_file_name="") # Write the file. diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 72d5dc6ea..6e6d2e875 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -429,11 +429,14 @@ def code_apply_linter_instructions() -> _PROMPT_OUT: 105: [W0718(broad-exception-caught), get_github_contributors] Catching too general exception Exception [pylint] 106: [W1203(logging-fstring-interpolation), get_github_contributors] Use lazy % formatting in logging functions [pylint] - You will fix the code according to the linting errors passed, minimizing the - number of changes to the code that are not needed. + You will fix the code according to the linting errors passed, print the + modified code, minimizing the number of changes to the code that are not + needed. + + Do not print any discussion, but just the converted code. """ - pre_transforms = {"add_line_numbers"} - post_transforms = set() + pre_transforms = {"add_line_numbers", "add_instructions"} + post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -716,6 +719,7 @@ def run_prompt( # environment. import helpers.hopenai as hopenai + _LOG.debug(hprint.to_str("system_prompt")) response = hopenai.get_completion( txt, system_prompt=system_prompt, model=model, print_cost=True ) From 2c543ab3af5ea0f74f8b60625392028414ea18a6 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 09:05:16 -0400 Subject: [PATCH 061/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 19 +++++++++++-------- helpers/hmarkdown.py | 3 +++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 6e6d2e875..20f11ebfb 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -97,9 +97,9 @@ def get_outside_container_post_transforms(transform_name: str) -> Set[str]: _CONTEXT = r""" -You are a proficient Python coder who pays attention to detail. -I will pass you a chunk of Python code. -""" + You are a proficient Python coder who pays attention to detail. + I will pass you a chunk of Python code. + """ def test() -> _PROMPT_OUT: @@ -425,10 +425,6 @@ def code_apply_linter_instructions() -> _PROMPT_OUT: I will pass you Python code and a list of linting errors in the format :: - For example: - 105: [W0718(broad-exception-caught), get_github_contributors] Catching too general exception Exception [pylint] - 106: [W1203(logging-fstring-interpolation), get_github_contributors] Use lazy % formatting in logging functions [pylint] - You will fix the code according to the linting errors passed, print the modified code, minimizing the number of changes to the code that are not needed. @@ -699,8 +695,15 @@ def run_prompt( if _to_run("add_line_numbers", pre_transforms): txt = hmarkdo.add_line_numbers(txt) if _to_run("add_instructions", pre_transforms): + # Add the specific instructions to the system prompt. + # E.g., + # The instructions are: + # 52: in private function `_parse`:D401: First line should be in imperative mood; try rephrasing (found 'Same') [doc_formatter] + # 174: error: Missing return statement [return] [mypy] + # 192: [W1201(logging-not-lazy), _convert_file_names] Use lazy % formatting in logging functions [pylint] + system_prompt = hprint.dedent(system_prompt) hdbg.dassert_is_not(instructions, None) - system_prompt = "The instructions are:\n" + system_prompt + system_prompt += "\nThe instructions are:\n" + instructions + "\n\n" hdbg.dassert_eq( len(pre_transforms), 0, diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index ff4a98c7e..fc7497bc1 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -252,6 +252,9 @@ def remove_code_delimiters(txt: str) -> str: # Replace the ```python and ``` delimiters with empty strings. txt_out = txt.replace("```python", "").replace("```", "") txt_out = txt_out.strip() + # Remove the numbers at the beginning of the line, if needed + # E.g., `3: """` -> `"""`. + txt_out = re.sub(r"(^\d+: )", "", txt_out, flags=re.MULTILINE) return txt_out From 74d837dd5aa303b8b1a38c980dc158eee34223a3 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 09:06:34 -0400 Subject: [PATCH 062/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/git/git_hooks/utils.py | 2 +- dev_scripts_helpers/llms/llm_transform.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev_scripts_helpers/git/git_hooks/utils.py b/dev_scripts_helpers/git/git_hooks/utils.py index bd18a258c..a4c46301b 100644 --- a/dev_scripts_helpers/git/git_hooks/utils.py +++ b/dev_scripts_helpers/git/git_hooks/utils.py @@ -216,7 +216,7 @@ def check_author(abort_on_error: bool = True) -> None: user_email = user_email.lstrip().rstrip() cmd = f"{_GIT_BINARY_PATH} config --show-origin {var}" _system_to_string(cmd, verbose=verbose) - print(f"user_email='{user_email}") + print(f"user_email='{user_email}'") # Check. error = False if not user_email.endswith("@gmail.com"): diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 82cc22f14..3987ffade 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -50,7 +50,7 @@ def _parse() -> argparse.ArgumentParser: """ - Same interface as `dockerized_llm_transform.py`. + Use the same argparse parser for `dockerized_llm_transform.py`. """ parser = argparse.ArgumentParser( description=__doc__, @@ -186,12 +186,12 @@ def _convert_file_names(in_file_name: str, out_file_name: str) -> str: if line.strip() == "": continue # E.g., the format is like - # ``` + # # /app/helpers_root/r.py:1: Change the shebang line to `#!/usr/bin/env python3` to e - # ``` - _LOG.debug("before: " + hprint.to_str("line in_file_name")) + # + _LOG.debug("before: %s", hprint.to_str("line in_file_name")) line = re.sub(r"^.*(:\d+:.*)$", rf"{in_file_name}\1", line) - _LOG.debug("after: " + hprint.to_str("line")) + _LOG.debug("after: %s", hprint.to_str("line")) txt_out.append(line) txt_out = "\n".join(txt_out) hio.to_file(out_file_name, txt_out) From 33727afb2a767aeeed938a535f7eb4bb875bc2e3 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 11:20:48 -0400 Subject: [PATCH 063/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- .../llms/dockerized_llm_apply_cfile.py | 4 +- .../llms/dockerized_llm_transform.py | 4 +- dev_scripts_helpers/llms/llm_apply_cfile.py | 1 - dev_scripts_helpers/llms/llm_prompts.py | 41 ++++++++++++------- .../llms/test/test_llm_prompts.py | 5 ++- 5 files changed, 35 insertions(+), 20 deletions(-) diff --git a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py index af44d98b1..f31149a73 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py @@ -92,7 +92,9 @@ def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model [f"{line_number}: {transform}" for line_number, transform in line_to_transform] ) # Transform the file using the instructions. - txt_out = dshlllpr.run_prompt(prompt_tag, txt_in, model, instructions=instructions, in_file_name="", out_file_name="") + txt_out = dshlllpr.run_prompt(prompt_tag, txt_in, model, + instructions=instructions, + in_file_name="", out_file_name="") # Write the file. hio.to_file(act_file_name, txt_out) diff --git a/dev_scripts_helpers/llms/dockerized_llm_transform.py b/dev_scripts_helpers/llms/dockerized_llm_transform.py index 8d646da21..431833913 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_transform.py +++ b/dev_scripts_helpers/llms/dockerized_llm_transform.py @@ -47,7 +47,9 @@ def _main(parser: argparse.ArgumentParser) -> None: model = "gpt-4o-mini" else: model = "gpt-4o" - txt_tmp = dshlllpr.run_prompt(prompt_tag, txt_tmp, model, in_file_name, out_file_name) + txt_tmp = dshlllpr.run_prompt(prompt_tag, txt_tmp, model, + in_file_name=in_file_name, + out_file_name=out_file_name) if txt_tmp is not None: # Write file, if needed. res = [] diff --git a/dev_scripts_helpers/llms/llm_apply_cfile.py b/dev_scripts_helpers/llms/llm_apply_cfile.py index e2f57992f..4f54d0e7b 100755 --- a/dev_scripts_helpers/llms/llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/llm_apply_cfile.py @@ -12,7 +12,6 @@ > llm_apply_cfile.py -i cfile.txt """ - import argparse import logging import os diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 20f11ebfb..c865fe974 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -67,7 +67,7 @@ def get_prompt_tags() -> List[str]: # `convert_to_vim_cfile`. "convert_file_names": [ "code_review_correctness", - "code_propose_refactoring", + "code_review_refactoring", ], # remove_code_delimiters "prettier_on_str": [ @@ -346,6 +346,9 @@ def code_fix_csfy_style() -> _PROMPT_OUT: def code_review_correctness() -> _PROMPT_OUT: + """ + Review the code for correctness. + """ system = _CONTEXT system += r""" You will review the code and make sure it is: @@ -364,7 +367,10 @@ def code_review_correctness() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_propose_refactoring() -> _PROMPT_OUT: +def code_review_refactoring() -> _PROMPT_OUT: + """ + Review the code for refactoring opportunities. + """ system = _CONTEXT system += r""" You will review the code and look for opportunities to refactor the code, @@ -379,7 +385,12 @@ def code_propose_refactoring() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_remove_redundancy() -> _PROMPT_OUT: +# ############################################################################# +# Transform the code. +# ############################################################################# + + +def code_transform_remove_redundancy() -> _PROMPT_OUT: system = _CONTEXT system += r""" You will review the code and look for opportunities to refactor the code, @@ -392,7 +403,7 @@ def code_remove_redundancy() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -def code_apply_csfy_style() -> _PROMPT_OUT: +def code_transform_apply_csfy_style() -> _PROMPT_OUT: """ Apply the style to the code using template code in `template_code.py`. """ @@ -400,10 +411,14 @@ def code_apply_csfy_style() -> _PROMPT_OUT: file_name = "template_code.py" file_content = hio.from_file(file_name) system += rf""" - Apply the style described below to the Python code without changing the - behavior of the code. + Apply the style described below to the Python code + + ``` + {file_content} + ``` + Do not remove any code, just format the existing code using the style. - + Do not change the behavior of the code. Do not report any explanation of what you did, but just the converted code. """ pre_transforms = set() @@ -411,12 +426,7 @@ def code_apply_csfy_style() -> _PROMPT_OUT: return system, pre_transforms, post_transforms -# ############################################################################# -# Apply transforms. -# ############################################################################# - - -def code_apply_linter_instructions() -> _PROMPT_OUT: +def code_transform_apply_linter_instructions() -> _PROMPT_OUT: """ Apply the transforms passed in a cfile to the code. """ @@ -441,6 +451,7 @@ def code_apply_linter_instructions() -> _PROMPT_OUT: # ############################################################################# +# TODO(gp): Probably obsolete since Cursor can do it. def _get_code_unit_test_prompt(num_tests: int) -> str: system = _CONTEXT system += rf""" @@ -455,14 +466,14 @@ def _get_code_unit_test_prompt(num_tests: int) -> str: return system -def code_unit_test() -> _PROMPT_OUT: +def code_write_unit_test() -> _PROMPT_OUT: system = _get_code_unit_test_prompt(5) pre_transforms = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms -def code_1_unit_test() -> _PROMPT_OUT: +def code_write_1_unit_test() -> _PROMPT_OUT: system = _get_code_unit_test_prompt(1) pre_transforms = set() post_transforms = {"remove_code_delimiters"} diff --git a/dev_scripts_helpers/llms/test/test_llm_prompts.py b/dev_scripts_helpers/llms/test/test_llm_prompts.py index 60725e0ec..62a743ed8 100644 --- a/dev_scripts_helpers/llms/test/test_llm_prompts.py +++ b/dev_scripts_helpers/llms/test/test_llm_prompts.py @@ -97,7 +97,8 @@ def _run_prompt(self, prompt_tag: str, input_txt: str, exp_output: str) -> None: out_file_name = "test.py" # Run the prompt. act_output = dshlllpr.run_prompt( - prompt_tag, input_txt, model, in_file_name, out_file_name + prompt_tag, input_txt, model, in_file_name=in_file_name, + out_file_name=out_file_name ) # Check the output. exp_output = hprint.dedent(exp_output) @@ -135,4 +136,4 @@ def transform(input: str, value: str, *, output: Optional[str] = None) -> str: transform("input", "value") transform("input", "value", output="output") """ - self._run_prompt(prompt_tag, txt, exp_output) \ No newline at end of file + self._run_prompt(prompt_tag, txt, exp_output) From 23c99bc7b64cd8f16a89ea747dd8ed6ac99e69d7 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 12:42:51 -0400 Subject: [PATCH 064/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/misc/extract_bounties.py | 116 +++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100755 dev_scripts_helpers/misc/extract_bounties.py diff --git a/dev_scripts_helpers/misc/extract_bounties.py b/dev_scripts_helpers/misc/extract_bounties.py new file mode 100755 index 000000000..fc307affc --- /dev/null +++ b/dev_scripts_helpers/misc/extract_bounties.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python + +""" +Given the list of potential bounties + +> curl -L -o output.md "https://docs.google.com/document/d/1xPgQ2tWXQuVWKkGVONjOGd5j14mXSmGeY_4d1_sGzAE/export?format=markdown" +> grep "## " output.md +## **Template** +## **LLMs** +### **Capture / replay interactions with OpenAI** +### **Extend hopenai.py using LangChain for Multiple Models** +### **TO\_FILE: Add progress bar to get\_completion** +### **Create an evaluation suite to compare LLM prompting and models** + +Extract sections from a markdown file and create files based on section hierarchy. + +This script processes a markdown file to extract level 2 (##) and level 3 (###) sections, +then creates files named with the format: level3_section,level2_section.txt + +Examples: +# Process a markdown file and create files in default 'output' directory +> extract_markdown_sections.py input.md + +# Process a markdown file and create files in specified directory +> extract_markdown_sections.py input.md --dst_dir my_output +""" + +import argparse +import logging +import re +from typing import List, Tuple + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hparser as hparser +import helpers.hsystem as hsystem +_LOG = logging.getLogger(__name__) + + +def _clean_up(line: str) -> str: + # Remove the ** and TO\_FILE + line = line.replace("**", "").replace("TO\_FILE:", "") + # Remove \` and \_. + line = line.replace(r"\`", "'").replace(r"\_", "_") + # + line = line.strip() + return line + + +def _extract_sections(markdown_content: str) -> List[Tuple[str, str]]: + """ + Extract sections from markdown content. + + :param markdown_content: string containing the markdown content + :return: list of tuples containing (level2_section, level3_section) + """ + sections = [] + current_level2 = None + # Split content into lines + lines = markdown_content.split('\n') + for line in lines: + # Check for level 2 headers (##). + level2_match = re.match(r'^##\s+(.+)$', line) + if level2_match: + current_level2 = level2_match.group(1).strip() + current_level2 = _clean_up(current_level2) + continue + # Check for level 3 headers (###). + level3_match = re.match(r'^###\s+(.+)$', line) + if level3_match and current_level2: + level3_section = level3_match.group(1).strip() + level3_section = _clean_up(level3_section) + sections.append((current_level2, level3_section)) + return sections + + +def _parse() -> argparse.ArgumentParser: + """ + Parse command-line arguments. + + :return: argument parser with all command-line arguments + """ + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--input_file", action="store", required=True, help="Path to the markdown file to process") + parser.add_argument("--output_file", action="store", required=False, help="Path to the output file to process") + hparser.add_verbosity_arg(parser) + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + """ + Execute the main logic of the script. + + :param parser: argument parser with command-line arguments + """ + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + # Read the input file. + content = hio.from_file(args.input_file) + # Extract the sections. + sections = _extract_sections(content) + _LOG.info("Processed %d sections from %s", len(sections), args.input_file) + # Convert the list of tuples into a string separated by commas. + strs = [f"{level3}\t{level2}" for level2, level3 in sections] + txt = "\n".join(strs) + # Create a file with the string. + if args.output_file: + hio.to_file(args.output_file, txt) + else: + hsystem.to_pbcopy(txt, pbcopy=True) + + +if __name__ == "__main__": + _main(_parse()) From c3c0231ba479b9b499b70a933f47b6858beea616 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 12:45:12 -0400 Subject: [PATCH 065/193] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/misc/extract_bounties.py | 27 ++++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/dev_scripts_helpers/misc/extract_bounties.py b/dev_scripts_helpers/misc/extract_bounties.py index fc307affc..a10ac06d6 100755 --- a/dev_scripts_helpers/misc/extract_bounties.py +++ b/dev_scripts_helpers/misc/extract_bounties.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ -Given the list of potential bounties +Given the list of potential bounties. > curl -L -o output.md "https://docs.google.com/document/d/1xPgQ2tWXQuVWKkGVONjOGd5j14mXSmGeY_4d1_sGzAE/export?format=markdown" > grep "## " output.md @@ -34,6 +34,7 @@ import helpers.hio as hio import helpers.hparser as hparser import helpers.hsystem as hsystem + _LOG = logging.getLogger(__name__) @@ -57,16 +58,16 @@ def _extract_sections(markdown_content: str) -> List[Tuple[str, str]]: sections = [] current_level2 = None # Split content into lines - lines = markdown_content.split('\n') + lines = markdown_content.split("\n") for line in lines: # Check for level 2 headers (##). - level2_match = re.match(r'^##\s+(.+)$', line) + level2_match = re.match(r"^##\s+(.+)$", line) if level2_match: current_level2 = level2_match.group(1).strip() current_level2 = _clean_up(current_level2) continue # Check for level 3 headers (###). - level3_match = re.match(r'^###\s+(.+)$', line) + level3_match = re.match(r"^###\s+(.+)$", line) if level3_match and current_level2: level3_section = level3_match.group(1).strip() level3_section = _clean_up(level3_section) @@ -83,8 +84,18 @@ def _parse() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) - parser.add_argument("--input_file", action="store", required=True, help="Path to the markdown file to process") - parser.add_argument("--output_file", action="store", required=False, help="Path to the output file to process") + parser.add_argument( + "--input_file", + action="store", + required=True, + help="Path to the markdown file to process", + ) + parser.add_argument( + "--output_file", + action="store", + required=False, + help="Path to the output file to process or stdout/clipboard", + ) hparser.add_verbosity_arg(parser) return parser @@ -106,11 +117,11 @@ def _main(parser: argparse.ArgumentParser) -> None: strs = [f"{level3}\t{level2}" for level2, level3 in sections] txt = "\n".join(strs) # Create a file with the string. - if args.output_file: + if args.output_file: hio.to_file(args.output_file, txt) else: hsystem.to_pbcopy(txt, pbcopy=True) if __name__ == "__main__": - _main(_parse()) + _main(_parse()) From 2e274fa8903cb56f68e21110bb9eac1e4771ed47 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 20:33:06 -0400 Subject: [PATCH 066/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/misc/extract_bounties.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev_scripts_helpers/misc/extract_bounties.py b/dev_scripts_helpers/misc/extract_bounties.py index a10ac06d6..6296ea3cf 100755 --- a/dev_scripts_helpers/misc/extract_bounties.py +++ b/dev_scripts_helpers/misc/extract_bounties.py @@ -94,7 +94,8 @@ def _parse() -> argparse.ArgumentParser: "--output_file", action="store", required=False, - help="Path to the output file to process or stdout/clipboard", + default=None, + help="Path to the output file to process", ) hparser.add_verbosity_arg(parser) return parser From f49a2e80124c128c7b717a21ae184a76aee684e6 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 20:52:55 -0400 Subject: [PATCH 067/193] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- .../documentation/notes_to_pdf.py | 4 +- .../llms/dockerized_llm_apply_cfile.py | 33 ++++++++++----- .../llms/dockerized_llm_transform.py | 10 +++-- dev_scripts_helpers/llms/llm_apply_cfile.py | 8 ++-- dev_scripts_helpers/llms/llm_transform.py | 4 +- .../llms/test/test_llm_apply_cfile.py | 13 ++---- .../llms/test/test_llm_prompts.py | 42 ++++++++++++------- 7 files changed, 66 insertions(+), 48 deletions(-) diff --git a/dev_scripts_helpers/documentation/notes_to_pdf.py b/dev_scripts_helpers/documentation/notes_to_pdf.py index 1c6363b59..21ac86df9 100755 --- a/dev_scripts_helpers/documentation/notes_to_pdf.py +++ b/dev_scripts_helpers/documentation/notes_to_pdf.py @@ -300,7 +300,7 @@ def _run_pandoc_to_pdf( "latex_abbrevs.sty", ) hdbg.dassert_file_exists(latex_file) - #cmd = f"cp -f {latex_file} ." + # cmd = f"cp -f {latex_file} ." cmd = f"cp -f {latex_file} {out_dir}" _ = _system(cmd) # @@ -327,7 +327,7 @@ def _run_pandoc_to_pdf( else: _LOG.warning("Skipping: run latex again") # Remove `latex_abbrevs.sty`. - #os.remove("latex_abbrevs.sty") + # os.remove("latex_abbrevs.sty") # Get the path of the output file created by Latex. file_out = os.path.basename(file_name).replace(".tex", ".pdf") file_out = os.path.join(out_dir, file_out) diff --git a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py index f31149a73..985076e93 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py @@ -36,7 +36,7 @@ def _parse_cfile(cfile: str) -> List[Tuple[str, str, str]]: # Read the cfile. cfile_lines = hio.from_file(cfile) cfile_lines = cfile_lines.split("\n") - # + # ret = [] # Parse the cfile. for line in cfile_lines: @@ -60,12 +60,14 @@ def _parse_cfile(cfile: str) -> List[Tuple[str, str, str]]: return ret -def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model: str) -> None: +def _apply_transforms( + cfile_lines: List[Tuple[str, str]], prompt_tag: str, model: str +) -> None: """ Apply the transforms to the file. - :param cfile_lines: list of tuples, each containing a file name, line - number, and transform + :param cfile_lines: list of tuples, each containing a file name, + line number, and transform :param model: model to use for the transformation """ # Create a dict from file to line number to transform. @@ -74,11 +76,13 @@ def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model if file_name not in file_to_line_to_transform: file_to_line_to_transform[file_name] = [] file_to_line_to_transform[file_name].append((line_number, transform)) - # + # _LOG.info("Files to transform: %s", len(file_to_line_to_transform.keys())) _LOG.info("Total number of transform: %s", len(cfile_lines)) # Apply the transforms to the file. - for file_name, line_to_transform in tqdm.tqdm(file_to_line_to_transform.items()): + for file_name, line_to_transform in tqdm.tqdm( + file_to_line_to_transform.items() + ): _LOG.info("Applying transforms to file '%s'", file_name) # Look for file in the current directory. cmd = f'find -path "*/{file_name}"' @@ -89,17 +93,24 @@ def _apply_transforms(cfile_lines: List[Tuple[str, str]], prompt_tag: str, model txt_in = hio.from_file(act_file_name) # Prepare the instructions for the prompt. instructions = "\n".join( - [f"{line_number}: {transform}" for line_number, transform in line_to_transform] + [ + f"{line_number}: {transform}" + for line_number, transform in line_to_transform + ] ) # Transform the file using the instructions. - txt_out = dshlllpr.run_prompt(prompt_tag, txt_in, model, - instructions=instructions, - in_file_name="", out_file_name="") + txt_out = dshlllpr.run_prompt( + prompt_tag, + txt_in, + model, + instructions=instructions, + in_file_name="", + out_file_name="", + ) # Write the file. hio.to_file(act_file_name, txt_out) - # # TODO(gp): This should become an invoke or a command, where we read a file # and a cfile and inject TODOs in the code. # def _annotate_with_cfile(txt: str, txt_cfile: str) -> str: diff --git a/dev_scripts_helpers/llms/dockerized_llm_transform.py b/dev_scripts_helpers/llms/dockerized_llm_transform.py index 431833913..9afe1cb29 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_transform.py +++ b/dev_scripts_helpers/llms/dockerized_llm_transform.py @@ -47,9 +47,13 @@ def _main(parser: argparse.ArgumentParser) -> None: model = "gpt-4o-mini" else: model = "gpt-4o" - txt_tmp = dshlllpr.run_prompt(prompt_tag, txt_tmp, model, - in_file_name=in_file_name, - out_file_name=out_file_name) + txt_tmp = dshlllpr.run_prompt( + prompt_tag, + txt_tmp, + model, + in_file_name=in_file_name, + out_file_name=out_file_name, + ) if txt_tmp is not None: # Write file, if needed. res = [] diff --git a/dev_scripts_helpers/llms/llm_apply_cfile.py b/dev_scripts_helpers/llms/llm_apply_cfile.py index 4f54d0e7b..f4d194af1 100755 --- a/dev_scripts_helpers/llms/llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/llm_apply_cfile.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 """ -Read cfile input and implement a transform for each line of the cfile using LLMs. +Read cfile input and implement a transform for each line of the cfile using +LLMs. The script `dockerized_llm_apply_cfile.py` is executed within a Docker container to ensure all dependencies are met. The Docker container is built dynamically if @@ -15,15 +16,12 @@ import argparse import logging import os -import re from typing import List, Optional -import dev_scripts_helpers.documentation.lint_notes as dshdlino import dev_scripts_helpers.llms.llm_prompts as dshlllpr import helpers.hdbg as hdbg import helpers.hdocker as hdocker import helpers.hgit as hgit -import helpers.hio as hio import helpers.hparser as hparser import helpers.hprint as hprint import helpers.hserver as hserver @@ -200,7 +198,7 @@ def _main(parser: argparse.ArgumentParser) -> None: # out_txt = dshdlino.prettier_on_str(out_txt) # Read the output from the container and write it to the output file from # command line (e.g., `-` for stdout). - #hparser.write_file(out_txt, out_file_name) + # hparser.write_file(out_txt, out_file_name) if __name__ == "__main__": diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 3987ffade..68c4d6230 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -186,9 +186,9 @@ def _convert_file_names(in_file_name: str, out_file_name: str) -> str: if line.strip() == "": continue # E.g., the format is like - # + # ``` # /app/helpers_root/r.py:1: Change the shebang line to `#!/usr/bin/env python3` to e - # + # ``` _LOG.debug("before: %s", hprint.to_str("line in_file_name")) line = re.sub(r"^.*(:\d+:.*)$", rf"{in_file_name}\1", line) _LOG.debug("after: %s", hprint.to_str("line")) diff --git a/dev_scripts_helpers/llms/test/test_llm_apply_cfile.py b/dev_scripts_helpers/llms/test/test_llm_apply_cfile.py index 4adc0cad2..419fa9f39 100644 --- a/dev_scripts_helpers/llms/test/test_llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/test/test_llm_apply_cfile.py @@ -1,22 +1,15 @@ import logging -import os -from typing import Tuple import pytest -import dev_scripts_helpers.llms.llm_prompts as dshlllpr -import helpers.hdbg as hdbg -import helpers.hio as hio -import helpers.hprint as hprint import helpers.hserver as hserver -import helpers.hsystem as hsystem import helpers.hunit_test as hunitest _LOG = logging.getLogger(__name__) # ############################################################################# -# Test_llm_transform1 +# Test_llm_apply_cfile1 # ############################################################################# @@ -28,7 +21,7 @@ class Test_llm_apply_cfile1(hunitest.TestCase): """ Run the script `llm_transform.py` in a Docker container. """ - pass + # i lint --files dev_scripts_helpers/llms/llm_prompts.py # llm_apply_cfile.py --cfile linter_warnings.txt -p code_apply_linter_instructions -v DEBUG @@ -51,4 +44,4 @@ class Test_llm_apply_cfile1(hunitest.TestCase): # dev_scripts_helpers/llms/llm_prompts.py:293: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] # dev_scripts_helpers/llms/llm_prompts.py:311: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] # dev_scripts_helpers/llms/llm_prompts.py:390: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] -# dev_scripts_helpers/llms/llm_prompts.py:391: error: Need type annotation for "post_transforms" (hint: "post_transforms: set[] = ...") [var-annotated] [mypy] \ No newline at end of file +# dev_scripts_helpers/llms/llm_prompts.py:391: error: Need type annotation for "post_transforms" (hint: "post_transforms: set[] = ...") [var-annotated] [mypy] diff --git a/dev_scripts_helpers/llms/test/test_llm_prompts.py b/dev_scripts_helpers/llms/test/test_llm_prompts.py index 62a743ed8..989c14d86 100644 --- a/dev_scripts_helpers/llms/test/test_llm_prompts.py +++ b/dev_scripts_helpers/llms/test/test_llm_prompts.py @@ -83,26 +83,16 @@ def test1(self) -> None: # ############################################################################# +# ############################################################################# +# Test_run_prompt1 +# ############################################################################# + + @pytest.mark.skipif( hserver.is_inside_ci() or hserver.is_dev_csfy(), reason="Disabled because of CmampTask10710", ) class Test_run_prompt1(hunitest.TestCase): - - def _run_prompt(self, prompt_tag: str, input_txt: str, exp_output: str) -> None: - # Prepare the input. - input_txt = hprint.dedent(input_txt) - model = "gpt-4o" - in_file_name = "test.py" - out_file_name = "test.py" - # Run the prompt. - act_output = dshlllpr.run_prompt( - prompt_tag, input_txt, model, in_file_name=in_file_name, - out_file_name=out_file_name - ) - # Check the output. - exp_output = hprint.dedent(exp_output) - self.assert_equal(act_output, exp_output, fuzzy_match=True) # TODO(gp): Add one tests for each prompt. @@ -126,6 +116,7 @@ def test_code_fix_star_before_optional_parameters1(self) -> None: def transform(input: str, value: str, output: Optional[str] = None) -> str: print(f"input={input}, value={value}, output={output}") + transform("input", "value") transform("input", "value", "output") """ @@ -133,7 +124,28 @@ def transform(input: str, value: str, output: Optional[str] = None) -> str: def transform(input: str, value: str, *, output: Optional[str] = None) -> str: print(f"input={input}, value={value}, output={output}") + transform("input", "value") transform("input", "value", output="output") """ self._run_prompt(prompt_tag, txt, exp_output) + + def _run_prompt( + self, prompt_tag: str, input_txt: str, exp_output: str + ) -> None: + # Prepare the input. + input_txt = hprint.dedent(input_txt) + model = "gpt-4o" + in_file_name = "test.py" + out_file_name = "test.py" + # Run the prompt. + act_output = dshlllpr.run_prompt( + prompt_tag, + input_txt, + model, + in_file_name=in_file_name, + out_file_name=out_file_name, + ) + # Check the output. + exp_output = hprint.dedent(exp_output) + self.assert_equal(act_output, exp_output, fuzzy_match=True) From d0cb8d7cd6ebdc5113c585f5527e7a1a6e244d21 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 21:04:26 -0400 Subject: [PATCH 068/193] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- .../documentation/lint_notes.py | 2 - .../documentation/notes_to_pdf.py | 4 +- dev_scripts_helpers/git/git_hooks/utils.py | 4 +- .../llms/dockerized_llm_apply_cfile.py | 13 ++--- .../llms/dockerized_llm_transform.py | 5 +- dev_scripts_helpers/llms/llm_prompts.py | 48 +++++++++---------- 6 files changed, 37 insertions(+), 39 deletions(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index 1dcdff260..d13dce0d3 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -103,7 +103,6 @@ def prettier( *, print_width: int = 80, use_dockerized_prettier: bool = True, - **kwargs: Any, ) -> None: """ Format the given text using Prettier. @@ -259,7 +258,6 @@ def _refresh_toc( txt: str, *, use_dockerized_markdown_toc: bool = True, - **kwargs: Any, ) -> str: """ Refresh the table of contents (TOC) in the given text. diff --git a/dev_scripts_helpers/documentation/notes_to_pdf.py b/dev_scripts_helpers/documentation/notes_to_pdf.py index 21ac86df9..12d558976 100755 --- a/dev_scripts_helpers/documentation/notes_to_pdf.py +++ b/dev_scripts_helpers/documentation/notes_to_pdf.py @@ -133,7 +133,7 @@ def _filter_by_lines(file_name: str, filter_by_lines: str, prefix: str) -> str: # E.g., filter_by_lines='1:10'. m = re.match(r"^(\S+):(\S+)$", filter_by_lines) hdbg.dassert(m, "Invalid filter_by_lines='%s'", filter_by_lines) - start_line, end_line = m.group(1), m.group(2) + start_line, end_line = m.groups() if start_line.lower() == "none": start_line = 1 else: @@ -737,4 +737,4 @@ def _main(parser: argparse.ArgumentParser) -> None: if __name__ == "__main__": - _main(_parse()) + _main(_parse()) \ No newline at end of file diff --git a/dev_scripts_helpers/git/git_hooks/utils.py b/dev_scripts_helpers/git/git_hooks/utils.py index a4c46301b..d315bb33c 100644 --- a/dev_scripts_helpers/git/git_hooks/utils.py +++ b/dev_scripts_helpers/git/git_hooks/utils.py @@ -356,7 +356,8 @@ def _check_words_in_text( val = m.group(1) _LOG.debug(" -> found '%s'", val) val = caesar(val, _CAESAR_STEP) - violation = f"{file_name}:{i+1}: Found '{val}'" + i_tmp = i + 1 + violation = f"{file_name}:{i_tmp}: Found '{val}'" violations.append(violation) return violations @@ -485,7 +486,6 @@ def check_gitleaks(abort_on_error: bool = True) -> None: """ func_name = _report() git_root_dir = get_git_root_dir() - # > docker run -v /data/heanhs/src/helpers2:/app zricethezav/gitleaks:latest -c /app/.github/gitleaks-rules.toml git /app --pre-commit --staged cmd = f""" docker run -v {git_root_dir}:/app zricethezav/gitleaks:latest -c /app/.github/gitleaks-rules.toml git /app --pre-commit --staged --verbose """ diff --git a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py index 985076e93..132c63a3b 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py @@ -1,9 +1,8 @@ #!/usr/bin/env python3 """ -This script is designed to run a transformation script using LLMs. It requires -certain dependencies to be present (e.g., `openai`) and thus it is executed -within a Docker container. +Run a transformation script using LLMs. It requires certain dependencies to be +present (e.g., `openai`) and thus it is executed within a Docker container. To use this script, you need to provide the input file, output file, and the type of transformation to apply. @@ -43,8 +42,10 @@ def _parse_cfile(cfile: str) -> List[Tuple[str, str, str]]: _LOG.debug("line=%s", line) hdbg.dassert_isinstance(line, str) # Parse the lines of the cfile, like - # dev_scripts_helpers/llms/llm_prompts.py:106: in public function `test`:D404: First word of the docstring should not be `This` [doc_formatter] - # dev_scripts_helpers/llms/llm_prompts.py:110: error: Need type annotation for "pre_transforms" (hint: "pre_transforms: set[] = ...") [var-annotated] [mypy] + # ``` + # dev_scripts_helpers/llms/llm_prompts.py:106: in public function `test`:D404: ... + # dev_scripts_helpers/llms/llm_prompts.py:110: error: Need type annotation for ... + # ``` # extracting the file name, line number, and transform. regex = r"^(.+):(\d+): (.*)$" match = re.match(regex, line) @@ -61,7 +62,7 @@ def _parse_cfile(cfile: str) -> List[Tuple[str, str, str]]: def _apply_transforms( - cfile_lines: List[Tuple[str, str]], prompt_tag: str, model: str + cfile_lines: List[Tuple[str, str, str]], prompt_tag: str, model: str ) -> None: """ Apply the transforms to the file. diff --git a/dev_scripts_helpers/llms/dockerized_llm_transform.py b/dev_scripts_helpers/llms/dockerized_llm_transform.py index 9afe1cb29..62931dfa5 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_transform.py +++ b/dev_scripts_helpers/llms/dockerized_llm_transform.py @@ -1,9 +1,8 @@ #!/usr/bin/env python3 """ -This script is designed to run a transformation script using LLMs. It requires -certain dependencies to be present (e.g., `openai`) and thus it is executed -within a Docker container. +Run transformations using LLMs. It requires certain dependencies to be present +(e.g., `openai`) and thus it is executed within a Docker container. To use this script, you need to provide the input file, output file, and the type of transformation to apply. diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index c865fe974..109a5a3c3 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -104,11 +104,11 @@ def get_outside_container_post_transforms(transform_name: str) -> Set[str]: def test() -> _PROMPT_OUT: """ - This is just needed as a placeholder to test the flow. + Placeholder to test the flow. """ system = "" - pre_transforms = set() - post_transforms = set() + pre_transforms: Set[str] = set() + post_transforms: Set[str] = set() return system, pre_transforms, post_transforms @@ -129,7 +129,7 @@ def code_fix_comments() -> _PROMPT_OUT: period `.` - Do not comment every single line of code and especially logging statements """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -161,7 +161,7 @@ def _format_greeting(name: str, *, greeting: str = DEFAULT_GREETING) -> str: """ ``` ''' - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -190,7 +190,7 @@ def process_data(data: List[float], threshold: float = 0.5) -> List[float]: return results ``` """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -224,7 +224,7 @@ def code_fix_log_string() -> _PROMPT_OUT: hdbg.dassert_in(env_var, os.environ, "env_var='%s' is not in env_vars='%s'", env_var, str(os.environ.keys())) ``` """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -252,7 +252,7 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: ``` to """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -272,7 +272,7 @@ def code_fix_by_using_perc_strings() -> _PROMPT_OUT: For instance, convert: to """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -290,7 +290,7 @@ def code_fix_from_imports() -> _PROMPT_OUT: with: Then replace the uses of `OpenAIEmbeddings` with: """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -308,7 +308,7 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: For instance, replace: with the following: """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -335,7 +335,7 @@ def code_fix_csfy_style() -> _PROMPT_OUT: hdbg.dassert_eq(pre_transforms, set()) hdbg.dassert_eq(post_transforms, {"remove_code_delimiters"}) system = "\n\n".join(system_prompts) - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -398,8 +398,8 @@ def code_transform_remove_redundancy() -> _PROMPT_OUT: redundancy in the code, minimizing the number of changes to the code that are not needed. """ - pre_transforms = set() - post_transforms = set() + pre_transforms: Set[str] = set() + post_transforms: Set[str] = set() return system, pre_transforms, post_transforms @@ -421,7 +421,7 @@ def code_transform_apply_csfy_style() -> _PROMPT_OUT: Do not change the behavior of the code. Do not report any explanation of what you did, but just the converted code. """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -468,14 +468,14 @@ def _get_code_unit_test_prompt(num_tests: int) -> str: def code_write_unit_test() -> _PROMPT_OUT: system = _get_code_unit_test_prompt(5) - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms def code_write_1_unit_test() -> _PROMPT_OUT: system = _get_code_unit_test_prompt(1) - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -487,12 +487,12 @@ def md_rewrite() -> _PROMPT_OUT: system = r""" You are a proficient technical writer. - Rewrite the text passed as if you were writing a technical document to increase - clarity and readability. + Rewrite the text passed as if you were writing a technical document to + increase clarity and readability. Maintain the structure of the text as much as possible, in terms of bullet points and their indentation """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -503,7 +503,7 @@ def md_summarize_short() -> _PROMPT_OUT: Summarize the text in less than 30 words. """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -519,7 +519,7 @@ def slide_improve() -> _PROMPT_OUT: You will convert the following markdown text into bullet points Make sure that the text is clean and readable """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = { "remove_code_delimiters", "remove_end_of_line_periods", @@ -546,7 +546,7 @@ def slide_colorize() -> _PROMPT_OUT: Print only the markdown without any explanation. """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -564,7 +564,7 @@ def slide_colorize_points() -> _PROMPT_OUT: Print only the markdown without any explanation. """ - pre_transforms = set() + pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms From 862deeeedbeeaaffd0778137d776eb7c45fca10e Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 21:07:10 -0400 Subject: [PATCH 069/193] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/documentation/notes_to_pdf.py | 2 +- dev_scripts_helpers/llms/test/test_llm_prompts.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/dev_scripts_helpers/documentation/notes_to_pdf.py b/dev_scripts_helpers/documentation/notes_to_pdf.py index 12d558976..3bb1e881d 100755 --- a/dev_scripts_helpers/documentation/notes_to_pdf.py +++ b/dev_scripts_helpers/documentation/notes_to_pdf.py @@ -737,4 +737,4 @@ def _main(parser: argparse.ArgumentParser) -> None: if __name__ == "__main__": - _main(_parse()) \ No newline at end of file + _main(_parse()) diff --git a/dev_scripts_helpers/llms/test/test_llm_prompts.py b/dev_scripts_helpers/llms/test/test_llm_prompts.py index 989c14d86..32a446d56 100644 --- a/dev_scripts_helpers/llms/test/test_llm_prompts.py +++ b/dev_scripts_helpers/llms/test/test_llm_prompts.py @@ -116,7 +116,6 @@ def test_code_fix_star_before_optional_parameters1(self) -> None: def transform(input: str, value: str, output: Optional[str] = None) -> str: print(f"input={input}, value={value}, output={output}") - transform("input", "value") transform("input", "value", "output") """ @@ -124,7 +123,6 @@ def transform(input: str, value: str, output: Optional[str] = None) -> str: def transform(input: str, value: str, *, output: Optional[str] = None) -> str: print(f"input={input}, value={value}, output={output}") - transform("input", "value") transform("input", "value", output="output") """ From 240a77ed8ecb05ede46dc0887f8dc671b800b0de Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 1 May 2025 21:20:12 -0400 Subject: [PATCH 070/193] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- .../documentation/lint_notes.py | 4 ++++ .../llms/dockerized_llm_apply_cfile.py | 2 +- dev_scripts_helpers/llms/llm_prompts.py | 14 ++++++------- dev_scripts_helpers/llms/llm_transform.py | 2 +- .../llms/test/test_llm_prompts.py | 21 ------------------- helpers/hdocker.py | 6 +++--- helpers/hserver.py | 2 +- helpers/stage_linked_file.py | 2 +- 8 files changed, 18 insertions(+), 35 deletions(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index d13dce0d3..8f22ca726 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -103,6 +103,8 @@ def prettier( *, print_width: int = 80, use_dockerized_prettier: bool = True, + # TODO(gp): Remove this. + **kwargs: Any, ) -> None: """ Format the given text using Prettier. @@ -258,6 +260,8 @@ def _refresh_toc( txt: str, *, use_dockerized_markdown_toc: bool = True, + # TODO(gp): Remove this. + **kwargs: Any, ) -> str: """ Refresh the table of contents (TOC) in the given text. diff --git a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py index 132c63a3b..bf8d32955 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py @@ -72,7 +72,7 @@ def _apply_transforms( :param model: model to use for the transformation """ # Create a dict from file to line number to transform. - file_to_line_to_transform = {} + file_to_line_to_transform: Dict[str, Tuple[int, str]] = {} for file_name, line_number, transform in cfile_lines: if file_name not in file_to_line_to_transform: file_to_line_to_transform[file_name] = [] diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 109a5a3c3..b8fd62e6d 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -83,7 +83,7 @@ def get_prompt_tags() -> List[str]: hdbg.dassert_in(prompt, valid_prompts) -def get_outside_container_post_transforms(transform_name: str) -> Set[str]: +def get_outside_container_post_transforms(transform_name: str) -> Dict[str, List[str]]: hdbg.dassert_in(transform_name, OUTSIDE_CONTAINER_POST_TRANSFORMS.keys()) return OUTSIDE_CONTAINER_POST_TRANSFORMS[transform_name] @@ -330,10 +330,10 @@ def code_fix_csfy_style() -> _PROMPT_OUT: ] system_prompts = [] for function_name in function_names: - system, pre_transforms, post_transforms = eval(function_name)() + system, pre_transforms_tmp, post_transforms_tmp = eval(function_name)() system_prompts.append(system) - hdbg.dassert_eq(pre_transforms, set()) - hdbg.dassert_eq(post_transforms, {"remove_code_delimiters"}) + hdbg.dassert_eq(pre_transforms_tmp, set()) + hdbg.dassert_eq(post_transforms_tmp, {"remove_code_delimiters"}) system = "\n\n".join(system_prompts) pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -574,7 +574,7 @@ def slide_colorize_points() -> _PROMPT_OUT: # ############################################################################# -def _extract_vim_cfile_lines(txt: str) -> List[str]: +def _extract_vim_cfile_lines(txt: str) -> List[Tuple[int, str]]: ret_out = [] for line in txt.split("\n"): _LOG.debug(hprint.to_str("line")) @@ -675,7 +675,7 @@ def run_prompt( txt: str, model: str, *, - instructions: Optional[str] = None, + instructions: str = "", in_file_name: str = "", out_file_name: str = "", ) -> Optional[str]: @@ -713,7 +713,7 @@ def run_prompt( # 174: error: Missing return statement [return] [mypy] # 192: [W1201(logging-not-lazy), _convert_file_names] Use lazy % formatting in logging functions [pylint] system_prompt = hprint.dedent(system_prompt) - hdbg.dassert_is_not(instructions, None) + hdbg.dassert_ne(instructions, "") system_prompt += "\nThe instructions are:\n" + instructions + "\n\n" hdbg.dassert_eq( len(pre_transforms), diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 68c4d6230..4ec23e5cf 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -171,7 +171,7 @@ def _run_dockerized_llm_transform( return ret -def _convert_file_names(in_file_name: str, out_file_name: str) -> str: +def _convert_file_names(in_file_name: str, out_file_name: str) -> None: """ Convert the files from inside the container to outside. diff --git a/dev_scripts_helpers/llms/test/test_llm_prompts.py b/dev_scripts_helpers/llms/test/test_llm_prompts.py index 32a446d56..5b71d1fb8 100644 --- a/dev_scripts_helpers/llms/test/test_llm_prompts.py +++ b/dev_scripts_helpers/llms/test/test_llm_prompts.py @@ -62,27 +62,6 @@ def test1(self) -> None: self.assertGreater(len(prompt_tags), 0) -# ############################################################################# -# Test_prompt_tags1 -# ############################################################################# - - -@pytest.mark.skipif( - hserver.is_inside_ci() or hserver.is_dev_csfy(), - reason="Disabled because of CmampTask10710", -) -class Test_prompt_tags1(hunitest.TestCase): - - def test1(self) -> None: - prompt_tags = dshlllpr.get_prompt_tags() - _LOG.debug(hprint.to_str("prompt_tags")) - # - self.assertGreater(len(prompt_tags), 0) - - -# ############################################################################# - - # ############################################################################# # Test_run_prompt1 # ############################################################################# diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 65f51a73b..cfe35b150 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -12,7 +12,7 @@ import re import shlex import time -from typing import Any, Dict, List, Optional, Tuple +from typing import cast, Any, Dict, List, Optional, Tuple import helpers.hdbg as hdbg import helpers.hgit as hgit @@ -141,7 +141,7 @@ def get_current_arch() -> str: cmd = "uname -m" _, current_arch = hsystem.system_to_one_line(cmd) _LOG.debug(hprint.to_str("current_arch")) - return current_arch + return cast(str, current_arch) def _is_compatible_arch(val1: str, val2: str) -> bool: @@ -1444,7 +1444,7 @@ def dockerized_tikz_to_bitmap( """ _LOG.debug(hprint.func_signature_to_str()) # Convert tikz file to PDF. - latex_cmd_opts = [] + latex_cmd_opts: List[str] = [] run_latex_again = False file_out = hio.change_file_extension(in_file_path, ".pdf") run_basic_latex( diff --git a/helpers/hserver.py b/helpers/hserver.py index ad36dc820..4df1f4858 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -81,7 +81,7 @@ def get_host_user_name() -> Optional[str]: return os.environ.get("CSFY_HOST_USER_NAME", None) -def get_dev_csfy_host_names() -> List[str]: +def get_dev_csfy_host_names() -> Tuple[str]: """ Return the names of the Causify dev servers. """ diff --git a/helpers/stage_linked_file.py b/helpers/stage_linked_file.py index 24373aea4..5f9945834 100644 --- a/helpers/stage_linked_file.py +++ b/helpers/stage_linked_file.py @@ -54,7 +54,7 @@ # _LOG.error(f"Error staging link {link}: {e}") -def main(): +def main() -> None: parser = argparse.ArgumentParser( description="Stage symbolic links for modification." ) From 892b8641a63bfa53ee5e4d890a2b1577bfbe8c6e Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Fri, 2 May 2025 13:19:17 -0400 Subject: [PATCH 071/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 21 +- dev_scripts_helpers/misc/get_url_titles.py | 239 +++++++++++++++++++++ 2 files changed, 259 insertions(+), 1 deletion(-) create mode 100644 dev_scripts_helpers/misc/get_url_titles.py diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index b8fd62e6d..0cb37e513 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -4,7 +4,7 @@ import logging import os import re -from typing import List, Optional, Set, Tuple +from typing import Dict, List, Optional, Set, Tuple import helpers.hdbg as hdbg import helpers.hio as hio @@ -569,6 +569,25 @@ def slide_colorize_points() -> _PROMPT_OUT: return system, pre_transforms, post_transforms +# ############################################################################# + + +def scratch_categorize_topics() -> _PROMPT_OUT: + system = r""" + For each of the following title of article, find the best topic among the following ones + + LLM Reasoning, Quant Finance, Time Series, Developer Tools, Python Ecosystem, Git and GitHub, Software Architecture, AI Infrastructure, Knowledge Graphs, Diffusion Models, Causal Inference, Trading Strategies, Prompt Engineering, Mathematical Concepts, Dev Productivity, Rust and C++, Marketing and Sales, Probabilistic Programming, Code Refactoring, Open Source + + Only print + - the first 2 words of the title + - a separator | + - the topic + and don't print any explanation + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + # ############################################################################# # Transforms. # ############################################################################# diff --git a/dev_scripts_helpers/misc/get_url_titles.py b/dev_scripts_helpers/misc/get_url_titles.py new file mode 100644 index 000000000..174bb7806 --- /dev/null +++ b/dev_scripts_helpers/misc/get_url_titles.py @@ -0,0 +1,239 @@ +import requests +from bs4 import BeautifulSoup + +def get_page_title(url): + try: + response = requests.get(url, timeout=10) + response.raise_for_status() + soup = BeautifulSoup(response.text, 'html.parser') + title_tag = soup.find('title') + return title_tag.string.strip() if title_tag else "No tag found" + except requests.RequestException as e: + return f"Request failed: {e}" + + +import requests +from html.parser import HTMLParser + +class TitleParser(HTMLParser): + def __init__(self): + super().__init__() + self.in_title = False + self.title = None + + def handle_starttag(self, tag, attrs): + if tag.lower() == 'title': + self.in_title = True + + def handle_data(self, data): + if self.in_title and self.title is None: + self.title = data.strip() + + def handle_endtag(self, tag): + if tag.lower() == 'title': + self.in_title = False + +def get_title_streaming(url): + try: + with requests.get(url, stream=True, timeout=10) as r: + r.raise_for_status() + parser = TitleParser() + for chunk in r.iter_content(chunk_size=1024, decode_unicode=True): + parser.feed(chunk) + if parser.title: + break + return parser.title if parser.title else "No <title> tag found" + except requests.RequestException as e: + return f"Request failed: {e}" + + + +if __name__ == "__main__": + # Example list of URLs + files = """ +https://news.ycombinator.com/item?id=34336386 +https://news.ycombinator.com/item?id=29671450 +https://news.ycombinator.com/item?id=22778089 +https://news.ycombinator.com/item?id=23331989 +https://news.ycombinator.com/item?id=34801636 +https://news.ycombinator.com/item?id=30371723 +https://news.ycombinator.com/item?id=26953352 +https://news.ycombinator.com/item?id=23209142 +https://news.ycombinator.com/item?id=30228261 +https://news.ycombinator.com/item?id=25950838 +https://news.ycombinator.com/item?id=32799789 +https://news.ycombinator.com/item?id=29315107 +https://news.ycombinator.com/item?id=30984662 +https://news.ycombinator.com/item?id=22168822 +https://news.ycombinator.com/item?id=22652141 +https://news.ycombinator.com/item?id=25279814 +https://news.ycombinator.com/item?id=22106367 +https://news.ycombinator.com/item?id=22446148 +https://news.ycombinator.com/item?id=24487135 +https://news.ycombinator.com/item?id=33696486 +https://news.ycombinator.com/item?id=14265051 +https://news.ycombinator.com/item?id=21534990 +https://news.ycombinator.com/item?id=29347885 +https://news.ycombinator.com/item?id=29876742 +https://news.ycombinator.com/item?id=23550758 +https://news.ycombinator.com/item?id=22504133 +https://news.ycombinator.com/item?id=23339830 +https://news.ycombinator.com/item?id=23755675 +https://news.ycombinator.com/item?id=26872904 +https://news.ycombinator.com/item?id=27760919 +https://news.ycombinator.com/item?id=21614533 +https://news.ycombinator.com/item?id=26602156 +https://news.ycombinator.com/item?id=22291417 +https://news.ycombinator.com/from?site=a16z.com&next=29816846 +https://news.ycombinator.com/item?id=27855145 +https://news.ycombinator.com/item?id=26930667 +https://news.ycombinator.com/item?id=29711042 +https://news.ycombinator.com/item?id=26580746 +https://news.ycombinator.com/item?id=24601579 +https://news.ycombinator.com/item?id=22161830 +https://news.ycombinator.com/item?id=26612321 +https://news.ycombinator.com/item?id=32081943 +https://news.ycombinator.com/item?id=22962869 +https://news.ycombinator.com/item?id=27350264 +https://news.ycombinator.com/item?id=29677238 +https://news.ycombinator.com/item?id=31441516 +https://news.ycombinator.com/item?id=26164790 +https://news.ycombinator.com/item?id=22291189 +https://news.ycombinator.com/item?id=25575505 +https://news.ycombinator.com/item?id=23549929 +https://news.ycombinator.com/item?id=26524876 +https://news.ycombinator.com/item?id=27593772 +https://news.ycombinator.com/item?id=27768211 +https://news.ycombinator.com/item?id=42405323 +https://news.ycombinator.com/item?id=35506009 +https://news.ycombinator.com/item?id=22033129 +https://news.ycombinator.com/item?id=30970720 +https://news.ycombinator.com/item?id=22278339 +https://news.ycombinator.com/item?id=30247159 +https://news.ycombinator.com/item?id=29367687 +https://news.ycombinator.com/item?id=25107285 +https://news.ycombinator.com/item?id=26225373 +https://news.ycombinator.com/item?id=31212542 +https://news.ycombinator.com/item?id=21505305 +https://news.ycombinator.com/item?id=25874374 +https://news.ycombinator.com/item?id=22827275 +https://news.ycombinator.com/item?id=26058440 +https://news.ycombinator.com/item?id=29899156 +https://news.ycombinator.com/item?id=34322033 +https://news.ycombinator.com/item?id=36015815 +https://news.ycombinator.com/item?id=22925484 +https://news.ycombinator.com/item?id=32937876 +https://news.ycombinator.com/item?id=34934216 +https://news.ycombinator.com/item?id=25445493 +https://news.ycombinator.com/item?id=21404292 +https://news.ycombinator.com/item?id=34821414 +https://news.ycombinator.com/item?id=33942597 +https://news.ycombinator.com/item?id=27763965 +https://news.ycombinator.com/item?id=23018805 +https://news.ycombinator.com/item?id=23593165 +https://news.ycombinator.com/item?id=31114554 +https://news.ycombinator.com/item?id=26053323 +https://news.ycombinator.com/item?id=25550240 +https://news.ycombinator.com/item?id=24949736 +https://news.ycombinator.com/item?id=29353904 +https://news.ycombinator.com/item?id=22207006 +https://news.ycombinator.com/item?id=22731317 +https://news.ycombinator.com/item?id=27805904 +https://news.ycombinator.com/item?id=28640429 +https://news.ycombinator.com/item?id=31168069 +https://news.ycombinator.com/item?id=31699032 +https://news.ycombinator.com/item?id=31123683 +https://news.ycombinator.com/item?id=23921610 +https://news.ycombinator.com/item?id=35020814 +https://news.ycombinator.com/item?id=21959874 +https://news.ycombinator.com/item?id=22895842 +https://news.ycombinator.com/item?id=33625367 +https://news.ycombinator.com/item?id=22429124 +https://news.ycombinator.com/item?id=26036790 +https://news.ycombinator.com/item?id=37059479 +https://news.ycombinator.com/item?id=30060765 +https://news.ycombinator.com/item?id=21610687 +https://news.ycombinator.com/item?id=25716581 +https://news.ycombinator.com/item?id=30822339 +https://news.ycombinator.com/item?id=22094355 +https://news.ycombinator.com/item?id=26034053 +https://news.ycombinator.com/item?id=27695574 +https://news.ycombinator.com/item?id=31286890 +https://news.ycombinator.com/item?id=36154622 +https://news.ycombinator.com/item?id=28155196 +https://news.ycombinator.com/item?id=34843094 +https://news.ycombinator.com/item?id=33477056 +https://news.ycombinator.com/item?id=26747743 +https://news.ycombinator.com/item?id=22059601 +https://news.ycombinator.com/item?id=34391045 +https://news.ycombinator.com/item?id=42174181 +https://news.ycombinator.com/item?id=34152100 +https://news.ycombinator.com/item?id=35697627 +https://news.ycombinator.com/item?id=31455919 +https://news.ycombinator.com/item?id=31200989 +https://news.ycombinator.com/item?id=34752489 +https://news.ycombinator.com/item?id=42357273 +https://news.ycombinator.com/item?id=21481461 +https://news.ycombinator.com/item?id=30120731 +https://news.ycombinator.com/item?id=21442330 +https://news.ycombinator.com/item?id=26899531 +https://news.ycombinator.com/item?id=34857287 +https://news.ycombinator.com/item?id=26799702 +https://news.ycombinator.com/item?id=24059441 +https://news.ycombinator.com/item?id=34165789 +https://news.ycombinator.com/item?id=25428621 +https://news.ycombinator.com/item?id=23626908 +https://news.ycombinator.com/item?id=31431224 +https://news.ycombinator.com/item?id=21411893 +https://news.ycombinator.com/item?id=36079115 +https://news.ycombinator.com/item?id=23725829 +https://news.ycombinator.com/item?id=33985969 +https://news.ycombinator.com/item?id=22270464 +https://news.ycombinator.com/item?id=30925223 +https://news.ycombinator.com/item?id=22325975 +https://news.ycombinator.com/item?id=30046272 +https://news.ycombinator.com/item?id=32390730 +https://news.ycombinator.com/item?id=28704164 +https://news.ycombinator.com/item?id=23151144 +https://news.ycombinator.com/item?id=22492381 +https://news.ycombinator.com/item?id=22340720 +https://news.ycombinator.com/item?id=31958536 +https://news.ycombinator.com/item?id=39094343 +https://news.ycombinator.com/item?id=26631467 +https://news.ycombinator.com/item?id=31945564 +https://news.ycombinator.com/item?id=27736304 +https://news.ycombinator.com/item?id=23026750 +https://news.ycombinator.com/item?id=22544563 +https://news.ycombinator.com/item?id=21564990 +https://news.ycombinator.com/item?id=27099536 +https://news.ycombinator.com/item?id=22082860 +https://news.ycombinator.com/item?id=28006894 +https://news.ycombinator.com/item?id=21706451 +https://news.ycombinator.com/item?id=35343791 +https://news.ycombinator.com/item?id=28045342 +https://news.ycombinator.com/item?id=29583792 +https://news.ycombinator.com/item?id=33001191 +https://news.ycombinator.com/item?id=34032872 +https://news.ycombinator.com/item?id=25304257 +https://news.ycombinator.com/item?id=29361004 +https://news.ycombinator.com/item?id=22627736 +https://news.ycombinator.com/item?id=25789336 +https://news.ycombinator.com/item?id=26762206 +https://news.ycombinator.com/item?id=34906378 +https://news.ycombinator.com/item?id=25789073 +https://news.ycombinator.com/item?id=34261656 +https://news.ycombinator.com/item?id=31335105 +https://news.ycombinator.com/item?id=9638748 +https://news.ycombinator.com/item?id=26247052 +https://news.ycombinator.com/item?id=42902936 +https://news.ycombinator.com/item?id=24958215 +https://news.ycombinator.com/item?id=36092179 +https://news.ycombinator.com/item?id=37202009 + """ + url_list = files.split("\n") + import time + for url in url_list: + #title = get_page_title(url) + title = get_title_streaming(url) + print("%s,%s" % (url, title)) + time.sleep(2) From 3899dff2aa16236ff88e4baedf441c17acac7b62 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 2 May 2025 21:02:12 -0400 Subject: [PATCH 072/193] Improve --- .../chatgpt/run_simple_chatgpt.py | 3 +- .../coding_tools/traceback_to_cfile.py | 2 +- .../coding_tools/transform_skeleton.py | 3 +- .../documentation/dockerized_prettier.py | 4 +- .../extract_headers_from_markdown.py | 4 +- .../documentation/render_images.py | 2 +- .../documentation/run_pandoc.py | 2 +- .../documentation/transform_notes.py | 4 +- .../llms/dockerized_llm_transform.py | 2 +- dev_scripts_helpers/llms/llm_transform.py | 4 +- .../system_tools/extract_cfile.py | 67 +++++++++++++++++++ .../system_tools/remove_escape_chars.py | 3 +- helpers/hparser.py | 13 ++++ 13 files changed, 91 insertions(+), 22 deletions(-) create mode 100755 dev_scripts_helpers/system_tools/extract_cfile.py diff --git a/dev_scripts_helpers/chatgpt/run_simple_chatgpt.py b/dev_scripts_helpers/chatgpt/run_simple_chatgpt.py index 1d15763ca..6f4f12308 100755 --- a/dev_scripts_helpers/chatgpt/run_simple_chatgpt.py +++ b/dev_scripts_helpers/chatgpt/run_simple_chatgpt.py @@ -61,8 +61,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - print("cmd line: %s" % hdbg.get_command_line()) - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + hparser.init_logger_for_input_output_transform(args) # in_file_name, out_file_name = hparser.parse_input_output_args( args, clear_screen=True diff --git a/dev_scripts_helpers/coding_tools/traceback_to_cfile.py b/dev_scripts_helpers/coding_tools/traceback_to_cfile.py index c2c10850b..37bda94ca 100755 --- a/dev_scripts_helpers/coding_tools/traceback_to_cfile.py +++ b/dev_scripts_helpers/coding_tools/traceback_to_cfile.py @@ -56,7 +56,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hdbg.init_logger(verbosity=args.log_level, use_exec_path=False) + hparser.init_logger_for_input_output_transform(args) # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args( args, clear_screen=True diff --git a/dev_scripts_helpers/coding_tools/transform_skeleton.py b/dev_scripts_helpers/coding_tools/transform_skeleton.py index 0fb78c7de..3b63d02fd 100755 --- a/dev_scripts_helpers/coding_tools/transform_skeleton.py +++ b/dev_scripts_helpers/coding_tools/transform_skeleton.py @@ -34,8 +34,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - print("cmd line: %s" % hdbg.get_command_line()) - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + hparser.init_logger_for_input_output_transform(args) # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) _ = in_file_name, out_file_name diff --git a/dev_scripts_helpers/documentation/dockerized_prettier.py b/dev_scripts_helpers/documentation/dockerized_prettier.py index 80de06a11..650ccffd3 100755 --- a/dev_scripts_helpers/documentation/dockerized_prettier.py +++ b/dev_scripts_helpers/documentation/dockerized_prettier.py @@ -54,14 +54,12 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: # Parse everything that can be parsed and returns the rest. args, cmd_opts = parser.parse_known_args() + hparser.init_logger_for_input_output_transform(args) in_file_name, out_file_name = hparser.parse_input_output_args( args, clear_screen=True ) if not cmd_opts: cmd_opts = [] - hdbg.init_logger( - verbosity=args.log_level, use_exec_path=True, force_white=False - ) _LOG.debug("cmd_opts: %s", cmd_opts) hdocker.run_dockerized_prettier( in_file_name, diff --git a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py index bc3b6bdb5..1a29fba9c 100755 --- a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py +++ b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py @@ -85,9 +85,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hdbg.init_logger( - verbosity=args.log_level, use_exec_path=True, force_white=False - ) + hparser.init_logger_for_input_output_transform(args) in_file_name, out_file_name = hparser.parse_input_output_args(args) # _extract_headers_from_markdown( diff --git a/dev_scripts_helpers/documentation/render_images.py b/dev_scripts_helpers/documentation/render_images.py index d709c8250..156082815 100755 --- a/dev_scripts_helpers/documentation/render_images.py +++ b/dev_scripts_helpers/documentation/render_images.py @@ -564,7 +564,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + hparser.init_logger_for_input_output_transform(args) # Get the paths to the input and output files. in_file, out_file = hparser.parse_input_output_args(args) # Verify that the input and output file types are valid and equal. diff --git a/dev_scripts_helpers/documentation/run_pandoc.py b/dev_scripts_helpers/documentation/run_pandoc.py index 797b092bb..6d7b5436c 100755 --- a/dev_scripts_helpers/documentation/run_pandoc.py +++ b/dev_scripts_helpers/documentation/run_pandoc.py @@ -46,7 +46,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + hparser.init_logger_for_input_output_transform(args) # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) # Read file. diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index db49c4fae..410a49010 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -53,9 +53,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hdbg.init_logger( - verbosity=logging.ERROR, use_exec_path=True, force_white=False - ) + hparser.init_logger_for_input_output_transform(args) # cmd = args.action max_lev = int(args.max_lev) diff --git a/dev_scripts_helpers/llms/dockerized_llm_transform.py b/dev_scripts_helpers/llms/dockerized_llm_transform.py index 62931dfa5..b27a6d528 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_transform.py +++ b/dev_scripts_helpers/llms/dockerized_llm_transform.py @@ -34,7 +34,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + hparser.init_logger_for_input_output_transform(args) # Parse files from command line. in_file_name, out_file_name = hparser.parse_input_output_args(args) # Read file. diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 4ec23e5cf..e884db6ca 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -199,9 +199,7 @@ def _convert_file_names(in_file_name: str, out_file_name: str) -> None: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hdbg.init_logger( - verbosity=args.log_level, use_exec_path=True, force_white=False - ) + hparser.init_logger_for_input_output_transform(args) if args.prompt == "list": print("# Available prompt tags:") print("\n".join(dshlllpr.get_prompt_tags())) diff --git a/dev_scripts_helpers/system_tools/extract_cfile.py b/dev_scripts_helpers/system_tools/extract_cfile.py new file mode 100755 index 000000000..2fedfe0b9 --- /dev/null +++ b/dev_scripts_helpers/system_tools/extract_cfile.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +""" +This script extracts the file name from a cfile. + +Example: +> jackmd DataPull | extract_cfile.py -i - -o - +""" + +import argparse +import logging +import re +from typing import List + +import helpers.hdbg as hdbg +import helpers.hparser as hparser + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + hparser.add_input_output_args(parser) + hparser.add_verbosity_arg(parser) + return parser + + +def _parse_input_cfile(txt: List[str]) -> List[str]: + files = [] + for line in txt: + # Extract the file name from the `filename:line:text`. + # E.g., + # ``` + # docs/all.workflow.explanation.md:396:- Add QA for a `DataPull` source + # ```` + pattern = r"^(\S+):\d+:.*$" + match = re.match(pattern, line) + if match: + filename = match.group(1) + files.append(filename) + else: + _LOG.warning("Can't parse line: '%s", line) + return files + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hparser.init_logger_for_input_output_transform(args) + # Parse files. + in_file_name, out_file_name = hparser.parse_input_output_args(args) + # Read file. + txt = hparser.read_file(in_file_name) + # Transform. + files = _parse_input_cfile(txt) + files = sorted(list(set(files))) + # Write file. + txt_out = "\n".join(files) + hparser.write_file(txt_out, out_file_name) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/dev_scripts_helpers/system_tools/remove_escape_chars.py b/dev_scripts_helpers/system_tools/remove_escape_chars.py index b34c4220a..5634ca0e9 100755 --- a/dev_scripts_helpers/system_tools/remove_escape_chars.py +++ b/dev_scripts_helpers/system_tools/remove_escape_chars.py @@ -30,8 +30,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - print("cmd line: %s" % hdbg.get_command_line()) - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + hparser.init_logger_for_input_output_transform(args) # in_file_name, out_file_name = hparser.parse_input_output_args( args, clear_screen=False diff --git a/helpers/hparser.py b/helpers/hparser.py index c9afcc63c..ca0559ff3 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -370,6 +370,19 @@ def parse_input_output_args( return in_file_name, out_file_name + + +def init_logger_for_input_output_transform(args: argparse.Namespace) -> None: + verbosity = args.log_level + # If the input is stdin, we don't want to print the command line or any + # other log messages, unless the user specified a more verbose log level. + if args.in_file_name == "-": + if args.log_level == "INFO": + verbosity = "CRITICAL" + else: + print("cmd line: %s" % hdbg.get_command_line()) + hdbg.init_logger(verbosity=verbosity, use_exec_path=True) + # TODO(gp): GFI -> from_file for symmetry for hio. def read_file(file_name: str) -> List[str]: From a6682e81bc843c8633ce0536c454d10dbad1a31f Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 3 May 2025 16:11:24 -0400 Subject: [PATCH 073/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' skipped All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 0cb37e513..29b7750e4 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -579,10 +579,12 @@ def scratch_categorize_topics() -> _PROMPT_OUT: LLM Reasoning, Quant Finance, Time Series, Developer Tools, Python Ecosystem, Git and GitHub, Software Architecture, AI Infrastructure, Knowledge Graphs, Diffusion Models, Causal Inference, Trading Strategies, Prompt Engineering, Mathematical Concepts, Dev Productivity, Rust and C++, Marketing and Sales, Probabilistic Programming, Code Refactoring, Open Source Only print - - the first 2 words of the title + - the first 3 words of the title - a separator | - the topic and don't print any explanation + + if you don't know the topic, print "unknown" """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} From e86ec7efa330825be6deadeac5b4ea0df15ceb9d Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 5 May 2025 08:39:16 -0400 Subject: [PATCH 074/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 143 +++++++++++++++--- .../bounty.onboarding_checklist.reference.md | 24 +++ 2 files changed, 142 insertions(+), 25 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 29b7750e4..7920b61e0 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -83,7 +83,9 @@ def get_prompt_tags() -> List[str]: hdbg.dassert_in(prompt, valid_prompts) -def get_outside_container_post_transforms(transform_name: str) -> Dict[str, List[str]]: +def get_outside_container_post_transforms( + transform_name: str, +) -> Dict[str, List[str]]: hdbg.dassert_in(transform_name, OUTSIDE_CONTAINER_POST_TRANSFORMS.keys()) return OUTSIDE_CONTAINER_POST_TRANSFORMS[transform_name] @@ -117,29 +119,108 @@ def test() -> _PROMPT_OUT: # ############################################################################# -def code_fix_comments() -> _PROMPT_OUT: +def code_fix_existing_comments() -> _PROMPT_OUT: + """ + Fix the already existing comments in the Python code. + """ + system = _CONTEXT + system += r""" + Make sure that comments in the code are: + - in imperative form + - a correct English phrase + - end with a period `.` + - clear + + Comments should be before the code that they refer to + E.g., + ``` + dir_name = self.directory.name # For example, "helpers". + ``` + should become + ``` + # E.g., "helpers". + dir_name = self.directory.name + ``` + + Variables should be enclosed in a back tick, like `bar`. + Functions should be reported as `foo()`. + + Do not change the code. + Do not add any empty line. + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +def code_fix_improve_comments() -> _PROMPT_OUT: """ Add comments to Python code. """ system = _CONTEXT system += r""" - - Every a chunk of 4 or 5 lines of code add comment explaining the code + - Add comments for the parts of the code that are not properly commented + - E.g., every chunk of 4 or 5 lines of code add comment explaining the + code - Comments should go before the logical chunk of code they describe - Comments should be in imperative form, a full English phrase, and end with a period `.` - Do not comment every single line of code and especially logging statements + - Add examples of the values of variables, when you are sure of the types + and values of variables. If you are not sure, do not add any information. + + Do not change the code. + Do not remove any already existing comment. + Do not add any empty line. """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms +def code_fix_logging_statements() -> _PROMPT_OUT: + """ + Add comments to Python code. + """ + system = _CONTEXT + system += r''' + When a variable `foobar` is important for debugging the code in case of + failure, add statements like: + ``` + _LOG.debug(hprint.to_str("foobar")) + ``` + + At the beginning of an important function, after the docstring, add code + like + ``` + def get_text_report(self) -> str: + """ + Generate a text report listing each module's dependencies. + + :return: Text report of dependencies, one per line. + """ + _LOG.debug(hprint.func_signature_to_str()) + ``` + + Do not change the code. + Do not remove any already existing comment. + Do not add any empty line. + ''' + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + def code_fix_docstrings() -> _PROMPT_OUT: """ Add or complete a REST docstring to Python code. Each function should have a docstring that describes the function, its parameters, and its return value. + + Create examples of the values in input and output of each function, only + when you are sure of the types and values of variables. If you are not + sure, do not add any information. """ system = _CONTEXT system += r''' @@ -151,13 +232,13 @@ def code_fix_docstrings() -> _PROMPT_OUT: An example of a correct docstring is: ``` - def _format_greeting(name: str, *, greeting: str = DEFAULT_GREETING) -> str: + def _format_greeting(name: str, *, greeting: str = "Hello") -> str: """ Format a greeting message with the given name. - :param name: the name to include in the greeting - :param greeting: the base greeting message to use - :return: formatted greeting + :param name: the name to include in the greeting (e.g., "John") + :param greeting: the base greeting message to use (e.g., "Ciao") + :return: formatted greeting (e.g., "Hello John") """ ``` ''' @@ -182,7 +263,7 @@ def process_data(data, threshold=0.5): ``` to: ``` - def process_data(data: List[float], threshold: float = 0.5) -> List[float]: + def process_data(data: List[float], *, threshold: float = 0.5) -> List[float]: results: List[float] = [] for item in data: if item > threshold: @@ -259,18 +340,17 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ - Use % formatting, like `"Hello, %s. - - You are %d years old." % (name, age)`. + Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. """ system = _CONTEXT system += r""" Use % formatting instead of f-strings (formatted string literals). - - Do not print any comment, but just the converted code. + Do not print any comment, just the converted code. For instance, convert: + `f"Hello, {name}. You are {age} years old."` to + `"Hello, %s. You are %d years old." % (name, age)` """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -285,10 +365,6 @@ def code_fix_from_imports() -> _PROMPT_OUT: system += r""" Replace any Python "from import" statement like `from X import Y` with the form `import X` and then replace the uses of `Y` with `X.Y` - - For instance, replace: - with: - Then replace the uses of `OpenAIEmbeddings` with: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -304,9 +380,18 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: When you find a Python function with optional parameters, add a star after the mandatory parameters and before the optional parameters, and make sure that the function is called with the correct number of arguments. + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + - For instance, replace: - with the following: +def code_fix_unit_test() -> _PROMPT_OUT: + """ + Fix code missing the star before optional parameters. + """ + system = _CONTEXT + system += r""" """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -315,7 +400,8 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: def code_fix_csfy_style() -> _PROMPT_OUT: """ - Apply the csfy style to the code. + Apply all the transformations required to write code according to the + Causify conventions. """ # > grep "def code_fix" ./dev_scripts_helpers/llms/llm_prompts.py | awk '{print $2 }' function_names = [ @@ -574,15 +660,21 @@ def slide_colorize_points() -> _PROMPT_OUT: def scratch_categorize_topics() -> _PROMPT_OUT: system = r""" - For each of the following title of article, find the best topic among the following ones + For each of the following title of article, find the best topic among the + following ones: - LLM Reasoning, Quant Finance, Time Series, Developer Tools, Python Ecosystem, Git and GitHub, Software Architecture, AI Infrastructure, Knowledge Graphs, Diffusion Models, Causal Inference, Trading Strategies, Prompt Engineering, Mathematical Concepts, Dev Productivity, Rust and C++, Marketing and Sales, Probabilistic Programming, Code Refactoring, Open Source + LLM Reasoning, Quant Finance, Time Series, Developer Tools, Python + Ecosystem, Git and GitHub, Software Architecture, AI Infrastructure, + Knowledge Graphs, Diffusion Models, Causal Inference, Trading Strategies, + Prompt Engineering, Mathematical Concepts, Dev Productivity, Rust and C++, + Marketing and Sales, Probabilistic Programming, Code Refactoring, Open + Source Only print - the first 3 words of the title - a separator | - the topic - and don't print any explanation + and don't print any explanation. if you don't know the topic, print "unknown" """ @@ -590,6 +682,7 @@ def scratch_categorize_topics() -> _PROMPT_OUT: post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms + # ############################################################################# # Transforms. # ############################################################################# @@ -730,9 +823,9 @@ def run_prompt( # Add the specific instructions to the system prompt. # E.g., # The instructions are: - # 52: in private function `_parse`:D401: First line should be in imperative mood; try rephrasing (found 'Same') [doc_formatter] + # 52: in private function `_parse`:D401: First line should be in # 174: error: Missing return statement [return] [mypy] - # 192: [W1201(logging-not-lazy), _convert_file_names] Use lazy % formatting in logging functions [pylint] + # 192: [W1201(logging-not-lazy), _convert_file_names] Use lazy % system_prompt = hprint.dedent(system_prompt) hdbg.dassert_ne(instructions, "") system_prompt += "\nThe instructions are:\n" + instructions + "\n\n" diff --git a/docs/onboarding/bounty.onboarding_checklist.reference.md b/docs/onboarding/bounty.onboarding_checklist.reference.md index cbe27b21a..2be65afb1 100644 --- a/docs/onboarding/bounty.onboarding_checklist.reference.md +++ b/docs/onboarding/bounty.onboarding_checklist.reference.md @@ -21,6 +21,9 @@ in [`intern.set_up_development_on_laptop.how_to_guide.md`](https://github.com/causify-ai/helpers/blob/master/docs/onboarding/intern.set_up_development_on_laptop.how_to_guide.md) - [ ] **Contributor**: Carefully study all the documents in the must-read list: + - [ ] *Carefully study all the documents in + [the must-read list](https://github.com/causify-ai/helpers/blob/master/docs/onboarding/all.dev_must_read_checklist.reference.md) + - [ ] [General rules of collaboration](https://github.com/causify-ai/helpers/blob/master/docs/work_organization/all.team_collaboration.how_to_guide.md) - [ ] [Coding style guide](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.coding_style.how_to_guide.md) - [ ] [How to write unit tests](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.write_unit_tests.how_to_guide.md) - [ ] [How to run unit tests](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.run_unit_tests.how_to_guide.md) @@ -31,6 +34,27 @@ - [ ] [GitHub organization](https://github.com/causify-ai/helpers/blob/master/docs/work_organization/all.use_github.how_to_guide.md) - [ ] [Tips for writing documentation](https://github.com/causify-ai/helpers/blob/master/docs/documentation_meta/all.writing_docs.how_to_guide.md) - They will help you get up to speed with our practices and development style + - Read them carefully one by one + - Ask questions + - Memorize / internalize all the information + - Take notes + - Mark the reading as done + - Open a GH issue/PR to propose improvements to the documentation + +### Final checks + +- [ ] **Intern**: Exercise all the important parts of the systems + - [ ] Create a GitHub issue + - [ ] Check out and pull the latest version of the repo code + - [ ] Create a branch + - [ ] Run regressions (`i run_fast_tests`) + - [ ] Run Linter (`i lint --files="..."`) + - [ ] Start a Docker container (`i docker_bash`) + - [ ] Start a Jupyter server (`i docker_jupyter`) + - [ ] Do a PR +- Tip: a good approach to the "final checks" is to perform all the steps + (opening an issue, creating a branch, filing a PR) for something very small + but useful -- like fixing a typo in the docs. ## Instructions From 1dc83eddcf5e8e03939b17c4a2459d235c393183 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 5 May 2025 08:57:27 -0400 Subject: [PATCH 075/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- helpers/hmarkdown.py | 27 +++++ helpers/hparser.py | 2 +- helpers/test/test_hmarkdown.py | 175 +++++++++++++++++++++++++++++++++ 3 files changed, 203 insertions(+), 1 deletion(-) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index cea80be22..81ab3b4fe 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -782,3 +782,30 @@ def colorize_first_level_bullets(markdown_text: str) -> str: else: result.append(line) return "\n".join(result) + + +def format_compressed_markdown(markdown_text: str) -> str: + """ + Add an empty line before first level bullets in markdown text. + + First level bullets are those starting with "- " at the beginning of a line + with no indentation. Other level bullets have no empty line before them. + + :param markdown_text: Input markdown text + :return: Formatted markdown text with empty lines before first level bullets + """ + lines = markdown_text.split("\n") + result = [] + for i, line in enumerate(lines): + # Check if current line is a first level bullet (no indentation) + if re.match(r"^- ", line): + # Add empty line before first level bullet if previous line exists and isn't empty + if i > 0 and lines[i-1].strip() != "": + result.append("") + # Check if current line is an indented bullet + elif re.match(r"^\s+- ", line): + # Remove any empty line before indented bullet + if result and result[-1].strip() == "": + result.pop() + result.append(line) + return "\n".join(result) diff --git a/helpers/hparser.py b/helpers/hparser.py index ca0559ff3..929cbaa5a 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -381,7 +381,7 @@ def init_logger_for_input_output_transform(args: argparse.Namespace) -> None: verbosity = "CRITICAL" else: print("cmd line: %s" % hdbg.get_command_line()) - hdbg.init_logger(verbosity=verbosity, use_exec_path=True) + hdbg.init_logger(verbosity=verbosity, use_exec_path=True, force_white=False) # TODO(gp): GFI -> from_file for symmetry for hio. diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index c4786f0fe..968758a9c 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1396,3 +1396,178 @@ def test3(self) -> None: # Call function. hmarkdo.check_header_list(header_list) self.assertTrue(True) + + +class Test_format_compressed_markdown1(hunitest.TestCase): + + def _format_and_compare_markdown(self, text: str, expected: str) -> None: + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # + actual = hmarkdo.format_compressed_markdown(text) + self.assert_equal(actual, expected) + + def test1(self) -> None: + """ + Test basic case with single first level bullet. + """ + text = """ + Some text + - First bullet + More text""" + expected = """ + Some text + + - First bullet + More text""" + self._format_and_compare_markdown(text, expected) + + def test2(self) -> None: + """ + Test multiple first level bullets. + """ + text = """ + - First bullet + - Second bullet + - Third bullet""" + expected = """ + - First bullet + + - Second bullet + + - Third bullet""" + self._format_and_compare_markdown(text, expected) + + def test3(self) -> None: + """ + Test mixed first level and indented bullets. + """ + text = """ + - First level + + - Second level + - Another second + - Back to first""" + expected = """ + - First level + - Second level + - Another second + + - Back to first""" + self._format_and_compare_markdown(text, expected) + + def test4(self) -> None: + """ + Test mixed content with text and bullets. + """ + text = """ + Some initial text + - First bullet + Some text in between + - Second bullet + Final text""" + expected = """ + Some initial text + + - First bullet + Some text in between + + - Second bullet + Final text""" + self._format_and_compare_markdown(text, expected) + + def test5(self) -> None: + """ + Test nested bullets with multiple levels. + """ + text = """ + - Level 1 + - Level 2 + - Level 3 + - Another level 1 + - Level 2 again""" + expected = """ + - Level 1 + - Level 2 + - Level 3 + + - Another level 1 + - Level 2 again""" + self._format_and_compare_markdown(text, expected) + + def test6(self) -> None: + """ + Test empty lines handling. + """ + text = """ + - First bullet + + - Second bullet + + - Third bullet""" + expected = """ + - First bullet + + - Second bullet + + - Third bullet""" + self._format_and_compare_markdown(text, expected) + + def test7(self) -> None: + """ + Test mixed content with bullets and text. + """ + text = """ + Some text here + - First bullet + More text + - Second bullet + - Nested bullet + Final paragraph + - Last bullet""" + expected = """ + Some text here + + - First bullet + More text + + - Second bullet + - Nested bullet + Final paragraph + + - Last bullet""" + self._format_and_compare_markdown(text, expected) + + def test8(self) -> None: + """ + Test bullets with inline formatting. + """ + text = """ + - **Bold bullet** point + - *Italic nested* bullet + - `Code bullet` here + - **_Mixed_** formatting""" + expected = """ + - **Bold bullet** point + - *Italic nested* bullet + + - `Code bullet` here + - **_Mixed_** formatting""" + self._format_and_compare_markdown(text, expected) + + def test9(self) -> None: + """ + Test bullets with special characters. + """ + text = """ + - Bullet with (parentheses) + - Bullet with [brackets] + - Bullet with {braces} + - Bullet with $math$""" + expected = """ + - Bullet with (parentheses) + - Bullet with [brackets] + + - Bullet with {braces} + - Bullet with $math$""" + self._format_and_compare_markdown(text, expected) From 814dc5fb393f56d40f09058eedf40d6ba4676507 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 6 May 2025 07:29:10 -0400 Subject: [PATCH 076/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../documentation/transform_notes.py | 73 +++++++++------ helpers/hmarkdown.py | 86 +++++++++++++++++- helpers/test/test_hmarkdown.py | 91 +++++++++++++++++++ 3 files changed, 215 insertions(+), 35 deletions(-) diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index 410a49010..9f1c0047e 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -36,6 +36,7 @@ import helpers.hlatex as hlatex import helpers.hmarkdown as hmarkdo import helpers.hparser as hparser +import helpers.hprint as hprint _LOG = logging.getLogger(__name__) @@ -56,6 +57,22 @@ def _main(parser: argparse.ArgumentParser) -> None: hparser.init_logger_for_input_output_transform(args) # cmd = args.action + if cmd == "list": + txt = r""" + test: compute the hash of a string to test the flow + format_headers: format the headers of the current file + increase_headers_level: increase the level of the headers of the current file + md_list_to_latex: convert a markdown list to a latex list + md_remove_formatting: remove the formatting of the current file + md_clean_up: clean up the current file + md_format: format the current file + md_format_compressed: format the current file + md_colorize_bold_text: colorize the bold text of the current file + """ + txt = hprint.dedent(txt) + print(txt) + return + max_lev = int(args.max_lev) # in_file_name, out_file_name = hparser.parse_input_output_args( @@ -67,42 +84,38 @@ def _main(parser: argparse.ArgumentParser) -> None: txt = "\n".join(txt) txt = hashlib.sha256(txt.encode("utf-8")).hexdigest() hparser.write_file(txt, out_file_name) - elif cmd == "toc": - txt = hparser.read_file(in_file_name) - max_level = 3 - header_list = hmarkdo.extract_headers_from_markdown( - txt, max_level=max_level - ) - mode = "list" - txt_out = hmarkdo.header_list_to_markdown(header_list, mode) - hparser.write_file(txt_out, out_file_name) - elif cmd == "format": + elif cmd == "format_headers": hmarkdo.format_headers(in_file_name, out_file_name, max_lev) - elif cmd == "increase": + elif cmd == "increase_headers_level": hmarkdo.modify_header_level(in_file_name, out_file_name, mode="increase") - elif cmd == "md_list_to_latex": - txt = hparser.read_file(in_file_name) - txt = "\n".join(txt) - txt = hlatex.markdown_list_to_latex(txt) - hparser.write_file(txt, out_file_name) - elif cmd == "md_remove_formatting": - txt = hparser.read_file(in_file_name) - txt = "\n".join(txt) - txt = hmarkdo.remove_formatting(txt) - hparser.write_file(txt, out_file_name) - elif cmd == "md_clean_up": - txt = hparser.read_file(in_file_name) - txt = "\n".join(txt) - txt = hmarkdo.md_clean_up(txt) - txt = dshdlino.prettier_on_str(txt) - hparser.write_file(txt, out_file_name) - elif cmd == "md_format": + else: txt = hparser.read_file(in_file_name) txt = "\n".join(txt) + if cmd == "toc": + max_level = 3 + header_list = hmarkdo.extract_headers_from_markdown( + txt, max_level=max_level + ) + mode = "list" + txt = hmarkdo.header_list_to_markdown(header_list, mode) + elif cmd == "md_list_to_latex": + txt = hlatex.markdown_list_to_latex(txt) + elif cmd == "md_remove_formatting": + txt = hmarkdo.remove_formatting(txt) + elif cmd == "md_clean_up": + txt = hmarkdo.md_clean_up(txt) + elif cmd == "md_format": + #txt = dshdlino.prettier_on_str(txt) + pass + elif cmd == "md_format_compressed": + txt = hmarkdo.format_compressed_markdown(txt) + elif cmd == "md_colorize_bold_text": + txt = hmarkdo.colorize_bold_text(txt) + else: + raise ValueError(f"Invalid cmd='{cmd}'") + # Format the output. txt = dshdlino.prettier_on_str(txt) hparser.write_file(txt, out_file_name) - else: - assert 0, f"Invalid cmd='{cmd}'" if __name__ == "__main__": diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 81ab3b4fe..dc30b96d2 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -755,7 +755,34 @@ def selected_navigation_to_str( # ############################################################################# +all_colors = [ + "red", + "orange", + "yellow", + "lime", + "green", + "teal", + "cyan", + "blue", + "purple", + "violet", + "magenta", + "pink", + "brown", + "olive", + "gray", + "darkgray", + "lightgray", +] + + def colorize_first_level_bullets(markdown_text: str) -> str: + """ + Colorize first-level bullets in markdown text. + + :param markdown_text: Input markdown text + :return: Formatted markdown text with first-level bullets colored + """ # Define the colors to use. colors = ["red", "orange", "green", "teal", "cyan", "blue", "violet", "brown"] # Find all first-level bullet points (lines starting with "- " after any whitespace). @@ -784,6 +811,54 @@ def colorize_first_level_bullets(markdown_text: str) -> str: return "\n".join(result) +def colorize_bold_text(markdown_text: str, *, use_abbreviations: bool = True) -> str: + r""" + Add colors to bold text in markdown using equidistant colors from an array. + + The function finds all bold text (enclosed in ** or __) and adds LaTeX color + commands while preserving the rest of the markdown unchanged. + + :param markdown_text: Input markdown text + :param use_abbreviations: Use LaTeX abbreviations for colors, + `\red{text}` instead of `\textcolor{red}{text}` + :return: Markdown text with colored bold sections + """ + # Find all bold text (both ** and __ formats). + bold_pattern = r'\*\*(.*?)\*\*|__(.*?)__' + # matches will look like: + # - For **text**: group(1)='text', group(2)=None + # - For __text__: group(1)=None, group(2)='text' + matches = list(re.finditer(bold_pattern, markdown_text)) + if not matches: + return markdown_text + + result = markdown_text + # Calculate color spacing to use equidistant colors. + color_step = len(all_colors) / len(matches) + + # Process matches in reverse to not mess up string indices + for i, match in enumerate(reversed(matches)): + # Get the matched bold text (either ** or __ format) + bold_text = match.group(1) or match.group(2) + # Calculate color index using equidistant spacing + color_idx = int((len(matches) - 1 - i) * color_step) % len(all_colors) + color = all_colors[color_idx] + + # Create the colored version + if use_abbreviations: + colored_text = f"\\{color}{{{bold_text}}}" + else: + colored_text = f"**\\textcolor{{{color}}}{{{bold_text}}}**" + + # Replace in the original text + result = ( + result[:match.start()] + + colored_text + + result[match.end():] + ) + return result + + def format_compressed_markdown(markdown_text: str) -> str: """ Add an empty line before first level bullets in markdown text. @@ -792,19 +867,20 @@ def format_compressed_markdown(markdown_text: str) -> str: with no indentation. Other level bullets have no empty line before them. :param markdown_text: Input markdown text - :return: Formatted markdown text with empty lines before first level bullets + :return: Formatted markdown text with """ lines = markdown_text.split("\n") result = [] for i, line in enumerate(lines): - # Check if current line is a first level bullet (no indentation) + # Check if current line is a first level bullet (no indentation). if re.match(r"^- ", line): - # Add empty line before first level bullet if previous line exists and isn't empty + # Add empty line before first level bullet if previous line exists + # and isn't empty. if i > 0 and lines[i-1].strip() != "": result.append("") - # Check if current line is an indented bullet elif re.match(r"^\s+- ", line): - # Remove any empty line before indented bullet + # If current line is an indented bullet, remove any empty line + # before indented bullet. if result and result[-1].strip() == "": result.pop() result.append(line) diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 968758a9c..9176a4064 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1398,6 +1398,97 @@ def test3(self) -> None: self.assertTrue(True) +# ############################################################################# +# Test_colorize_bold_text1 +# ############################################################################# + + +class Test_colorize_bold_text1(hunitest.TestCase): + + def test1(self) -> None: + """ + Test basic case with single bold text. + """ + text = "This is **bold** text" + actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) + expected = "This is \\red{bold} text" + self.assert_equal(actual, expected) + + def test2(self) -> None: + """ + Test multiple bold sections get different colors. + """ + text = "**First** normal **Second** text" + actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) + expected = "\\red{First} normal \\purple{Second} text" + self.assert_equal(actual, expected) + + def test3(self) -> None: + """ + Test underscore style bold text. + """ + text = "This is __bold__ text" + actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) + expected = "This is \\red{bold} text" + self.assert_equal(actual, expected) + + def test4(self) -> None: + """ + Test text with no bold sections returns unchanged. + """ + text = "This is plain text" + actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) + expected = "This is plain text" + self.assert_equal(actual, expected) + + def test5(self) -> None: + """ + Test mixed bold styles in same text. + """ + text = "**First** and __Second__ bold" + actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) + expected = "\\red{First} and \\purple{Second} bold" + self.assert_equal(actual, expected) + + def test6(self) -> None: + """ + Test with abbreviations=False uses full \textcolor syntax. + """ + text = "This is **bold** text" + actual = hmarkdo.colorize_bold_text(text, use_abbreviations=False) + expected = "This is **\\textcolor{red}{bold}** text" + self.assert_equal(actual, expected) + + def test7(self) -> None: + """ + Test with multiple bullet lists and different colors. + """ + text = """ + **List 1:** + - First item + - Second item + + **List 2:** + - Another item + - Final item + """ + actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) + expected = """ + \\red{List 1:} + - First item + - Second item + + \\purple{List 2:} + - Another item + - Final item + """ + self.assert_equal(actual, expected) + +# ############################################################################# +# Test_format_compressed_markdown1 +# ############################################################################# + + class Test_format_compressed_markdown1(hunitest.TestCase): def _format_and_compare_markdown(self, text: str, expected: str) -> None: From dc3b906db0300c37b73e1c928258d8c935a5c2d2 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 6 May 2025 07:37:03 -0400 Subject: [PATCH 077/193] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../documentation/transform_notes.py | 9 +- dev_scripts_helpers/llms/llm_prompts.py | 16 +- dev_scripts_helpers/misc/get_url_titles.py | 215 ++---------------- .../bounty.onboarding_checklist.reference.md | 31 +-- helpers/hdocker.py | 2 +- helpers/hmarkdown.py | 34 ++- helpers/hparser.py | 1 - helpers/hserver.py | 2 +- helpers/test/test_hmarkdown.py | 21 +- 9 files changed, 78 insertions(+), 253 deletions(-) diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index 9f1c0047e..aa5efc0b8 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -32,7 +32,6 @@ import logging import dev_scripts_helpers.documentation.lint_notes as dshdlino -import helpers.hdbg as hdbg import helpers.hlatex as hlatex import helpers.hmarkdown as hmarkdo import helpers.hparser as hparser @@ -72,7 +71,6 @@ def _main(parser: argparse.ArgumentParser) -> None: txt = hprint.dedent(txt) print(txt) return - max_lev = int(args.max_lev) # in_file_name, out_file_name = hparser.parse_input_output_args( @@ -89,8 +87,10 @@ def _main(parser: argparse.ArgumentParser) -> None: elif cmd == "increase_headers_level": hmarkdo.modify_header_level(in_file_name, out_file_name, mode="increase") else: + # Read the input. txt = hparser.read_file(in_file_name) txt = "\n".join(txt) + # Process the input. if cmd == "toc": max_level = 3 header_list = hmarkdo.extract_headers_from_markdown( @@ -105,7 +105,7 @@ def _main(parser: argparse.ArgumentParser) -> None: elif cmd == "md_clean_up": txt = hmarkdo.md_clean_up(txt) elif cmd == "md_format": - #txt = dshdlino.prettier_on_str(txt) + # txt = dshdlino.prettier_on_str(txt) pass elif cmd == "md_format_compressed": txt = hmarkdo.format_compressed_markdown(txt) @@ -113,8 +113,9 @@ def _main(parser: argparse.ArgumentParser) -> None: txt = hmarkdo.colorize_bold_text(txt) else: raise ValueError(f"Invalid cmd='{cmd}'") - # Format the output. + # Reflow the output. txt = dshdlino.prettier_on_str(txt) + # Write the output. hparser.write_file(txt, out_file_name) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 7920b61e0..39567cdf5 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -56,7 +56,7 @@ def get_prompt_tags() -> List[str]: # Store the prompts that need a certain post-transforms to be applied outside # the container. -OUTSIDE_CONTAINER_POST_TRANSFORMS = {} +OUTSIDE_CONTAINER_POST_TRANSFORMS: Dict[str, List[str]] = {} # TODO(gp): We should embed this outside_container_post_transforms in the @@ -168,7 +168,7 @@ def code_fix_improve_comments() -> _PROMPT_OUT: - Do not comment every single line of code and especially logging statements - Add examples of the values of variables, when you are sure of the types and values of variables. If you are not sure, do not add any information. - + Do not change the code. Do not remove any already existing comment. Do not add any empty line. @@ -218,9 +218,9 @@ def code_fix_docstrings() -> _PROMPT_OUT: Each function should have a docstring that describes the function, its parameters, and its return value. - Create examples of the values in input and output of each function, only - when you are sure of the types and values of variables. If you are not - sure, do not add any information. + Create examples of the values in input and output of each function, + only when you are sure of the types and values of variables. If you + are not sure, do not add any information. """ system = _CONTEXT system += r''' @@ -689,7 +689,7 @@ def scratch_categorize_topics() -> _PROMPT_OUT: def _extract_vim_cfile_lines(txt: str) -> List[Tuple[int, str]]: - ret_out = [] + ret_out: List[Tuple[int, str]] = [] for line in txt.split("\n"): _LOG.debug(hprint.to_str("line")) if line.strip() == "": @@ -707,7 +707,7 @@ def _extract_vim_cfile_lines(txt: str) -> List[Tuple[int, str]]: ) match = regex.match(line) if match: - line_number = match.group(1) + line_number = int(match.group(1)) description = match.group(2) else: # ``` @@ -723,7 +723,7 @@ def _extract_vim_cfile_lines(txt: str) -> List[Tuple[int, str]]: ) match = regex.match(line) if match: - line_number = match.group(1) + line_number = int(match.group(1)) description = match.group(2) else: _LOG.warning("Can't parse line: '%s'", line) diff --git a/dev_scripts_helpers/misc/get_url_titles.py b/dev_scripts_helpers/misc/get_url_titles.py index 174bb7806..d01381bac 100644 --- a/dev_scripts_helpers/misc/get_url_titles.py +++ b/dev_scripts_helpers/misc/get_url_titles.py @@ -1,39 +1,39 @@ +import time import requests from bs4 import BeautifulSoup +from typing import Optional +from html.parser import HTMLParser -def get_page_title(url): +def get_page_title(url: str) -> str: try: response = requests.get(url, timeout=10) response.raise_for_status() - soup = BeautifulSoup(response.text, 'html.parser') - title_tag = soup.find('title') + soup = BeautifulSoup(response.text, "html.parser") + title_tag = soup.find("title") return title_tag.string.strip() if title_tag else "No <title> tag found" except requests.RequestException as e: return f"Request failed: {e}" - -import requests -from html.parser import HTMLParser - class TitleParser(HTMLParser): - def __init__(self): + + def __init__(self) -> None: super().__init__() - self.in_title = False - self.title = None + self.in_title: bool = False + self.title: Optional[str] = None - def handle_starttag(self, tag, attrs): - if tag.lower() == 'title': + def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]) -> None: + if tag.lower() == "title": self.in_title = True - def handle_data(self, data): + def handle_data(self, data: str) -> None: if self.in_title and self.title is None: self.title = data.strip() - def handle_endtag(self, tag): - if tag.lower() == 'title': + def handle_endtag(self, tag: str) -> None: + if tag.lower() == "title": self.in_title = False -def get_title_streaming(url): +def get_title_streaming(url: str) -> str: try: with requests.get(url, stream=True, timeout=10) as r: r.raise_for_status() @@ -46,11 +46,8 @@ def get_title_streaming(url): except requests.RequestException as e: return f"Request failed: {e}" - - if __name__ == "__main__": - # Example list of URLs - files = """ + files: str = """ https://news.ycombinator.com/item?id=34336386 https://news.ycombinator.com/item?id=29671450 https://news.ycombinator.com/item?id=22778089 @@ -59,181 +56,11 @@ def get_title_streaming(url): https://news.ycombinator.com/item?id=30371723 https://news.ycombinator.com/item?id=26953352 https://news.ycombinator.com/item?id=23209142 -https://news.ycombinator.com/item?id=30228261 -https://news.ycombinator.com/item?id=25950838 -https://news.ycombinator.com/item?id=32799789 -https://news.ycombinator.com/item?id=29315107 -https://news.ycombinator.com/item?id=30984662 -https://news.ycombinator.com/item?id=22168822 -https://news.ycombinator.com/item?id=22652141 -https://news.ycombinator.com/item?id=25279814 -https://news.ycombinator.com/item?id=22106367 -https://news.ycombinator.com/item?id=22446148 -https://news.ycombinator.com/item?id=24487135 -https://news.ycombinator.com/item?id=33696486 -https://news.ycombinator.com/item?id=14265051 -https://news.ycombinator.com/item?id=21534990 -https://news.ycombinator.com/item?id=29347885 -https://news.ycombinator.com/item?id=29876742 -https://news.ycombinator.com/item?id=23550758 -https://news.ycombinator.com/item?id=22504133 -https://news.ycombinator.com/item?id=23339830 -https://news.ycombinator.com/item?id=23755675 -https://news.ycombinator.com/item?id=26872904 -https://news.ycombinator.com/item?id=27760919 -https://news.ycombinator.com/item?id=21614533 -https://news.ycombinator.com/item?id=26602156 -https://news.ycombinator.com/item?id=22291417 -https://news.ycombinator.com/from?site=a16z.com&next=29816846 -https://news.ycombinator.com/item?id=27855145 -https://news.ycombinator.com/item?id=26930667 -https://news.ycombinator.com/item?id=29711042 -https://news.ycombinator.com/item?id=26580746 -https://news.ycombinator.com/item?id=24601579 -https://news.ycombinator.com/item?id=22161830 -https://news.ycombinator.com/item?id=26612321 -https://news.ycombinator.com/item?id=32081943 -https://news.ycombinator.com/item?id=22962869 -https://news.ycombinator.com/item?id=27350264 -https://news.ycombinator.com/item?id=29677238 -https://news.ycombinator.com/item?id=31441516 -https://news.ycombinator.com/item?id=26164790 -https://news.ycombinator.com/item?id=22291189 -https://news.ycombinator.com/item?id=25575505 -https://news.ycombinator.com/item?id=23549929 -https://news.ycombinator.com/item?id=26524876 -https://news.ycombinator.com/item?id=27593772 -https://news.ycombinator.com/item?id=27768211 -https://news.ycombinator.com/item?id=42405323 -https://news.ycombinator.com/item?id=35506009 -https://news.ycombinator.com/item?id=22033129 -https://news.ycombinator.com/item?id=30970720 -https://news.ycombinator.com/item?id=22278339 -https://news.ycombinator.com/item?id=30247159 -https://news.ycombinator.com/item?id=29367687 -https://news.ycombinator.com/item?id=25107285 -https://news.ycombinator.com/item?id=26225373 -https://news.ycombinator.com/item?id=31212542 -https://news.ycombinator.com/item?id=21505305 -https://news.ycombinator.com/item?id=25874374 -https://news.ycombinator.com/item?id=22827275 -https://news.ycombinator.com/item?id=26058440 -https://news.ycombinator.com/item?id=29899156 -https://news.ycombinator.com/item?id=34322033 -https://news.ycombinator.com/item?id=36015815 -https://news.ycombinator.com/item?id=22925484 -https://news.ycombinator.com/item?id=32937876 -https://news.ycombinator.com/item?id=34934216 -https://news.ycombinator.com/item?id=25445493 -https://news.ycombinator.com/item?id=21404292 -https://news.ycombinator.com/item?id=34821414 -https://news.ycombinator.com/item?id=33942597 -https://news.ycombinator.com/item?id=27763965 -https://news.ycombinator.com/item?id=23018805 -https://news.ycombinator.com/item?id=23593165 -https://news.ycombinator.com/item?id=31114554 -https://news.ycombinator.com/item?id=26053323 -https://news.ycombinator.com/item?id=25550240 -https://news.ycombinator.com/item?id=24949736 -https://news.ycombinator.com/item?id=29353904 -https://news.ycombinator.com/item?id=22207006 -https://news.ycombinator.com/item?id=22731317 -https://news.ycombinator.com/item?id=27805904 -https://news.ycombinator.com/item?id=28640429 -https://news.ycombinator.com/item?id=31168069 -https://news.ycombinator.com/item?id=31699032 -https://news.ycombinator.com/item?id=31123683 -https://news.ycombinator.com/item?id=23921610 -https://news.ycombinator.com/item?id=35020814 -https://news.ycombinator.com/item?id=21959874 -https://news.ycombinator.com/item?id=22895842 -https://news.ycombinator.com/item?id=33625367 -https://news.ycombinator.com/item?id=22429124 -https://news.ycombinator.com/item?id=26036790 -https://news.ycombinator.com/item?id=37059479 -https://news.ycombinator.com/item?id=30060765 -https://news.ycombinator.com/item?id=21610687 -https://news.ycombinator.com/item?id=25716581 -https://news.ycombinator.com/item?id=30822339 -https://news.ycombinator.com/item?id=22094355 -https://news.ycombinator.com/item?id=26034053 -https://news.ycombinator.com/item?id=27695574 -https://news.ycombinator.com/item?id=31286890 -https://news.ycombinator.com/item?id=36154622 -https://news.ycombinator.com/item?id=28155196 -https://news.ycombinator.com/item?id=34843094 -https://news.ycombinator.com/item?id=33477056 -https://news.ycombinator.com/item?id=26747743 -https://news.ycombinator.com/item?id=22059601 -https://news.ycombinator.com/item?id=34391045 -https://news.ycombinator.com/item?id=42174181 -https://news.ycombinator.com/item?id=34152100 -https://news.ycombinator.com/item?id=35697627 -https://news.ycombinator.com/item?id=31455919 -https://news.ycombinator.com/item?id=31200989 -https://news.ycombinator.com/item?id=34752489 -https://news.ycombinator.com/item?id=42357273 -https://news.ycombinator.com/item?id=21481461 -https://news.ycombinator.com/item?id=30120731 -https://news.ycombinator.com/item?id=21442330 -https://news.ycombinator.com/item?id=26899531 -https://news.ycombinator.com/item?id=34857287 -https://news.ycombinator.com/item?id=26799702 -https://news.ycombinator.com/item?id=24059441 -https://news.ycombinator.com/item?id=34165789 -https://news.ycombinator.com/item?id=25428621 -https://news.ycombinator.com/item?id=23626908 -https://news.ycombinator.com/item?id=31431224 -https://news.ycombinator.com/item?id=21411893 -https://news.ycombinator.com/item?id=36079115 -https://news.ycombinator.com/item?id=23725829 -https://news.ycombinator.com/item?id=33985969 -https://news.ycombinator.com/item?id=22270464 -https://news.ycombinator.com/item?id=30925223 -https://news.ycombinator.com/item?id=22325975 -https://news.ycombinator.com/item?id=30046272 -https://news.ycombinator.com/item?id=32390730 -https://news.ycombinator.com/item?id=28704164 -https://news.ycombinator.com/item?id=23151144 -https://news.ycombinator.com/item?id=22492381 -https://news.ycombinator.com/item?id=22340720 -https://news.ycombinator.com/item?id=31958536 -https://news.ycombinator.com/item?id=39094343 -https://news.ycombinator.com/item?id=26631467 -https://news.ycombinator.com/item?id=31945564 -https://news.ycombinator.com/item?id=27736304 -https://news.ycombinator.com/item?id=23026750 -https://news.ycombinator.com/item?id=22544563 -https://news.ycombinator.com/item?id=21564990 -https://news.ycombinator.com/item?id=27099536 -https://news.ycombinator.com/item?id=22082860 -https://news.ycombinator.com/item?id=28006894 -https://news.ycombinator.com/item?id=21706451 -https://news.ycombinator.com/item?id=35343791 -https://news.ycombinator.com/item?id=28045342 -https://news.ycombinator.com/item?id=29583792 -https://news.ycombinator.com/item?id=33001191 -https://news.ycombinator.com/item?id=34032872 -https://news.ycombinator.com/item?id=25304257 -https://news.ycombinator.com/item?id=29361004 -https://news.ycombinator.com/item?id=22627736 -https://news.ycombinator.com/item?id=25789336 -https://news.ycombinator.com/item?id=26762206 -https://news.ycombinator.com/item?id=34906378 -https://news.ycombinator.com/item?id=25789073 -https://news.ycombinator.com/item?id=34261656 -https://news.ycombinator.com/item?id=31335105 -https://news.ycombinator.com/item?id=9638748 -https://news.ycombinator.com/item?id=26247052 -https://news.ycombinator.com/item?id=42902936 -https://news.ycombinator.com/item?id=24958215 -https://news.ycombinator.com/item?id=36092179 -https://news.ycombinator.com/item?id=37202009 """ - url_list = files.split("\n") - import time + url_list: list[str] = files.split("\n") + for url in url_list: - #title = get_page_title(url) - title = get_title_streaming(url) + # title = get_page_title(url) + title: str = get_title_streaming(url) print("%s,%s" % (url, title)) time.sleep(2) diff --git a/docs/onboarding/bounty.onboarding_checklist.reference.md b/docs/onboarding/bounty.onboarding_checklist.reference.md index 2be65afb1..4e02bb2b6 100644 --- a/docs/onboarding/bounty.onboarding_checklist.reference.md +++ b/docs/onboarding/bounty.onboarding_checklist.reference.md @@ -3,8 +3,9 @@ <!-- toc --> - [Checklist](#checklist) + * [Final checks](#final-checks) - [Instructions](#instructions) - * [Org](#org) + * [General organization](#general-organization) * [Working on a bounty](#working-on-a-bounty) <!-- tocstop --> @@ -12,27 +13,27 @@ ## Checklist - Source: - [`bounty.onboarding_checklist.reference.md`](https://github.com/causify-ai/helpers/blob/master/docs/onboarding/bounty.onboarding_checklist.reference.md) + [`bounty.onboarding_checklist.reference.md`](/docs/onboarding/bounty.onboarding_checklist.reference.md) - [ ] **Contributor**: Fork the repos - [ ] [helpers](https://github.com/causify-ai/helpers) - [ ] [tutorials](https://github.com/causify-ai/tutorials) - [ ] **Contributor**: Set up the development environment following instructions in - [`intern.set_up_development_on_laptop.how_to_guide.md`](https://github.com/causify-ai/helpers/blob/master/docs/onboarding/intern.set_up_development_on_laptop.how_to_guide.md) + [`intern.set_up_development_on_laptop.how_to_guide.md`](/docs/onboarding/intern.set_up_development_on_laptop.how_to_guide.md) - [ ] **Contributor**: Carefully study all the documents in the must-read list: - - [ ] *Carefully study all the documents in - [the must-read list](https://github.com/causify-ai/helpers/blob/master/docs/onboarding/all.dev_must_read_checklist.reference.md) - - [ ] [General rules of collaboration](https://github.com/causify-ai/helpers/blob/master/docs/work_organization/all.team_collaboration.how_to_guide.md) - - [ ] [Coding style guide](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.coding_style.how_to_guide.md) - - [ ] [How to write unit tests](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.write_unit_tests.how_to_guide.md) - - [ ] [How to run unit tests](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.run_unit_tests.how_to_guide.md) - - [ ] [Creating a Jupyter Notebook](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md) - - [ ] [What to do before opening a PR](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.submit_code_for_review.how_to_guide.md) - - [ ] [Code review process](https://github.com/causify-ai/helpers/blob/master/docs/coding/all.code_review.how_to_guide.md) - - [ ] [Git workflows and best practices](https://github.com/causify-ai/helpers/blob/master/docs/work_tools/git/all.git.how_to_guide.md) - - [ ] [GitHub organization](https://github.com/causify-ai/helpers/blob/master/docs/work_organization/all.use_github.how_to_guide.md) - - [ ] [Tips for writing documentation](https://github.com/causify-ai/helpers/blob/master/docs/documentation_meta/all.writing_docs.how_to_guide.md) + - [ ] **Carefully** study all the documents in + [the must-read list](/docs/onboarding/all.dev_must_read_checklist.reference.md) + - [ ] [General rules of collaboration](/docs/work_organization/all.team_collaboration.how_to_guide.md) + - [ ] [Coding style guide](/docs/coding/all.coding_style.how_to_guide.md) + - [ ] [How to write unit tests](/docs/coding/all.write_unit_tests.how_to_guide.md) + - [ ] [How to run unit tests](/docs/coding/all.run_unit_tests.how_to_guide.md) + - [ ] [Creating a Jupyter Notebook](/docs/coding/all.jupyter_notebook.how_to_guide.md) + - [ ] [What to do before opening a PR](/docs/coding/all.submit_code_for_review.how_to_guide.md) + - [ ] [Code review process](/docs/coding/all.code_review.how_to_guide.md) + - [ ] [Git workflows and best practices](/docs/work_tools/git/all.git.how_to_guide.md) + - [ ] [GitHub organization](/docs/work_organization/all.use_github.how_to_guide.md) + - [ ] [Tips for writing documentation](/docs/documentation_meta/all.writing_docs.how_to_guide.md) - They will help you get up to speed with our practices and development style - Read them carefully one by one - Ask questions diff --git a/helpers/hdocker.py b/helpers/hdocker.py index cfe35b150..101009552 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -12,7 +12,7 @@ import re import shlex import time -from typing import cast, Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, cast import helpers.hdbg as hdbg import helpers.hgit as hgit diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index dc30b96d2..642c30f30 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -757,12 +757,12 @@ def selected_navigation_to_str( all_colors = [ "red", - "orange", + "orange", "yellow", "lime", "green", "teal", - "cyan", + "cyan", "blue", "purple", "violet", @@ -772,7 +772,7 @@ def selected_navigation_to_str( "olive", "gray", "darkgray", - "lightgray", + "lightgray", ] @@ -811,12 +811,15 @@ def colorize_first_level_bullets(markdown_text: str) -> str: return "\n".join(result) -def colorize_bold_text(markdown_text: str, *, use_abbreviations: bool = True) -> str: +def colorize_bold_text( + markdown_text: str, *, use_abbreviations: bool = True +) -> str: r""" Add colors to bold text in markdown using equidistant colors from an array. - The function finds all bold text (enclosed in ** or __) and adds LaTeX color - commands while preserving the rest of the markdown unchanged. + The function finds all bold text (enclosed in ** or __) and adds + LaTeX color commands while preserving the rest of the markdown + unchanged. :param markdown_text: Input markdown text :param use_abbreviations: Use LaTeX abbreviations for colors, @@ -824,18 +827,16 @@ def colorize_bold_text(markdown_text: str, *, use_abbreviations: bool = True) -> :return: Markdown text with colored bold sections """ # Find all bold text (both ** and __ formats). - bold_pattern = r'\*\*(.*?)\*\*|__(.*?)__' + bold_pattern = r"\*\*(.*?)\*\*|__(.*?)__" # matches will look like: # - For **text**: group(1)='text', group(2)=None # - For __text__: group(1)=None, group(2)='text' matches = list(re.finditer(bold_pattern, markdown_text)) if not matches: return markdown_text - result = markdown_text # Calculate color spacing to use equidistant colors. color_step = len(all_colors) / len(matches) - # Process matches in reverse to not mess up string indices for i, match in enumerate(reversed(matches)): # Get the matched bold text (either ** or __ format) @@ -843,19 +844,13 @@ def colorize_bold_text(markdown_text: str, *, use_abbreviations: bool = True) -> # Calculate color index using equidistant spacing color_idx = int((len(matches) - 1 - i) * color_step) % len(all_colors) color = all_colors[color_idx] - # Create the colored version if use_abbreviations: colored_text = f"\\{color}{{{bold_text}}}" else: colored_text = f"**\\textcolor{{{color}}}{{{bold_text}}}**" - # Replace in the original text - result = ( - result[:match.start()] - + colored_text - + result[match.end():] - ) + result = result[: match.start()] + colored_text + result[match.end() :] return result @@ -863,8 +858,9 @@ def format_compressed_markdown(markdown_text: str) -> str: """ Add an empty line before first level bullets in markdown text. - First level bullets are those starting with "- " at the beginning of a line - with no indentation. Other level bullets have no empty line before them. + First level bullets are those starting with "- " at the beginning of + a line with no indentation. Other level bullets have no empty line + before them. :param markdown_text: Input markdown text :return: Formatted markdown text with @@ -876,7 +872,7 @@ def format_compressed_markdown(markdown_text: str) -> str: if re.match(r"^- ", line): # Add empty line before first level bullet if previous line exists # and isn't empty. - if i > 0 and lines[i-1].strip() != "": + if i > 0 and lines[i - 1].strip() != "": result.append("") elif re.match(r"^\s+- ", line): # If current line is an indented bullet, remove any empty line diff --git a/helpers/hparser.py b/helpers/hparser.py index 929cbaa5a..603bc31d9 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -370,7 +370,6 @@ def parse_input_output_args( return in_file_name, out_file_name - def init_logger_for_input_output_transform(args: argparse.Namespace) -> None: verbosity = args.log_level diff --git a/helpers/hserver.py b/helpers/hserver.py index 4df1f4858..f5d97a225 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -85,7 +85,7 @@ def get_dev_csfy_host_names() -> Tuple[str]: """ Return the names of the Causify dev servers. """ - host_names = ("dev1", "dev2", "dev3") + host_names = ["dev1", "dev2", "dev3"] return host_names diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 9176a4064..bdea60b72 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -812,7 +812,7 @@ def test1(self) -> None: # ############################################################################# -# Test_fix_chatgpt_output1 +# Test_md_clean_up1 # ############################################################################# @@ -1467,7 +1467,7 @@ def test7(self) -> None: **List 1:** - First item - Second item - + **List 2:** - Another item - Final item @@ -1477,13 +1477,14 @@ def test7(self) -> None: \\red{List 1:} - First item - Second item - + \\purple{List 2:} - Another item - Final item """ self.assert_equal(actual, expected) + # ############################################################################# # Test_format_compressed_markdown1 # ############################################################################# @@ -1491,13 +1492,6 @@ def test7(self) -> None: class Test_format_compressed_markdown1(hunitest.TestCase): - def _format_and_compare_markdown(self, text: str, expected: str) -> None: - text = hprint.dedent(text) - expected = hprint.dedent(expected) - # - actual = hmarkdo.format_compressed_markdown(text) - self.assert_equal(actual, expected) - def test1(self) -> None: """ Test basic case with single first level bullet. @@ -1662,3 +1656,10 @@ def test9(self) -> None: - Bullet with {braces} - Bullet with $math$""" self._format_and_compare_markdown(text, expected) + + def _format_and_compare_markdown(self, text: str, expected: str) -> None: + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # + actual = hmarkdo.format_compressed_markdown(text) + self.assert_equal(actual, expected) From 818ad521eb935df9aab0afeda7e03ff02a4131f9 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 6 May 2025 11:25:34 -0400 Subject: [PATCH 078/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../documentation/transform_notes.py | 38 +++-- helpers/hmarkdown.py | 81 +++++----- helpers/test/test_hmarkdown.py | 141 +++++++++++++++++- 3 files changed, 198 insertions(+), 62 deletions(-) diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index aa5efc0b8..69ccc4418 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -51,6 +51,12 @@ def _parse() -> argparse.ArgumentParser: return parser +def _format_markdown(txt: str) -> str: + txt = dshdlino.prettier_on_str(txt) + txt = hmarkdo.remove_empty_lines_from_markdown(txt) + return txt + + def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() hparser.init_logger_for_input_output_transform(args) @@ -59,14 +65,14 @@ def _main(parser: argparse.ArgumentParser) -> None: if cmd == "list": txt = r""" test: compute the hash of a string to test the flow - format_headers: format the headers of the current file - increase_headers_level: increase the level of the headers of the current file + format_headers: format the headers + increase_headers_level: increase the level of the headers md_list_to_latex: convert a markdown list to a latex list - md_remove_formatting: remove the formatting of the current file - md_clean_up: clean up the current file - md_format: format the current file - md_format_compressed: format the current file - md_colorize_bold_text: colorize the bold text of the current file + md_remove_formatting: remove the formatting + md_clean_up: clean up removing all weird characters + md_only_format: reflow the markdown + md_colorize_bold_text: colorize the bold text + md_format: reflow the markdown and colorize the bold text """ txt = hprint.dedent(txt) print(txt) @@ -98,23 +104,27 @@ def _main(parser: argparse.ArgumentParser) -> None: ) mode = "list" txt = hmarkdo.header_list_to_markdown(header_list, mode) + txt = _format_markdown(txt) elif cmd == "md_list_to_latex": txt = hlatex.markdown_list_to_latex(txt) + txt = _format_markdown(txt) elif cmd == "md_remove_formatting": txt = hmarkdo.remove_formatting(txt) + txt = _format_markdown(txt) elif cmd == "md_clean_up": txt = hmarkdo.md_clean_up(txt) - elif cmd == "md_format": - # txt = dshdlino.prettier_on_str(txt) - pass - elif cmd == "md_format_compressed": - txt = hmarkdo.format_compressed_markdown(txt) + txt = _format_markdown(txt) + elif cmd == "md_only_format": + txt = _format_markdown(txt) elif cmd == "md_colorize_bold_text": txt = hmarkdo.colorize_bold_text(txt) + txt = _format_markdown(txt) + elif cmd == "md_format": + txt = hmarkdo.md_clean_up(txt) + txt = hmarkdo.colorize_bold_text(txt) + txt = _format_markdown(txt) else: raise ValueError(f"Invalid cmd='{cmd}'") - # Reflow the output. - txt = dshdlino.prettier_on_str(txt) # Write the output. hparser.write_file(txt, out_file_name) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 642c30f30..3e0082eea 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -285,6 +285,9 @@ def md_clean_up(txt: str) -> str: txt = re.sub(r"\\\[(.*?)\\\]", r"$$\1$$", txt, flags=re.DOTALL) # Replace `P(.)`` with `\Pr(.)`. txt = re.sub(r"P\((.*?)\)", r"\\Pr(\1)", txt) + # Replace \left[, \right]. + txt = re.sub(r"\\left\[", r"[", txt) + txt = re.sub(r"\\right\]", r"]", txt) # Replace \mid with `|`. txt = re.sub(r"\\mid", r"|", txt) # E.g.,`` • Description Logics (DLs) are a family`` @@ -755,27 +758,24 @@ def selected_navigation_to_str( # ############################################################################# -all_colors = [ +# These are the colors that are supported by Latex / markdown, are readable on +# white, and form an equidistant color palette. +_ALL_COLORS = [ "red", "orange", - "yellow", - "lime", + "brown", + "olive", "green", "teal", "cyan", "blue", - "purple", "violet", - "magenta", - "pink", - "brown", - "olive", - "gray", "darkgray", - "lightgray", + "gray", ] +# TODO(gp): Just bold the first-level bullets and then apply colorize_bold_text. def colorize_first_level_bullets(markdown_text: str) -> str: """ Colorize first-level bullets in markdown text. @@ -784,7 +784,6 @@ def colorize_first_level_bullets(markdown_text: str) -> str: :return: Formatted markdown text with first-level bullets colored """ # Define the colors to use. - colors = ["red", "orange", "green", "teal", "cyan", "blue", "violet", "brown"] # Find all first-level bullet points (lines starting with "- " after any whitespace). lines = markdown_text.split("\n") color_index = 0 @@ -796,7 +795,7 @@ def colorize_first_level_bullets(markdown_text: str) -> str: indentation = len(line) - len(line.lstrip()) if indentation == 0: # First-level bullet. - color = colors[color_index % len(colors)] + color = _ALL_COLORS[color_index % len(_ALL_COLORS)] # Replace the bullet with a colored version. # - \textcolor{red}{Linear models} colored_line = re.sub( @@ -826,58 +825,60 @@ def colorize_bold_text( `\red{text}` instead of `\textcolor{red}{text}` :return: Markdown text with colored bold sections """ + # Remove any existing color formatting. + # Remove \color{text} format + markdown_text = re.sub(r"\\[a-z]+\{([^}]+)\}", r"\1", markdown_text) + # Remove \textcolor{color}{text} format + markdown_text = re.sub(r"\\textcolor\{[^}]+\}\{([^}]+)\}", r"\1", markdown_text) # Find all bold text (both ** and __ formats). bold_pattern = r"\*\*(.*?)\*\*|__(.*?)__" # matches will look like: - # - For **text**: group(1)='text', group(2)=None - # - For __text__: group(1)=None, group(2)='text' + # For **text**: group(1)='text', group(2)=None. + # For __text__: group(1)=None, group(2)='text'. matches = list(re.finditer(bold_pattern, markdown_text)) if not matches: return markdown_text result = markdown_text # Calculate color spacing to use equidistant colors. - color_step = len(all_colors) / len(matches) - # Process matches in reverse to not mess up string indices + color_step = len(_ALL_COLORS) / len(matches) + # Process matches in reverse to not mess up string indices. for i, match in enumerate(reversed(matches)): - # Get the matched bold text (either ** or __ format) + # Get the matched bold text (either ** or __ format). bold_text = match.group(1) or match.group(2) - # Calculate color index using equidistant spacing - color_idx = int((len(matches) - 1 - i) * color_step) % len(all_colors) - color = all_colors[color_idx] - # Create the colored version + # Calculate `color_idx` using equidistant spacing. + color_idx = int((len(matches) - 1 - i) * color_step) % len(_ALL_COLORS) + color = _ALL_COLORS[color_idx] + # Create the colored version. if use_abbreviations: + # E.g., \red{text} colored_text = f"\\{color}{{{bold_text}}}" else: - colored_text = f"**\\textcolor{{{color}}}{{{bold_text}}}**" - # Replace in the original text + # E.g., \textcolor{red}{text} + colored_text = f"\\textcolor{{{color}}}{{{bold_text}}}" + # Apply bold. + colored_text = f"**{colored_text}**" + # Replace in the original text. result = result[: match.start()] + colored_text + result[match.end() :] return result -def format_compressed_markdown(markdown_text: str) -> str: +def remove_empty_lines_from_markdown(markdown_text: str) -> str: """ - Add an empty line before first level bullets in markdown text. - - First level bullets are those starting with "- " at the beginning of - a line with no indentation. Other level bullets have no empty line - before them. + Remove all empty lines from markdown text and add empty lines only before + first level bullets. :param markdown_text: Input markdown text - :return: Formatted markdown text with + :return: Formatted markdown text """ - lines = markdown_text.split("\n") + # Split into lines and remove empty ones. + lines = [line for line in markdown_text.split("\n") if line.strip()] + # Remove all empty lines. result = [] for i, line in enumerate(lines): # Check if current line is a first level bullet (no indentation). if re.match(r"^- ", line): - # Add empty line before first level bullet if previous line exists - # and isn't empty. - if i > 0 and lines[i - 1].strip() != "": + # Add empty line before first level bullet if not at start. + if i > 0: result.append("") - elif re.match(r"^\s+- ", line): - # If current line is an indented bullet, remove any empty line - # before indented bullet. - if result and result[-1].strip() == "": - result.pop() result.append(line) - return "\n".join(result) + return "\n".join(result) \ No newline at end of file diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index bdea60b72..613b1447f 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1411,7 +1411,7 @@ def test1(self) -> None: """ text = "This is **bold** text" actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) - expected = "This is \\red{bold} text" + expected = r"This is **\red{bold}** text" self.assert_equal(actual, expected) def test2(self) -> None: @@ -1420,7 +1420,7 @@ def test2(self) -> None: """ text = "**First** normal **Second** text" actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) - expected = "\\red{First} normal \\purple{Second} text" + expected = r"**\red{First}** normal **\purple{Second}** text" self.assert_equal(actual, expected) def test3(self) -> None: @@ -1429,7 +1429,7 @@ def test3(self) -> None: """ text = "This is __bold__ text" actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) - expected = "This is \\red{bold} text" + expected = r"This is **\red{bold}** text" self.assert_equal(actual, expected) def test4(self) -> None: @@ -1447,7 +1447,7 @@ def test5(self) -> None: """ text = "**First** and __Second__ bold" actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) - expected = "\\red{First} and \\purple{Second} bold" + expected = r"**\red{First}** and **\purple{Second}** bold" self.assert_equal(actual, expected) def test6(self) -> None: @@ -1456,7 +1456,7 @@ def test6(self) -> None: """ text = "This is **bold** text" actual = hmarkdo.colorize_bold_text(text, use_abbreviations=False) - expected = "This is **\\textcolor{red}{bold}** text" + expected = r"This is **\textcolor{red}{bold}** text" self.assert_equal(actual, expected) def test7(self) -> None: @@ -1473,17 +1473,92 @@ def test7(self) -> None: - Final item """ actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) - expected = """ - \\red{List 1:} + expected = r""" + **\red{List 1:}** - First item - Second item - \\purple{List 2:} + **\purple{List 2:}** - Another item - Final item """ self.assert_equal(actual, expected) + def test8(self) -> None: + text = r""" +- **\red{Objective}** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + +- **\orange{Key Components}** + - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - Utility update: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + +- **\blue{Learning Process}** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + +- **\violet{Advantages}** + - More sample-efficient than direct utility estimation + - Leverages structure of the MDP to generalize better + +- **\pink{Challenges}** + - Requires accurate model estimation + - Computational cost of solving Bellman equations repeatedly + +- **\olive{Example}** + - A thermostat estimates room temperature dynamics and uses them to predict + comfort level under a fixed heating schedule + +- **\darkgray{Use Case}** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) + expected = r""" +- **\red{Objective}** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + +- **\yellow{Key Components}** + - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - Utility update: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + +- **\green{Learning Process}** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + +- **\blue{Advantages}** + - More sample-efficient than direct utility estimation + - Leverages structure of the MDP to generalize better + +- **\violet{Challenges}** + - Requires accurate model estimation + - Computational cost of solving Bellman equations repeatedly + +- **\brown{Example}** + - A thermostat estimates room temperature dynamics and uses them to predict + comfort level under a fixed heating schedule + +- **\gray{Use Case}** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + self.assert_equal(actual, expected) + + + + # ############################################################################# # Test_format_compressed_markdown1 @@ -1657,6 +1732,56 @@ def test9(self) -> None: - Bullet with $math$""" self._format_and_compare_markdown(text, expected) + def test10(self) -> None: + text = r""" + - **Objective** + + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **Key Components** + + - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - **Utility update**: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **Learning Process** + + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **Use Case** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + expected = r""" + - **Objective** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **Key Components** + - **Model learning**: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - **Utility update**: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **Learning Process** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **Use Case** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """ + self._format_and_compare_markdown(text, expected) + def _format_and_compare_markdown(self, text: str, expected: str) -> None: text = hprint.dedent(text) expected = hprint.dedent(expected) From c6380f1a73cd2d601e5fb2364631b5a2daf55d16 Mon Sep 17 00:00:00 2001 From: neomisule <119680709+neomisule@users.noreply.github.com> Date: Mon, 5 May 2025 14:09:48 -0400 Subject: [PATCH 079/193] HelpersTask520-break-tests-in-Test_fix_links-into-smaller-chunks (#575) * HelpersTask520: Divided test1 and test3 in chunks * HelpersTask520: Adding the separate tests * HelpersTask520: Fixing the indentation * Added new files for tests * Modified all testcases and fixed fast tests * Solving conflicts * Deleting conflicted file * Fixing order of testcases * Added helper function * Reverted unnecessary file * Resolving conflicts * Fixing test17 output * Ran linter * HelpersTask520: Made suggested changes --------- Co-authored-by: Neomi <nsule@umd.edu> Co-authored-by: aangelo9 <153690899+aangelo9@users.noreply.github.com> --- .../Test_fix_links.test1/output/test.txt | 92 +----- .../Test_fix_links.test14/output/test.txt | 9 + .../Test_fix_links.test15/output/test.txt | 19 ++ .../Test_fix_links.test16/output/test.txt | 24 ++ .../Test_fix_links.test17/output/test.txt | 21 ++ .../Test_fix_links.test18/output/test.txt | 17 ++ .../Test_fix_links.test19/output/test.txt | 21 ++ .../Test_fix_links.test3/output/test.txt | 16 +- .../Test_fix_links.test5/output/test.txt | 18 +- linters/test/test_amp_fix_md_links.py | 286 ++++++++++-------- 10 files changed, 282 insertions(+), 241 deletions(-) create mode 100644 linters/test/outcomes/Test_fix_links.test14/output/test.txt create mode 100644 linters/test/outcomes/Test_fix_links.test15/output/test.txt create mode 100644 linters/test/outcomes/Test_fix_links.test16/output/test.txt create mode 100644 linters/test/outcomes/Test_fix_links.test17/output/test.txt create mode 100644 linters/test/outcomes/Test_fix_links.test18/output/test.txt create mode 100644 linters/test/outcomes/Test_fix_links.test19/output/test.txt diff --git a/linters/test/outcomes/Test_fix_links.test1/output/test.txt b/linters/test/outcomes/Test_fix_links.test1/output/test.txt index 9fc30cf08..e5311ec56 100644 --- a/linters/test/outcomes/Test_fix_links.test1/output/test.txt +++ b/linters/test/outcomes/Test_fix_links.test1/output/test.txt @@ -1,16 +1,5 @@ # linter warnings -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:38: '/helpersssss/hhhhgit.py' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:77: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:80: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:83: '/import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:86: '/iiimport_check/example/output/basicccc.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:86: '/iiimport_check/example/output/basicccc.png' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:89: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:92: 'import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:96: '/import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:99: '../../import_check/example/output/basic.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:102: '/iiimport_check/example/output/basicccc.png' does not follow the format 'figs/test.md/XYZ' -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:102: '/iiimport_check/example/output/basicccc.png' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:25: '/docs/markdown_example.md' does not exist # linted file - Markdown-style link with a text label @@ -34,84 +23,7 @@ $GIT_ROOT/linters/test/outcomes/Test_fix_links.test1/tmp.scratch/test.md:102: '/ - Markdown-style link with a path label in backticks without the slash at the start - [`/helpers/test/test_hdbg.py`](/helpers/test/test_hdbg.py) -- Markdown-style link with the link only in square brackets - - [/helpers/hgit.py](/helpers/hgit.py) - -- Markdown-style link with an http GH company link - - [/helpers/hgit.py](/helpers/hgit.py) - -- Markdown-style link with an http GH company link and a text label - - [Here](/helpers/hgit.py) - -- Markdown-style link with an http external link - - [AirFlow UI](http://172.30.2.44:8090/home). - -- Markdown-style link with backticks in the square brackets and external http link - - [`foobar`](https://ap-northeast-1.console.aws.amazon.com/s3/buckets/foobar) - -- Markdown-style link to a file that does not exist - - [File not found](/helpersssss/hhhhgit.py) - - Markdown-style link with a directory beginning with a dot - [`fast_tests.yml`](/.github/workflows/fast_tests.yml) -- File path without the backticks - - /helpers/test/test_hdbg.py - -- File path with the backticks - - [`/helpers/test/test_hdbg.py`](/helpers/test/test_hdbg.py) - -- File path with the backticks and a dot at the start - - [`/helpers/test/test_hdbg.py`](/helpers/test/test_hdbg.py) - -- File path with the backticks and no slash at the start - - [`/helpers/test/test_hdbg.py`](/helpers/test/test_hdbg.py) - -- File path without the dir - - `README.md` - -- File path of a hidden file - - .github/workflows/build_image.yml.DISABLED - -- Non-file path - - ../../../../helpers:$GIT_ROOT/helpers - -- Non-file path text with slashes in it - - Code in Markdown/LaTeX files (e.g., mermaid code). - -- File path that does not exist - - `/helpersssss/hhhhgit.py` - -- File path inside triple ticks: -```bash -With backticks: `helpers/hgit.py` -Without backticks: helpers/hgit.py -``` - -- HTML-style figure pointer - - <img src="import_check/example/output/basic.png"> - -- HTML-style figure pointer with an attribute - <img src="import_check/example/output/basic.png" style="" /> - -- HTML-style figure pointer with a slash at the start - - <img src="/import_check/example/output/basic.png"> - -- HTML-style figure pointer that does not exist - - <img src="/iiimport_check/example/output/basicccc.png"> - -- Markdown-style figure pointer - - <img src="import_check/example/output/basic.png"> - -- Markdown-style figure pointer with an attribute - - <img src="import_check/example/output/basic.png">{width="6.854779090113736in" -height="1.2303444881889765in"} - -- Markdown-style figure pointer with a slash at the start - - <img src="/import_check/example/output/basic.png"> - -- Markdown-style figure pointer with a dir changes at the start - - <img src="../../import_check/example/output/basic.png"> - -- Markdown-style figure pointer that does not exist - - <img src="/iiimport_check/example/output/basicccc.png"> \ No newline at end of file +Markdown link: [Valid Markdown Link](/docs/markdown_example.md) diff --git a/linters/test/outcomes/Test_fix_links.test14/output/test.txt b/linters/test/outcomes/Test_fix_links.test14/output/test.txt new file mode 100644 index 000000000..1132c69fe --- /dev/null +++ b/linters/test/outcomes/Test_fix_links.test14/output/test.txt @@ -0,0 +1,9 @@ +# linter warnings + + +# linted file +- File path inside triple ticks: +```bash +With backticks: `helpers/hgit.py` +Without backticks: helpers/hgit.py +``` diff --git a/linters/test/outcomes/Test_fix_links.test15/output/test.txt b/linters/test/outcomes/Test_fix_links.test15/output/test.txt new file mode 100644 index 000000000..2cee949dd --- /dev/null +++ b/linters/test/outcomes/Test_fix_links.test15/output/test.txt @@ -0,0 +1,19 @@ +# linter warnings +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test15/tmp.scratch/test_html_img_links.md:2: 'import_check/example/output/basic.png' does not follow the format 'figs/test_html_img_links.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test15/tmp.scratch/test_html_img_links.md:5: 'import_check/example/output/basic.png' does not follow the format 'figs/test_html_img_links.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test15/tmp.scratch/test_html_img_links.md:8: '/import_check/example/output/basic.png' does not follow the format 'figs/test_html_img_links.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test15/tmp.scratch/test_html_img_links.md:11: '/iiimport_check/example/output/basicccc.png' does not follow the format 'figs/test_html_img_links.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test15/tmp.scratch/test_html_img_links.md:11: '/iiimport_check/example/output/basicccc.png' does not exist + +# linted file +- HTML-style figure pointer + - <img src="import_check/example/output/basic.png"> + +- HTML-style figure pointer with an attribute + <img src="import_check/example/output/basic.png" style="" /> + +- HTML-style figure pointer with a slash at the start + - <img src="/import_check/example/output/basic.png"> + +- HTML-style figure pointer that does not exist + - <img src="/iiimport_check/example/output/basicccc.png"> diff --git a/linters/test/outcomes/Test_fix_links.test16/output/test.txt b/linters/test/outcomes/Test_fix_links.test16/output/test.txt new file mode 100644 index 000000000..6f424c772 --- /dev/null +++ b/linters/test/outcomes/Test_fix_links.test16/output/test.txt @@ -0,0 +1,24 @@ +# linter warnings +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test16/tmp.scratch/test_md_img_links.md:2: 'import_check/example/output/basic.png' does not follow the format 'figs/test_md_img_links.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test16/tmp.scratch/test_md_img_links.md:5: 'import_check/example/output/basic.png' does not follow the format 'figs/test_md_img_links.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test16/tmp.scratch/test_md_img_links.md:9: '/import_check/example/output/basic.png' does not follow the format 'figs/test_md_img_links.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test16/tmp.scratch/test_md_img_links.md:12: '../../import_check/example/output/basic.png' does not follow the format 'figs/test_md_img_links.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test16/tmp.scratch/test_md_img_links.md:15: '/iiimport_check/example/output/basicccc.png' does not follow the format 'figs/test_md_img_links.md/XYZ' +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test16/tmp.scratch/test_md_img_links.md:15: '/iiimport_check/example/output/basicccc.png' does not exist + +# linted file +- Markdown-style figure pointer + - <img src="import_check/example/output/basic.png"> + +- Markdown-style figure pointer with an attribute + - <img src="import_check/example/output/basic.png">{width="6.854779090113736in" +height="1.2303444881889765in"} + +- Markdown-style figure pointer with a slash at the start + - <img src="/import_check/example/output/basic.png"> + +- Markdown-style figure pointer with a dir changes at the start + - <img src="../../import_check/example/output/basic.png"> + +- Markdown-style figure pointer that does not exist + - <img src="/iiimport_check/example/output/basicccc.png"> diff --git a/linters/test/outcomes/Test_fix_links.test17/output/test.txt b/linters/test/outcomes/Test_fix_links.test17/output/test.txt new file mode 100644 index 000000000..db92f14f4 --- /dev/null +++ b/linters/test/outcomes/Test_fix_links.test17/output/test.txt @@ -0,0 +1,21 @@ +# linter warnings +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test17/tmp.scratch/test_broken_links.md:5: '/helpersssss/hhhhgit.py' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test17/tmp.scratch/test_broken_links.md:16: '/missing_markdown.md' does not exist + +# linted file +- Markdown-style link with the link only in square brackets + - [/helpers/hgit.py](/helpers/hgit.py) + +- Markdown-style link to a file that does not exist + - [File not found](/helpersssss/hhhhgit.py) + +- Non-file path + - ../../../../helpers:$GIT_ROOT/helpers + +- Non-file path text with slashes in it + - Code in Markdown/LaTeX files (e.g., mermaid code). + +- File path that does not exist + - `/helpersssss/hhhhgit.py` + +Broken Markdown link: [Broken Markdown Link](/missing_markdown.md) diff --git a/linters/test/outcomes/Test_fix_links.test18/output/test.txt b/linters/test/outcomes/Test_fix_links.test18/output/test.txt new file mode 100644 index 000000000..069ae09fa --- /dev/null +++ b/linters/test/outcomes/Test_fix_links.test18/output/test.txt @@ -0,0 +1,17 @@ +# linter warnings + + +# linted file +- Markdown-style link with an http GH company link + - [/helpers/hgit.py](/helpers/hgit.py) + +- Markdown-style link with an http GH company link and a text label + - [Here](/helpers/hgit.py) + +- Markdown-style link with an http external link + - [AirFlow UI](http://172.30.2.44:8090/home). + +- Markdown-style link with backticks in the square brackets and external http link + - [`foobar`](https://ap-northeast-1.console.aws.amazon.com/s3/buckets/foobar) + +External Markdown link: [External Markdown Link](https://example.com) diff --git a/linters/test/outcomes/Test_fix_links.test19/output/test.txt b/linters/test/outcomes/Test_fix_links.test19/output/test.txt new file mode 100644 index 000000000..d78eda347 --- /dev/null +++ b/linters/test/outcomes/Test_fix_links.test19/output/test.txt @@ -0,0 +1,21 @@ +# linter warnings + + +# linted file +- File path without the backticks + - /helpers/test/test_hdbg.py + +- File path with the backticks + - [`/helpers/test/test_hdbg.py`](/helpers/test/test_hdbg.py) + +- File path with the backticks and a dot at the start + - [`/helpers/test/test_hdbg.py`](/helpers/test/test_hdbg.py) + +- File path with the backticks and no slash at the start + - [`/helpers/test/test_hdbg.py`](/helpers/test/test_hdbg.py) + +- File path without the dir + - `README.md` + +- File path of a hidden file + - .github/workflows/build_image.yml.DISABLED diff --git a/linters/test/outcomes/Test_fix_links.test3/output/test.txt b/linters/test/outcomes/Test_fix_links.test3/output/test.txt index 78ee63a22..94a8dd362 100644 --- a/linters/test/outcomes/Test_fix_links.test3/output/test.txt +++ b/linters/test/outcomes/Test_fix_links.test3/output/test.txt @@ -1,21 +1,13 @@ # linter warnings -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:1: '/docs/markdown_example.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:3: '/docs/html_example.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:5: '/missing_markdown.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:7: '/missing_html.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:13: '/nested.md)' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:1: '/docs/html_example.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:3: '/missing_html.md' does not exist +$GIT_ROOT/linters/test/outcomes/Test_fix_links.test3/tmp.scratch/test_combined.md:7: '/nested.md)' does not exist # linted file -Markdown link: [Valid Markdown Link](/docs/markdown_example.md) - HTML-style link: [Valid HTML Link](/docs/html_example.md) -Broken Markdown link: [Broken Markdown Link](/missing_markdown.md) - Broken HTML link: [Broken HTML Link](/missing_html.md) -External Markdown link: [External Markdown Link](https://example.com) - External HTML link: [External HTML Link](https://example.com) -Nested HTML link with Markdown: [Invalid Nested]([Example](/nested.md)) \ No newline at end of file +Nested HTML link with Markdown: [Invalid Nested]([Example](/nested.md)) diff --git a/linters/test/outcomes/Test_fix_links.test5/output/test.txt b/linters/test/outcomes/Test_fix_links.test5/output/test.txt index 75fd16dcf..37f8ecc9e 100644 --- a/linters/test/outcomes/Test_fix_links.test5/output/test.txt +++ b/linters/test/outcomes/Test_fix_links.test5/output/test.txt @@ -1,14 +1,16 @@ # linter warnings -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:1: '$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:3: '/docs/markdown_exam.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:5: '$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md' does not exist -$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/valid_header_test.md:7: '$GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md' does not exist + # linted file -Markdown link: [Valid Markdown and header Link]($GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md#introduction) +# Reference test file + +- [Introduction](#introduction) +- [Hyphen test](#hyphen-test) + +## Introduction -Markdown link: [InValid Markdown Link](/docs/markdown_exam.md#introduction) +A test header with one word in the reference file. -Markdown link: [Invalid header in the Markdown Link]($GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md#introduce) +## Hyphen test -Markdown link: [Valid Markdown and header Link]($GIT_ROOT/linters/test/outcomes/Test_fix_links.test5/tmp.scratch/reference.md#hyphen-test) \ No newline at end of file +A test to check two words header in the reference file. diff --git a/linters/test/test_amp_fix_md_links.py b/linters/test/test_amp_fix_md_links.py index ef60a521e..64de5c7e7 100644 --- a/linters/test/test_amp_fix_md_links.py +++ b/linters/test/test_amp_fix_md_links.py @@ -38,6 +38,20 @@ def _get_output_string(out_warnings: List[str], updated_lines: List[str]) -> str class Test_fix_links(hunitest.TestCase): + def run_test(self, input_content: str, *, file_name: str = "test.md") -> None: + """ + Helper method to run tests with common setup and verification. + + :param input_content: the content to test + :param file_name: the name of the test file + """ + file_path = self.write_input_file(input_content, file_name) + # Run. + _, updated_lines, out_warnings = lafimdli.fix_links(file_path) + # Check. + output = _get_output_string(out_warnings, updated_lines) + self.check_string(output, purify_text=True) + def write_input_file(self, txt: str, file_name: str) -> str: """ Write test content to a file in the scratch space. @@ -55,8 +69,6 @@ def write_input_file(self, txt: str, file_name: str) -> str: hio.to_file(file_path, txt) return file_path - # TODO(gp): To outsource. Break into smaller tests. If one of these fails, - # it's hard to debug. def test1(self) -> None: """ Test fixing link formatting in a Markdown file. @@ -84,95 +96,12 @@ def test1(self) -> None: - Markdown-style link with a path label in backticks without the slash at the start - [`helpers/test/test_hdbg.py`](helpers/test/test_hdbg.py) - - Markdown-style link with the link only in square brackets - - [/helpers/hgit.py]() - - - Markdown-style link with an http GH company link - - [helpers/hgit.py](https://github.com/causify-ai/helpers/blob/master/helpers/hgit.py) - - - Markdown-style link with an http GH company link and a text label - - [Here](https://github.com/causify-ai/helpers/blob/master/helpers/hgit.py) - - - Markdown-style link with an http external link - - [AirFlow UI](http://172.30.2.44:8090/home). - - - Markdown-style link with backticks in the square brackets and external http link - - [`foobar`](https://ap-northeast-1.console.aws.amazon.com/s3/buckets/foobar) - - - Markdown-style link to a file that does not exist - - [File not found](/helpersssss/hhhhgit.py) - - Markdown-style link with a directory beginning with a dot - [`fast_tests.yml`](/.github/workflows/fast_tests.yml) - - File path without the backticks - - /helpers/test/test_hdbg.py - - - File path with the backticks - - `/helpers/test/test_hdbg.py` - - - File path with the backticks and a dot at the start - - `./helpers/test/test_hdbg.py` - - - File path with the backticks and no slash at the start - - `helpers/test/test_hdbg.py` - - - File path without the dir - - `README.md` - - - File path of a hidden file - - .github/workflows/build_image.yml.DISABLED - - - Non-file path - - ../../../../amp/helpers:/app/helpers - - - Non-file path text with slashes in it - - Code in Markdown/LaTeX files (e.g., mermaid code). - - - File path that does not exist - - `/helpersssss/hhhhgit.py` - - - File path inside triple ticks: - ```bash - With backticks: `helpers/hgit.py` - Without backticks: helpers/hgit.py - ``` - - - HTML-style figure pointer - - <img src="import_check/example/output/basic.png"> - - - HTML-style figure pointer with an attribute - <img src="import_check/example/output/basic.png" style="" /> - - - HTML-style figure pointer with a slash at the start - - <img src="/import_check/example/output/basic.png"> - - - HTML-style figure pointer that does not exist - - <img src="/iiimport_check/example/output/basicccc.png"> - - - Markdown-style figure pointer - - ![](import_check/example/output/basic.png) - - - Markdown-style figure pointer with an attribute - - ![](import_check/example/output/basic.png){width="6.854779090113736in" - height="1.2303444881889765in"} - - - Markdown-style figure pointer with a slash at the start - - ![](/import_check/example/output/basic.png) - - - Markdown-style figure pointer with a dir changes at the start - - ![](../../import_check/example/output/basic.png) - - - Markdown-style figure pointer that does not exist - - ![](/iiimport_check/example/output/basicccc.png) + Markdown link: [Valid Markdown Link](docs/markdown_example.md) """ - file_name = "test.md" - file_path = self.write_input_file(txt_incorrect, file_name) - # Run. - _, updated_lines, out_warnings = lafimdli.fix_links(file_path) - # Check. - output = _get_output_string(out_warnings, updated_lines) - self.check_string(output, purify_text=True) + self.run_test(txt_incorrect, file_name="test.md") def test2(self) -> None: """ @@ -197,43 +126,23 @@ def test2(self) -> None: [Data Availability](#data-availability) """ # - file_name = "test.md" - file_path = self.write_input_file(txt_internal_links, file_name) - # Run. - _, updated_lines, out_warnings = lafimdli.fix_links(file_path) - # Check. - output = _get_output_string(out_warnings, updated_lines) - self.check_string(output, purify_text=True) + self.run_test(txt_internal_links, file_name="test.md") - # TODO(gp): To outsource. Break into smaller tests. If one of these fails, - # it's hard to debug. def test3(self) -> None: """ - Test the mix of Markdown and HTML-style links. + Test the mix of Markdown and HTML-style links with anchor tags. """ # Prepare inputs. input_content = r""" - Markdown link: [Valid Markdown Link](docs/markdown_example.md) - HTML-style link: <a href="docs/html_example.md">Valid HTML Link</a> - Broken Markdown link: [Broken Markdown Link](missing_markdown.md) - Broken HTML link: <a href="missing_html.md">Broken HTML Link</a> - External Markdown link: [External Markdown Link](https://example.com) - External HTML link: <a href="https://example.com">External HTML Link</a> Nested HTML link with Markdown: <a href="[Example](nested.md)">Invalid Nested</a> """ - file_name = "test_combined.md" - file_path = self.write_input_file(input_content, file_name) - # Run. - _, updated_lines, out_warnings = lafimdli.fix_links(file_path) - # Check. - output = _get_output_string(out_warnings, updated_lines) - self.check_string(output, purify_text=True) + self.run_test(input_content, file_name="test_combined.md") def test4(self) -> None: """ @@ -294,13 +203,7 @@ def test5(self) -> None: Markdown link: [Valid Markdown and header Link]({reference_file_link}#hyphen-test) """ - test_file_name = "valid_header_test.md" - test_file_link = self.write_input_file(test_md_content, test_file_name) - # Run. - _, updated_lines, out_warnings = lafimdli.fix_links(test_file_link) - # Check. - output = _get_output_string(out_warnings, updated_lines) - self.check_string(output, purify_text=True) + self.run_test(reference_file_md_content, file_name="valid_header_test.md") def test6(self) -> None: """ @@ -317,13 +220,7 @@ def test6(self) -> None: Tel: [Call](tel:+1234567890) """ - file_name = "test_links.md" - file_path = self.write_input_file(input_content, file_name) - # Run. - _, updated_lines, out_warnings = lafimdli.fix_links(file_path) - # Check. - output = _get_output_string(out_warnings, updated_lines) - self.check_string(output, purify_text=True) + self.run_test(input_content, file_name="test_links.md") def test7(self) -> None: """ @@ -419,13 +316,7 @@ def test11(self) -> None: response = requests.get("https://api.github.com/users") ``` """ - file_name = "test_fenced_blocks.md" - file_path = self.write_input_file(text, file_name) - # Run. - _, updated_lines, out_warnings = lafimdli.fix_links(file_path) - # Check. - output = _get_output_string(out_warnings, updated_lines) - self.check_string(output, purify_text=True) + self.run_test(text, file_name="test_fenced_blocks.md") def test12(self) -> None: """ @@ -477,6 +368,139 @@ def test13(self) -> None: ] self.assertEqual(expected, actual) + def test14(self) -> None: + """ + Test Markdown files in triple backticks. + """ + txt_incorrect = r""" + - File path inside triple ticks: + ```bash + With backticks: `helpers/hgit.py` + Without backticks: helpers/hgit.py + ``` + """ + self.run_test(txt_incorrect, file_name="test_md_in_triple_backticks.md") + + def test15(self) -> None: + """ + Test HTML style image links. + """ + # Prepare inputs. + txt_incorrect = r""" + + - HTML-style figure pointer + - <img src="import_check/example/output/basic.png"> + + - HTML-style figure pointer with an attribute + <img src="import_check/example/output/basic.png" style="" /> + + - HTML-style figure pointer with a slash at the start + - <img src="/import_check/example/output/basic.png"> + + - HTML-style figure pointer that does not exist + - <img src="/iiimport_check/example/output/basicccc.png"> + """ + self.run_test(txt_incorrect, file_name="test_html_img_links.md") + + def test16(self) -> None: + """ + Test Markdown style image links. + """ + # Prepare inputs. + txt_incorrect = r""" + + - Markdown-style figure pointer + - ![](import_check/example/output/basic.png) + + - Markdown-style figure pointer with an attribute + - ![](import_check/example/output/basic.png){width="6.854779090113736in" + height="1.2303444881889765in"} + + - Markdown-style figure pointer with a slash at the start + - ![](/import_check/example/output/basic.png) + + - Markdown-style figure pointer with a dir changes at the start + - ![](../../import_check/example/output/basic.png) + + - Markdown-style figure pointer that does not exist + - ![](/iiimport_check/example/output/basicccc.png) + """ + self.run_test(txt_incorrect, file_name="test_md_img_links.md") + + def test17(self) -> None: + """ + Test broken files, links and paths. + """ + # Prepare inputs. + txt_incorrect = r""" + - Markdown-style link with the link only in square brackets + - [/helpers/hgit.py]() + + - Markdown-style link to a file that does not exist + - [File not found](/helpersssss/hhhhgit.py) + + - Non-file path + - ../../../../amp/helpers:/app/helpers + + - Non-file path text with slashes in it + - Code in Markdown/LaTeX files (e.g., mermaid code). + + - File path that does not exist + - `/helpersssss/hhhhgit.py` + + Broken Markdown link: [Broken Markdown Link](missing_markdown.md) + """ + self.run_test(txt_incorrect, file_name="test_broken_links.md") + + def test18(self) -> None: + """ + Test Markdown files with external links. + """ + # Prepare inputs. + txt_incorrect = r""" + + - Markdown-style link with an http GH company link + - [helpers/hgit.py](https://github.com/causify-ai/helpers/blob/master/helpers/hgit.py) + + - Markdown-style link with an http GH company link and a text label + - [Here](https://github.com/causify-ai/helpers/blob/master/helpers/hgit.py) + + - Markdown-style link with an http external link + - [AirFlow UI](http://172.30.2.44:8090/home). + + - Markdown-style link with backticks in the square brackets and external http link + - [`foobar`](https://ap-northeast-1.console.aws.amazon.com/s3/buckets/foobar) + + External Markdown link: [External Markdown Link](https://example.com) + """ + self.run_test(txt_incorrect, file_name="test_external_links.md") + + def test19(self) -> None: + """ + Test files without Markdown hyperlinks. + """ + # Prepare inputs. + txt_incorrect = r""" + - File path without the backticks + - /helpers/test/test_hdbg.py + + - File path with the backticks + - `/helpers/test/test_hdbg.py` + + - File path with the backticks and a dot at the start + - `./helpers/test/test_hdbg.py` + + - File path with the backticks and no slash at the start + - `helpers/test/test_hdbg.py` + + - File path without the dir + - `README.md` + + - File path of a hidden file + - .github/workflows/build_image.yml.DISABLED + """ + self.run_test(txt_incorrect, file_name="test_without_md_hyperlinks.md") + # ############################################################################# # Test_make_path_absolute From d0c698eb9ce98d682f609e2377b13dd224bb4c0b Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 6 May 2025 20:14:45 -0400 Subject: [PATCH 080/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../documentation/transform_notes.py | 21 +- dev_scripts_helpers/llms/llm_prompts.py | 276 ++++++++++-------- dev_scripts_helpers/llms/llm_transform.py | 21 +- dev_scripts_helpers/misc/extract_bounties.py | 63 ++-- helpers/hmarkdown.py | 11 +- 5 files changed, 218 insertions(+), 174 deletions(-) diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index 69ccc4418..929eee3fa 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -31,7 +31,6 @@ import hashlib import logging -import dev_scripts_helpers.documentation.lint_notes as dshdlino import helpers.hlatex as hlatex import helpers.hmarkdown as hmarkdo import helpers.hparser as hparser @@ -51,12 +50,6 @@ def _parse() -> argparse.ArgumentParser: return parser -def _format_markdown(txt: str) -> str: - txt = dshdlino.prettier_on_str(txt) - txt = hmarkdo.remove_empty_lines_from_markdown(txt) - return txt - - def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() hparser.init_logger_for_input_output_transform(args) @@ -104,25 +97,25 @@ def _main(parser: argparse.ArgumentParser) -> None: ) mode = "list" txt = hmarkdo.header_list_to_markdown(header_list, mode) - txt = _format_markdown(txt) + txt = hmarkdo.format_markdown(txt) elif cmd == "md_list_to_latex": txt = hlatex.markdown_list_to_latex(txt) - txt = _format_markdown(txt) + txt = hmarkdo.format_markdown(txt) elif cmd == "md_remove_formatting": txt = hmarkdo.remove_formatting(txt) - txt = _format_markdown(txt) + txt = hmarkdo.format_markdown(txt) elif cmd == "md_clean_up": txt = hmarkdo.md_clean_up(txt) - txt = _format_markdown(txt) + txt = hmarkdo.format_markdown(txt) elif cmd == "md_only_format": - txt = _format_markdown(txt) + txt = hmarkdo.format_markdown(txt) elif cmd == "md_colorize_bold_text": txt = hmarkdo.colorize_bold_text(txt) - txt = _format_markdown(txt) + txt = hmarkdo.format_markdown(txt) elif cmd == "md_format": txt = hmarkdo.md_clean_up(txt) txt = hmarkdo.colorize_bold_text(txt) - txt = _format_markdown(txt) + txt = hmarkdo.format_markdown(txt) else: raise ValueError(f"Invalid cmd='{cmd}'") # Write the output. diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 39567cdf5..50296f448 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -4,8 +4,9 @@ import logging import os import re -from typing import Dict, List, Optional, Set, Tuple +from typing import Dict, List, Optional, Set, Tuple, Union +import helpers.hgit as hgit import helpers.hdbg as hdbg import helpers.hio as hio import helpers.hmarkdown as hmarkdo @@ -56,38 +57,21 @@ def get_prompt_tags() -> List[str]: # Store the prompts that need a certain post-transforms to be applied outside # the container. -OUTSIDE_CONTAINER_POST_TRANSFORMS: Dict[str, List[str]] = {} - - -# TODO(gp): We should embed this outside_container_post_transforms in the -# prompts. -if not OUTSIDE_CONTAINER_POST_TRANSFORMS: - OUTSIDE_CONTAINER_POST_TRANSFORMS = { - # These are all the prompts with post_transforms with - # `convert_to_vim_cfile`. - "convert_file_names": [ - "code_review_correctness", - "code_review_refactoring", - ], - # remove_code_delimiters - "prettier_on_str": [ - "md_rewrite", - "md_summarize_short", - "slide_improve", - "slide_colorize", - ], - } - valid_prompts = get_prompt_tags() - for _, prompts in OUTSIDE_CONTAINER_POST_TRANSFORMS.items(): - for prompt in prompts: - hdbg.dassert_in(prompt, valid_prompts) +_POST_CONTAINER_TRANSFORMS: Dict[str, List[str]] = {} -def get_outside_container_post_transforms( +def get_post_container_transforms( transform_name: str, ) -> Dict[str, List[str]]: - hdbg.dassert_in(transform_name, OUTSIDE_CONTAINER_POST_TRANSFORMS.keys()) - return OUTSIDE_CONTAINER_POST_TRANSFORMS[transform_name] + global _POST_CONTAINER_TRANSFORMS + if not _POST_CONTAINER_TRANSFORMS: + valid_prompts = get_prompt_tags() + for prompt in valid_prompts: + _, _, _, post_container_transforms = eval(f"{prompt}()") + hdbg.dassert_not_in(prompt, _POST_CONTAINER_TRANSFORMS) + _POST_CONTAINER_TRANSFORMS[prompt] = list(post_container_transforms) + hdbg.dassert_in(transform_name, _POST_CONTAINER_TRANSFORMS.keys()) + return _POST_CONTAINER_TRANSFORMS[transform_name] # ############################################################################# @@ -95,10 +79,10 @@ def get_outside_container_post_transforms( # ############################################################################# -_PROMPT_OUT = Tuple[str, Set[str], Set[str]] +_PROMPT_OUT = Tuple[str, Set[str], Set[str], List[str]] -_CONTEXT = r""" +_CODING_CONTEXT = r""" You are a proficient Python coder who pays attention to detail. I will pass you a chunk of Python code. """ @@ -111,7 +95,8 @@ def test() -> _PROMPT_OUT: system = "" pre_transforms: Set[str] = set() post_transforms: Set[str] = set() - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -123,7 +108,7 @@ def code_fix_existing_comments() -> _PROMPT_OUT: """ Fix the already existing comments in the Python code. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" Make sure that comments in the code are: - in imperative form @@ -150,14 +135,15 @@ def code_fix_existing_comments() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_improve_comments() -> _PROMPT_OUT: """ Add comments to Python code. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" - Add comments for the parts of the code that are not properly commented - E.g., every chunk of 4 or 5 lines of code add comment explaining the @@ -175,14 +161,15 @@ def code_fix_improve_comments() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_logging_statements() -> _PROMPT_OUT: """ Add comments to Python code. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r''' When a variable `foobar` is important for debugging the code in case of failure, add statements like: @@ -208,7 +195,8 @@ def get_text_report(self) -> str: ''' pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_docstrings() -> _PROMPT_OUT: @@ -222,7 +210,7 @@ def code_fix_docstrings() -> _PROMPT_OUT: only when you are sure of the types and values of variables. If you are not sure, do not add any information. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r''' Make sure each function as a REST docstring - The first comment should be in imperative mode and fit in a single line of @@ -244,11 +232,12 @@ def _format_greeting(name: str, *, greeting: str = "Hello") -> str: ''' pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_type_hints() -> _PROMPT_OUT: - system = _CONTEXT + system = _CODING_CONTEXT system += r""" Add type hints to the Python code passed. @@ -273,14 +262,15 @@ def process_data(data: List[float], *, threshold: float = 0.5) -> List[float]: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_log_string() -> _PROMPT_OUT: """ Fix the log statements to use % formatting. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" Fix logging statements and dassert statements by using % formatting instead of f-strings (formatted string literals). @@ -307,7 +297,8 @@ def code_fix_log_string() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_by_using_f_strings() -> _PROMPT_OUT: @@ -316,7 +307,7 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: You are {age} years old."`. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" Fix statements like: ``` @@ -335,14 +326,15 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" Use % formatting instead of f-strings (formatted string literals). Do not print any comment, just the converted code. @@ -354,28 +346,30 @@ def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_from_imports() -> _PROMPT_OUT: """ Fix code to use imports instead of "from import" statements. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" Replace any Python "from import" statement like `from X import Y` with the form `import X` and then replace the uses of `Y` with `X.Y` """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: """ Fix code missing the star before optional parameters. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" When you find a Python function with optional parameters, add a star after the mandatory parameters and before the optional parameters, and make sure @@ -383,19 +377,21 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_unit_test() -> _PROMPT_OUT: """ Fix code missing the star before optional parameters. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_fix_csfy_style() -> _PROMPT_OUT: @@ -405,7 +401,7 @@ def code_fix_csfy_style() -> _PROMPT_OUT: """ # > grep "def code_fix" ./dev_scripts_helpers/llms/llm_prompts.py | awk '{print $2 }' function_names = [ - "code_fix_comments", + "code_fix_existing_comments", "code_fix_docstrings", "code_fix_type_hints", "code_fix_log_string", @@ -416,14 +412,17 @@ def code_fix_csfy_style() -> _PROMPT_OUT: ] system_prompts = [] for function_name in function_names: - system, pre_transforms_tmp, post_transforms_tmp = eval(function_name)() + system, pre_transforms_tmp, post_transforms_tmp, post_container_transforms_tmp = eval(function_name)() system_prompts.append(system) hdbg.dassert_eq(pre_transforms_tmp, set()) hdbg.dassert_eq(post_transforms_tmp, {"remove_code_delimiters"}) + hdbg.dassert_eq(post_container_transforms_tmp, []) + # system = "\n\n".join(system_prompts) pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -435,7 +434,7 @@ def code_review_correctness() -> _PROMPT_OUT: """ Review the code for correctness. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" You will review the code and make sure it is: - correct @@ -450,14 +449,15 @@ def code_review_correctness() -> _PROMPT_OUT: """ pre_transforms = {"add_line_numbers"} post_transforms = {"convert_to_vim_cfile"} - return system, pre_transforms, post_transforms + post_container_transforms = ["convert_file_names"] + return system, pre_transforms, post_transforms, post_container_transforms def code_review_refactoring() -> _PROMPT_OUT: """ Review the code for refactoring opportunities. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" You will review the code and look for opportunities to refactor the code, by removing redundancy and copy-paste code. @@ -468,7 +468,8 @@ def code_review_refactoring() -> _PROMPT_OUT: """ pre_transforms = {"add_line_numbers"} post_transforms = {"convert_to_vim_cfile"} - return system, pre_transforms, post_transforms + post_container_transforms = ["convert_file_names"] + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -477,7 +478,7 @@ def code_review_refactoring() -> _PROMPT_OUT: def code_transform_remove_redundancy() -> _PROMPT_OUT: - system = _CONTEXT + system = _CODING_CONTEXT system += r""" You will review the code and look for opportunities to refactor the code, by removing redundancy and copy-paste code, and apply refactoring to remove @@ -486,15 +487,17 @@ def code_transform_remove_redundancy() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms: Set[str] = set() - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_transform_apply_csfy_style() -> _PROMPT_OUT: """ Apply the style to the code using template code in `template_code.py`. """ - system = _CONTEXT + system = _CODING_CONTEXT file_name = "template_code.py" + file_name = os.path.join(hgit.find_helpers_root(), file_name) file_content = hio.from_file(file_name) system += rf""" Apply the style described below to the Python code @@ -509,14 +512,15 @@ def code_transform_apply_csfy_style() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_transform_apply_linter_instructions() -> _PROMPT_OUT: """ Apply the transforms passed in a cfile to the code. """ - system = _CONTEXT + system = _CODING_CONTEXT system += r""" I will pass you Python code and a list of linting errors in the format <line_number>:<error_code>:<error_message> @@ -529,7 +533,8 @@ def code_transform_apply_linter_instructions() -> _PROMPT_OUT: """ pre_transforms = {"add_line_numbers", "add_instructions"} post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -539,7 +544,7 @@ def code_transform_apply_linter_instructions() -> _PROMPT_OUT: # TODO(gp): Probably obsolete since Cursor can do it. def _get_code_unit_test_prompt(num_tests: int) -> str: - system = _CONTEXT + system = _CODING_CONTEXT system += rf""" - You will write a unit test suite for the function passed. @@ -556,23 +561,30 @@ def code_write_unit_test() -> _PROMPT_OUT: system = _get_code_unit_test_prompt(5) pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms def code_write_1_unit_test() -> _PROMPT_OUT: system = _get_code_unit_test_prompt(1) pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# -def md_rewrite() -> _PROMPT_OUT: - system = r""" +_MD_CONTEXT = r""" You are a proficient technical writer. + I will pass you a chunk of markdown code. + """ + +def md_rewrite() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" Rewrite the text passed as if you were writing a technical document to increase clarity and readability. Maintain the structure of the text as much as possible, in terms of bullet @@ -580,28 +592,52 @@ def md_rewrite() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms def md_summarize_short() -> _PROMPT_OUT: - system = r""" - You are a proficient technical writer. - + system = _MD_CONTEXT + system += r""" Summarize the text in less than 30 words. """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def md_clean_up_how_to_guide() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + Format the text passed as a how-to guide. + + An how-to-guide should explain how to solve a specific problem or achieve + a goal. + + Rewrite the markdown passed to make it a how-to guide and contain the + the following sections: + - Goal / Use Case + - Assumptions / Requirements + - Step-by-Step Instructions + - Alternatives or Optional Steps + - Troubleshooting + + Do not lose any information, just rewrite the text to make it a how-to. + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# def slide_improve() -> _PROMPT_OUT: - system = r""" - You are a proficient technical writer and expert of machine learning. - - I will give you markdown text in the next prompt + system = _MD_CONTEXT + system += r""" + I will give you markdown text You will convert the following markdown text into bullet points Make sure that the text is clean and readable """ @@ -611,48 +647,47 @@ def slide_improve() -> _PROMPT_OUT: "remove_end_of_line_periods", "remove_empty_lines", } - return system, pre_transforms, post_transforms - + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms -def slide_colorize() -> _PROMPT_OUT: - system = r""" - You are a proficient technical writer and expert of machine learning. - - I will give you markdown text in the next prompt - - Do not change the text or the structure of the text - - You will use multiple colors using pandoc \textcolor{COLOR}{text} to - highlight only the most important phrases in the text—those that are key to - understanding the main points. Keep the highlights minimal and avoid - over-marking. Focus on critical concepts, key data, or essential takeaways - rather than full sentences or excessive details. - - You can use the following colors in the given order: red, orange, green, - teal, cyan, blue, violet, brown - - You can highlight only 4 words or phrases in the text - - Print only the markdown without any explanation. +def slide_reduce() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + I will give you markdown text + You will: + - maintain the structure of the text + - make sure that the text is clean and readable + - remove all the words that are not needed + - minimize the changes to the text """ pre_transforms: Set[str] = set() - post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_transforms = { + "remove_code_delimiters", + "remove_end_of_line_periods", + "remove_empty_lines", + } + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms -def slide_colorize_points() -> _PROMPT_OUT: - system = r""" - You are a proficient technical writer and expert of machine learning. +def slide_bold() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + I will give you markdown text and you will - I will give you markdown text in the next prompt - Do not change the text or the structure of the text - - You will highlight with \textcolor{COLOR}{text} the bullet point at the - first level, without highlighting the - character - - You can use the following colors in the given order: red, orange, green, - teal, cyan, blue, violet, brown + - Highlight in bold only the most important phrases in the text—those that + are key to understanding the main points. Keep the highlights minimal and + avoid over-marking. Focus on critical concepts, key data, or essential + takeaways rather than full sentences or excessive details. Print only the markdown without any explanation. """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -680,7 +715,8 @@ def scratch_categorize_topics() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - return system, pre_transforms, post_transforms + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -774,7 +810,7 @@ def _convert_to_vim_cfile(txt: str, in_file_name: str, out_file_name: str) -> st # ############################################################################# -def _to_run(action: str, transforms: Set[str]) -> bool: +def to_run(action: str, transforms: Union[Set[str], List[str]]) -> bool: """ Return True if the action should be run. """ @@ -811,15 +847,17 @@ def run_prompt( prompt_tags = get_prompt_tags() hdbg.dassert_in(prompt_tag, prompt_tags) python_cmd = f"{prompt_tag}()" - system_prompt, pre_transforms, post_transforms = eval(python_cmd) + system_prompt, pre_transforms, post_transforms, post_container_transforms = eval(python_cmd) + # Check return types. hdbg.dassert_isinstance(system_prompt, str) hdbg.dassert_isinstance(pre_transforms, set) hdbg.dassert_isinstance(post_transforms, set) + hdbg.dassert_isinstance(post_container_transforms, list) system_prompt = hprint.dedent(system_prompt) # 1) Run pre-transforms. - if _to_run("add_line_numbers", pre_transforms): + if to_run("add_line_numbers", pre_transforms): txt = hmarkdo.add_line_numbers(txt) - if _to_run("add_instructions", pre_transforms): + if to_run("add_instructions", pre_transforms): # Add the specific instructions to the system prompt. # E.g., # The instructions are: @@ -855,13 +893,13 @@ def run_prompt( txt_out = hopenai.response_to_txt(response) hdbg.dassert_isinstance(txt_out, str) # 3) Run post-transforms. - if _to_run("remove_code_delimiters", post_transforms): + if to_run("remove_code_delimiters", post_transforms): txt_out = hmarkdo.remove_code_delimiters(txt_out) - if _to_run("remove_end_of_line_periods", post_transforms): + if to_run("remove_end_of_line_periods", post_transforms): txt_out = hmarkdo.remove_end_of_line_periods(txt_out) - if _to_run("remove_empty_lines", post_transforms): + if to_run("remove_empty_lines", post_transforms): txt_out = hmarkdo.remove_empty_lines(txt_out) - if _to_run("convert_to_vim_cfile", post_transforms): + if to_run("convert_to_vim_cfile", post_transforms): hdbg.dassert_ne(in_file_name, "") hdbg.dassert_ne(out_file_name, "") txt_out = _convert_to_vim_cfile(txt_out, in_file_name, out_file_name) diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index e884db6ca..333ec85f7 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -34,17 +34,18 @@ import re from typing import List, Optional -import dev_scripts_helpers.documentation.lint_notes as dshdlino import dev_scripts_helpers.llms.llm_prompts as dshlllpr import helpers.hdbg as hdbg import helpers.hdocker as hdocker import helpers.hgit as hgit import helpers.hio as hio +import helpers.hmarkdown as hmarkdo import helpers.hparser as hparser import helpers.hprint as hprint import helpers.hserver as hserver import helpers.hsystem as hsystem + _LOG = logging.getLogger(__name__) @@ -56,7 +57,8 @@ def _parse() -> argparse.ArgumentParser: description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) - hparser.add_input_output_args(parser) + hparser.add_input_output_args(parser, in_default="-", in_required=False, + out_default="-", out_required=False) hparser.add_prompt_arg(parser) hparser.add_dockerized_script_arg(parser) # Use CRITICAL to avoid logging anything. @@ -239,17 +241,22 @@ def _main(parser: argparse.ArgumentParser) -> None: ) # Run post-transforms outside the container. # 1) _convert_file_names(). - prompts = dshlllpr.get_outside_container_post_transforms("convert_file_names") - if args.prompt in prompts: + post_container_transforms = dshlllpr.get_post_container_transforms(args.prompt) + if dshlllpr.to_run("convert_file_names", post_container_transforms): _convert_file_names(in_file_name, tmp_out_file_name) # 2) prettier_on_str(). out_txt = hio.from_file(tmp_out_file_name) - prompts = dshlllpr.get_outside_container_post_transforms("prettier_on_str") - if args.prompt in prompts: + if dshlllpr.to_run("format_markdown", post_container_transforms): # Note that we need to run this outside the `llm_transform` container to # avoid to do docker-in-docker in the `llm_transform` container (which # doesn't support that). - out_txt = dshdlino.prettier_on_str(out_txt) + out_txt = hmarkdo.format_markdown(out_txt) + hdbg.dassert_eq( + len(post_container_transforms), + 0, + "Not all post_transforms were run: %s", + post_container_transforms, + ) # Read the output from the container and write it to the output file from # command line (e.g., `-` for stdout). hparser.write_file(out_txt, out_file_name) diff --git a/dev_scripts_helpers/misc/extract_bounties.py b/dev_scripts_helpers/misc/extract_bounties.py index 6296ea3cf..9a6212f18 100755 --- a/dev_scripts_helpers/misc/extract_bounties.py +++ b/dev_scripts_helpers/misc/extract_bounties.py @@ -3,8 +3,8 @@ """ Given the list of potential bounties. -> curl -L -o output.md "https://docs.google.com/document/d/1xPgQ2tWXQuVWKkGVONjOGd5j14mXSmGeY_4d1_sGzAE/export?format=markdown" -> grep "## " output.md +> curl -L -o bounties.md "https://docs.google.com/document/d/1xPgQ2tWXQuVWKkGVONjOGd5j14mXSmGeY_4d1_sGzAE/export?format=markdown" +> grep "## " bounties.md ## **Template** ## **LLMs** ### **Capture / replay interactions with OpenAI** @@ -18,11 +18,8 @@ then creates files named with the format: level3_section,level2_section.txt Examples: -# Process a markdown file and create files in default 'output' directory -> extract_markdown_sections.py input.md - -# Process a markdown file and create files in specified directory -> extract_markdown_sections.py input.md --dst_dir my_output +> extract_bounties.py --input_file bounties.md +> extract_bounties.py --input_file bounties.md --output_file output.txt """ import argparse @@ -38,6 +35,32 @@ _LOG = logging.getLogger(__name__) +def _parse() -> argparse.ArgumentParser: + """ + Parse command-line arguments. + + :return: argument parser with all command-line arguments + """ + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--input_file", + action="store", + required=True, + help="Path to the markdown file to process", + ) + parser.add_argument( + "--output_file", + action="store", + required=False, + default=None, + help="Path to the output file to process", + ) + hparser.add_verbosity_arg(parser) + return parser + + def _clean_up(line: str) -> str: # Remove the ** and TO\_FILE line = line.replace("**", "").replace("TO\_FILE:", "") @@ -75,32 +98,6 @@ def _extract_sections(markdown_content: str) -> List[Tuple[str, str]]: return sections -def _parse() -> argparse.ArgumentParser: - """ - Parse command-line arguments. - - :return: argument parser with all command-line arguments - """ - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument( - "--input_file", - action="store", - required=True, - help="Path to the markdown file to process", - ) - parser.add_argument( - "--output_file", - action="store", - required=False, - default=None, - help="Path to the output file to process", - ) - hparser.add_verbosity_arg(parser) - return parser - - def _main(parser: argparse.ArgumentParser) -> None: """ Execute the main logic of the script. diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 3e0082eea..d169dfdf9 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -12,6 +12,8 @@ import helpers.hdbg as hdbg import helpers.hparser as hparser import helpers.hprint as hprint +import dev_scripts_helpers.documentation.lint_notes as dshdlino + _LOG = logging.getLogger(__name__) @@ -881,4 +883,11 @@ def remove_empty_lines_from_markdown(markdown_text: str) -> str: if i > 0: result.append("") result.append(line) - return "\n".join(result) \ No newline at end of file + return "\n".join(result) + + +def format_markdown(txt: str) -> str: + txt = dshdlino.prettier_on_str(txt) + txt = remove_empty_lines_from_markdown(txt) + return txt + From 2e3bb2ff70f7b4433bf6c9e2eb18d7915bc7c186 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Wed, 7 May 2025 17:44:13 -0400 Subject: [PATCH 081/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/documentation/transform_notes.py | 3 ++- dev_scripts_helpers/llms/llm_prompts.py | 2 +- helpers/hmarkdown.py | 4 ++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index 929eee3fa..ddb9fc334 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -43,8 +43,9 @@ def _parse() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) + hparser.add_input_output_args(parser, in_default="-", in_required=False, + out_default="-", out_required=False) parser.add_argument("-a", "--action", required=True) - hparser.add_input_output_args(parser) parser.add_argument("-l", "--max_lev", default=5) hparser.add_verbosity_arg(parser) return parser diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 50296f448..cf7f56460 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -167,7 +167,7 @@ def code_fix_improve_comments() -> _PROMPT_OUT: def code_fix_logging_statements() -> _PROMPT_OUT: """ - Add comments to Python code. + Add logging statements to Python code. """ system = _CODING_CONTEXT system += r''' diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index d169dfdf9..802cb1f5e 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -277,6 +277,10 @@ def remove_formatting(txt: str) -> str: txt = re.sub(r"\*\*(.*?)\*\*", r"\1", txt) # Replace italic markdown syntax with plain text. txt = re.sub(r"\*(.*?)\*", r"\1", txt) + # Remove \textcolor{red}{ ... }. + txt = re.sub(r"\\textcolor\{(.*?)\}\{(.*?)\}", r"\2", txt) + # Remove \red{ ... }. + txt = re.sub(r"\\\S+\{(.*?)\}", r"\1", txt) return txt From 41a3efa66da576032ef53d93e3956758164b7415 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 8 May 2025 10:46:41 -0400 Subject: [PATCH 082/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 58 +++++++--- dev_scripts_helpers/llms/llm_transform.py | 13 +++ helpers/hmarkdown.py | 33 +++--- helpers/test/test_hmarkdown.py | 123 ++++++++++++++++++---- 4 files changed, 172 insertions(+), 55 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index cf7f56460..fd91aadc3 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -383,7 +383,6 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: def code_fix_unit_test() -> _PROMPT_OUT: """ - Fix code missing the star before optional parameters. """ system = _CODING_CONTEXT system += r""" @@ -638,8 +637,36 @@ def slide_improve() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" I will give you markdown text - You will convert the following markdown text into bullet points - Make sure that the text is clean and readable + + You will: + - Convert the following markdown text into bullet points + - Make sure that the text is clean and readable + + Print only the markdown without any explanation. + """ + pre_transforms: Set[str] = set() + post_transforms = { + "remove_code_delimiters", + "remove_end_of_line_periods", + "remove_empty_lines", + } + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def slide_improve2() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + I will give you markdown text + + You will: + - Maintain the structure of the text and keep the content of the existing text + - Remove all the words that are not needed, minimizing the changes to the + text + - Add bullet points to the text that are important or missing + - Add examples to clarify the text and help intuition + + Print only the markdown without any explanation. """ pre_transforms: Set[str] = set() post_transforms = { @@ -655,11 +682,14 @@ def slide_reduce() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" I will give you markdown text + You will: - - maintain the structure of the text - - make sure that the text is clean and readable - - remove all the words that are not needed - - minimize the changes to the text + - Maintain the structure of the text + - Make sure that the text is clean and readable + - Remove all the words that are not needed + - Minimize the changes to the text + + Print only the markdown without any explanation. """ pre_transforms: Set[str] = set() post_transforms = { @@ -674,13 +704,15 @@ def slide_reduce() -> _PROMPT_OUT: def slide_bold() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text and you will - - - Do not change the text or the structure of the text + I will give you markdown text + + You will: + - Not change the text or the structure of the text - Highlight in bold only the most important phrases in the text—those that - are key to understanding the main points. Keep the highlights minimal and - avoid over-marking. Focus on critical concepts, key data, or essential - takeaways rather than full sentences or excessive details. + are key to understanding the main points + - Keep the highlights minimal and avoid over-marking. Focus on critical + concepts, key data, or essential takeaways rather than full sentences or + excessive details. Print only the markdown without any explanation. """ diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 333ec85f7..bcdf6da18 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -61,6 +61,12 @@ def _parse() -> argparse.ArgumentParser: out_default="-", out_required=False) hparser.add_prompt_arg(parser) hparser.add_dockerized_script_arg(parser) + parser.add_argument( + "-c", + "--compare", + action="store_true", + help="Print the original and transformed", + ) # Use CRITICAL to avoid logging anything. hparser.add_verbosity_arg(parser, log_level="CRITICAL") return parser @@ -257,6 +263,13 @@ def _main(parser: argparse.ArgumentParser) -> None: "Not all post_transforms were run: %s", post_container_transforms, ) + if args.compare: + out_txt_tmp = [] + out_txt_tmp.append("#### Original ####") + out_txt_tmp.append(hio.from_file(tmp_in_file_name)) + out_txt_tmp.append("#### Transformed ####") + out_txt_tmp.append(out_txt) + out_txt = "\n\n".join(out_txt_tmp) # Read the output from the container and write it to the output file from # command line (e.g., `-` for stdout). hparser.write_file(out_txt, out_file_name) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 802cb1f5e..af5fff016 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -781,36 +781,26 @@ def selected_navigation_to_str( ] -# TODO(gp): Just bold the first-level bullets and then apply colorize_bold_text. -def colorize_first_level_bullets(markdown_text: str) -> str: +def bold_first_level_bullets(markdown_text: str) -> str: """ - Colorize first-level bullets in markdown text. + Make first-level bullets bold in markdown text. :param markdown_text: Input markdown text - :return: Formatted markdown text with first-level bullets colored + :return: Formatted markdown text with first-level bullets in bold """ - # Define the colors to use. - # Find all first-level bullet points (lines starting with "- " after any whitespace). lines = markdown_text.split("\n") - color_index = 0 result = [] for line in lines: # Check if this is a first-level bullet point. if re.match(r"^\s*- ", line): - # Only color first-level bullets (those with minimal indentation). - indentation = len(line) - len(line.lstrip()) - if indentation == 0: - # First-level bullet. - color = _ALL_COLORS[color_index % len(_ALL_COLORS)] - # Replace the bullet with a colored version. - # - \textcolor{red}{Linear models} - colored_line = re.sub( - r"^(\s*-\s+)(.*)", r"\1\\textcolor{" + color + r"}{\2}", line - ) - result.append(colored_line) - color_index += 1 - else: - result.append(line) + # Check if the line has bold text it in it. + if not re.search(r"\*\*", line): + # Bold first-level bullets. + indentation = len(line) - len(line.lstrip()) + if indentation == 0: + # First-level bullet, add bold markers. + line = re.sub(r"^(\s*-\s+)(.*)", r"\1**\2**", line) + result.append(line) else: result.append(line) return "\n".join(result) @@ -891,6 +881,7 @@ def remove_empty_lines_from_markdown(markdown_text: str) -> str: def format_markdown(txt: str) -> str: + txt = bold_first_level_bullets(txt) txt = dshdlino.prettier_on_str(txt) txt = remove_empty_lines_from_markdown(txt) return txt diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 613b1447f..2efdc4846 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -782,33 +782,114 @@ def test2(self) -> None: # ############################################################################# -# Test_colorize_first_level_bullets1 +# Test_bold_first_level_bullets1 # ############################################################################# -class Test_colorize_first_level_bullets1(hunitest.TestCase): - +class Test_bold_first_level_bullets1(hunitest.TestCase): def test1(self) -> None: - # Prepare inputs. - content = r""" - - Item 1 - - Subitem 1.1 - - Subitem 1.2 - - Item 2 - - Subitem 2.1 """ - content = hprint.dedent(content) - # Call tested function. - act = hmarkdo.colorize_first_level_bullets(content) - # Check output. - exp = r""" - - \textcolor{red}{Item 1} - - Subitem 1.1 - - Subitem 1.2 - - \textcolor{orange}{Item 2} - - Subitem 2.1 + Test basic first-level bullet bolding. """ - self.assert_equal(act, exp, dedent=True) + text = r""" + - First item + - Sub item + - Second item + """ + expected = r""" + - **First item** + - Sub item + - **Second item** + """ + self._test_bold_first_level_bullets(text, expected) + + def _test_bold_first_level_bullets(self, text: str, expected: str) -> None: + """ + Helper to test bold_first_level_bullets function. + """ + text = hprint.dedent(text) + actual = hmarkdo.bold_first_level_bullets(text) + self.assert_equal(actual, expected, dedent=True) + + def test2(self) -> None: + """ + Test with mixed content including non-bullet text. + """ + text = r""" + Some text here + - First bullet + More text + - Second bullet + - Nested bullet + Final text + """ + expected = r""" + Some text here + - **First bullet** + More text + - **Second bullet** + - Nested bullet + Final text + """ + self._test_bold_first_level_bullets(text, expected) + + def test3(self) -> None: + """ + Test with multiple levels of nesting. + """ + text = r""" + - Top level + - Second level + - Third level + - Back to second + - Another top + """ + expected = r""" + - **Top level** + - Second level + - Third level + - Back to second + - **Another top** + """ + self._test_bold_first_level_bullets(text, expected) + + def test4(self) -> None: + """ + Test with empty lines between bullets. + """ + text = r""" + - First item + + - Second item + - Sub item + + - Third item + """ + expected = r""" + - **First item** + + - **Second item** + - Sub item + + - **Third item** + """ + self._test_bold_first_level_bullets(text, expected) + + def test5(self) -> None: + """ + Test with text that already contains some bold markers. + """ + text = r""" + - First **important** point + - Sub point + - Second point with emphasis + """ + expected = r""" + - First **important** point + - Sub point + - **Second point with emphasis** + """ + self._test_bold_first_level_bullets(text, expected) # ############################################################################# From 82c2e5a7bbeed9397f792b0894a3cddbf3f7bdf7 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 8 May 2025 19:41:03 -0400 Subject: [PATCH 083/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 25 ++++++++- dev_scripts_helpers/llms/llm_transform.py | 67 ++++++++++++++--------- helpers/hdocker.py | 2 + helpers/hmarkdown.py | 1 - 4 files changed, 68 insertions(+), 27 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index fd91aadc3..20fd24712 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -660,7 +660,8 @@ def slide_improve2() -> _PROMPT_OUT: I will give you markdown text You will: - - Maintain the structure of the text and keep the content of the existing text + - Maintain the structure of the text and keep the content of the existing + text - Remove all the words that are not needed, minimizing the changes to the text - Add bullet points to the text that are important or missing @@ -678,6 +679,27 @@ def slide_improve2() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def slide_elaborate() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + I will give you markdown text + + You will: + - Add bullet points to the text that are important or missing + - Add examples to clarify the text and help intuition + + Print only the markdown without any explanation. + """ + pre_transforms: Set[str] = set() + post_transforms = { + "remove_code_delimiters", + "remove_end_of_line_periods", + "remove_empty_lines", + } + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + def slide_reduce() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" @@ -685,6 +707,7 @@ def slide_reduce() -> _PROMPT_OUT: You will: - Maintain the structure of the text + - Keep all the figures - Make sure that the text is clean and readable - Remove all the words that are not needed - Minimize the changes to the text diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index bcdf6da18..f4e07a826 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -67,6 +67,18 @@ def _parse() -> argparse.ArgumentParser: action="store_true", help="Print the original and transformed", ) + parser.add_argument( + "-b", + "--bold_first_level_bullets", + action="store_true", + help="Bold the first level bullets", + ) + parser.add_argument( + "-s", + "--skip-post-transforms", + action="store_true", + help="Skip the post-transforms", + ) # Use CRITICAL to avoid logging anything. hparser.add_verbosity_arg(parser, log_level="CRITICAL") return parser @@ -246,34 +258,39 @@ def _main(parser: argparse.ArgumentParser) -> None: suppress_output=suppress_output, ) # Run post-transforms outside the container. - # 1) _convert_file_names(). - post_container_transforms = dshlllpr.get_post_container_transforms(args.prompt) - if dshlllpr.to_run("convert_file_names", post_container_transforms): - _convert_file_names(in_file_name, tmp_out_file_name) - # 2) prettier_on_str(). - out_txt = hio.from_file(tmp_out_file_name) - if dshlllpr.to_run("format_markdown", post_container_transforms): - # Note that we need to run this outside the `llm_transform` container to - # avoid to do docker-in-docker in the `llm_transform` container (which - # doesn't support that). - out_txt = hmarkdo.format_markdown(out_txt) - hdbg.dassert_eq( - len(post_container_transforms), - 0, - "Not all post_transforms were run: %s", - post_container_transforms, - ) - if args.compare: - out_txt_tmp = [] - out_txt_tmp.append("#### Original ####") - out_txt_tmp.append(hio.from_file(tmp_in_file_name)) - out_txt_tmp.append("#### Transformed ####") - out_txt_tmp.append(out_txt) - out_txt = "\n\n".join(out_txt_tmp) + if not args.skip_post_transforms: + post_container_transforms = dshlllpr.get_post_container_transforms(args.prompt) + # + if dshlllpr.to_run("convert_file_names", post_container_transforms): + _convert_file_names(in_file_name, tmp_out_file_name) + # + out_txt = hio.from_file(tmp_out_file_name) + if dshlllpr.to_run("format_markdown", post_container_transforms): + # Note that we need to run this outside the `llm_transform` container to + # avoid to do docker-in-docker in the `llm_transform` container (which + # doesn't support that). + out_txt = hmarkdo.format_markdown(out_txt) + if args.bold_first_level_bullets: + out_txt = hmarkdo.bold_first_level_bullets(out_txt) + hdbg.dassert_eq( + len(post_container_transforms), + 0, + "Not all post_transforms were run: %s", + post_container_transforms, + ) + if args.compare: + out_txt_tmp = [] + out_txt_tmp.append("#### Original ####") + out_txt_tmp.append(hio.from_file(tmp_in_file_name)) + out_txt_tmp.append("#### Transformed ####") + out_txt_tmp.append(out_txt) + out_txt = "\n\n".join(out_txt_tmp) + else: + _LOG.info("Skipping post-transforms") + out_txt = hio.from_file(tmp_out_file_name) # Read the output from the container and write it to the output file from # command line (e.g., `-` for stdout). hparser.write_file(out_txt, out_file_name) - # if os.path.basename(out_file_name) == "cfile": print(out_txt) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 101009552..98577af65 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -1584,6 +1584,7 @@ def run_dockerized_mermaid( use_sibling_container_for_callee=use_sibling_container_for_callee, ) mermaid_cmd = f" -i {in_file_path} -o {out_file_path}" + mermaid_cmd += " --scale 3" docker_cmd = get_docker_base_cmd(use_sudo) docker_cmd.extend( [ @@ -1593,6 +1594,7 @@ def run_dockerized_mermaid( ] ) docker_cmd = " ".join(docker_cmd) + _LOG.debug(hprint.to_str("docker_cmd")) hsystem.system(docker_cmd) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index af5fff016..ed5b4ea94 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -881,7 +881,6 @@ def remove_empty_lines_from_markdown(markdown_text: str) -> str: def format_markdown(txt: str) -> str: - txt = bold_first_level_bullets(txt) txt = dshdlino.prettier_on_str(txt) txt = remove_empty_lines_from_markdown(txt) return txt From 6502a37c464fadb02b72140849debd7181b35a47 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 8 May 2025 20:16:34 -0400 Subject: [PATCH 084/193] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../documentation/transform_notes.py | 9 ++++++-- dev_scripts_helpers/llms/llm_prompts.py | 22 ++++++++++++------- dev_scripts_helpers/llms/llm_transform.py | 14 ++++++++---- dev_scripts_helpers/misc/get_url_titles.py | 18 ++++++++++++--- helpers/hmarkdown.py | 12 +++++----- helpers/hserver.py | 18 ++++++++------- helpers/test/test_hmarkdown.py | 20 ++++++++--------- 7 files changed, 71 insertions(+), 42 deletions(-) diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index ddb9fc334..55aad8f38 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -43,8 +43,13 @@ def _parse() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) - hparser.add_input_output_args(parser, in_default="-", in_required=False, - out_default="-", out_required=False) + hparser.add_input_output_args( + parser, + in_default="-", + in_required=False, + out_default="-", + out_required=False, + ) parser.add_argument("-a", "--action", required=True) parser.add_argument("-l", "--max_lev", default=5) hparser.add_verbosity_arg(parser) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 20fd24712..5d92f891e 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -6,8 +6,8 @@ import re from typing import Dict, List, Optional, Set, Tuple, Union -import helpers.hgit as hgit import helpers.hdbg as hdbg +import helpers.hgit as hgit import helpers.hio as hio import helpers.hmarkdown as hmarkdo import helpers.hprint as hprint @@ -382,8 +382,7 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: def code_fix_unit_test() -> _PROMPT_OUT: - """ - """ + """ """ system = _CODING_CONTEXT system += r""" """ @@ -411,7 +410,12 @@ def code_fix_csfy_style() -> _PROMPT_OUT: ] system_prompts = [] for function_name in function_names: - system, pre_transforms_tmp, post_transforms_tmp, post_container_transforms_tmp = eval(function_name)() + ( + system, + pre_transforms_tmp, + post_transforms_tmp, + post_container_transforms_tmp, + ) = eval(function_name)() system_prompts.append(system) hdbg.dassert_eq(pre_transforms_tmp, set()) hdbg.dassert_eq(post_transforms_tmp, {"remove_code_delimiters"}) @@ -533,7 +537,7 @@ def code_transform_apply_linter_instructions() -> _PROMPT_OUT: pre_transforms = {"add_line_numbers", "add_instructions"} post_transforms = {"remove_code_delimiters"} post_container_transforms: List[str] = [] - return system, pre_transforms, post_transforms, post_container_transforms + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -610,7 +614,7 @@ def md_clean_up_how_to_guide() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" Format the text passed as a how-to guide. - + An how-to-guide should explain how to solve a specific problem or achieve a goal. @@ -728,7 +732,7 @@ def slide_bold() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" I will give you markdown text - + You will: - Not change the text or the structure of the text - Highlight in bold only the most important phrases in the text—those that @@ -902,7 +906,9 @@ def run_prompt( prompt_tags = get_prompt_tags() hdbg.dassert_in(prompt_tag, prompt_tags) python_cmd = f"{prompt_tag}()" - system_prompt, pre_transforms, post_transforms, post_container_transforms = eval(python_cmd) + system_prompt, pre_transforms, post_transforms, post_container_transforms = ( + eval(python_cmd) + ) # Check return types. hdbg.dassert_isinstance(system_prompt, str) hdbg.dassert_isinstance(pre_transforms, set) diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index f4e07a826..369ed9b6a 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -45,7 +45,6 @@ import helpers.hserver as hserver import helpers.hsystem as hsystem - _LOG = logging.getLogger(__name__) @@ -57,8 +56,13 @@ def _parse() -> argparse.ArgumentParser: description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) - hparser.add_input_output_args(parser, in_default="-", in_required=False, - out_default="-", out_required=False) + hparser.add_input_output_args( + parser, + in_default="-", + in_required=False, + out_default="-", + out_required=False, + ) hparser.add_prompt_arg(parser) hparser.add_dockerized_script_arg(parser) parser.add_argument( @@ -259,7 +263,9 @@ def _main(parser: argparse.ArgumentParser) -> None: ) # Run post-transforms outside the container. if not args.skip_post_transforms: - post_container_transforms = dshlllpr.get_post_container_transforms(args.prompt) + post_container_transforms = dshlllpr.get_post_container_transforms( + args.prompt + ) # if dshlllpr.to_run("convert_file_names", post_container_transforms): _convert_file_names(in_file_name, tmp_out_file_name) diff --git a/dev_scripts_helpers/misc/get_url_titles.py b/dev_scripts_helpers/misc/get_url_titles.py index d01381bac..f58b8cec9 100644 --- a/dev_scripts_helpers/misc/get_url_titles.py +++ b/dev_scripts_helpers/misc/get_url_titles.py @@ -1,8 +1,10 @@ import time +from html.parser import HTMLParser +from typing import Optional + import requests from bs4 import BeautifulSoup -from typing import Optional -from html.parser import HTMLParser + def get_page_title(url: str) -> str: try: @@ -14,6 +16,12 @@ def get_page_title(url: str) -> str: except requests.RequestException as e: return f"Request failed: {e}" + +# ############################################################################# +# TitleParser +# ############################################################################# + + class TitleParser(HTMLParser): def __init__(self) -> None: @@ -21,7 +29,9 @@ def __init__(self) -> None: self.in_title: bool = False self.title: Optional[str] = None - def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]) -> None: + def handle_starttag( + self, tag: str, attrs: list[tuple[str, Optional[str]]] + ) -> None: if tag.lower() == "title": self.in_title = True @@ -33,6 +43,7 @@ def handle_endtag(self, tag: str) -> None: if tag.lower() == "title": self.in_title = False + def get_title_streaming(url: str) -> str: try: with requests.get(url, stream=True, timeout=10) as r: @@ -46,6 +57,7 @@ def get_title_streaming(url: str) -> str: except requests.RequestException as e: return f"Request failed: {e}" + if __name__ == "__main__": files: str = """ https://news.ycombinator.com/item?id=34336386 diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index ed5b4ea94..302ba4138 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -9,11 +9,10 @@ import re from typing import Generator, List, Optional, Tuple, cast +import dev_scripts_helpers.documentation.lint_notes as dshdlino import helpers.hdbg as hdbg import helpers.hparser as hparser import helpers.hprint as hprint -import dev_scripts_helpers.documentation.lint_notes as dshdlino - _LOG = logging.getLogger(__name__) @@ -822,10 +821,12 @@ def colorize_bold_text( :return: Markdown text with colored bold sections """ # Remove any existing color formatting. - # Remove \color{text} format + # Remove \color{text} format. markdown_text = re.sub(r"\\[a-z]+\{([^}]+)\}", r"\1", markdown_text) - # Remove \textcolor{color}{text} format - markdown_text = re.sub(r"\\textcolor\{[^}]+\}\{([^}]+)\}", r"\1", markdown_text) + # Remove \textcolor{color}{text} format. + markdown_text = re.sub( + r"\\textcolor\{[^}]+\}\{([^}]+)\}", r"\1", markdown_text + ) # Find all bold text (both ** and __ formats). bold_pattern = r"\*\*(.*?)\*\*|__(.*?)__" # matches will look like: @@ -884,4 +885,3 @@ def format_markdown(txt: str) -> str: txt = dshdlino.prettier_on_str(txt) txt = remove_empty_lines_from_markdown(txt) return txt - diff --git a/helpers/hserver.py b/helpers/hserver.py index 577c5b0a3..b0b3a894c 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -857,14 +857,16 @@ def enable_privileged_mode() -> bool: ret = True elif is_external_linux(): ret = True - elif is_host_mac(version="Catalina"): - # Docker for macOS Catalina supports dind. - ret = True - elif ( - is_host_mac(version="Monterey") - or is_host_mac(version="Ventura") - or is_host_mac(version="Sequoia") - ): + elif is_host_mac(): + mac_version = get_host_mac_version() + if mac_version == "Catalina": + # Docker for macOS Catalina supports dind. + ret = True + elif mac_version in ("Monterey", "Ventura", "Sequoia"): + # Docker doesn't seem to support dind for these versions of macOS. + ret = False + else: + raise ValueError(f"Invalid version='{version}'") # Docker doesn't seem to support dind for these versions of macOS. ret = False elif is_prod_csfy(): diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 2efdc4846..ee141074b 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -787,6 +787,7 @@ def test2(self) -> None: class Test_bold_first_level_bullets1(hunitest.TestCase): + def test1(self) -> None: """ Test basic first-level bullet bolding. @@ -803,14 +804,6 @@ def test1(self) -> None: """ self._test_bold_first_level_bullets(text, expected) - def _test_bold_first_level_bullets(self, text: str, expected: str) -> None: - """ - Helper to test bold_first_level_bullets function. - """ - text = hprint.dedent(text) - actual = hmarkdo.bold_first_level_bullets(text) - self.assert_equal(actual, expected, dedent=True) - def test2(self) -> None: """ Test with mixed content including non-bullet text. @@ -891,6 +884,14 @@ def test5(self) -> None: """ self._test_bold_first_level_bullets(text, expected) + def _test_bold_first_level_bullets(self, text: str, expected: str) -> None: + """ + Helper to test bold_first_level_bullets function. + """ + text = hprint.dedent(text) + actual = hmarkdo.bold_first_level_bullets(text) + self.assert_equal(actual, expected, dedent=True) + # ############################################################################# # Test_md_clean_up1 @@ -1638,9 +1639,6 @@ def test8(self) -> None: self.assert_equal(actual, expected) - - - # ############################################################################# # Test_format_compressed_markdown1 # ############################################################################# From 5a677fad44310c5e357d1244d07544bee670d34b Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 9 May 2025 18:06:00 -0400 Subject: [PATCH 085/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../documentation/dockerized_prettier.py | 6 +- dev_scripts_helpers/llms/llm_prompts.py | 39 ++++++++- dev_scripts_helpers/llms/llm_transform.py | 6 +- helpers/hdocker.py | 11 +-- helpers/henv.py | 20 +++++ helpers/hmarkdown.py | 5 ++ helpers/hopenai.py | 35 +++++++- helpers/lib_tasks_docker.py | 85 ++++++++++--------- 8 files changed, 146 insertions(+), 61 deletions(-) diff --git a/dev_scripts_helpers/documentation/dockerized_prettier.py b/dev_scripts_helpers/documentation/dockerized_prettier.py index 650ccffd3..7c732fe1e 100755 --- a/dev_scripts_helpers/documentation/dockerized_prettier.py +++ b/dev_scripts_helpers/documentation/dockerized_prettier.py @@ -55,9 +55,7 @@ def _main(parser: argparse.ArgumentParser) -> None: # Parse everything that can be parsed and returns the rest. args, cmd_opts = parser.parse_known_args() hparser.init_logger_for_input_output_transform(args) - in_file_name, out_file_name = hparser.parse_input_output_args( - args, clear_screen=True - ) + in_file_name, out_file_name = hparser.parse_input_output_args(args) if not cmd_opts: cmd_opts = [] _LOG.debug("cmd_opts: %s", cmd_opts) @@ -68,7 +66,7 @@ def _main(parser: argparse.ArgumentParser) -> None: force_rebuild=args.dockerized_force_rebuild, use_sudo=args.dockerized_use_sudo, ) - _LOG.info("Output written to '%s'", args.output) + _LOG.info("Output written to '%s'", out_file_name) if __name__ == "__main__": diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 5d92f891e..4ebbc0701 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -610,7 +610,7 @@ def md_summarize_short() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def md_clean_up_how_to_guide() -> _PROMPT_OUT: +def md_clean_up_how_to_guide_doc() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" Format the text passed as a how-to guide. @@ -630,7 +630,42 @@ def md_clean_up_how_to_guide() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - post_container_transforms = ["format_markdown"] + post_container_transforms = ["prettier_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def md_clean_up_explanation_doc() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + Rewrite the provided markdown to transform it into an explanation document + that clearly explains a concept or idea. Follow this structure: + + - Abstract + Provide a clear and concise summary of the document in approximately 200 words. + - Introduction + Briefly introduce the topic and its relevance or context. + + - Core Concepts + List and explain the key ideas necessary to understand the topic. + + - How It Works + Describe the mechanics or process in a step-by-step or logical manner. + + - Design Rationale + Explain the reasoning behind the approach, design, or structure. + + - (Optional) Trade-offs and Alternatives + Discuss other possible approaches, including their pros and cons. + + You will: + - Maintain clarity and conciseness throughout. + - Use bullet points and indentation to enhance readability everywhere + - Preserve all information from the original content — do not omit or + summarize unless it improves clarity. + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = ["prettier_markdown"] return system, pre_transforms, post_transforms, post_container_transforms diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 369ed9b6a..6e4c37cec 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -60,8 +60,6 @@ def _parse() -> argparse.ArgumentParser: parser, in_default="-", in_required=False, - out_default="-", - out_required=False, ) hparser.add_prompt_arg(parser) hparser.add_dockerized_script_arg(parser) @@ -119,7 +117,7 @@ def _run_dockerized_llm_transform( # Install pip packages. RUN pip install --upgrade pip - RUN pip install --no-cache-dir PyYAML + RUN pip install --no-cache-dir PyYAML requests pandas RUN pip install --no-cache-dir openai """ @@ -271,6 +269,8 @@ def _main(parser: argparse.ArgumentParser) -> None: _convert_file_names(in_file_name, tmp_out_file_name) # out_txt = hio.from_file(tmp_out_file_name) + if dshlllpr.to_run("prettier_markdown", post_container_transforms): + out_txt = hmarkdo.prettier_markdown(out_txt) if dshlllpr.to_run("format_markdown", post_container_transforms): # Note that we need to run this outside the `llm_transform` container to # avoid to do docker-in-docker in the `llm_transform` container (which diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 98577af65..43c6b2516 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -15,6 +15,7 @@ from typing import Any, Dict, List, Optional, Tuple, cast import helpers.hdbg as hdbg +import helpers.henv as henv import helpers.hgit as hgit import helpers.hio as hio import helpers.hprint as hprint @@ -311,15 +312,7 @@ def get_docker_base_cmd(use_sudo: bool) -> List[str]: :return: The base command for running a Docker container. """ docker_executable = get_docker_executable(use_sudo) - # Get all the environment variables that start with `AM_`, `CK_`, `CSFY_`. - vars_to_pass = [ - v - for v in os.environ.keys() - if - # TODO(gp): We should only pass the `CSFY_` vars. - v.startswith("AM_") or v.startswith("CK_") or v.startswith("CSFY_") - ] - vars_to_pass.append("OPENAI_API_KEY") + vars_to_pass = henv.get_csfy_env_vars() + henv.get_api_key_env_vars() vars_to_pass = sorted(vars_to_pass) vars_to_pass_as_str = " ".join(f"-e {v}" for v in vars_to_pass) # Build the command as a list. diff --git a/helpers/henv.py b/helpers/henv.py index 02e83ea19..0be1c14a6 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -131,6 +131,15 @@ def get_env_var( value = True return value + +def get_csfy_env_vars() -> List[str]: + """ + Get all the environment variables that start with `AM_`, `CK_`, `CSFY_`. + """ + # TODO(gp): We should only pass the `CSFY_` vars. + env_var_names = [ v for v in os.environ.keys() if v.startswith("AM_") or v.startswith("CK_") or v.startswith("CSFY_") ] + return env_var_names + # TODO(gp): Extract all the env vars that start with AM_, CK_, CSFY_ and make # sure they have a description here. @@ -216,6 +225,17 @@ def get_secret_env_vars() -> List[str]: return secret_env_var_names +def get_api_key_env_vars() -> List[str]: + """ + Return the list of env vars that are API keys. + """ + # Find all the env vars that end with "_API_KEY". + env_var_names = [ + env_var for env_var in os.environ.keys() if env_var.endswith("_API_KEY") + ] + return env_var_names + + def check_env_vars() -> None: """ Make sure all the expected env vars are defined. diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 302ba4138..668855f67 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -881,6 +881,11 @@ def remove_empty_lines_from_markdown(markdown_text: str) -> str: return "\n".join(result) +def prettier_markdown(txt: str) -> str: + txt = dshdlino.prettier_on_str(txt) + return txt + + def format_markdown(txt: str) -> str: txt = dshdlino.prettier_on_str(txt) txt = remove_empty_lines_from_markdown(txt) diff --git a/helpers/hopenai.py b/helpers/hopenai.py index 3894d24fb..134215c9f 100644 --- a/helpers/hopenai.py +++ b/helpers/hopenai.py @@ -7,7 +7,9 @@ import datetime import functools import logging +import os import re +import requests from typing import Any, Dict, List, Optional import openai @@ -139,6 +141,17 @@ def _calculate_cost( return cost +def get_models_stats(): + url = "https://openrouter.ai/api/v1/models" + response = requests.get(url) + import pprint + pprint.pprint(response.json()) + # + import pandas as pd + df = pd.read_json(response.json()) + print(df) + + @functools.lru_cache(maxsize=1024) def get_completion( user_prompt: str, @@ -160,9 +173,23 @@ def get_completion( call :return: completion text """ - model = _MODEL if model is None else model - client = OpenAI() + get_models_stats() + assert 0 + #model = _MODEL if model is None else model + #model = "anthropic/claude-3-5-sonnet" + #model = "openai/gpt-4o" + #model="meta-llama/llama-3-70b-instruct" + model="deepseek/deepseek-r1-distill-qwen-1.5b" print("OpenAI API call ... ") + #client = OpenAI() + # print(openai.api_base) + # assert 0 + # openai.api_base ="https://openrouter.ai/api/v1" + # openai.api_key = os.environ.get("OPENROUTER_API_KEY") + client = OpenAI( + base_url="https://openrouter.ai/api/v1", # Important: Use OpenRouter's base URL + api_key=os.environ.get("OPENROUTER_API_KEY") + ) memento = htimer.dtimer_start(logging.DEBUG, "OpenAI API call") if not report_progress: completion = client.chat.completions.create( @@ -201,9 +228,9 @@ def get_completion( msg, _ = htimer.dtimer_stop(memento) print(msg) # Calculate and accumulate the cost - cost = _calculate_cost(completion, model, print_cost) + #cost = _calculate_cost(completion, model, print_cost) # Accumulate the cost. - _accumulate_cost_if_needed(cost) + #_accumulate_cost_if_needed(cost) return response diff --git a/helpers/lib_tasks_docker.py b/helpers/lib_tasks_docker.py index 0b9d2b4d7..3742697e7 100644 --- a/helpers/lib_tasks_docker.py +++ b/helpers/lib_tasks_docker.py @@ -18,6 +18,7 @@ # We want to minimize the dependencies from non-standard Python packages since # this code needs to run with minimal dependencies and without Docker. import helpers.hdbg as hdbg +import helpers.henv as henv import helpers.hdict as hdict import helpers.hdocker as hdocker import helpers.hgit as hgit @@ -556,47 +557,53 @@ def _generate_docker_compose_file( # We could do the same also with IMAGE for symmetry. # Keep the env vars in sync with what we print in `henv.get_env_vars()`. # Configure `base_app` service. + # TODO(gp): Use henv.get_env_vars() to get the env vars. + environment = [ + f"CSFY_ENABLE_DIND={CSFY_ENABLE_DIND}", + f"CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL", + f"CSFY_HOST_NAME={csfy_host_name}", + f"CSFY_HOST_OS_NAME={csfy_host_os_name}", + f"CSFY_HOST_OS_VERSION={csfy_host_os_version}", + f"CSFY_HOST_USER_NAME={csfy_host_user_name}", + "CSFY_REPO_CONFIG_CHECK=True", + # Use inferred path for `repo_config.py`. + "CSFY_REPO_CONFIG_PATH=", + "CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID", + "CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION", + "CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE", + "CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET", + "CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY", + "CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN", + "CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH", + # The path of the outermost Git root on the host. + f"CSFY_HOST_GIT_ROOT_PATH={git_host_root_path}", + # The path of the outermost Git root in the Docker container. + f"CSFY_GIT_ROOT_PATH={git_root_path}", + # The path of the helpers dir in the Docker container (e.g., + # `/app`, `/app/helpers_root`) + f"CSFY_HELPERS_ROOT_PATH={helper_root_path}", + f"CSFY_USE_HELPERS_AS_NESTED_MODULE={use_helpers_as_nested_module}", + "CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN", + # This env var is used by GH Action to signal that we are inside the + # CI. It's set up by default by the GH Action runner. See: + # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables + "CSFY_CI=$CSFY_CI", + # TODO(Vlad): consider removing, locally we use our personal tokens + # from files and inside GitHub actions we use the `GH_TOKEN` + # environment variable. + ] + environment.extend([ + "GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN", + # Inside GitHub Actions we use `GH_TOKEN` environment variable, + # see https://cli.github.com/manual/gh_auth_login. + "GH_TOKEN=$GH_ACTION_ACCESS_TOKEN", + ]) + api_key_env_vars = henv.get_api_key_env_vars() + environment.extend([f"{env_var}=${env_var}" for env_var in api_key_env_vars]) + # base_app_spec = { "cap_add": ["SYS_ADMIN"], - "environment": [ - f"CSFY_ENABLE_DIND={CSFY_ENABLE_DIND}", - f"CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL", - f"CSFY_HOST_NAME={csfy_host_name}", - f"CSFY_HOST_OS_NAME={csfy_host_os_name}", - f"CSFY_HOST_OS_VERSION={csfy_host_os_version}", - f"CSFY_HOST_USER_NAME={csfy_host_user_name}", - "CSFY_REPO_CONFIG_CHECK=True", - # Use inferred path for `repo_config.py`. - "CSFY_REPO_CONFIG_PATH=", - "CSFY_AWS_ACCESS_KEY_ID=$CSFY_AWS_ACCESS_KEY_ID", - "CSFY_AWS_DEFAULT_REGION=$CSFY_AWS_DEFAULT_REGION", - "CSFY_AWS_PROFILE=$CSFY_AWS_PROFILE", - "CSFY_AWS_S3_BUCKET=$CSFY_AWS_S3_BUCKET", - "CSFY_AWS_SECRET_ACCESS_KEY=$CSFY_AWS_SECRET_ACCESS_KEY", - "CSFY_AWS_SESSION_TOKEN=$CSFY_AWS_SESSION_TOKEN", - "CSFY_ECR_BASE_PATH=$CSFY_ECR_BASE_PATH", - # The path of the outermost Git root on the host. - f"CSFY_HOST_GIT_ROOT_PATH={git_host_root_path}", - # The path of the outermost Git root in the Docker container. - f"CSFY_GIT_ROOT_PATH={git_root_path}", - # The path of the helpers dir in the Docker container (e.g., - # `/app`, `/app/helpers_root`) - f"CSFY_HELPERS_ROOT_PATH={helper_root_path}", - f"CSFY_USE_HELPERS_AS_NESTED_MODULE={use_helpers_as_nested_module}", - "CSFY_TELEGRAM_TOKEN=$CSFY_TELEGRAM_TOKEN", - # This env var is used by GH Action to signal that we are inside the - # CI. It's set up by default by the GH Action runner. See: - # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables - "CSFY_CI=$CSFY_CI", - "OPENAI_API_KEY=$OPENAI_API_KEY", - # TODO(Vlad): consider removing, locally we use our personal tokens - # from files and inside GitHub actions we use the `GH_TOKEN` - # environment variable. - "GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN", - # Inside GitHub Actions we use `GH_TOKEN` environment variable, - # see https://cli.github.com/manual/gh_auth_login. - "GH_TOKEN=$GH_ACTION_ACCESS_TOKEN", - ], + "environment": environment, "image": "${IMAGE}", "restart": "no", "volumes": [ From 99cbbf2245a4774ee0a907305009e62e136ce00d Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 9 May 2025 18:08:07 -0400 Subject: [PATCH 086/193] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../documentation/dockerized_prettier.py | 1 - dev_scripts_helpers/llms/llm_prompts.py | 35 ++++++------------- helpers/henv.py | 8 +++-- helpers/hopenai.py | 24 +++++++------ helpers/hsql_test.py | 1 + helpers/lib_tasks_docker.py | 16 +++++---- 6 files changed, 40 insertions(+), 45 deletions(-) diff --git a/dev_scripts_helpers/documentation/dockerized_prettier.py b/dev_scripts_helpers/documentation/dockerized_prettier.py index 7c732fe1e..613945179 100755 --- a/dev_scripts_helpers/documentation/dockerized_prettier.py +++ b/dev_scripts_helpers/documentation/dockerized_prettier.py @@ -31,7 +31,6 @@ import argparse import logging -import helpers.hdbg as hdbg import helpers.hdocker as hdocker import helpers.hparser as hparser diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 4ebbc0701..f56aaabbe 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -287,13 +287,7 @@ def code_fix_log_string() -> _PROMPT_OUT: ``` For instance, convert: - ``` - hdbg.dassert_in(env_var, os.environ, f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}''") - ``` to - ``` - hdbg.dassert_in(env_var, os.environ, "env_var='%s' is not in env_vars='%s'", env_var, str(os.environ.keys())) - ``` """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -310,18 +304,12 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: system = _CODING_CONTEXT system += r""" Fix statements like: - ``` - raise ValueError(f"Unsupported data_source='{data_source}'") - ``` by using f-strings (formatted string literals) instead of % formatting and format strings. Do not print any comment, but just the converted code. For instance, convert: - ``` - "Hello, %s. You are %d years old." % (name, age) - ``` to """ pre_transforms: Set[str] = set() @@ -332,7 +320,9 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ - Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. + Use % formatting, like `"Hello, %s. + + You are %d years old." % (name, age)`. """ system = _CODING_CONTEXT system += r""" @@ -504,11 +494,8 @@ def code_transform_apply_csfy_style() -> _PROMPT_OUT: file_content = hio.from_file(file_name) system += rf""" Apply the style described below to the Python code - - ``` - {file_content} - ``` - + + Do not remove any code, just format the existing code using the style. Do not change the behavior of the code. Do not report any explanation of what you did, but just the converted code. @@ -640,21 +627,21 @@ def md_clean_up_explanation_doc() -> _PROMPT_OUT: Rewrite the provided markdown to transform it into an explanation document that clearly explains a concept or idea. Follow this structure: - - Abstract + - Abstract Provide a clear and concise summary of the document in approximately 200 words. - - Introduction + - Introduction Briefly introduce the topic and its relevance or context. - - Core Concepts + - Core Concepts List and explain the key ideas necessary to understand the topic. - - How It Works + - How It Works Describe the mechanics or process in a step-by-step or logical manner. - - Design Rationale + - Design Rationale Explain the reasoning behind the approach, design, or structure. - - (Optional) Trade-offs and Alternatives + - (Optional) Trade-offs and Alternatives Discuss other possible approaches, including their pros and cons. You will: diff --git a/helpers/henv.py b/helpers/henv.py index 0be1c14a6..b816ac06e 100644 --- a/helpers/henv.py +++ b/helpers/henv.py @@ -131,13 +131,17 @@ def get_env_var( value = True return value - + def get_csfy_env_vars() -> List[str]: """ Get all the environment variables that start with `AM_`, `CK_`, `CSFY_`. """ # TODO(gp): We should only pass the `CSFY_` vars. - env_var_names = [ v for v in os.environ.keys() if v.startswith("AM_") or v.startswith("CK_") or v.startswith("CSFY_") ] + env_var_names = [ + v + for v in os.environ.keys() + if v.startswith("AM_") or v.startswith("CK_") or v.startswith("CSFY_") + ] return env_var_names diff --git a/helpers/hopenai.py b/helpers/hopenai.py index 134215c9f..0b1668062 100644 --- a/helpers/hopenai.py +++ b/helpers/hopenai.py @@ -9,10 +9,10 @@ import logging import os import re -import requests from typing import Any, Dict, List, Optional import openai +import requests import tqdm from openai import OpenAI from openai.types.beta.assistant import Assistant @@ -145,9 +145,11 @@ def get_models_stats(): url = "https://openrouter.ai/api/v1/models" response = requests.get(url) import pprint + pprint.pprint(response.json()) # import pandas as pd + df = pd.read_json(response.json()) print(df) @@ -175,21 +177,21 @@ def get_completion( """ get_models_stats() assert 0 - #model = _MODEL if model is None else model - #model = "anthropic/claude-3-5-sonnet" - #model = "openai/gpt-4o" - #model="meta-llama/llama-3-70b-instruct" - model="deepseek/deepseek-r1-distill-qwen-1.5b" + # model = _MODEL if model is None else model + # model = "anthropic/claude-3-5-sonnet" + # model = "openai/gpt-4o" + # model="meta-llama/llama-3-70b-instruct" + model = "deepseek/deepseek-r1-distill-qwen-1.5b" print("OpenAI API call ... ") - #client = OpenAI() + # client = OpenAI() # print(openai.api_base) # assert 0 # openai.api_base ="https://openrouter.ai/api/v1" # openai.api_key = os.environ.get("OPENROUTER_API_KEY") client = OpenAI( base_url="https://openrouter.ai/api/v1", # Important: Use OpenRouter's base URL - api_key=os.environ.get("OPENROUTER_API_KEY") - ) + api_key=os.environ.get("OPENROUTER_API_KEY"), + ) memento = htimer.dtimer_start(logging.DEBUG, "OpenAI API call") if not report_progress: completion = client.chat.completions.create( @@ -228,9 +230,9 @@ def get_completion( msg, _ = htimer.dtimer_stop(memento) print(msg) # Calculate and accumulate the cost - #cost = _calculate_cost(completion, model, print_cost) + # cost = _calculate_cost(completion, model, print_cost) # Accumulate the cost. - #_accumulate_cost_if_needed(cost) + # _accumulate_cost_if_needed(cost) return response diff --git a/helpers/hsql_test.py b/helpers/hsql_test.py index 0d28262e3..462543efc 100644 --- a/helpers/hsql_test.py +++ b/helpers/hsql_test.py @@ -212,6 +212,7 @@ def _create_docker_files(cls) -> None: host_port = 5432 + idx txt = f"""version: '3.5' + services: # Docker container running Postgres DB. {service_name}: diff --git a/helpers/lib_tasks_docker.py b/helpers/lib_tasks_docker.py index 3742697e7..5fdbbdbee 100644 --- a/helpers/lib_tasks_docker.py +++ b/helpers/lib_tasks_docker.py @@ -18,9 +18,9 @@ # We want to minimize the dependencies from non-standard Python packages since # this code needs to run with minimal dependencies and without Docker. import helpers.hdbg as hdbg -import helpers.henv as henv import helpers.hdict as hdict import helpers.hdocker as hdocker +import helpers.henv as henv import helpers.hgit as hgit import helpers.hio as hio import helpers.hprint as hprint @@ -592,12 +592,14 @@ def _generate_docker_compose_file( # from files and inside GitHub actions we use the `GH_TOKEN` # environment variable. ] - environment.extend([ - "GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN", - # Inside GitHub Actions we use `GH_TOKEN` environment variable, - # see https://cli.github.com/manual/gh_auth_login. - "GH_TOKEN=$GH_ACTION_ACCESS_TOKEN", - ]) + environment.extend( + [ + "GH_ACTION_ACCESS_TOKEN=$GH_ACTION_ACCESS_TOKEN", + # Inside GitHub Actions we use `GH_TOKEN` environment variable, + # see https://cli.github.com/manual/gh_auth_login. + "GH_TOKEN=$GH_ACTION_ACCESS_TOKEN", + ] + ) api_key_env_vars = henv.get_api_key_env_vars() environment.extend([f"{env_var}=${env_var}" for env_var in api_key_env_vars]) # From 65b312b3a28322b5e5b1fd994bb2e8bb913f4222 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 9 May 2025 20:14:58 -0400 Subject: [PATCH 087/193] Improve --- helpers/hopenai.py | 107 +- helpers/notebooks/hopenai_tutorial.ipynb | 13045 ++++++++++++++++ helpers/notebooks/hopenai_tutorial.py | 120 + ..._template.ipynb => notebook_template.ipynb | 58 +- ...tebook_template.py => notebook_template.py | 6 +- 5 files changed, 13291 insertions(+), 45 deletions(-) create mode 100644 helpers/notebooks/hopenai_tutorial.ipynb create mode 100644 helpers/notebooks/hopenai_tutorial.py rename helpers/notebooks/notebook_template.ipynb => notebook_template.ipynb (61%) rename helpers/notebooks/notebook_template.py => notebook_template.py (86%) diff --git a/helpers/hopenai.py b/helpers/hopenai.py index 0b1668062..2a9be2562 100644 --- a/helpers/hopenai.py +++ b/helpers/hopenai.py @@ -141,17 +141,116 @@ def _calculate_cost( return cost -def get_models_stats(): +import pandas as pd + + +def convert_to_type(col, type_): + if type_ == "is_bool": + return col.map(lambda x: isinstance(x, bool) or + x in ["True", "False", "true", "false"] or + x in [1, 0, "1", "0"]) + elif type_ == "is_int": + return pd.to_numeric(col, errors='coerce') + elif type_ == "is_numeric": + return pd.to_numeric(col, errors='coerce') + elif type_ == "is_string": + return col.map(lambda x: isinstance(x, str)) + else: + raise ValueError(f"Unknown column type: {type_}") + + +def infer_column_types(col): + vals = { + 'is_numeric': pd.to_numeric(col, errors='coerce').notna(), + #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), + 'is_bool': col.map(lambda x: isinstance(x, bool)), + 'is_string': col.map(lambda x: isinstance(x, str)), + } + vals = {k: float(v.mean()) for k, v in vals.items()} + # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", + # (vals["is_numeric"] >= vals["is_string"], "is_numeric", + # "is_string")) + if vals["is_bool"] >= vals["is_numeric"]: + type_ = "is_bool" + elif vals["is_numeric"] >= vals["is_string"]: + type_ = "is_numeric" + else: + type_ = "is_string" + vals["type"] = type_ + return vals + + +def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: + return df.apply(lambda x: pd.Series(infer_column_types(x))).T + + +def convert_df(df: pd.DataFrame, *, print_invalid_values: bool = False) -> pd.DataFrame: + types = df.apply(lambda x: pd.Series(infer_column_types(x))).T + df_out = [] + for col in df.columns: + if types[col]["type"] == "is_bool": + df_out[col] = df[col].astype(bool) + elif types[col]["type"] == "is_numeric": + df_out[col] = df[col].astype(float) + elif types[col]["type"] == "is_string": + df_out[col] = df[col] + else: + raise ValueError(f"Unknown column type: {types[col]['type']}") + return df_out + + + +def get_model_stats() -> Dict[str, Any]: url = "https://openrouter.ai/api/v1/models" response = requests.get(url) + # {'architecture': {'input_modalities': ['text', 'image'], + # 'instruct_type': None, + # 'modality': 'text+image->text', + # 'output_modalities': ['text'], + # 'tokenizer': 'Mistral'}, + # 'context_length': 131072, + # 'created': 1746627341, + # 'description': 'Mistral Medium 3 is a high-performance enterprise-grade ' + # 'language model designed to deliver frontier-level ' + # ... + # 'broad compatibility across cloud environments.', + # 'id': 'mistralai/mistral-medium-3', + # 'name': 'Mistral: Mistral Medium 3', + # 'per_request_limits': None, + # 'pricing': {'completion': '0.000002', + # 'image': '0', + # 'internal_reasoning': '0', + # 'prompt': '0.0000004', + # 'request': '0', + # 'web_search': '0'}, + # 'supported_parameters': ['tools', + # 'tool_choice', + # 'max_tokens', + # 'temperature', + # 'top_p', + # 'stop', + # 'frequency_penalty', + # 'presence_penalty', + # 'response_format', + # 'structured_outputs', + # 'seed'], + # 'top_provider': {'context_length': 131072, + # 'is_moderated': False, + # 'max_completion_tokens': None}} + response_json = response.json() + hdbg.dassert_eq(list(response_json.keys()), ["data"]) + response_json = response_json["data"] + return response_json + + import pprint pprint.pprint(response.json()) # - import pandas as pd + #import pandas as pd - df = pd.read_json(response.json()) - print(df) + #df = pd.read_json(response.json()) + #print(df) @functools.lru_cache(maxsize=1024) diff --git a/helpers/notebooks/hopenai_tutorial.ipynb b/helpers/notebooks/hopenai_tutorial.ipynb new file mode 100644 index 000000000..4306bd1fc --- /dev/null +++ b/helpers/notebooks/hopenai_tutorial.ipynb @@ -0,0 +1,13045 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "CONTENTS:\n", + "- [Description](#description)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<a name='description'></a>\n", + "# Description\n", + "\n", + "This notebook examines ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet jupyterlab-vim)\"\n", + "#!jupyter labextension enable" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2021-04-02T18:11:14.828251Z", + "start_time": "2021-04-02T18:11:14.514771Z" + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import logging\n", + "\n", + "import helpers.hdbg as hdbg\n", + "import helpers.henv as henv\n", + "import helpers.hprint as hprint" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2021-04-02T18:11:24.635995Z", + "start_time": "2021-04-02T18:11:18.239237Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# System signature\n", + " # Container version\n", + " container_version='1.2.0'\n", + " changelog_version='2.0.0'\n", + " # Git info\n", + " branch_name='CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI'\n", + " hash='0ca93d8c'\n", + " # Last commits:\n", + " * 0ca93d8c GP Saggese Merge ( 5 minutes ago) Fri May 9 22:09:03 2025 (HEAD -> CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI, origin/CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI)\n", + " |\\ \n", + " * | 99cbbf22 GP Saggese Lint ( 6 minutes ago) Fri May 9 22:08:07 2025 \n", + " | * 27b38c48 GP Saggese CmampTask12067_Read_docs_about_DataPull_4 (#698) ( 8 minutes ago) Fri May 9 22:06:25 2025 (origin/master, origin/HEAD, master)\n", + " # Platform info\n", + " system=Linux\n", + " node name=0f79e8b845ee\n", + " release=6.10.14-linuxkit\n", + " version=#1 SMP Thu Mar 20 16:32:56 UTC 2025\n", + " machine=aarch64\n", + " processor=aarch64\n", + " # psutils info\n", + " cpu count=8\n", + " cpu freq=None\n", + " memory=svmem(total=16749285376, available=14575529984, percent=13.0, used=1910644736, free=9673363456, active=2843516928, inactive=3252117504, buffers=490647552, cached=4674629632, shared=1093632, slab=694362112)\n", + " disk usage=sdiskusage(total=270233210880, used=102272610304, free=154199986176, percent=39.9)\n", + " # Docker info\n", + " has_docker=True\n", + " docker_version='28.0.4'\n", + " docker_needs_sudo=False\n", + " has_privileged_mode=True\n", + " is_inside_docker=True\n", + " has_docker_sibling_containers_support=True\n", + " has_docker_children_containers_support=True\n", + " # Packages\n", + " python: 3.12.3\n", + " cvxopt: ?\n", + " cvxpy: ?\n", + " gluonnlp: ?\n", + " gluonts: ?\n", + " joblib: 1.4.2\n", + " mxnet: ?\n", + " numpy: 2.2.3\n", + " pandas: 2.2.3\n", + " pyarrow: 19.0.1\n", + " scipy: 1.15.2\n", + " seaborn: 0.13.2\n", + " sklearn: 1.6.1\n", + " statsmodels: 0.14.4\n" + ] + } + ], + "source": [ + "print(henv.get_system_signature()[0])\n", + "\n", + "hprint.config_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2021-04-02T18:11:24.668793Z", + "start_time": "2021-04-02T18:11:24.638503Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Running in Jupyter\n", + "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-0f2f4a10-7f18-4858-af02-b60808101345.json'\n" + ] + } + ], + "source": [ + "# hdbg.init_logger(verbosity=logging.DEBUG)\n", + "hdbg.init_logger(verbosity=logging.INFO)\n", + "# hdbg.test_logger()\n", + "_LOG = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet openai requests)\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import helpers.hopenai as hopenai" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "val = hopenai.get_model_stats()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'architecture': {'input_modalities': ['text', 'image'],\n", + " 'instruct_type': None,\n", + " 'modality': 'text+image->text',\n", + " 'output_modalities': ['text'],\n", + " 'tokenizer': 'Mistral'},\n", + " 'context_length': 131072,\n", + " 'created': 1746627341,\n", + " 'description': 'Mistral Medium 3 is a high-performance enterprise-grade '\n", + " 'language model designed to deliver frontier-level '\n", + " 'capabilities at significantly reduced operational cost. It '\n", + " 'balances state-of-the-art reasoning and multimodal '\n", + " 'performance with 8× lower cost compared to traditional large '\n", + " 'models, making it suitable for scalable deployments across '\n", + " 'professional and industrial use cases.\\n'\n", + " '\\n'\n", + " 'The model excels in domains such as coding, STEM reasoning, '\n", + " 'and enterprise adaptation. It supports hybrid, on-prem, and '\n", + " 'in-VPC deployments and is optimized for integration into '\n", + " 'custom workflows. Mistral Medium 3 offers competitive '\n", + " 'accuracy relative to larger models like Claude Sonnet '\n", + " '3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining '\n", + " 'broad compatibility across cloud environments.',\n", + " 'id': 'mistralai/mistral-medium-3',\n", + " 'name': 'Mistral: Mistral Medium 3',\n", + " 'per_request_limits': None,\n", + " 'pricing': {'completion': '0.000002',\n", + " 'image': '0',\n", + " 'internal_reasoning': '0',\n", + " 'prompt': '0.0000004',\n", + " 'request': '0',\n", + " 'web_search': '0'},\n", + " 'supported_parameters': ['tools',\n", + " 'tool_choice',\n", + " 'max_tokens',\n", + " 'temperature',\n", + " 'top_p',\n", + " 'stop',\n", + " 'frequency_penalty',\n", + " 'presence_penalty',\n", + " 'response_format',\n", + " 'structured_outputs',\n", + " 'seed'],\n", + " 'top_provider': {'context_length': 131072,\n", + " 'is_moderated': False,\n", + " 'max_completion_tokens': None}}\n" + ] + } + ], + "source": [ + "import pprint\n", + "pprint.pprint(val[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>id</th>\n", + " <th>name</th>\n", + " <th>created</th>\n", + " <th>description</th>\n", + " <th>context_length</th>\n", + " <th>per_request_limits</th>\n", + " <th>supported_parameters</th>\n", + " <th>architecture_modality</th>\n", + " <th>architecture_input_modalities</th>\n", + " <th>architecture_output_modalities</th>\n", + " <th>architecture_tokenizer</th>\n", + " <th>architecture_instruct_type</th>\n", + " <th>pricing_prompt</th>\n", + " <th>pricing_completion</th>\n", + " <th>pricing_request</th>\n", + " <th>pricing_image</th>\n", + " <th>pricing_web_search</th>\n", + " <th>pricing_internal_reasoning</th>\n", + " <th>top_provider_context_length</th>\n", + " <th>top_provider_max_completion_tokens</th>\n", + " <th>top_provider_is_moderated</th>\n", + " <th>pricing_input_cache_read</th>\n", + " <th>pricing_input_cache_write</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>mistralai/mistral-medium-3</td>\n", + " <td>Mistral: Mistral Medium 3</td>\n", + " <td>1746627341</td>\n", + " <td>Mistral Medium 3 is a high-performance enterpr...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.0000004</td>\n", + " <td>0.000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>google/gemini-2.5-pro-preview</td>\n", + " <td>Google: Gemini 2.5 Pro Preview</td>\n", + " <td>1746578513</td>\n", + " <td>Gemini 2.5 Pro is Google’s state-of-the-art AI...</td>\n", + " <td>1048576</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, tools, tool_c...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0.00000125</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0.00516</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1048576.0</td>\n", + " <td>65535.0</td>\n", + " <td>False</td>\n", + " <td>0.00000031</td>\n", + " <td>0.000001625</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>arcee-ai/caller-large</td>\n", + " <td>Arcee AI: Caller Large</td>\n", + " <td>1746487869</td>\n", + " <td>Caller Large is Arcee's specialist \"function‑c...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000055</td>\n", + " <td>0.00000085</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>arcee-ai/spotlight</td>\n", + " <td>Arcee AI: Spotlight</td>\n", + " <td>1746481552</td>\n", + " <td>Spotlight is a 7‑billion‑parameter vision‑lang...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000018</td>\n", + " <td>0.00000018</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>65537.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>arcee-ai/maestro-reasoning</td>\n", + " <td>Arcee AI: Maestro Reasoning</td>\n", + " <td>1746481269</td>\n", + " <td>Maestro Reasoning is Arcee's flagship analysis...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000033</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>32000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>arcee-ai/virtuoso-large</td>\n", + " <td>Arcee AI: Virtuoso Large</td>\n", + " <td>1746478885</td>\n", + " <td>Virtuoso‑Large is Arcee's top‑tier general‑pur...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000075</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>64000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>arcee-ai/coder-large</td>\n", + " <td>Arcee AI: Coder Large</td>\n", + " <td>1746478663</td>\n", + " <td>Coder‑Large is a 32 B‑parameter offspring of Q...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>arcee-ai/virtuoso-medium-v2</td>\n", + " <td>Arcee AI: Virtuoso Medium V2</td>\n", + " <td>1746478434</td>\n", + " <td>Virtuoso‑Medium‑v2 is a 32 B model distilled f...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>32768.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>arcee-ai/arcee-blitz</td>\n", + " <td>Arcee AI: Arcee Blitz</td>\n", + " <td>1746470100</td>\n", + " <td>Arcee Blitz is a 24 B‑parameter dense model di...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000045</td>\n", + " <td>0.00000075</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>microsoft/phi-4-reasoning-plus:free</td>\n", + " <td>Microsoft: Phi 4 Reasoning Plus (free)</td>\n", + " <td>1746130961</td>\n", + " <td>Phi-4-reasoning-plus is an enhanced 14B parame...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>microsoft/phi-4-reasoning-plus</td>\n", + " <td>Microsoft: Phi 4 Reasoning Plus</td>\n", + " <td>1746130961</td>\n", + " <td>Phi-4-reasoning-plus is an enhanced 14B parame...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000007</td>\n", + " <td>0.00000035</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>microsoft/phi-4-reasoning:free</td>\n", + " <td>Microsoft: Phi 4 Reasoning (free)</td>\n", + " <td>1746121275</td>\n", + " <td>Phi-4-reasoning is a 14B parameter dense decod...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>qwen/qwen3-0.6b-04-28:free</td>\n", + " <td>Qwen: Qwen3 0.6B (free)</td>\n", + " <td>1746043526</td>\n", + " <td>Qwen3-0.6B is a lightweight, 0.6 billion param...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>inception/mercury-coder-small-beta</td>\n", + " <td>Inception: Mercury Coder Small Beta</td>\n", + " <td>1746033880</td>\n", + " <td>Mercury Coder Small is the first diffusion lar...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, frequency_penalty, presence_penal...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000025</td>\n", + " <td>0.000001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>qwen/qwen3-1.7b:free</td>\n", + " <td>Qwen: Qwen3 1.7B (free)</td>\n", + " <td>1746031388</td>\n", + " <td>Qwen3-1.7B is a compact, 1.7 billion parameter...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>qwen/qwen3-4b:free</td>\n", + " <td>Qwen: Qwen3 4B (free)</td>\n", + " <td>1746031104</td>\n", + " <td>Qwen3-4B is a 4 billion parameter dense langua...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>opengvlab/internvl3-14b:free</td>\n", + " <td>OpenGVLab: InternVL3 14B (free)</td>\n", + " <td>1746021355</td>\n", + " <td>The 14b version of the InternVL3 series. An ad...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p]</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>opengvlab/internvl3-2b:free</td>\n", + " <td>OpenGVLab: InternVL3 2B (free)</td>\n", + " <td>1746019807</td>\n", + " <td>The 2b version of the InternVL3 series, for an...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p]</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>deepseek/deepseek-prover-v2:free</td>\n", + " <td>DeepSeek: DeepSeek Prover V2 (free)</td>\n", + " <td>1746013094</td>\n", + " <td>DeepSeek Prover V2 is a 671B parameter model, ...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>deepseek/deepseek-prover-v2</td>\n", + " <td>DeepSeek: DeepSeek Prover V2</td>\n", + " <td>1746013094</td>\n", + " <td>DeepSeek Prover V2 is a 671B parameter model, ...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>None</td>\n", + " <td>0.0000005</td>\n", + " <td>0.00000218</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>meta-llama/llama-guard-4-12b</td>\n", + " <td>Meta: Llama Guard 4 12B</td>\n", + " <td>1745975193</td>\n", + " <td>Llama Guard 4 is a Llama 4 Scout-derived multi...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000005</td>\n", + " <td>0.00000005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td>qwen/qwen3-30b-a3b:free</td>\n", + " <td>Qwen: Qwen3 30B A3B (free)</td>\n", + " <td>1745878604</td>\n", + " <td>Qwen3, the latest generation in the Qwen large...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>qwen/qwen3-30b-a3b</td>\n", + " <td>Qwen: Qwen3 30B A3B</td>\n", + " <td>1745878604</td>\n", + " <td>Qwen3, the latest generation in the Qwen large...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>40960.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td>qwen/qwen3-8b:free</td>\n", + " <td>Qwen: Qwen3 8B (free)</td>\n", + " <td>1745876632</td>\n", + " <td>Qwen3-8B is a dense 8.2B parameter causal lang...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>40960.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>qwen/qwen3-8b</td>\n", + " <td>Qwen: Qwen3 8B</td>\n", + " <td>1745876632</td>\n", + " <td>Qwen3-8B is a dense 8.2B parameter causal lang...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0.000000035</td>\n", + " <td>0.000000138</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>qwen/qwen3-14b:free</td>\n", + " <td>Qwen: Qwen3 14B (free)</td>\n", + " <td>1745876478</td>\n", + " <td>Qwen3-14B is a dense 14.8B parameter causal la...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>qwen/qwen3-14b</td>\n", + " <td>Qwen: Qwen3 14B</td>\n", + " <td>1745876478</td>\n", + " <td>Qwen3-14B is a dense 14.8B parameter causal la...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0.00000007</td>\n", + " <td>0.00000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>40960.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>qwen/qwen3-32b:free</td>\n", + " <td>Qwen: Qwen3 32B (free)</td>\n", + " <td>1745875945</td>\n", + " <td>Qwen3-32B is a dense 32.8B parameter causal la...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>qwen/qwen3-32b</td>\n", + " <td>Qwen: Qwen3 32B</td>\n", + " <td>1745875945</td>\n", + " <td>Qwen3-32B is a dense 32.8B parameter causal la...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td>qwen/qwen3-235b-a22b:free</td>\n", + " <td>Qwen: Qwen3 235B A22B (free)</td>\n", + " <td>1745875757</td>\n", + " <td>Qwen3-235B-A22B is a 235B parameter mixture-of...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>qwen/qwen3-235b-a22b</td>\n", + " <td>Qwen: Qwen3 235B A22B</td>\n", + " <td>1745875757</td>\n", + " <td>Qwen3-235B-A22B is a 235B parameter mixture-of...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen3</td>\n", + " <td>None</td>\n", + " <td>0.00000014</td>\n", + " <td>0.000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>40960.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>tngtech/deepseek-r1t-chimera:free</td>\n", + " <td>TNG: DeepSeek R1T Chimera (free)</td>\n", + " <td>1745760875</td>\n", + " <td>DeepSeek-R1T-Chimera is created by merging Dee...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td>thudm/glm-z1-rumination-32b</td>\n", + " <td>THUDM: GLM Z1 Rumination 32B</td>\n", + " <td>1745601495</td>\n", + " <td>THUDM: GLM Z1 Rumination 32B is a 32B-paramete...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.00000024</td>\n", + " <td>0.00000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td>thudm/glm-z1-9b:free</td>\n", + " <td>THUDM: GLM Z1 9B (free)</td>\n", + " <td>1745601140</td>\n", + " <td>GLM-Z1-9B-0414 is a 9B-parameter language mode...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>thudm/glm-4-9b:free</td>\n", + " <td>THUDM: GLM 4 9B (free)</td>\n", + " <td>1745601023</td>\n", + " <td>GLM-4-9B-0414 is a 9 billion parameter languag...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>microsoft/mai-ds-r1:free</td>\n", + " <td>Microsoft: MAI DS R1 (free)</td>\n", + " <td>1745194100</td>\n", + " <td>MAI-DS-R1 is a post-trained variant of DeepSee...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>thudm/glm-z1-32b:free</td>\n", + " <td>THUDM: GLM Z1 32B (free)</td>\n", + " <td>1744924148</td>\n", + " <td>GLM-Z1-32B-0414 is an enhanced reasoning varia...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>thudm/glm-z1-32b</td>\n", + " <td>THUDM: GLM Z1 32B</td>\n", + " <td>1744924148</td>\n", + " <td>GLM-Z1-32B-0414 is an enhanced reasoning varia...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.00000024</td>\n", + " <td>0.00000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>thudm/glm-4-32b:free</td>\n", + " <td>THUDM: GLM 4 32B (free)</td>\n", + " <td>1744920915</td>\n", + " <td>GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>thudm/glm-4-32b</td>\n", + " <td>THUDM: GLM 4 32B</td>\n", + " <td>1744920915</td>\n", + " <td>GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000024</td>\n", + " <td>0.00000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>google/gemini-2.5-flash-preview</td>\n", + " <td>Google: Gemini 2.5 Flash Preview</td>\n", + " <td>1744914667</td>\n", + " <td>Gemini 2.5 Flash is Google's state-of-the-art ...</td>\n", + " <td>1048576</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, tools, tool_c...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text, file]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " <td>0</td>\n", + " <td>0.0006192</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1048576.0</td>\n", + " <td>65535.0</td>\n", + " <td>False</td>\n", + " <td>0.0000000375</td>\n", + " <td>0.0000002333</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td>google/gemini-2.5-flash-preview:thinking</td>\n", + " <td>Google: Gemini 2.5 Flash Preview (thinking)</td>\n", + " <td>1744914667</td>\n", + " <td>Gemini 2.5 Flash is Google's state-of-the-art ...</td>\n", + " <td>1048576</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, tools, tool_c...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text, file]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000035</td>\n", + " <td>0</td>\n", + " <td>0.0006192</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1048576.0</td>\n", + " <td>65535.0</td>\n", + " <td>False</td>\n", + " <td>0.0000000375</td>\n", + " <td>0.0000002333</td>\n", + " </tr>\n", + " <tr>\n", + " <th>42</th>\n", + " <td>openai/o4-mini-high</td>\n", + " <td>OpenAI: o4 Mini High</td>\n", + " <td>1744824212</td>\n", + " <td>OpenAI o4-mini-high is the same model as [o4-m...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, seed, max_tokens, respons...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text, file]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " <td>0</td>\n", + " <td>0.0008415</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>100000.0</td>\n", + " <td>True</td>\n", + " <td>0.000000275</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43</th>\n", + " <td>openai/o3</td>\n", + " <td>OpenAI: o3</td>\n", + " <td>1744823457</td>\n", + " <td>o3 is a well-rounded and powerful model across...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, seed, max_tokens, respons...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text, file]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00001</td>\n", + " <td>0.00004</td>\n", + " <td>0</td>\n", + " <td>0.00765</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>100000.0</td>\n", + " <td>True</td>\n", + " <td>0.0000025</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>44</th>\n", + " <td>openai/o4-mini</td>\n", + " <td>OpenAI: o4 Mini</td>\n", + " <td>1744820942</td>\n", + " <td>OpenAI o4-mini is a compact reasoning model in...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, seed, max_tokens, respons...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " <td>0</td>\n", + " <td>0.0008415</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>100000.0</td>\n", + " <td>True</td>\n", + " <td>0.000000275</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>shisa-ai/shisa-v2-llama3.3-70b:free</td>\n", + " <td>Shisa AI: Shisa V2 Llama 3.3 70B (free)</td>\n", + " <td>1744754858</td>\n", + " <td>Shisa V2 Llama 3.3 70B is a bilingual Japanese...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>46</th>\n", + " <td>qwen/qwen2.5-coder-7b-instruct</td>\n", + " <td>Qwen: Qwen2.5 Coder 7B Instruct</td>\n", + " <td>1744734887</td>\n", + " <td>Qwen2.5-Coder-7B-Instruct is a 7B parameter in...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.00000001</td>\n", + " <td>0.00000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>openai/gpt-4.1</td>\n", + " <td>OpenAI: GPT-4.1</td>\n", + " <td>1744651385</td>\n", + " <td>GPT-4.1 is a flagship large language model opt...</td>\n", + " <td>1047576</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1047576.0</td>\n", + " <td>32768.0</td>\n", + " <td>True</td>\n", + " <td>0.0000005</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>48</th>\n", + " <td>openai/gpt-4.1-mini</td>\n", + " <td>OpenAI: GPT-4.1 Mini</td>\n", + " <td>1744651381</td>\n", + " <td>GPT-4.1 Mini is a mid-sized model delivering p...</td>\n", + " <td>1047576</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000004</td>\n", + " <td>0.0000016</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1047576.0</td>\n", + " <td>32768.0</td>\n", + " <td>True</td>\n", + " <td>0.0000001</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>49</th>\n", + " <td>openai/gpt-4.1-nano</td>\n", + " <td>OpenAI: GPT-4.1 Nano</td>\n", + " <td>1744651369</td>\n", + " <td>For tasks that demand low latency, GPT‑4.1 nan...</td>\n", + " <td>1047576</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1047576.0</td>\n", + " <td>32768.0</td>\n", + " <td>True</td>\n", + " <td>0.000000025</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50</th>\n", + " <td>eleutherai/llemma_7b</td>\n", + " <td>EleutherAI: Llemma 7b</td>\n", + " <td>1744643225</td>\n", + " <td>Llemma 7B is a language model for mathematics....</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>code-llama</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>51</th>\n", + " <td>alfredpros/codellama-7b-instruct-solidity</td>\n", + " <td>AlfredPros: CodeLLaMa 7B Instruct Solidity</td>\n", + " <td>1744641874</td>\n", + " <td>A finetuned 7 billion parameters Code LLaMA - ...</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>alpaca</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>52</th>\n", + " <td>arliai/qwq-32b-arliai-rpr-v1:free</td>\n", + " <td>ArliAI: QwQ 32B RpR v1 (free)</td>\n", + " <td>1744555982</td>\n", + " <td>QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>53</th>\n", + " <td>agentica-org/deepcoder-14b-preview:free</td>\n", + " <td>Agentica: Deepcoder 14B Preview (free)</td>\n", + " <td>1744555395</td>\n", + " <td>DeepCoder-14B-Preview is a 14B parameter code ...</td>\n", + " <td>96000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>96000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>54</th>\n", + " <td>moonshotai/kimi-vl-a3b-thinking:free</td>\n", + " <td>Moonshot AI: Kimi VL A3B Thinking (free)</td>\n", + " <td>1744304841</td>\n", + " <td>Kimi-VL is a lightweight Mixture-of-Experts vi...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text+image->text</td>\n", + " <td>[image, text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>55</th>\n", + " <td>x-ai/grok-3-mini-beta</td>\n", + " <td>xAI: Grok 3 Mini Beta</td>\n", + " <td>1744240195</td>\n", + " <td>Grok 3 Mini is a lightweight, smaller thinking...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Grok</td>\n", + " <td>None</td>\n", + " <td>0.0000003</td>\n", + " <td>0.0000005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>56</th>\n", + " <td>x-ai/grok-3-beta</td>\n", + " <td>xAI: Grok 3 Beta</td>\n", + " <td>1744240068</td>\n", + " <td>Grok 3 is the latest model from xAI. It's thei...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Grok</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>57</th>\n", + " <td>nvidia/llama-3.3-nemotron-super-49b-v1:free</td>\n", + " <td>NVIDIA: Llama 3.3 Nemotron Super 49B v1 (free)</td>\n", + " <td>1744119494</td>\n", + " <td>Llama-3.3-Nemotron-Super-49B-v1 is a large lan...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>58</th>\n", + " <td>nvidia/llama-3.3-nemotron-super-49b-v1</td>\n", + " <td>NVIDIA: Llama 3.3 Nemotron Super 49B v1</td>\n", + " <td>1744119494</td>\n", + " <td>Llama-3.3-Nemotron-Super-49B-v1 is a large lan...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000013</td>\n", + " <td>0.0000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>59</th>\n", + " <td>nvidia/llama-3.1-nemotron-ultra-253b-v1:free</td>\n", + " <td>NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free)</td>\n", + " <td>1744115059</td>\n", + " <td>Llama-3.1-Nemotron-Ultra-253B-v1 is a large la...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>60</th>\n", + " <td>meta-llama/llama-4-maverick:free</td>\n", + " <td>Meta: Llama 4 Maverick (free)</td>\n", + " <td>1743881822</td>\n", + " <td>Llama 4 Maverick 17B Instruct (128E) is a high...</td>\n", + " <td>256000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, structured_ou...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>256000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>61</th>\n", + " <td>meta-llama/llama-4-maverick</td>\n", + " <td>Meta: Llama 4 Maverick</td>\n", + " <td>1743881822</td>\n", + " <td>Llama 4 Maverick 17B Instruct (128E) is a high...</td>\n", + " <td>1048576</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000017</td>\n", + " <td>0.0000006</td>\n", + " <td>0</td>\n", + " <td>0.0006684</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1048576.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>62</th>\n", + " <td>meta-llama/llama-4-scout:free</td>\n", + " <td>Meta: Llama 4 Scout (free)</td>\n", + " <td>1743881519</td>\n", + " <td>Llama 4 Scout 17B Instruct (16E) is a mixture-...</td>\n", + " <td>512000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, structured_ou...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>512000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>63</th>\n", + " <td>meta-llama/llama-4-scout</td>\n", + " <td>Meta: Llama 4 Scout</td>\n", + " <td>1743881519</td>\n", + " <td>Llama 4 Scout 17B Instruct (16E) is a mixture-...</td>\n", + " <td>1048576</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, presence_pena...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000008</td>\n", + " <td>0.0000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1048576.0</td>\n", + " <td>1048576.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>64</th>\n", + " <td>all-hands/openhands-lm-32b-v0.1</td>\n", + " <td>OpenHands LM 32B V0.1</td>\n", + " <td>1743613013</td>\n", + " <td>OpenHands LM v0.1 is a 32B open-source coding ...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000026</td>\n", + " <td>0.0000034</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>65</th>\n", + " <td>mistral/ministral-8b</td>\n", + " <td>Mistral: Ministral 8B</td>\n", + " <td>1743430021</td>\n", + " <td>Ministral 8B is a state-of-the-art language mo...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>66</th>\n", + " <td>deepseek/deepseek-v3-base:free</td>\n", + " <td>DeepSeek: DeepSeek V3 Base (free)</td>\n", + " <td>1743272023</td>\n", + " <td>Note that this is a base model mostly meant fo...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>67</th>\n", + " <td>scb10x/llama3.1-typhoon2-8b-instruct</td>\n", + " <td>Typhoon2 8B Instruct</td>\n", + " <td>1743196511</td>\n", + " <td>Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000018</td>\n", + " <td>0.00000018</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>68</th>\n", + " <td>scb10x/llama3.1-typhoon2-70b-instruct</td>\n", + " <td>Typhoon2 70B Instruct</td>\n", + " <td>1743196170</td>\n", + " <td>Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000088</td>\n", + " <td>0.00000088</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>69</th>\n", + " <td>allenai/molmo-7b-d:free</td>\n", + " <td>AllenAI: Molmo 7B D (free)</td>\n", + " <td>1743023247</td>\n", + " <td>Molmo is a family of open vision-language mode...</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>70</th>\n", + " <td>bytedance-research/ui-tars-72b:free</td>\n", + " <td>Bytedance: UI-TARS 72B (free)</td>\n", + " <td>1743020065</td>\n", + " <td>UI-TARS 72B is an open-source multimodal AI mo...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>71</th>\n", + " <td>qwen/qwen2.5-vl-3b-instruct:free</td>\n", + " <td>Qwen: Qwen2.5 VL 3B Instruct (free)</td>\n", + " <td>1743014573</td>\n", + " <td>Qwen2.5 VL 3B is a multimodal LLM from the Qwe...</td>\n", + " <td>64000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>64000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>72</th>\n", + " <td>google/gemini-2.5-pro-exp-03-25</td>\n", + " <td>Google: Gemini 2.5 Pro Experimental</td>\n", + " <td>1742922099</td>\n", + " <td>Gemini 2.5 Pro is Google’s state-of-the-art AI...</td>\n", + " <td>1000000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, tools, tool_c...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1000000.0</td>\n", + " <td>65535.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73</th>\n", + " <td>qwen/qwen2.5-vl-32b-instruct:free</td>\n", + " <td>Qwen: Qwen2.5 VL 32B Instruct (free)</td>\n", + " <td>1742839838</td>\n", + " <td>Qwen2.5-VL-32B is a multimodal vision-language...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, seed, respons...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>74</th>\n", + " <td>qwen/qwen2.5-vl-32b-instruct</td>\n", + " <td>Qwen: Qwen2.5 VL 32B Instruct</td>\n", + " <td>1742839838</td>\n", + " <td>Qwen2.5-VL-32B is a multimodal vision-language...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000009</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75</th>\n", + " <td>deepseek/deepseek-chat-v3-0324:free</td>\n", + " <td>DeepSeek: DeepSeek V3 0324 (free)</td>\n", + " <td>1742824755</td>\n", + " <td>DeepSeek V3, a 685B-parameter, mixture-of-expe...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>76</th>\n", + " <td>deepseek/deepseek-chat-v3-0324</td>\n", + " <td>DeepSeek: DeepSeek V3 0324</td>\n", + " <td>1742824755</td>\n", + " <td>DeepSeek V3, a 685B-parameter, mixture-of-expe...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, presence_pena...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>None</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000088</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>77</th>\n", + " <td>featherless/qwerky-72b:free</td>\n", + " <td>Qwerky 72B (free)</td>\n", + " <td>1742481597</td>\n", + " <td>Qwerky-72B is a linear-attention RWKV variant ...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>78</th>\n", + " <td>openai/o1-pro</td>\n", + " <td>OpenAI: o1-pro</td>\n", + " <td>1742423211</td>\n", + " <td>The o1 series of models are trained with reinf...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00015</td>\n", + " <td>0.0006</td>\n", + " <td>0</td>\n", + " <td>0.21675</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>100000.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>79</th>\n", + " <td>mistralai/mistral-small-3.1-24b-instruct:free</td>\n", + " <td>Mistral: Mistral Small 3.1 24B (free)</td>\n", + " <td>1742238937</td>\n", + " <td>Mistral Small 3.1 24B Instruct is an upgraded ...</td>\n", + " <td>96000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>96000.0</td>\n", + " <td>96000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>80</th>\n", + " <td>mistralai/mistral-small-3.1-24b-instruct</td>\n", + " <td>Mistral: Mistral Small 3.1 24B</td>\n", + " <td>1742238937</td>\n", + " <td>Mistral Small 3.1 24B Instruct is an upgraded ...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, presence_pena...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.00000005</td>\n", + " <td>0.00000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>81</th>\n", + " <td>open-r1/olympiccoder-32b:free</td>\n", + " <td>OlympicCoder 32B (free)</td>\n", + " <td>1742077228</td>\n", + " <td>OlympicCoder-32B is a high-performing open-sou...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>82</th>\n", + " <td>google/gemma-3-1b-it:free</td>\n", + " <td>Google: Gemma 3 1B (free)</td>\n", + " <td>1741963556</td>\n", + " <td>Gemma 3 1B is the smallest of the new Gemma 3 ...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>83</th>\n", + " <td>google/gemma-3-4b-it:free</td>\n", + " <td>Google: Gemma 3 4B (free)</td>\n", + " <td>1741905510</td>\n", + " <td>Gemma 3 introduces multimodality, supporting v...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>84</th>\n", + " <td>google/gemma-3-4b-it</td>\n", + " <td>Google: Gemma 3 4B</td>\n", + " <td>1741905510</td>\n", + " <td>Gemma 3 introduces multimodality, supporting v...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>85</th>\n", + " <td>ai21/jamba-1.6-large</td>\n", + " <td>AI21: Jamba 1.6 Large</td>\n", + " <td>1741905173</td>\n", + " <td>AI21 Jamba Large 1.6 is a high-performance hyb...</td>\n", + " <td>256000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>256000.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>86</th>\n", + " <td>ai21/jamba-1.6-mini</td>\n", + " <td>AI21: Jamba Mini 1.6</td>\n", + " <td>1741905171</td>\n", + " <td>AI21 Jamba Mini 1.6 is a hybrid foundation mod...</td>\n", + " <td>256000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>256000.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>87</th>\n", + " <td>google/gemma-3-12b-it:free</td>\n", + " <td>Google: Gemma 3 12B (free)</td>\n", + " <td>1741902625</td>\n", + " <td>Gemma 3 introduces multimodality, supporting v...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>88</th>\n", + " <td>google/gemma-3-12b-it</td>\n", + " <td>Google: Gemma 3 12B</td>\n", + " <td>1741902625</td>\n", + " <td>Gemma 3 introduces multimodality, supporting v...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0.00000005</td>\n", + " <td>0.0000001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>89</th>\n", + " <td>cohere/command-a</td>\n", + " <td>Cohere: Command A</td>\n", + " <td>1741894342</td>\n", + " <td>Command A is an open-weights 111B parameter mo...</td>\n", + " <td>256000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>256000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>90</th>\n", + " <td>openai/gpt-4o-mini-search-preview</td>\n", + " <td>OpenAI: GPT-4o-mini Search Preview</td>\n", + " <td>1741818122</td>\n", + " <td>GPT-4o mini Search Preview is a specialized mo...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[web_search_options, max_tokens, response_form...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " <td>0.0275</td>\n", + " <td>0.000217</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>16384.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>91</th>\n", + " <td>openai/gpt-4o-search-preview</td>\n", + " <td>OpenAI: GPT-4o Search Preview</td>\n", + " <td>1741817949</td>\n", + " <td>GPT-4o Search Previewis a specialized model fo...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[web_search_options, max_tokens, response_form...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " <td>0.035</td>\n", + " <td>0.003613</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>16384.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>92</th>\n", + " <td>rekaai/reka-flash-3:free</td>\n", + " <td>Reka: Flash 3 (free)</td>\n", + " <td>1741812813</td>\n", + " <td>Reka Flash 3 is a general-purpose, instruction...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>93</th>\n", + " <td>google/gemma-3-27b-it:free</td>\n", + " <td>Google: Gemma 3 27B (free)</td>\n", + " <td>1741756359</td>\n", + " <td>Gemma 3 introduces multimodality, supporting v...</td>\n", + " <td>96000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>96000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>94</th>\n", + " <td>google/gemma-3-27b-it</td>\n", + " <td>Google: Gemma 3 27B</td>\n", + " <td>1741756359</td>\n", + " <td>Gemma 3 introduces multimodality, supporting v...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000002</td>\n", + " <td>0</td>\n", + " <td>0.0000256</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>95</th>\n", + " <td>thedrummer/anubis-pro-105b-v1</td>\n", + " <td>TheDrummer: Anubis Pro 105B V1</td>\n", + " <td>1741642290</td>\n", + " <td>Anubis Pro 105B v1 is an expanded and refined ...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, presence_pena...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>131072.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>96</th>\n", + " <td>thedrummer/skyfall-36b-v2</td>\n", + " <td>TheDrummer: Skyfall 36B V2</td>\n", + " <td>1741636566</td>\n", + " <td>Skyfall 36B v2 is an enhanced iteration of Mis...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, presence_pena...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>32768.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>97</th>\n", + " <td>microsoft/phi-4-multimodal-instruct</td>\n", + " <td>Microsoft: Phi 4 Multimodal Instruct</td>\n", + " <td>1741396284</td>\n", + " <td>Phi-4 Multimodal Instruct is a versatile 5.6B ...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000005</td>\n", + " <td>0.0000001</td>\n", + " <td>0</td>\n", + " <td>0.00017685</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>98</th>\n", + " <td>perplexity/sonar-reasoning-pro</td>\n", + " <td>Perplexity: Sonar Reasoning Pro</td>\n", + " <td>1741313308</td>\n", + " <td>Note: Sonar Pro pricing includes Perplexity se...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0.005</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>99</th>\n", + " <td>perplexity/sonar-pro</td>\n", + " <td>Perplexity: Sonar Pro</td>\n", + " <td>1741312423</td>\n", + " <td>Note: Sonar Pro pricing includes Perplexity se...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, web_search_op...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0.005</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>8000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>100</th>\n", + " <td>perplexity/sonar-deep-research</td>\n", + " <td>Perplexity: Sonar Deep Research</td>\n", + " <td>1741311246</td>\n", + " <td>Sonar Deep Research is a research-focused mode...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0.005</td>\n", + " <td>0.000003</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>101</th>\n", + " <td>deepseek/deepseek-r1-zero:free</td>\n", + " <td>DeepSeek: DeepSeek R1 Zero (free)</td>\n", + " <td>1741297434</td>\n", + " <td>DeepSeek-R1-Zero is a model trained via large-...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>102</th>\n", + " <td>qwen/qwq-32b:free</td>\n", + " <td>Qwen: QwQ 32B (free)</td>\n", + " <td>1741208814</td>\n", + " <td>QwQ is the reasoning model of the Qwen series....</td>\n", + " <td>40000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>qwq</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40000.0</td>\n", + " <td>40000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>103</th>\n", + " <td>qwen/qwq-32b</td>\n", + " <td>Qwen: QwQ 32B</td>\n", + " <td>1741208814</td>\n", + " <td>QwQ is the reasoning model of the Qwen series....</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>qwq</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>104</th>\n", + " <td>moonshotai/moonlight-16b-a3b-instruct:free</td>\n", + " <td>Moonshot AI: Moonlight 16B A3B Instruct (free)</td>\n", + " <td>1740719801</td>\n", + " <td>Moonlight-16B-A3B-Instruct is a 16B-parameter ...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>105</th>\n", + " <td>nousresearch/deephermes-3-llama-3-8b-preview:free</td>\n", + " <td>Nous: DeepHermes 3 Llama 3 8B Preview (free)</td>\n", + " <td>1740719372</td>\n", + " <td>DeepHermes 3 Preview is the latest version of ...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>106</th>\n", + " <td>openai/gpt-4.5-preview</td>\n", + " <td>OpenAI: GPT-4.5 (Preview)</td>\n", + " <td>1740687810</td>\n", + " <td>GPT-4.5 (Preview) is a research preview of Ope...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000075</td>\n", + " <td>0.00015</td>\n", + " <td>0</td>\n", + " <td>0.108375</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>16384.0</td>\n", + " <td>True</td>\n", + " <td>0.0000375</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>107</th>\n", + " <td>google/gemini-2.0-flash-lite-001</td>\n", + " <td>Google: Gemini 2.0 Flash Lite</td>\n", + " <td>1740506212</td>\n", + " <td>Gemini 2.0 Flash Lite offers a significantly f...</td>\n", + " <td>1048576</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0.000000075</td>\n", + " <td>0.0000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1048576.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>108</th>\n", + " <td>anthropic/claude-3.7-sonnet</td>\n", + " <td>Anthropic: Claude 3.7 Sonnet</td>\n", + " <td>1740422110</td>\n", + " <td>Claude 3.7 Sonnet is an advanced large languag...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.0048</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>64000.0</td>\n", + " <td>False</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>109</th>\n", + " <td>anthropic/claude-3.7-sonnet:thinking</td>\n", + " <td>Anthropic: Claude 3.7 Sonnet (thinking)</td>\n", + " <td>1740422110</td>\n", + " <td>Claude 3.7 Sonnet is an advanced large languag...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.0048</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>64000.0</td>\n", + " <td>False</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>110</th>\n", + " <td>anthropic/claude-3.7-sonnet:beta</td>\n", + " <td>Anthropic: Claude 3.7 Sonnet (self-moderated)</td>\n", + " <td>1740422110</td>\n", + " <td>Claude 3.7 Sonnet is an advanced large languag...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, stop, reasoning, inc...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.0048</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>128000.0</td>\n", + " <td>False</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>111</th>\n", + " <td>perplexity/r1-1776</td>\n", + " <td>Perplexity: R1 1776</td>\n", + " <td>1740004929</td>\n", + " <td>R1 1776 is a version of DeepSeek-R1 that has b...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>112</th>\n", + " <td>mistralai/mistral-saba</td>\n", + " <td>Mistral: Saba</td>\n", + " <td>1739803239</td>\n", + " <td>Mistral Saba is a 24B-parameter language model...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>113</th>\n", + " <td>cognitivecomputations/dolphin3.0-r1-mistral-24...</td>\n", + " <td>Dolphin3.0 R1 Mistral 24B (free)</td>\n", + " <td>1739462498</td>\n", + " <td>Dolphin 3.0 R1 is the next generation of the D...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>114</th>\n", + " <td>cognitivecomputations/dolphin3.0-mistral-24b:free</td>\n", + " <td>Dolphin3.0 Mistral 24B (free)</td>\n", + " <td>1739462019</td>\n", + " <td>Dolphin 3.0 is the next generation of the Dolp...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>115</th>\n", + " <td>meta-llama/llama-guard-3-8b</td>\n", + " <td>Llama Guard 3 8B</td>\n", + " <td>1739401318</td>\n", + " <td>Llama Guard 3 is a Llama-3.1-8B pretrained mod...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>none</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>116</th>\n", + " <td>openai/o3-mini-high</td>\n", + " <td>OpenAI: o3 Mini High</td>\n", + " <td>1739372611</td>\n", + " <td>OpenAI o3-mini-high is the same model as [o3-m...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, seed, max_tokens, respons...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>100000.0</td>\n", + " <td>True</td>\n", + " <td>0.00000055</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>117</th>\n", + " <td>deepseek/deepseek-r1-distill-llama-8b</td>\n", + " <td>DeepSeek: R1 Distill Llama 8B</td>\n", + " <td>1738937718</td>\n", + " <td>DeepSeek R1 Distill Llama 8B is a distilled la...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.00000004</td>\n", + " <td>0.00000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>32000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>118</th>\n", + " <td>google/gemini-2.0-flash-001</td>\n", + " <td>Google: Gemini 2.0 Flash</td>\n", + " <td>1738769413</td>\n", + " <td>Gemini Flash 2.0 offers a significantly faster...</td>\n", + " <td>1000000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000004</td>\n", + " <td>0</td>\n", + " <td>0.0000258</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1000000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>0.000000025</td>\n", + " <td>0.0000001833</td>\n", + " </tr>\n", + " <tr>\n", + " <th>119</th>\n", + " <td>qwen/qwen-vl-plus</td>\n", + " <td>Qwen: Qwen VL Plus</td>\n", + " <td>1738731255</td>\n", + " <td>Qwen's Enhanced Large Visual Language Model. S...</td>\n", + " <td>7500</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, seed, respons...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.00000021</td>\n", + " <td>0.00000063</td>\n", + " <td>0</td>\n", + " <td>0.0002688</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>7500.0</td>\n", + " <td>1500.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>120</th>\n", + " <td>aion-labs/aion-1.0</td>\n", + " <td>AionLabs: Aion-1.0</td>\n", + " <td>1738697557</td>\n", + " <td>Aion-1.0 is a multi-model system designed for ...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.000004</td>\n", + " <td>0.000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>32768.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>121</th>\n", + " <td>aion-labs/aion-1.0-mini</td>\n", + " <td>AionLabs: Aion-1.0-Mini</td>\n", + " <td>1738697107</td>\n", + " <td>Aion-1.0-Mini 32B parameter model is a distill...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000007</td>\n", + " <td>0.0000014</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>32768.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>122</th>\n", + " <td>aion-labs/aion-rp-llama-3.1-8b</td>\n", + " <td>AionLabs: Aion-RP 1.0 (8B)</td>\n", + " <td>1738696718</td>\n", + " <td>Aion-RP-Llama-3.1-8B ranks the highest in the ...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>32768.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>123</th>\n", + " <td>qwen/qwen-vl-max</td>\n", + " <td>Qwen: Qwen VL Max</td>\n", + " <td>1738434304</td>\n", + " <td>Qwen VL Max is a visual understanding model wi...</td>\n", + " <td>7500</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, seed, respons...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000032</td>\n", + " <td>0</td>\n", + " <td>0.001024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>7500.0</td>\n", + " <td>1500.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>124</th>\n", + " <td>qwen/qwen-turbo</td>\n", + " <td>Qwen: Qwen-Turbo</td>\n", + " <td>1738410974</td>\n", + " <td>Qwen-Turbo, based on Qwen2.5, is a 1M context ...</td>\n", + " <td>1000000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.00000005</td>\n", + " <td>0.0000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1000000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>125</th>\n", + " <td>qwen/qwen2.5-vl-72b-instruct:free</td>\n", + " <td>Qwen: Qwen2.5 VL 72B Instruct (free)</td>\n", + " <td>1738410311</td>\n", + " <td>Qwen2.5-VL is proficient in recognizing common...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, seed, respons...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>2048.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>126</th>\n", + " <td>qwen/qwen2.5-vl-72b-instruct</td>\n", + " <td>Qwen: Qwen2.5 VL 72B Instruct</td>\n", + " <td>1738410311</td>\n", + " <td>Qwen2.5-VL is proficient in recognizing common...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000075</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>127</th>\n", + " <td>qwen/qwen-plus</td>\n", + " <td>Qwen: Qwen-Plus</td>\n", + " <td>1738409840</td>\n", + " <td>Qwen-Plus, based on the Qwen2.5 foundation mod...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.0000004</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>128</th>\n", + " <td>qwen/qwen-max</td>\n", + " <td>Qwen: Qwen-Max</td>\n", + " <td>1738402289</td>\n", + " <td>Qwen-Max, based on Qwen2.5, provides the best ...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.0000016</td>\n", + " <td>0.0000064</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>129</th>\n", + " <td>openai/o3-mini</td>\n", + " <td>OpenAI: o3 Mini</td>\n", + " <td>1738351721</td>\n", + " <td>OpenAI o3-mini is a cost-efficient language mo...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, seed, max_tokens, respons...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>100000.0</td>\n", + " <td>True</td>\n", + " <td>0.00000055</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>130</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-1.5b</td>\n", + " <td>DeepSeek: R1 Distill Qwen 1.5B</td>\n", + " <td>1738328067</td>\n", + " <td>DeepSeek R1 Distill Qwen 1.5B is a distilled l...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.00000018</td>\n", + " <td>0.00000018</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>32768.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>131</th>\n", + " <td>mistralai/mistral-small-24b-instruct-2501:free</td>\n", + " <td>Mistral: Mistral Small 3 (free)</td>\n", + " <td>1738255409</td>\n", + " <td>Mistral Small 3 is a 24B-parameter language mo...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>132</th>\n", + " <td>mistralai/mistral-small-24b-instruct-2501</td>\n", + " <td>Mistral: Mistral Small 3</td>\n", + " <td>1738255409</td>\n", + " <td>Mistral Small 3 is a 24B-parameter language mo...</td>\n", + " <td>28000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.00000006</td>\n", + " <td>0.00000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>28000.0</td>\n", + " <td>14000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>133</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-32b:free</td>\n", + " <td>DeepSeek: R1 Distill Qwen 32B (free)</td>\n", + " <td>1738194830</td>\n", + " <td>DeepSeek R1 Distill Qwen 32B is a distilled la...</td>\n", + " <td>16000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16000.0</td>\n", + " <td>16000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>134</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-32b</td>\n", + " <td>DeepSeek: R1 Distill Qwen 32B</td>\n", + " <td>1738194830</td>\n", + " <td>DeepSeek R1 Distill Qwen 32B is a distilled la...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.00000012</td>\n", + " <td>0.00000018</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>135</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-14b:free</td>\n", + " <td>DeepSeek: R1 Distill Qwen 14B (free)</td>\n", + " <td>1738193940</td>\n", + " <td>DeepSeek R1 Distill Qwen 14B is a distilled la...</td>\n", + " <td>64000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>64000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>136</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-14b</td>\n", + " <td>DeepSeek: R1 Distill Qwen 14B</td>\n", + " <td>1738193940</td>\n", + " <td>DeepSeek R1 Distill Qwen 14B is a distilled la...</td>\n", + " <td>64000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.00000015</td>\n", + " <td>0.00000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>64000.0</td>\n", + " <td>64000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>137</th>\n", + " <td>perplexity/sonar-reasoning</td>\n", + " <td>Perplexity: Sonar Reasoning</td>\n", + " <td>1738131107</td>\n", + " <td>Sonar Reasoning is a reasoning model provided ...</td>\n", + " <td>127000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.000001</td>\n", + " <td>0.000005</td>\n", + " <td>0.005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>127000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>138</th>\n", + " <td>perplexity/sonar</td>\n", + " <td>Perplexity: Sonar</td>\n", + " <td>1738013808</td>\n", + " <td>Sonar is lightweight, affordable, fast, and si...</td>\n", + " <td>127072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, web_search_op...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.000001</td>\n", + " <td>0.000001</td>\n", + " <td>0.005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>127072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>139</th>\n", + " <td>liquid/lfm-7b</td>\n", + " <td>Liquid: LFM 7B</td>\n", + " <td>1737806883</td>\n", + " <td>LFM-7B, a new best-in-class language model. LF...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>chatml</td>\n", + " <td>0.00000001</td>\n", + " <td>0.00000001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>140</th>\n", + " <td>liquid/lfm-3b</td>\n", + " <td>Liquid: LFM 3B</td>\n", + " <td>1737806501</td>\n", + " <td>Liquid's LFM 3B delivers incredible performanc...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>chatml</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>141</th>\n", + " <td>deepseek/deepseek-r1-distill-llama-70b:free</td>\n", + " <td>DeepSeek: R1 Distill Llama 70B (free)</td>\n", + " <td>1737663169</td>\n", + " <td>DeepSeek R1 Distill Llama 70B is a distilled l...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>142</th>\n", + " <td>deepseek/deepseek-r1-distill-llama-70b</td>\n", + " <td>DeepSeek: R1 Distill Llama 70B</td>\n", + " <td>1737663169</td>\n", + " <td>DeepSeek R1 Distill Llama 70B is a distilled l...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>143</th>\n", + " <td>deepseek/deepseek-r1:free</td>\n", + " <td>DeepSeek: R1 (free)</td>\n", + " <td>1737381095</td>\n", + " <td>DeepSeek R1 is here: Performance on par with [...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, reasoning, include_reasoning, tem...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>144</th>\n", + " <td>deepseek/deepseek-r1</td>\n", + " <td>DeepSeek: R1</td>\n", + " <td>1737381095</td>\n", + " <td>DeepSeek R1 is here: Performance on par with [...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, reasoning, in...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.0000005</td>\n", + " <td>0.00000218</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>163840.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>145</th>\n", + " <td>minimax/minimax-01</td>\n", + " <td>MiniMax: MiniMax-01</td>\n", + " <td>1736915462</td>\n", + " <td>MiniMax-01 is a combines MiniMax-Text-01 for t...</td>\n", + " <td>1000192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p]</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000011</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1000192.0</td>\n", + " <td>1000192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>146</th>\n", + " <td>mistralai/codestral-2501</td>\n", + " <td>Mistral: Codestral 2501</td>\n", + " <td>1736895522</td>\n", + " <td>[Mistral](/mistralai)'s cutting-edge language ...</td>\n", + " <td>262144</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.0000003</td>\n", + " <td>0.0000009</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>262144.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>147</th>\n", + " <td>microsoft/phi-4</td>\n", + " <td>Microsoft: Phi 4</td>\n", + " <td>1736489872</td>\n", + " <td>[Microsoft Research](/microsoft) Phi-4 is desi...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000007</td>\n", + " <td>0.00000014</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>148</th>\n", + " <td>deepseek/deepseek-chat:free</td>\n", + " <td>DeepSeek: DeepSeek V3 (free)</td>\n", + " <td>1735241320</td>\n", + " <td>DeepSeek-V3 is the latest model from the DeepS...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>149</th>\n", + " <td>deepseek/deepseek-chat</td>\n", + " <td>DeepSeek: DeepSeek V3</td>\n", + " <td>1735241320</td>\n", + " <td>DeepSeek-V3 is the latest model from the DeepS...</td>\n", + " <td>163840</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>DeepSeek</td>\n", + " <td>None</td>\n", + " <td>0.00000038</td>\n", + " <td>0.00000089</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>163840.0</td>\n", + " <td>163840.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>150</th>\n", + " <td>sao10k/l3.3-euryale-70b</td>\n", + " <td>Sao10K: Llama 3.3 Euryale 70B</td>\n", + " <td>1734535928</td>\n", + " <td>Euryale L3.3 70B is a model focused on creativ...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.0000007</td>\n", + " <td>0.0000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>151</th>\n", + " <td>openai/o1</td>\n", + " <td>OpenAI: o1</td>\n", + " <td>1734459999</td>\n", + " <td>The latest and strongest model family from Ope...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, seed, max_tokens, respons...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000015</td>\n", + " <td>0.00006</td>\n", + " <td>0</td>\n", + " <td>0.021675</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>100000.0</td>\n", + " <td>True</td>\n", + " <td>0.0000075</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>152</th>\n", + " <td>eva-unit-01/eva-llama-3.33-70b</td>\n", + " <td>EVA Llama 3.33 70B</td>\n", + " <td>1734377303</td>\n", + " <td>EVA Llama 3.33 70b is a roleplay and storywrit...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.000004</td>\n", + " <td>0.000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>153</th>\n", + " <td>x-ai/grok-2-vision-1212</td>\n", + " <td>xAI: Grok 2 Vision 1212</td>\n", + " <td>1734237338</td>\n", + " <td>Grok 2 Vision 1212 advances image-based AI wit...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Grok</td>\n", + " <td>None</td>\n", + " <td>0.000002</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0.0036</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>154</th>\n", + " <td>x-ai/grok-2-1212</td>\n", + " <td>xAI: Grok 2 1212</td>\n", + " <td>1734232814</td>\n", + " <td>Grok 2 1212 introduces significant enhancement...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Grok</td>\n", + " <td>None</td>\n", + " <td>0.000002</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>155</th>\n", + " <td>cohere/command-r7b-12-2024</td>\n", + " <td>Cohere: Command R7B (12-2024)</td>\n", + " <td>1734158152</td>\n", + " <td>Command R7B (12-2024) is a small, fast update ...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Cohere</td>\n", + " <td>None</td>\n", + " <td>0.0000000375</td>\n", + " <td>0.00000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>156</th>\n", + " <td>google/gemini-2.0-flash-exp:free</td>\n", + " <td>Google: Gemini 2.0 Flash Experimental (free)</td>\n", + " <td>1733937523</td>\n", + " <td>Gemini Flash 2.0 offers a significantly faster...</td>\n", + " <td>1048576</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop]</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1048576.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>157</th>\n", + " <td>meta-llama/llama-3.3-70b-instruct:free</td>\n", + " <td>Meta: Llama 3.3 70B Instruct (free)</td>\n", + " <td>1733506137</td>\n", + " <td>The Meta Llama 3.3 multilingual large language...</td>\n", + " <td>8000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8000.0</td>\n", + " <td>8000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>158</th>\n", + " <td>meta-llama/llama-3.3-70b-instruct</td>\n", + " <td>Meta: Llama 3.3 70B Instruct</td>\n", + " <td>1733506137</td>\n", + " <td>The Meta Llama 3.3 multilingual large language...</td>\n", + " <td>131000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000009</td>\n", + " <td>0.00000035</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131000.0</td>\n", + " <td>131000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>159</th>\n", + " <td>amazon/nova-lite-v1</td>\n", + " <td>Amazon: Nova Lite 1.0</td>\n", + " <td>1733437363</td>\n", + " <td>Amazon Nova Lite 1.0 is a very low-cost multim...</td>\n", + " <td>300000</td>\n", + " <td>None</td>\n", + " <td>[tools, max_tokens, temperature, top_p, top_k,...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Nova</td>\n", + " <td>None</td>\n", + " <td>0.00000006</td>\n", + " <td>0.00000024</td>\n", + " <td>0</td>\n", + " <td>0.00009</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>300000.0</td>\n", + " <td>5120.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>160</th>\n", + " <td>amazon/nova-micro-v1</td>\n", + " <td>Amazon: Nova Micro 1.0</td>\n", + " <td>1733437237</td>\n", + " <td>Amazon Nova Micro 1.0 is a text-only model tha...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, max_tokens, temperature, top_p, top_k,...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Nova</td>\n", + " <td>None</td>\n", + " <td>0.000000035</td>\n", + " <td>0.00000014</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>5120.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>161</th>\n", + " <td>amazon/nova-pro-v1</td>\n", + " <td>Amazon: Nova Pro 1.0</td>\n", + " <td>1733436303</td>\n", + " <td>Amazon Nova Pro 1.0 is a capable multimodal mo...</td>\n", + " <td>300000</td>\n", + " <td>None</td>\n", + " <td>[tools, max_tokens, temperature, top_p, top_k,...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Nova</td>\n", + " <td>None</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000032</td>\n", + " <td>0</td>\n", + " <td>0.0012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>300000.0</td>\n", + " <td>5120.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>162</th>\n", + " <td>qwen/qwq-32b-preview:free</td>\n", + " <td>Qwen: QwQ 32B Preview (free)</td>\n", + " <td>1732754541</td>\n", + " <td>QwQ-32B-Preview is an experimental research mo...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>163</th>\n", + " <td>qwen/qwq-32b-preview</td>\n", + " <td>Qwen: QwQ 32B Preview</td>\n", + " <td>1732754541</td>\n", + " <td>QwQ-32B-Preview is an experimental research mo...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>deepseek-r1</td>\n", + " <td>0.00000009</td>\n", + " <td>0.00000027</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>164</th>\n", + " <td>google/learnlm-1.5-pro-experimental:free</td>\n", + " <td>Google: LearnLM 1.5 Pro Experimental (free)</td>\n", + " <td>1732216551</td>\n", + " <td>An experimental version of [Gemini 1.5 Pro](/g...</td>\n", + " <td>40960</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>40960.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>165</th>\n", + " <td>eva-unit-01/eva-qwen-2.5-72b</td>\n", + " <td>EVA Qwen2.5 72B</td>\n", + " <td>1732210606</td>\n", + " <td>EVA Qwen2.5 72B is a roleplay and storywriting...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.000004</td>\n", + " <td>0.000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>166</th>\n", + " <td>openai/gpt-4o-2024-11-20</td>\n", + " <td>OpenAI: GPT-4o (2024-11-20)</td>\n", + " <td>1732127594</td>\n", + " <td>The 2024-11-20 version of GPT-4o offers a leve...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0.003613</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>16384.0</td>\n", + " <td>True</td>\n", + " <td>0.00000125</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>167</th>\n", + " <td>mistralai/mistral-large-2411</td>\n", + " <td>Mistral Large 2411</td>\n", + " <td>1731978685</td>\n", + " <td>Mistral Large 2 2411 is an update of [Mistral ...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.000002</td>\n", + " <td>0.000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>168</th>\n", + " <td>mistralai/mistral-large-2407</td>\n", + " <td>Mistral Large 2407</td>\n", + " <td>1731978415</td>\n", + " <td>This is Mistral AI's flagship model, Mistral L...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.000002</td>\n", + " <td>0.000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>169</th>\n", + " <td>mistralai/pixtral-large-2411</td>\n", + " <td>Mistral: Pixtral Large 2411</td>\n", + " <td>1731977388</td>\n", + " <td>Pixtral Large is a 124B parameter, open-weight...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.000002</td>\n", + " <td>0.000006</td>\n", + " <td>0</td>\n", + " <td>0.002888</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>170</th>\n", + " <td>x-ai/grok-vision-beta</td>\n", + " <td>xAI: Grok Vision Beta</td>\n", + " <td>1731976624</td>\n", + " <td>Grok Vision Beta is xAI's experimental languag...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Grok</td>\n", + " <td>None</td>\n", + " <td>0.000005</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.009</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>171</th>\n", + " <td>infermatic/mn-inferor-12b</td>\n", + " <td>Infermatic: Mistral Nemo Inferor 12B</td>\n", + " <td>1731464428</td>\n", + " <td>Inferor 12B is a merge of top roleplay models,...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>172</th>\n", + " <td>qwen/qwen-2.5-coder-32b-instruct:free</td>\n", + " <td>Qwen2.5 Coder 32B Instruct (free)</td>\n", + " <td>1731368400</td>\n", + " <td>Qwen2.5-Coder is the latest series of Code-Spe...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>173</th>\n", + " <td>qwen/qwen-2.5-coder-32b-instruct</td>\n", + " <td>Qwen2.5 Coder 32B Instruct</td>\n", + " <td>1731368400</td>\n", + " <td>Qwen2.5-Coder is the latest series of Code-Spe...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.00000006</td>\n", + " <td>0.00000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>174</th>\n", + " <td>raifle/sorcererlm-8x22b</td>\n", + " <td>SorcererLM 8x22B</td>\n", + " <td>1731105083</td>\n", + " <td>SorcererLM is an advanced RP and storytelling ...</td>\n", + " <td>16000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>vicuna</td>\n", + " <td>0.0000045</td>\n", + " <td>0.0000045</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>175</th>\n", + " <td>eva-unit-01/eva-qwen-2.5-32b</td>\n", + " <td>EVA Qwen2.5 32B</td>\n", + " <td>1731104847</td>\n", + " <td>EVA Qwen2.5 32B is a roleplaying/storywriting ...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.0000026</td>\n", + " <td>0.0000034</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>176</th>\n", + " <td>thedrummer/unslopnemo-12b</td>\n", + " <td>Unslopnemo 12B</td>\n", + " <td>1731103448</td>\n", + " <td>UnslopNemo v4.1 is the latest addition from th...</td>\n", + " <td>32000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0.00000045</td>\n", + " <td>0.00000045</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32000.0</td>\n", + " <td>16000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>177</th>\n", + " <td>anthropic/claude-3.5-haiku:beta</td>\n", + " <td>Anthropic: Claude 3.5 Haiku (self-moderated)</td>\n", + " <td>1730678400</td>\n", + " <td>Claude 3.5 Haiku features offers enhanced capa...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>0.00000008</td>\n", + " <td>0.000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>178</th>\n", + " <td>anthropic/claude-3.5-haiku</td>\n", + " <td>Anthropic: Claude 3.5 Haiku</td>\n", + " <td>1730678400</td>\n", + " <td>Claude 3.5 Haiku features offers enhanced capa...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>8192.0</td>\n", + " <td>True</td>\n", + " <td>0.00000008</td>\n", + " <td>0.000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>179</th>\n", + " <td>anthropic/claude-3.5-haiku-20241022:beta</td>\n", + " <td>Anthropic: Claude 3.5 Haiku (2024-10-22) (self...</td>\n", + " <td>1730678400</td>\n", + " <td>Claude 3.5 Haiku features enhancements across ...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>0.00000008</td>\n", + " <td>0.000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>180</th>\n", + " <td>anthropic/claude-3.5-haiku-20241022</td>\n", + " <td>Anthropic: Claude 3.5 Haiku (2024-10-22)</td>\n", + " <td>1730678400</td>\n", + " <td>Claude 3.5 Haiku features enhancements across ...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>8192.0</td>\n", + " <td>True</td>\n", + " <td>0.00000008</td>\n", + " <td>0.000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>181</th>\n", + " <td>neversleep/llama-3.1-lumimaid-70b</td>\n", + " <td>NeverSleep: Lumimaid v0.2 70B</td>\n", + " <td>1729555200</td>\n", + " <td>Lumimaid v0.2 70B is a finetune of [Llama 3.1 ...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.0000015</td>\n", + " <td>0.00000225</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>2048.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>182</th>\n", + " <td>anthracite-org/magnum-v4-72b</td>\n", + " <td>Magnum v4 72B</td>\n", + " <td>1729555200</td>\n", + " <td>This is a series of models designed to replica...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.0000015</td>\n", + " <td>0.00000225</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>1024.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>183</th>\n", + " <td>anthropic/claude-3.5-sonnet:beta</td>\n", + " <td>Anthropic: Claude 3.5 Sonnet (self-moderated)</td>\n", + " <td>1729555200</td>\n", + " <td>New Claude 3.5 Sonnet delivers better-than-Opu...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.0048</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>184</th>\n", + " <td>anthropic/claude-3.5-sonnet</td>\n", + " <td>Anthropic: Claude 3.5 Sonnet</td>\n", + " <td>1729555200</td>\n", + " <td>New Claude 3.5 Sonnet delivers better-than-Opu...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.0048</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>8192.0</td>\n", + " <td>True</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>185</th>\n", + " <td>x-ai/grok-beta</td>\n", + " <td>xAI: Grok Beta</td>\n", + " <td>1729382400</td>\n", + " <td>Grok Beta is xAI's experimental language model...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Grok</td>\n", + " <td>None</td>\n", + " <td>0.000005</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>186</th>\n", + " <td>mistralai/ministral-8b</td>\n", + " <td>Mistral: Ministral 8B</td>\n", + " <td>1729123200</td>\n", + " <td>Ministral 8B is an 8B parameter model featurin...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>187</th>\n", + " <td>mistralai/ministral-3b</td>\n", + " <td>Mistral: Ministral 3B</td>\n", + " <td>1729123200</td>\n", + " <td>Ministral 3B is a 3B parameter model optimized...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.00000004</td>\n", + " <td>0.00000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>188</th>\n", + " <td>qwen/qwen-2.5-7b-instruct:free</td>\n", + " <td>Qwen2.5 7B Instruct (free)</td>\n", + " <td>1729036800</td>\n", + " <td>Qwen2.5 7B is the latest series of Qwen large ...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>32768.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>189</th>\n", + " <td>qwen/qwen-2.5-7b-instruct</td>\n", + " <td>Qwen2.5 7B Instruct</td>\n", + " <td>1729036800</td>\n", + " <td>Qwen2.5 7B is the latest series of Qwen large ...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.00000005</td>\n", + " <td>0.0000001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>190</th>\n", + " <td>nvidia/llama-3.1-nemotron-70b-instruct</td>\n", + " <td>NVIDIA: Llama 3.1 Nemotron 70B Instruct</td>\n", + " <td>1728950400</td>\n", + " <td>NVIDIA's Llama 3.1 Nemotron 70B is a language ...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000012</td>\n", + " <td>0.0000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>131072.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>191</th>\n", + " <td>inflection/inflection-3-productivity</td>\n", + " <td>Inflection: Inflection 3 Productivity</td>\n", + " <td>1728604800</td>\n", + " <td>Inflection 3 Productivity is optimized for fol...</td>\n", + " <td>8000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8000.0</td>\n", + " <td>1024.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>192</th>\n", + " <td>inflection/inflection-3-pi</td>\n", + " <td>Inflection: Inflection 3 Pi</td>\n", + " <td>1728604800</td>\n", + " <td>Inflection 3 Pi powers Inflection's [Pi](https...</td>\n", + " <td>8000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8000.0</td>\n", + " <td>1024.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>193</th>\n", + " <td>google/gemini-flash-1.5-8b</td>\n", + " <td>Google: Gemini 1.5 Flash 8B</td>\n", + " <td>1727913600</td>\n", + " <td>Gemini Flash 1.5 8B is optimized for speed and...</td>\n", + " <td>1000000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0.0000000375</td>\n", + " <td>0.00000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1000000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>0.00000001</td>\n", + " <td>0.0000000583</td>\n", + " </tr>\n", + " <tr>\n", + " <th>194</th>\n", + " <td>thedrummer/rocinante-12b</td>\n", + " <td>Rocinante 12B</td>\n", + " <td>1727654400</td>\n", + " <td>Rocinante 12B is designed for engaging storyte...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.00000025</td>\n", + " <td>0.0000005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>195</th>\n", + " <td>anthracite-org/magnum-v2-72b</td>\n", + " <td>Magnum v2 72B</td>\n", + " <td>1727654400</td>\n", + " <td>From the maker of [Goliath](https://openrouter...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.000003</td>\n", + " <td>0.000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>196</th>\n", + " <td>liquid/lfm-40b</td>\n", + " <td>Liquid: LFM 40B MoE</td>\n", + " <td>1727654400</td>\n", + " <td>Liquid's 40.3B Mixture of Experts (MoE) model....</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>chatml</td>\n", + " <td>0.00000015</td>\n", + " <td>0.00000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>197</th>\n", + " <td>meta-llama/llama-3.2-3b-instruct:free</td>\n", + " <td>Meta: Llama 3.2 3B Instruct (free)</td>\n", + " <td>1727222400</td>\n", + " <td>Llama 3.2 3B is a 3-billion-parameter multilin...</td>\n", + " <td>20000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>20000.0</td>\n", + " <td>20000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>198</th>\n", + " <td>meta-llama/llama-3.2-3b-instruct</td>\n", + " <td>Meta: Llama 3.2 3B Instruct</td>\n", + " <td>1727222400</td>\n", + " <td>Llama 3.2 3B is a 3-billion-parameter multilin...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000001</td>\n", + " <td>0.00000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>199</th>\n", + " <td>meta-llama/llama-3.2-1b-instruct:free</td>\n", + " <td>Meta: Llama 3.2 1B Instruct (free)</td>\n", + " <td>1727222400</td>\n", + " <td>Llama 3.2 1B is a 1-billion-parameter language...</td>\n", + " <td>131000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>200</th>\n", + " <td>meta-llama/llama-3.2-1b-instruct</td>\n", + " <td>Meta: Llama 3.2 1B Instruct</td>\n", + " <td>1727222400</td>\n", + " <td>Llama 3.2 1B is a 1-billion-parameter language...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, stop, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.000000005</td>\n", + " <td>0.00000001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>201</th>\n", + " <td>meta-llama/llama-3.2-90b-vision-instruct</td>\n", + " <td>Meta: Llama 3.2 90B Vision Instruct</td>\n", + " <td>1727222400</td>\n", + " <td>The Llama 90B Vision model is a top-tier, 90-b...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.0000012</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0.001734</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>2048.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>202</th>\n", + " <td>meta-llama/llama-3.2-11b-vision-instruct:free</td>\n", + " <td>Meta: Llama 3.2 11B Vision Instruct (free)</td>\n", + " <td>1727222400</td>\n", + " <td>Llama 3.2 11B Vision is a multimodal model wit...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>2048.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>203</th>\n", + " <td>meta-llama/llama-3.2-11b-vision-instruct</td>\n", + " <td>Meta: Llama 3.2 11B Vision Instruct</td>\n", + " <td>1727222400</td>\n", + " <td>Llama 3.2 11B Vision is a multimodal model wit...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.000000049</td>\n", + " <td>0.000000049</td>\n", + " <td>0</td>\n", + " <td>0.00007948</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>204</th>\n", + " <td>qwen/qwen-2.5-72b-instruct:free</td>\n", + " <td>Qwen2.5 72B Instruct (free)</td>\n", + " <td>1726704000</td>\n", + " <td>Qwen2.5 72B is the latest series of Qwen large...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>205</th>\n", + " <td>qwen/qwen-2.5-72b-instruct</td>\n", + " <td>Qwen2.5 72B Instruct</td>\n", + " <td>1726704000</td>\n", + " <td>Qwen2.5 72B is the latest series of Qwen large...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.00000012</td>\n", + " <td>0.00000039</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>206</th>\n", + " <td>qwen/qwen-2.5-vl-72b-instruct</td>\n", + " <td>Qwen: Qwen2.5-VL 72B Instruct</td>\n", + " <td>1726617600</td>\n", + " <td>Qwen2.5 VL 72B is a multimodal LLM from the Qw...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.0000006</td>\n", + " <td>0.0000006</td>\n", + " <td>0</td>\n", + " <td>0.000578</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>207</th>\n", + " <td>neversleep/llama-3.1-lumimaid-8b</td>\n", + " <td>NeverSleep: Lumimaid v0.2 8B</td>\n", + " <td>1726358400</td>\n", + " <td>Lumimaid v0.2 8B is a finetune of [Llama 3.1 8...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000009375</td>\n", + " <td>0.00000075</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>2048.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>208</th>\n", + " <td>openai/o1-preview</td>\n", + " <td>OpenAI: o1-preview</td>\n", + " <td>1726099200</td>\n", + " <td>The latest and strongest model family from Ope...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[seed, max_tokens]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000015</td>\n", + " <td>0.00006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>32768.0</td>\n", + " <td>True</td>\n", + " <td>0.0000075</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>209</th>\n", + " <td>openai/o1-preview-2024-09-12</td>\n", + " <td>OpenAI: o1-preview (2024-09-12)</td>\n", + " <td>1726099200</td>\n", + " <td>The latest and strongest model family from Ope...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[seed, max_tokens]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000015</td>\n", + " <td>0.00006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>32768.0</td>\n", + " <td>True</td>\n", + " <td>0.0000075</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>210</th>\n", + " <td>openai/o1-mini</td>\n", + " <td>OpenAI: o1-mini</td>\n", + " <td>1726099200</td>\n", + " <td>The latest and strongest model family from Ope...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[seed, max_tokens]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>65536.0</td>\n", + " <td>True</td>\n", + " <td>0.00000055</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>211</th>\n", + " <td>openai/o1-mini-2024-09-12</td>\n", + " <td>OpenAI: o1-mini (2024-09-12)</td>\n", + " <td>1726099200</td>\n", + " <td>The latest and strongest model family from Ope...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[seed, max_tokens]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>65536.0</td>\n", + " <td>True</td>\n", + " <td>0.00000055</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>212</th>\n", + " <td>mistralai/pixtral-12b</td>\n", + " <td>Mistral: Pixtral 12B</td>\n", + " <td>1725926400</td>\n", + " <td>The first multi-modal, text+image-to-text mode...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000001</td>\n", + " <td>0</td>\n", + " <td>0.0001445</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>213</th>\n", + " <td>cohere/command-r-plus-08-2024</td>\n", + " <td>Cohere: Command R+ (08-2024)</td>\n", + " <td>1724976000</td>\n", + " <td>command-r-plus-08-2024 is an update of the [Co...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, max_tokens, temperature, top_p, stop, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Cohere</td>\n", + " <td>None</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>214</th>\n", + " <td>cohere/command-r-08-2024</td>\n", + " <td>Cohere: Command R (08-2024)</td>\n", + " <td>1724976000</td>\n", + " <td>command-r-08-2024 is an update of the [Command...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, max_tokens, temperature, top_p, stop, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Cohere</td>\n", + " <td>None</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>215</th>\n", + " <td>qwen/qwen-2.5-vl-7b-instruct:free</td>\n", + " <td>Qwen: Qwen2.5-VL 7B Instruct (free)</td>\n", + " <td>1724803200</td>\n", + " <td>Qwen2.5 VL 7B is a multimodal LLM from the Qwe...</td>\n", + " <td>64000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>64000.0</td>\n", + " <td>64000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>216</th>\n", + " <td>qwen/qwen-2.5-vl-7b-instruct</td>\n", + " <td>Qwen: Qwen2.5-VL 7B Instruct</td>\n", + " <td>1724803200</td>\n", + " <td>Qwen2.5 VL 7B is a multimodal LLM from the Qwe...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>None</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " <td>0</td>\n", + " <td>0.0001445</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>217</th>\n", + " <td>sao10k/l3.1-euryale-70b</td>\n", + " <td>Sao10K: Llama 3.1 Euryale 70B v2.2</td>\n", + " <td>1724803200</td>\n", + " <td>Euryale L3.1 70B v2.2 is a model focused on cr...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.0000007</td>\n", + " <td>0.0000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>218</th>\n", + " <td>google/gemini-flash-1.5-8b-exp</td>\n", + " <td>Google: Gemini 1.5 Flash 8B Experimental</td>\n", + " <td>1724803200</td>\n", + " <td>Gemini Flash 1.5 8B Experimental is an experim...</td>\n", + " <td>1000000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1000000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>219</th>\n", + " <td>microsoft/phi-3.5-mini-128k-instruct</td>\n", + " <td>Microsoft: Phi-3.5 Mini 128K Instruct</td>\n", + " <td>1724198400</td>\n", + " <td>Phi-3.5 models are lightweight, state-of-the-a...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>phi3</td>\n", + " <td>0.00000003</td>\n", + " <td>0.00000009</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>220</th>\n", + " <td>nousresearch/hermes-3-llama-3.1-70b</td>\n", + " <td>Nous: Hermes 3 70B Instruct</td>\n", + " <td>1723939200</td>\n", + " <td>Hermes 3 is a generalist language model with m...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>chatml</td>\n", + " <td>0.00000012</td>\n", + " <td>0.0000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>131072.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>221</th>\n", + " <td>nousresearch/hermes-3-llama-3.1-405b</td>\n", + " <td>Nous: Hermes 3 405B Instruct</td>\n", + " <td>1723766400</td>\n", + " <td>Hermes 3 is a generalist language model with m...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>chatml</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>131072.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>222</th>\n", + " <td>openai/chatgpt-4o-latest</td>\n", + " <td>OpenAI: ChatGPT-4o</td>\n", + " <td>1723593600</td>\n", + " <td>OpenAI ChatGPT 4o is continually updated by Op...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000005</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.007225</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>16384.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>223</th>\n", + " <td>sao10k/l3-lunaris-8b</td>\n", + " <td>Sao10K: Llama 3 8B Lunaris</td>\n", + " <td>1723507200</td>\n", + " <td>Lunaris 8B is a versatile generalist and rolep...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>224</th>\n", + " <td>aetherwiing/mn-starcannon-12b</td>\n", + " <td>Aetherwiing: Starcannon 12B</td>\n", + " <td>1723507200</td>\n", + " <td>Starcannon 12B v2 is a creative roleplay and s...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>chatml</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>225</th>\n", + " <td>openai/gpt-4o-2024-08-06</td>\n", + " <td>OpenAI: GPT-4o (2024-08-06)</td>\n", + " <td>1722902400</td>\n", + " <td>The 2024-08-06 version of GPT-4o offers improv...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0.003613</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>16384.0</td>\n", + " <td>True</td>\n", + " <td>0.00000125</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>226</th>\n", + " <td>meta-llama/llama-3.1-405b:free</td>\n", + " <td>Meta: Llama 3.1 405B (base) (free)</td>\n", + " <td>1722556800</td>\n", + " <td>Meta's latest class of model (Llama 3.1) launc...</td>\n", + " <td>64000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>none</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>64000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>227</th>\n", + " <td>meta-llama/llama-3.1-405b</td>\n", + " <td>Meta: Llama 3.1 405B (base)</td>\n", + " <td>1722556800</td>\n", + " <td>Meta's latest class of model (Llama 3.1) launc...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>none</td>\n", + " <td>0.000002</td>\n", + " <td>0.000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>228</th>\n", + " <td>nothingiisreal/mn-celeste-12b</td>\n", + " <td>Mistral Nemo 12B Celeste</td>\n", + " <td>1722556800</td>\n", + " <td>A specialized story writing and roleplaying mo...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>chatml</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>229</th>\n", + " <td>perplexity/llama-3.1-sonar-small-128k-online</td>\n", + " <td>Perplexity: Llama 3.1 Sonar 8B Online</td>\n", + " <td>1722470400</td>\n", + " <td>Llama 3.1 Sonar is Perplexity's latest model f...</td>\n", + " <td>127072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, freque...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>None</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " <td>0.005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>127072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>230</th>\n", + " <td>perplexity/llama-3.1-sonar-large-128k-online</td>\n", + " <td>Perplexity: Llama 3.1 Sonar 70B Online</td>\n", + " <td>1722470400</td>\n", + " <td>Llama 3.1 Sonar is Perplexity's latest model f...</td>\n", + " <td>127072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, freque...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>None</td>\n", + " <td>0.000001</td>\n", + " <td>0.000001</td>\n", + " <td>0.005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>127072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>231</th>\n", + " <td>meta-llama/llama-3.1-8b-instruct:free</td>\n", + " <td>Meta: Llama 3.1 8B Instruct (free)</td>\n", + " <td>1721692800</td>\n", + " <td>Meta's latest class of model (Llama 3.1) launc...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>232</th>\n", + " <td>meta-llama/llama-3.1-8b-instruct</td>\n", + " <td>Meta: Llama 3.1 8B Instruct</td>\n", + " <td>1721692800</td>\n", + " <td>Meta's latest class of model (Llama 3.1) launc...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>233</th>\n", + " <td>meta-llama/llama-3.1-405b-instruct</td>\n", + " <td>Meta: Llama 3.1 405B Instruct</td>\n", + " <td>1721692800</td>\n", + " <td>The highly anticipated 400B class of Llama3 is...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>234</th>\n", + " <td>meta-llama/llama-3.1-70b-instruct</td>\n", + " <td>Meta: Llama 3.1 70B Instruct</td>\n", + " <td>1721692800</td>\n", + " <td>Meta's latest class of model (Llama 3.1) launc...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.0000001</td>\n", + " <td>0.00000028</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>235</th>\n", + " <td>mistralai/codestral-mamba</td>\n", + " <td>Mistral: Codestral Mamba</td>\n", + " <td>1721347200</td>\n", + " <td>A 7.3B parameter Mamba-based model designed fo...</td>\n", + " <td>262144</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000025</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>262144.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>236</th>\n", + " <td>mistralai/mistral-nemo:free</td>\n", + " <td>Mistral: Mistral Nemo (free)</td>\n", + " <td>1721347200</td>\n", + " <td>A 12B parameter model with a 128k token contex...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>128000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>237</th>\n", + " <td>mistralai/mistral-nemo</td>\n", + " <td>Mistral: Mistral Nemo</td>\n", + " <td>1721347200</td>\n", + " <td>A 12B parameter model with a 128k token contex...</td>\n", + " <td>98304</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0.00000003</td>\n", + " <td>0.00000007</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>98304.0</td>\n", + " <td>49152.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>238</th>\n", + " <td>openai/gpt-4o-mini</td>\n", + " <td>OpenAI: GPT-4o-mini</td>\n", + " <td>1721260800</td>\n", + " <td>GPT-4o mini is OpenAI's newest model after [GP...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " <td>0</td>\n", + " <td>0.000217</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>16384.0</td>\n", + " <td>True</td>\n", + " <td>0.000000075</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>239</th>\n", + " <td>openai/gpt-4o-mini-2024-07-18</td>\n", + " <td>OpenAI: GPT-4o-mini (2024-07-18)</td>\n", + " <td>1721260800</td>\n", + " <td>GPT-4o mini is OpenAI's newest model after [GP...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " <td>0</td>\n", + " <td>0.007225</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>16384.0</td>\n", + " <td>True</td>\n", + " <td>0.000000075</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>240</th>\n", + " <td>google/gemma-2-27b-it</td>\n", + " <td>Google: Gemma 2 27B</td>\n", + " <td>1720828800</td>\n", + " <td>Gemma 2 27B by Google is an open model built f...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>241</th>\n", + " <td>alpindale/magnum-72b</td>\n", + " <td>Magnum 72B</td>\n", + " <td>1720656000</td>\n", + " <td>From the maker of [Goliath](https://openrouter...</td>\n", + " <td>16384</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.000004</td>\n", + " <td>0.000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16384.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>242</th>\n", + " <td>google/gemma-2-9b-it:free</td>\n", + " <td>Google: Gemma 2 9B (free)</td>\n", + " <td>1719532800</td>\n", + " <td>Gemma 2 9B by Google is an advanced, open-sour...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>243</th>\n", + " <td>google/gemma-2-9b-it</td>\n", + " <td>Google: Gemma 2 9B</td>\n", + " <td>1719532800</td>\n", + " <td>Gemma 2 9B by Google is an advanced, open-sour...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>gemma</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>244</th>\n", + " <td>01-ai/yi-large</td>\n", + " <td>01.AI: Yi Large</td>\n", + " <td>1719273600</td>\n", + " <td>The Yi Large model was designed by 01.AI with ...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Yi</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>245</th>\n", + " <td>ai21/jamba-instruct</td>\n", + " <td>AI21: Jamba Instruct</td>\n", + " <td>1719273600</td>\n", + " <td>The Jamba-Instruct model, introduced by AI21 L...</td>\n", + " <td>256000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000007</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>256000.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>246</th>\n", + " <td>anthropic/claude-3.5-sonnet-20240620:beta</td>\n", + " <td>Anthropic: Claude 3.5 Sonnet (2024-06-20) (sel...</td>\n", + " <td>1718841600</td>\n", + " <td>Claude 3.5 Sonnet delivers better-than-Opus ca...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.0048</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>247</th>\n", + " <td>anthropic/claude-3.5-sonnet-20240620</td>\n", + " <td>Anthropic: Claude 3.5 Sonnet (2024-06-20)</td>\n", + " <td>1718841600</td>\n", + " <td>Claude 3.5 Sonnet delivers better-than-Opus ca...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.0048</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>8192.0</td>\n", + " <td>True</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>248</th>\n", + " <td>sao10k/l3-euryale-70b</td>\n", + " <td>Sao10k: Llama 3 Euryale 70B v2.1</td>\n", + " <td>1718668800</td>\n", + " <td>Euryale 70B v2.1 is a model focused on creativ...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000148</td>\n", + " <td>0.00000148</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>249</th>\n", + " <td>cognitivecomputations/dolphin-mixtral-8x22b</td>\n", + " <td>Dolphin 2.9.2 Mixtral 8x22B 🐬</td>\n", + " <td>1717804800</td>\n", + " <td>Dolphin 2.9 is designed for instruction follow...</td>\n", + " <td>16000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>chatml</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000009</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>250</th>\n", + " <td>qwen/qwen-2-72b-instruct</td>\n", + " <td>Qwen 2 72B Instruct</td>\n", + " <td>1717718400</td>\n", + " <td>Qwen2 72B is a transformer-based model that ex...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Qwen</td>\n", + " <td>chatml</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000009</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>251</th>\n", + " <td>mistralai/mistral-7b-instruct:free</td>\n", + " <td>Mistral: Mistral 7B Instruct (free)</td>\n", + " <td>1716768000</td>\n", + " <td>A high-performing, industry-standard 7.3B para...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>252</th>\n", + " <td>mistralai/mistral-7b-instruct</td>\n", + " <td>Mistral: Mistral 7B Instruct</td>\n", + " <td>1716768000</td>\n", + " <td>A high-performing, industry-standard 7.3B para...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0.000000028</td>\n", + " <td>0.000000054</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>253</th>\n", + " <td>nousresearch/hermes-2-pro-llama-3-8b</td>\n", + " <td>NousResearch: Hermes 2 Pro - Llama-3 8B</td>\n", + " <td>1716768000</td>\n", + " <td>Hermes 2 Pro is an upgraded, retrained version...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>chatml</td>\n", + " <td>0.000000025</td>\n", + " <td>0.00000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>131072.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>254</th>\n", + " <td>mistralai/mistral-7b-instruct-v0.3</td>\n", + " <td>Mistral: Mistral 7B Instruct v0.3</td>\n", + " <td>1716768000</td>\n", + " <td>A high-performing, industry-standard 7.3B para...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0.000000028</td>\n", + " <td>0.000000054</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>255</th>\n", + " <td>microsoft/phi-3-mini-128k-instruct</td>\n", + " <td>Microsoft: Phi-3 Mini 128K Instruct</td>\n", + " <td>1716681600</td>\n", + " <td>Phi-3 Mini is a powerful 3.8B parameter model ...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>phi3</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000001</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>256</th>\n", + " <td>microsoft/phi-3-medium-128k-instruct</td>\n", + " <td>Microsoft: Phi-3 Medium 128K Instruct</td>\n", + " <td>1716508800</td>\n", + " <td>Phi-3 128K Medium is a powerful 14-billion par...</td>\n", + " <td>131072</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>phi3</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>131072.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>257</th>\n", + " <td>neversleep/llama-3-lumimaid-70b</td>\n", + " <td>NeverSleep: Llama 3 Lumimaid 70B</td>\n", + " <td>1715817600</td>\n", + " <td>The NeverSleep team is back, with a Llama 3 70...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.000004</td>\n", + " <td>0.000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>258</th>\n", + " <td>deepseek/deepseek-coder</td>\n", + " <td>DeepSeek-Coder-V2</td>\n", + " <td>1715644800</td>\n", + " <td>DeepSeek-Coder-V2, an open-source Mixture-of-E...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>None</td>\n", + " <td>0.00000004</td>\n", + " <td>0.00000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>259</th>\n", + " <td>google/gemini-flash-1.5</td>\n", + " <td>Google: Gemini 1.5 Flash</td>\n", + " <td>1715644800</td>\n", + " <td>Gemini 1.5 Flash is a foundation model that pe...</td>\n", + " <td>1000000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0.000000075</td>\n", + " <td>0.0000003</td>\n", + " <td>0</td>\n", + " <td>0.00004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1000000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>0.00000001875</td>\n", + " <td>0.0000001583</td>\n", + " </tr>\n", + " <tr>\n", + " <th>260</th>\n", + " <td>openai/gpt-4o</td>\n", + " <td>OpenAI: GPT-4o</td>\n", + " <td>1715558400</td>\n", + " <td>GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " <td>0</td>\n", + " <td>0.003613</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>16384.0</td>\n", + " <td>True</td>\n", + " <td>0.00000125</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>261</th>\n", + " <td>openai/gpt-4o:extended</td>\n", + " <td>OpenAI: GPT-4o (extended)</td>\n", + " <td>1715558400</td>\n", + " <td>GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000006</td>\n", + " <td>0.000018</td>\n", + " <td>0</td>\n", + " <td>0.007225</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>64000.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>262</th>\n", + " <td>meta-llama/llama-guard-2-8b</td>\n", + " <td>Meta: LlamaGuard 2 8B</td>\n", + " <td>1715558400</td>\n", + " <td>This safeguard model has 8B parameters and is ...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>none</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>263</th>\n", + " <td>openai/gpt-4o-2024-05-13</td>\n", + " <td>OpenAI: GPT-4o (2024-05-13)</td>\n", + " <td>1715558400</td>\n", + " <td>GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image, file]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000005</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.007225</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>264</th>\n", + " <td>allenai/olmo-7b-instruct</td>\n", + " <td>OLMo 7B Instruct</td>\n", + " <td>1715299200</td>\n", + " <td>OLMo 7B Instruct by the Allen Institute for AI...</td>\n", + " <td>2048</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Other</td>\n", + " <td>zephyr</td>\n", + " <td>0.00000008</td>\n", + " <td>0.00000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2048.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>265</th>\n", + " <td>neversleep/llama-3-lumimaid-8b:extended</td>\n", + " <td>NeverSleep: Llama 3 Lumimaid 8B (extended)</td>\n", + " <td>1714780800</td>\n", + " <td>The NeverSleep team is back, with a Llama 3 8B...</td>\n", + " <td>24576</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000009375</td>\n", + " <td>0.00000075</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>24576.0</td>\n", + " <td>2048.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>266</th>\n", + " <td>neversleep/llama-3-lumimaid-8b</td>\n", + " <td>NeverSleep: Llama 3 Lumimaid 8B</td>\n", + " <td>1714780800</td>\n", + " <td>The NeverSleep team is back, with a Llama 3 8B...</td>\n", + " <td>24576</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000009375</td>\n", + " <td>0.00000075</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>24576.0</td>\n", + " <td>2048.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>267</th>\n", + " <td>sao10k/fimbulvetr-11b-v2</td>\n", + " <td>Fimbulvetr 11B v2</td>\n", + " <td>1713657600</td>\n", + " <td>Creative writing model, routed with permission...</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>alpaca</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>268</th>\n", + " <td>meta-llama/llama-3-8b-instruct</td>\n", + " <td>Meta: Llama 3 8B Instruct</td>\n", + " <td>1713398400</td>\n", + " <td>Meta's latest class of model (Llama 3) launche...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, seed, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.00000003</td>\n", + " <td>0.00000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>269</th>\n", + " <td>meta-llama/llama-3-70b-instruct</td>\n", + " <td>Meta: Llama 3 70B Instruct</td>\n", + " <td>1713398400</td>\n", + " <td>Meta's latest class of model (Llama 3) launche...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama3</td>\n", + " <td>llama3</td>\n", + " <td>0.0000003</td>\n", + " <td>0.0000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>270</th>\n", + " <td>mistralai/mixtral-8x22b-instruct</td>\n", + " <td>Mistral: Mixtral 8x22B Instruct</td>\n", + " <td>1713312000</td>\n", + " <td>Mistral's official instruct fine-tuned version...</td>\n", + " <td>65536</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0.0000004</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>65536.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>271</th>\n", + " <td>microsoft/wizardlm-2-8x22b</td>\n", + " <td>WizardLM-2 8x22B</td>\n", + " <td>1713225600</td>\n", + " <td>WizardLM-2 8x22B is Microsoft AI's most advanc...</td>\n", + " <td>65536</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, presence_pena...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>vicuna</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>65536.0</td>\n", + " <td>16384.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>272</th>\n", + " <td>google/gemini-pro-1.5</td>\n", + " <td>Google: Gemini 1.5 Pro</td>\n", + " <td>1712620800</td>\n", + " <td>Google's latest multimodal model, supports ima...</td>\n", + " <td>2000000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Gemini</td>\n", + " <td>None</td>\n", + " <td>0.00000125</td>\n", + " <td>0.000005</td>\n", + " <td>0</td>\n", + " <td>0.0006575</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2000000.0</td>\n", + " <td>8192.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>273</th>\n", + " <td>openai/gpt-4-turbo</td>\n", + " <td>OpenAI: GPT-4 Turbo</td>\n", + " <td>1712620800</td>\n", + " <td>The latest GPT-4 Turbo model with vision capab...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00001</td>\n", + " <td>0.00003</td>\n", + " <td>0</td>\n", + " <td>0.01445</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>274</th>\n", + " <td>cohere/command-r-plus</td>\n", + " <td>Cohere: Command R+</td>\n", + " <td>1712188800</td>\n", + " <td>Command R+ is a new, 104B-parameter LLM from C...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, max_tokens, temperature, top_p, stop, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Cohere</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>275</th>\n", + " <td>cohere/command-r-plus-04-2024</td>\n", + " <td>Cohere: Command R+ (04-2024)</td>\n", + " <td>1712016000</td>\n", + " <td>Command R+ is a new, 104B-parameter LLM from C...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, max_tokens, temperature, top_p, stop, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Cohere</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>276</th>\n", + " <td>sophosympatheia/midnight-rose-70b</td>\n", + " <td>Midnight Rose 70B</td>\n", + " <td>1711065600</td>\n", + " <td>A merge with a complex family tree, this model...</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>airoboros</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000008</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>277</th>\n", + " <td>cohere/command</td>\n", + " <td>Cohere: Command</td>\n", + " <td>1710374400</td>\n", + " <td>Command is an instruction-following conversati...</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Cohere</td>\n", + " <td>None</td>\n", + " <td>0.000001</td>\n", + " <td>0.000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>4000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>278</th>\n", + " <td>cohere/command-r</td>\n", + " <td>Cohere: Command R</td>\n", + " <td>1710374400</td>\n", + " <td>Command-R is a 35B parameter model that perfor...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, max_tokens, temperature, top_p, stop, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Cohere</td>\n", + " <td>None</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>279</th>\n", + " <td>anthropic/claude-3-haiku:beta</td>\n", + " <td>Anthropic: Claude 3 Haiku (self-moderated)</td>\n", + " <td>1710288000</td>\n", + " <td>Claude 3 Haiku is Anthropic's fastest and most...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000125</td>\n", + " <td>0</td>\n", + " <td>0.0004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>0.00000003</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>280</th>\n", + " <td>anthropic/claude-3-haiku</td>\n", + " <td>Anthropic: Claude 3 Haiku</td>\n", + " <td>1710288000</td>\n", + " <td>Claude 3 Haiku is Anthropic's fastest and most...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000125</td>\n", + " <td>0</td>\n", + " <td>0.0004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>0.00000003</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>281</th>\n", + " <td>anthropic/claude-3-opus:beta</td>\n", + " <td>Anthropic: Claude 3 Opus (self-moderated)</td>\n", + " <td>1709596800</td>\n", + " <td>Claude 3 Opus is Anthropic's most powerful mod...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000015</td>\n", + " <td>0.000075</td>\n", + " <td>0</td>\n", + " <td>0.024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>0.0000015</td>\n", + " <td>0.00001875</td>\n", + " </tr>\n", + " <tr>\n", + " <th>282</th>\n", + " <td>anthropic/claude-3-opus</td>\n", + " <td>Anthropic: Claude 3 Opus</td>\n", + " <td>1709596800</td>\n", + " <td>Claude 3 Opus is Anthropic's most powerful mod...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000015</td>\n", + " <td>0.000075</td>\n", + " <td>0</td>\n", + " <td>0.024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>0.0000015</td>\n", + " <td>0.00001875</td>\n", + " </tr>\n", + " <tr>\n", + " <th>283</th>\n", + " <td>anthropic/claude-3-sonnet:beta</td>\n", + " <td>Anthropic: Claude 3 Sonnet (self-moderated)</td>\n", + " <td>1709596800</td>\n", + " <td>Claude 3 Sonnet is an ideal balance of intelli...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.0048</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>284</th>\n", + " <td>anthropic/claude-3-sonnet</td>\n", + " <td>Anthropic: Claude 3 Sonnet</td>\n", + " <td>1709596800</td>\n", + " <td>Claude 3 Sonnet is an ideal balance of intelli...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text+image->text</td>\n", + " <td>[text, image]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " <td>0</td>\n", + " <td>0.0048</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>285</th>\n", + " <td>cohere/command-r-03-2024</td>\n", + " <td>Cohere: Command R (03-2024)</td>\n", + " <td>1709341200</td>\n", + " <td>Command-R is a 35B parameter model that perfor...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, max_tokens, temperature, top_p, stop, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Cohere</td>\n", + " <td>None</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>286</th>\n", + " <td>mistralai/mistral-large</td>\n", + " <td>Mistral Large</td>\n", + " <td>1708905600</td>\n", + " <td>This is Mistral AI's flagship model, Mistral L...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.000002</td>\n", + " <td>0.000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>287</th>\n", + " <td>openai/gpt-3.5-turbo-0613</td>\n", + " <td>OpenAI: GPT-3.5 Turbo (older v0613)</td>\n", + " <td>1706140800</td>\n", + " <td>GPT-3.5 Turbo is OpenAI's fastest model. It ca...</td>\n", + " <td>4095</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000001</td>\n", + " <td>0.000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4095.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>288</th>\n", + " <td>openai/gpt-4-turbo-preview</td>\n", + " <td>OpenAI: GPT-4 Turbo Preview</td>\n", + " <td>1706140800</td>\n", + " <td>The preview GPT-4 model with improved instruct...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00001</td>\n", + " <td>0.00003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>289</th>\n", + " <td>nousresearch/nous-hermes-2-mixtral-8x7b-dpo</td>\n", + " <td>Nous: Hermes 2 Mixtral 8x7B DPO</td>\n", + " <td>1705363200</td>\n", + " <td>Nous Hermes 2 Mixtral 8x7B DPO is the new flag...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>chatml</td>\n", + " <td>0.0000006</td>\n", + " <td>0.0000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>2048.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>290</th>\n", + " <td>mistralai/mistral-medium</td>\n", + " <td>Mistral Medium</td>\n", + " <td>1704844800</td>\n", + " <td>This is Mistral AI's closed-source, medium-sid...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.00000275</td>\n", + " <td>0.0000081</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>291</th>\n", + " <td>mistralai/mistral-small</td>\n", + " <td>Mistral Small</td>\n", + " <td>1704844800</td>\n", + " <td>With 22 billion parameters, Mistral Small v24....</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>292</th>\n", + " <td>mistralai/mistral-tiny</td>\n", + " <td>Mistral Tiny</td>\n", + " <td>1704844800</td>\n", + " <td>Note: This model is being deprecated. Recommen...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>None</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000025</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>293</th>\n", + " <td>mistralai/mistral-7b-instruct-v0.2</td>\n", + " <td>Mistral: Mistral 7B Instruct v0.2</td>\n", + " <td>1703721600</td>\n", + " <td>A high-performing, industry-standard 7.3B para...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>294</th>\n", + " <td>mistralai/mixtral-8x7b-instruct</td>\n", + " <td>Mistral: Mixtral 8x7B Instruct</td>\n", + " <td>1702166400</td>\n", + " <td>Mixtral 8x7B Instruct is a pretrained generati...</td>\n", + " <td>32768</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0.00000008</td>\n", + " <td>0.00000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32768.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>295</th>\n", + " <td>neversleep/noromaid-20b</td>\n", + " <td>Noromaid 20B</td>\n", + " <td>1700956800</td>\n", + " <td>A collab between IkariDev and Undi. This merge...</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>alpaca</td>\n", + " <td>0.00000075</td>\n", + " <td>0.0000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>2048.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>296</th>\n", + " <td>anthropic/claude-2.1:beta</td>\n", + " <td>Anthropic: Claude v2.1 (self-moderated)</td>\n", + " <td>1700611200</td>\n", + " <td>Claude 2 delivers advancements in key capabili...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, stop]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>297</th>\n", + " <td>anthropic/claude-2.1</td>\n", + " <td>Anthropic: Claude v2.1</td>\n", + " <td>1700611200</td>\n", + " <td>Claude 2 delivers advancements in key capabili...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, stop]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>298</th>\n", + " <td>anthropic/claude-2:beta</td>\n", + " <td>Anthropic: Claude v2 (self-moderated)</td>\n", + " <td>1700611200</td>\n", + " <td>Claude 2 delivers advancements in key capabili...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, stop]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>299</th>\n", + " <td>anthropic/claude-2</td>\n", + " <td>Anthropic: Claude v2</td>\n", + " <td>1700611200</td>\n", + " <td>Claude 2 delivers advancements in key capabili...</td>\n", + " <td>200000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, stop]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>200000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>300</th>\n", + " <td>undi95/toppy-m-7b</td>\n", + " <td>Toppy M 7B</td>\n", + " <td>1699574400</td>\n", + " <td>A wild 7B parameter model that merges several ...</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>alpaca</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>301</th>\n", + " <td>alpindale/goliath-120b</td>\n", + " <td>Goliath 120B</td>\n", + " <td>1699574400</td>\n", + " <td>A large LLM created by combining two fine-tune...</td>\n", + " <td>6144</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>airoboros</td>\n", + " <td>0.0000065625</td>\n", + " <td>0.000009375</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>6144.0</td>\n", + " <td>512.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>302</th>\n", + " <td>openrouter/auto</td>\n", + " <td>Auto Router</td>\n", + " <td>1699401600</td>\n", + " <td>Your prompt will be processed by a meta-model ...</td>\n", + " <td>2000000</td>\n", + " <td>None</td>\n", + " <td>[]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Router</td>\n", + " <td>None</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>303</th>\n", + " <td>openai/gpt-3.5-turbo-1106</td>\n", + " <td>OpenAI: GPT-3.5 Turbo 16k (older v1106)</td>\n", + " <td>1699228800</td>\n", + " <td>An older GPT-3.5 Turbo model with improved ins...</td>\n", + " <td>16385</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000001</td>\n", + " <td>0.000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16385.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>304</th>\n", + " <td>openai/gpt-4-1106-preview</td>\n", + " <td>OpenAI: GPT-4 Turbo (older v1106)</td>\n", + " <td>1699228800</td>\n", + " <td>The latest GPT-4 Turbo model with vision capab...</td>\n", + " <td>128000</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00001</td>\n", + " <td>0.00003</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>128000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>305</th>\n", + " <td>jondurbin/airoboros-l2-70b</td>\n", + " <td>Airoboros 70B</td>\n", + " <td>1698537600</td>\n", + " <td>A Llama 2 70B fine-tune using synthetic data (...</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>airoboros</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000005</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>306</th>\n", + " <td>openai/gpt-3.5-turbo-instruct</td>\n", + " <td>OpenAI: GPT-3.5 Turbo Instruct</td>\n", + " <td>1695859200</td>\n", + " <td>This model is a variant of GPT-3.5 Turbo tuned...</td>\n", + " <td>4095</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>chatml</td>\n", + " <td>0.0000015</td>\n", + " <td>0.000002</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4095.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>307</th>\n", + " <td>mistralai/mistral-7b-instruct-v0.1</td>\n", + " <td>Mistral: Mistral 7B Instruct v0.1</td>\n", + " <td>1695859200</td>\n", + " <td>A 7.3B parameter model that outperforms Llama ...</td>\n", + " <td>2824</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Mistral</td>\n", + " <td>mistral</td>\n", + " <td>0.00000011</td>\n", + " <td>0.00000019</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2824.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>308</th>\n", + " <td>pygmalionai/mythalion-13b</td>\n", + " <td>Pygmalion: Mythalion 13B</td>\n", + " <td>1693612800</td>\n", + " <td>A blend of the new Pygmalion-13b and MythoMax....</td>\n", + " <td>8192</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>alpaca</td>\n", + " <td>0.0000005625</td>\n", + " <td>0.000001125</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8192.0</td>\n", + " <td>1024.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>309</th>\n", + " <td>openai/gpt-3.5-turbo-16k</td>\n", + " <td>OpenAI: GPT-3.5 Turbo 16k</td>\n", + " <td>1693180800</td>\n", + " <td>This model offers four times the context lengt...</td>\n", + " <td>16385</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.000003</td>\n", + " <td>0.000004</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16385.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>310</th>\n", + " <td>openai/gpt-4-32k</td>\n", + " <td>OpenAI: GPT-4 32k</td>\n", + " <td>1693180800</td>\n", + " <td>GPT-4-32k is an extended version of GPT-4, wit...</td>\n", + " <td>32767</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00006</td>\n", + " <td>0.00012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32767.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>311</th>\n", + " <td>openai/gpt-4-32k-0314</td>\n", + " <td>OpenAI: GPT-4 32k (older v0314)</td>\n", + " <td>1693180800</td>\n", + " <td>GPT-4-32k is an extended version of GPT-4, wit...</td>\n", + " <td>32767</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00006</td>\n", + " <td>0.00012</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>32767.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>312</th>\n", + " <td>mancer/weaver</td>\n", + " <td>Mancer: Weaver (alpha)</td>\n", + " <td>1690934400</td>\n", + " <td>An attempt to recreate Claude-style verbosity,...</td>\n", + " <td>8000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>alpaca</td>\n", + " <td>0.000001125</td>\n", + " <td>0.000001125</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8000.0</td>\n", + " <td>1000.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>313</th>\n", + " <td>anthropic/claude-2.0:beta</td>\n", + " <td>Anthropic: Claude v2.0 (self-moderated)</td>\n", + " <td>1690502400</td>\n", + " <td>Anthropic's flagship model. Superior performan...</td>\n", + " <td>100000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, stop]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>100000.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>314</th>\n", + " <td>anthropic/claude-2.0</td>\n", + " <td>Anthropic: Claude v2.0</td>\n", + " <td>1690502400</td>\n", + " <td>Anthropic's flagship model. Superior performan...</td>\n", + " <td>100000</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, top_k, stop]</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Claude</td>\n", + " <td>None</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>100000.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>315</th>\n", + " <td>undi95/remm-slerp-l2-13b</td>\n", + " <td>ReMM SLERP 13B</td>\n", + " <td>1689984000</td>\n", + " <td>A recreation trial of the original MythoMax-L2...</td>\n", + " <td>6144</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>alpaca</td>\n", + " <td>0.0000005625</td>\n", + " <td>0.000001125</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>6144.0</td>\n", + " <td>1024.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>316</th>\n", + " <td>gryphe/mythomax-l2-13b</td>\n", + " <td>MythoMax 13B</td>\n", + " <td>1688256000</td>\n", + " <td>One of the highest performing and most popular...</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>alpaca</td>\n", + " <td>0.000000065</td>\n", + " <td>0.000000065</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>4096.0</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>317</th>\n", + " <td>meta-llama/llama-2-70b-chat</td>\n", + " <td>Meta: Llama 2 70B Chat</td>\n", + " <td>1687219200</td>\n", + " <td>The flagship, 70 billion parameter language mo...</td>\n", + " <td>4096</td>\n", + " <td>None</td>\n", + " <td>[max_tokens, temperature, top_p, stop, frequen...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>Llama2</td>\n", + " <td>llama2</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000009</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4096.0</td>\n", + " <td>NaN</td>\n", + " <td>False</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>318</th>\n", + " <td>openai/gpt-3.5-turbo</td>\n", + " <td>OpenAI: GPT-3.5 Turbo</td>\n", + " <td>1685232000</td>\n", + " <td>GPT-3.5 Turbo is OpenAI's fastest model. It ca...</td>\n", + " <td>16385</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16385.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>319</th>\n", + " <td>openai/gpt-3.5-turbo-0125</td>\n", + " <td>OpenAI: GPT-3.5 Turbo 16k</td>\n", + " <td>1685232000</td>\n", + " <td>The latest GPT-3.5 Turbo model with improved i...</td>\n", + " <td>16385</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000015</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>16385.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>320</th>\n", + " <td>openai/gpt-4</td>\n", + " <td>OpenAI: GPT-4</td>\n", + " <td>1685232000</td>\n", + " <td>OpenAI's flagship model, GPT-4 is a large-scal...</td>\n", + " <td>8191</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00003</td>\n", + " <td>0.00006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8191.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>321</th>\n", + " <td>openai/gpt-4-0314</td>\n", + " <td>OpenAI: GPT-4 (older v0314)</td>\n", + " <td>1685232000</td>\n", + " <td>GPT-4-0314 is the first version of GPT-4 relea...</td>\n", + " <td>8191</td>\n", + " <td>None</td>\n", + " <td>[tools, tool_choice, max_tokens, temperature, ...</td>\n", + " <td>text->text</td>\n", + " <td>[text]</td>\n", + " <td>[text]</td>\n", + " <td>GPT</td>\n", + " <td>None</td>\n", + " <td>0.00003</td>\n", + " <td>0.00006</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8191.0</td>\n", + " <td>4096.0</td>\n", + " <td>True</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " id name created description context_length per_request_limits supported_parameters architecture_modality architecture_input_modalities architecture_output_modalities architecture_tokenizer architecture_instruct_type pricing_prompt pricing_completion pricing_request pricing_image pricing_web_search pricing_internal_reasoning top_provider_context_length top_provider_max_completion_tokens top_provider_is_moderated pricing_input_cache_read pricing_input_cache_write\n", + "0 mistralai/mistral-medium-3 Mistral: Mistral Medium 3 1746627341 Mistral Medium 3 is a high-performance enterpr... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.0000004 0.000002 0 0 0 0 131072.0 NaN False NaN NaN\n", + "1 google/gemini-2.5-pro-preview Google: Gemini 2.5 Pro Preview 1746578513 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0.00000125 0.00001 0 0.00516 0 0 1048576.0 65535.0 False 0.00000031 0.000001625\n", + "2 arcee-ai/caller-large Arcee AI: Caller Large 1746487869 Caller Large is Arcee's specialist \"function‑c... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.00000055 0.00000085 0 0 0 0 32768.0 NaN False NaN NaN\n", + "3 arcee-ai/spotlight Arcee AI: Spotlight 1746481552 Spotlight is a 7‑billion‑parameter vision‑lang... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000018 0.00000018 0 0 0 0 131072.0 65537.0 False NaN NaN\n", + "4 arcee-ai/maestro-reasoning Arcee AI: Maestro Reasoning 1746481269 Maestro Reasoning is Arcee's flagship analysis... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000009 0.0000033 0 0 0 0 131072.0 32000.0 False NaN NaN\n", + "5 arcee-ai/virtuoso-large Arcee AI: Virtuoso Large 1746478885 Virtuoso‑Large is Arcee's top‑tier general‑pur... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000075 0.0000012 0 0 0 0 131072.0 64000.0 False NaN NaN\n", + "6 arcee-ai/coder-large Arcee AI: Coder Large 1746478663 Coder‑Large is a 32 B‑parameter offspring of Q... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 NaN False NaN NaN\n", + "7 arcee-ai/virtuoso-medium-v2 Arcee AI: Virtuoso Medium V2 1746478434 Virtuoso‑Medium‑v2 is a 32 B model distilled f... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "8 arcee-ai/arcee-blitz Arcee AI: Arcee Blitz 1746470100 Arcee Blitz is a 24 B‑parameter dense model di... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000045 0.00000075 0 0 0 0 32768.0 NaN False NaN NaN\n", + "9 microsoft/phi-4-reasoning-plus:free Microsoft: Phi 4 Reasoning Plus (free) 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "10 microsoft/phi-4-reasoning-plus Microsoft: Phi 4 Reasoning Plus 1746130961 Phi-4-reasoning-plus is an enhanced 14B parame... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.00000007 0.00000035 0 0 0 0 32768.0 NaN False NaN NaN\n", + "11 microsoft/phi-4-reasoning:free Microsoft: Phi 4 Reasoning (free) 1746121275 Phi-4-reasoning is a 14B parameter dense decod... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "12 qwen/qwen3-0.6b-04-28:free Qwen: Qwen3 0.6B (free) 1746043526 Qwen3-0.6B is a lightweight, 0.6 billion param... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "13 inception/mercury-coder-small-beta Inception: Mercury Coder Small Beta 1746033880 Mercury Coder Small is the first diffusion lar... 32000 None [max_tokens, frequency_penalty, presence_penal... text->text [text] [text] Other None 0.00000025 0.000001 0 0 0 0 32000.0 NaN False NaN NaN\n", + "14 qwen/qwen3-1.7b:free Qwen: Qwen3 1.7B (free) 1746031388 Qwen3-1.7B is a compact, 1.7 billion parameter... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "15 qwen/qwen3-4b:free Qwen: Qwen3 4B (free) 1746031104 Qwen3-4B is a 4 billion parameter dense langua... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 128000.0 NaN False NaN NaN\n", + "16 opengvlab/internvl3-14b:free OpenGVLab: InternVL3 14B (free) 1746021355 The 14b version of the InternVL3 series. An ad... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "17 opengvlab/internvl3-2b:free OpenGVLab: InternVL3 2B (free) 1746019807 The 2b version of the InternVL3 series, for an... 32000 None [max_tokens, temperature, top_p] text+image->text [image, text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "18 deepseek/deepseek-prover-v2:free DeepSeek: DeepSeek Prover V2 (free) 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "19 deepseek/deepseek-prover-v2 DeepSeek: DeepSeek Prover V2 1746013094 DeepSeek Prover V2 is a 671B parameter model, ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.0000005 0.00000218 0 0 0 0 131072.0 NaN False NaN NaN\n", + "20 meta-llama/llama-guard-4-12b Meta: Llama Guard 4 12B 1745975193 Llama Guard 4 is a Llama 4 Scout-derived multi... 163840 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [image, text] [text] Other None 0.00000005 0.00000005 0 0 0 0 163840.0 NaN False NaN NaN\n", + "21 qwen/qwen3-30b-a3b:free Qwen: Qwen3 30B A3B (free) 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "22 qwen/qwen3-30b-a3b Qwen: Qwen3 30B A3B 1745878604 Qwen3, the latest generation in the Qwen large... 40960 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "23 qwen/qwen3-8b:free Qwen: Qwen3 8B (free) 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "24 qwen/qwen3-8b Qwen: Qwen3 8B 1745876632 Qwen3-8B is a dense 8.2B parameter causal lang... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.000000035 0.000000138 0 0 0 0 128000.0 NaN False NaN NaN\n", + "25 qwen/qwen3-14b:free Qwen: Qwen3 14B (free) 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "26 qwen/qwen3-14b Qwen: Qwen3 14B 1745876478 Qwen3-14B is a dense 14.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000007 0.00000024 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "27 qwen/qwen3-32b:free Qwen: Qwen3 32B (free) 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "28 qwen/qwen3-32b Qwen: Qwen3 32B 1745875945 Qwen3-32B is a dense 32.8B parameter causal la... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.0000001 0.0000003 0 0 0 0 40960.0 NaN False NaN NaN\n", + "29 qwen/qwen3-235b-a22b:free Qwen: Qwen3 235B A22B (free) 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0 0 0 0 0 0 40960.0 NaN False NaN NaN\n", + "30 qwen/qwen3-235b-a22b Qwen: Qwen3 235B A22B 1745875757 Qwen3-235B-A22B is a 235B parameter mixture-of... 40960 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen3 None 0.00000014 0.000002 0 0 0 0 40960.0 40960.0 False NaN NaN\n", + "31 tngtech/deepseek-r1t-chimera:free TNG: DeepSeek R1T Chimera (free) 1745760875 DeepSeek-R1T-Chimera is created by merging Dee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "32 thudm/glm-z1-rumination-32b THUDM: GLM Z1 Rumination 32B 1745601495 THUDM: GLM Z1 Rumination 32B is a 32B-paramete... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", + "33 thudm/glm-z1-9b:free THUDM: GLM Z1 9B (free) 1745601140 GLM-Z1-9B-0414 is a 9B-parameter language mode... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "34 thudm/glm-4-9b:free THUDM: GLM 4 9B (free) 1745601023 GLM-4-9B-0414 is a 9 billion parameter languag... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32000.0 NaN False NaN NaN\n", + "35 microsoft/mai-ds-r1:free Microsoft: MAI DS R1 (free) 1745194100 MAI-DS-R1 is a post-trained variant of DeepSee... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "36 thudm/glm-z1-32b:free THUDM: GLM Z1 32B (free) 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "37 thudm/glm-z1-32b THUDM: GLM Z1 32B 1744924148 GLM-Z1-32B-0414 is an enhanced reasoning varia... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", + "38 thudm/glm-4-32b:free THUDM: GLM 4 32B (free) 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "39 thudm/glm-4-32b THUDM: GLM 4 32B 1744920915 GLM-4-32B-0414 is a 32B bilingual (Chinese-Eng... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000024 0.00000024 0 0 0 0 32000.0 NaN False NaN NaN\n", + "40 google/gemini-2.5-flash-preview Google: Gemini 2.5 Flash Preview 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000006 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", + "41 google/gemini-2.5-flash-preview:thinking Google: Gemini 2.5 Flash Preview (thinking) 1744914667 Gemini 2.5 Flash is Google's state-of-the-art ... 1048576 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [image, text, file] [text] Gemini None 0.00000015 0.0000035 0 0.0006192 0 0 1048576.0 65535.0 False 0.0000000375 0.0000002333\n", + "42 openai/o4-mini-high OpenAI: o4 Mini High 1744824212 OpenAI o4-mini-high is the same model as [o4-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", + "43 openai/o3 OpenAI: o3 1744823457 o3 is a well-rounded and powerful model across... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text, file] [text] Other None 0.00001 0.00004 0 0.00765 0 0 200000.0 100000.0 True 0.0000025 NaN\n", + "44 openai/o4-mini OpenAI: o4 Mini 1744820942 OpenAI o4-mini is a compact reasoning model in... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [image, text] [text] Other None 0.0000011 0.0000044 0 0.0008415 0 0 200000.0 100000.0 True 0.000000275 NaN\n", + "45 shisa-ai/shisa-v2-llama3.3-70b:free Shisa AI: Shisa V2 Llama 3.3 70B (free) 1744754858 Shisa V2 Llama 3.3 70B is a bilingual Japanese... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "46 qwen/qwen2.5-coder-7b-instruct Qwen: Qwen2.5 Coder 7B Instruct 1744734887 Qwen2.5-Coder-7B-Instruct is a 7B parameter in... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen None 0.00000001 0.00000003 0 0 0 0 32768.0 NaN False NaN NaN\n", + "47 openai/gpt-4.1 OpenAI: GPT-4.1 1744651385 GPT-4.1 is a flagship large language model opt... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.000002 0.000008 0 0 0 0 1047576.0 32768.0 True 0.0000005 NaN\n", + "48 openai/gpt-4.1-mini OpenAI: GPT-4.1 Mini 1744651381 GPT-4.1 Mini is a mid-sized model delivering p... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000004 0.0000016 0 0 0 0 1047576.0 32768.0 True 0.0000001 NaN\n", + "49 openai/gpt-4.1-nano OpenAI: GPT-4.1 Nano 1744651369 For tasks that demand low latency, GPT‑4.1 nan... 1047576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [image, text] [text] GPT None 0.0000001 0.0000004 0 0 0 0 1047576.0 32768.0 True 0.000000025 NaN\n", + "50 eleutherai/llemma_7b EleutherAI: Llemma 7b 1744643225 Llemma 7B is a language model for mathematics.... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other code-llama 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "51 alfredpros/codellama-7b-instruct-solidity AlfredPros: CodeLLaMa 7B Instruct Solidity 1744641874 A finetuned 7 billion parameters Code LLaMA - ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "52 arliai/qwq-32b-arliai-rpr-v1:free ArliAI: QwQ 32B RpR v1 (free) 1744555982 QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "53 agentica-org/deepcoder-14b-preview:free Agentica: Deepcoder 14B Preview (free) 1744555395 DeepCoder-14B-Preview is a 14B parameter code ... 96000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 96000.0 NaN False NaN NaN\n", + "54 moonshotai/kimi-vl-a3b-thinking:free Moonshot AI: Kimi VL A3B Thinking (free) 1744304841 Kimi-VL is a lightweight Mixture-of-Experts vi... 131072 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [image, text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "55 x-ai/grok-3-mini-beta xAI: Grok 3 Mini Beta 1744240195 Grok 3 Mini is a lightweight, smaller thinking... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.0000003 0.0000005 0 0 0 0 131072.0 NaN False NaN NaN\n", + "56 x-ai/grok-3-beta xAI: Grok 3 Beta 1744240068 Grok 3 is the latest model from xAI. It's thei... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000003 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", + "57 nvidia/llama-3.3-nemotron-super-49b-v1:free NVIDIA: Llama 3.3 Nemotron Super 49B v1 (free) 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "58 nvidia/llama-3.3-nemotron-super-49b-v1 NVIDIA: Llama 3.3 Nemotron Super 49B v1 1744119494 Llama-3.3-Nemotron-Super-49B-v1 is a large lan... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000013 0.0000004 0 0 0 0 131072.0 NaN False NaN NaN\n", + "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free NVIDIA: Llama 3.1 Nemotron Ultra 253B v1 (free) 1744115059 Llama-3.1-Nemotron-Ultra-253B-v1 is a large la... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "60 meta-llama/llama-4-maverick:free Meta: Llama 4 Maverick (free) 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 256000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 256000.0 NaN False NaN NaN\n", + "61 meta-llama/llama-4-maverick Meta: Llama 4 Maverick 1743881822 Llama 4 Maverick 17B Instruct (128E) is a high... 1048576 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000017 0.0000006 0 0.0006684 0 0 1048576.0 16384.0 False NaN NaN\n", + "62 meta-llama/llama-4-scout:free Meta: Llama 4 Scout (free) 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 512000 None [max_tokens, temperature, top_p, structured_ou... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 512000.0 NaN False NaN NaN\n", + "63 meta-llama/llama-4-scout Meta: Llama 4 Scout 1743881519 Llama 4 Scout 17B Instruct (16E) is a mixture-... 1048576 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Other None 0.00000008 0.0000003 0 0 0 0 1048576.0 1048576.0 False NaN NaN\n", + "64 all-hands/openhands-lm-32b-v0.1 OpenHands LM 32B V0.1 1743613013 OpenHands LM v0.1 is a 32B open-source coding ... 16384 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "65 mistral/ministral-8b Mistral: Ministral 8B 1743430021 Ministral 8B is a state-of-the-art language mo... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "66 deepseek/deepseek-v3-base:free DeepSeek: DeepSeek V3 Base (free) 1743272023 Note that this is a base model mostly meant fo... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "67 scb10x/llama3.1-typhoon2-8b-instruct Typhoon2 8B Instruct 1743196511 Llama3.1-Typhoon2-8B-Instruct is a Thai-Englis... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000018 0.00000018 0 0 0 0 8192.0 NaN False NaN NaN\n", + "68 scb10x/llama3.1-typhoon2-70b-instruct Typhoon2 70B Instruct 1743196170 Llama3.1-Typhoon2-70B-Instruct is a Thai-Engli... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000088 0.00000088 0 0 0 0 8192.0 NaN False NaN NaN\n", + "69 allenai/molmo-7b-d:free AllenAI: Molmo 7B D (free) 1743023247 Molmo is a family of open vision-language mode... 4096 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 4096.0 NaN False NaN NaN\n", + "70 bytedance-research/ui-tars-72b:free Bytedance: UI-TARS 72B (free) 1743020065 UI-TARS 72B is an open-source multimodal AI mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "71 qwen/qwen2.5-vl-3b-instruct:free Qwen: Qwen2.5 VL 3B Instruct (free) 1743014573 Qwen2.5 VL 3B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", + "72 google/gemini-2.5-pro-exp-03-25 Google: Gemini 2.5 Pro Experimental 1742922099 Gemini 2.5 Pro is Google’s state-of-the-art AI... 1000000 None [max_tokens, temperature, top_p, tools, tool_c... text+image->text [text, image, file] [text] Gemini None 0 0 0 0 0 0 1000000.0 65535.0 False NaN NaN\n", + "73 qwen/qwen2.5-vl-32b-instruct:free Qwen: Qwen2.5 VL 32B Instruct (free) 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 8192 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", + "74 qwen/qwen2.5-vl-32b-instruct Qwen: Qwen2.5 VL 32B Instruct 1742839838 Qwen2.5-VL-32B is a multimodal vision-language... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000009 0.0000009 0 0 0 0 128000.0 NaN False NaN NaN\n", + "75 deepseek/deepseek-chat-v3-0324:free DeepSeek: DeepSeek V3 0324 (free) 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "76 deepseek/deepseek-chat-v3-0324 DeepSeek: DeepSeek V3 0324 1742824755 DeepSeek V3, a 685B-parameter, mixture-of-expe... 163840 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] DeepSeek None 0.0000003 0.00000088 0 0 0 0 163840.0 NaN False NaN NaN\n", + "77 featherless/qwerky-72b:free Qwerky 72B (free) 1742481597 Qwerky-72B is a linear-attention RWKV variant ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 4096.0 False NaN NaN\n", + "78 openai/o1-pro OpenAI: o1-pro 1742423211 The o1 series of models are trained with reinf... 200000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] GPT None 0.00015 0.0006 0 0.21675 0 0 200000.0 100000.0 True NaN NaN\n", + "79 mistralai/mistral-small-3.1-24b-instruct:free Mistral: Mistral Small 3.1 24B (free) 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 96000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0 0 0 0 0 0 96000.0 96000.0 False NaN NaN\n", + "80 mistralai/mistral-small-3.1-24b-instruct Mistral: Mistral Small 3.1 24B 1742238937 Mistral Small 3.1 24B Instruct is an upgraded ... 131072 None [max_tokens, temperature, top_p, presence_pena... text+image->text [text, image] [text] Mistral None 0.00000005 0.00000015 0 0 0 0 131072.0 NaN False NaN NaN\n", + "81 open-r1/olympiccoder-32b:free OlympicCoder 32B (free) 1742077228 OlympicCoder-32B is a high-performing open-sou... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "82 google/gemma-3-1b-it:free Google: Gemma 3 1B (free) 1741963556 Gemma 3 1B is the smallest of the new Gemma 3 ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 32768.0 8192.0 False NaN NaN\n", + "83 google/gemma-3-4b-it:free Google: Gemma 3 4B (free) 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", + "84 google/gemma-3-4b-it Google: Gemma 3 4B 1741905510 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000002 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", + "85 ai21/jamba-1.6-large AI21: Jamba 1.6 Large 1741905173 AI21 Jamba Large 1.6 is a high-performance hyb... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.000002 0.000008 0 0 0 0 256000.0 4096.0 False NaN NaN\n", + "86 ai21/jamba-1.6-mini AI21: Jamba Mini 1.6 1741905171 AI21 Jamba Mini 1.6 is a hybrid foundation mod... 256000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other None 0.0000002 0.0000004 0 0 0 0 256000.0 4096.0 False NaN NaN\n", + "87 google/gemma-3-12b-it:free Google: Gemma 3 12B (free) 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 131072.0 8192.0 False NaN NaN\n", + "88 google/gemma-3-12b-it Google: Gemma 3 12B 1741902625 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.00000005 0.0000001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "89 cohere/command-a Cohere: Command A 1741894342 Command A is an open-weights 111B parameter mo... 256000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 256000.0 8192.0 False NaN NaN\n", + "90 openai/gpt-4o-mini-search-preview OpenAI: GPT-4o-mini Search Preview 1741818122 GPT-4o mini Search Preview is a specialized mo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.00000015 0.0000006 0.0275 0.000217 0 0 128000.0 16384.0 True NaN NaN\n", + "91 openai/gpt-4o-search-preview OpenAI: GPT-4o Search Preview 1741817949 GPT-4o Search Previewis a specialized model fo... 128000 None [web_search_options, max_tokens, response_form... text->text [text] [text] GPT None 0.0000025 0.00001 0.035 0.003613 0 0 128000.0 16384.0 True NaN NaN\n", + "92 rekaai/reka-flash-3:free Reka: Flash 3 (free) 1741812813 Reka Flash 3 is a general-purpose, instruction... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "93 google/gemma-3-27b-it:free Google: Gemma 3 27B (free) 1741756359 Gemma 3 introduces multimodality, supporting v... 96000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0 0 0 0 0 0 96000.0 8192.0 False NaN NaN\n", + "94 google/gemma-3-27b-it Google: Gemma 3 27B 1741756359 Gemma 3 introduces multimodality, supporting v... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini gemma 0.0000001 0.0000002 0 0.0000256 0 0 131072.0 16384.0 False NaN NaN\n", + "95 thedrummer/anubis-pro-105b-v1 TheDrummer: Anubis Pro 105B V1 1741642290 Anubis Pro 105B v1 is an expanded and refined ... 131072 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000008 0.000001 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "96 thedrummer/skyfall-36b-v2 TheDrummer: Skyfall 36B V2 1741636566 Skyfall 36B v2 is an enhanced iteration of Mis... 32768 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Other None 0.0000005 0.0000008 0 0 0 0 32768.0 32768.0 False NaN NaN\n", + "97 microsoft/phi-4-multimodal-instruct Microsoft: Phi 4 Multimodal Instruct 1741396284 Phi-4 Multimodal Instruct is a versatile 5.6B ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Other None 0.00000005 0.0000001 0 0.00017685 0 0 131072.0 NaN False NaN NaN\n", + "98 perplexity/sonar-reasoning-pro Perplexity: Sonar Reasoning Pro 1741313308 Note: Sonar Pro pricing includes Perplexity se... 128000 None [max_tokens, temperature, top_p, reasoning, in... text+image->text [text, image] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0 128000.0 NaN False NaN NaN\n", + "99 perplexity/sonar-pro Perplexity: Sonar Pro 1741312423 Note: Sonar Pro pricing includes Perplexity se... 200000 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000003 0.000015 0 0 0.005 0 200000.0 8000.0 False NaN NaN\n", + "100 perplexity/sonar-deep-research Perplexity: Sonar Deep Research 1741311246 Sonar Deep Research is a research-focused mode... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000002 0.000008 0 0 0.005 0.000003 128000.0 NaN False NaN NaN\n", + "101 deepseek/deepseek-r1-zero:free DeepSeek: DeepSeek R1 Zero (free) 1741297434 DeepSeek-R1-Zero is a model trained via large-... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "102 qwen/qwq-32b:free Qwen: QwQ 32B (free) 1741208814 QwQ is the reasoning model of the Qwen series.... 40000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0 0 0 0 0 0 40000.0 40000.0 False NaN NaN\n", + "103 qwen/qwq-32b Qwen: QwQ 32B 1741208814 QwQ is the reasoning model of the Qwen series.... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen qwq 0.00000015 0.0000002 0 0 0 0 131072.0 NaN False NaN NaN\n", + "104 moonshotai/moonlight-16b-a3b-instruct:free Moonshot AI: Moonlight 16B A3B Instruct (free) 1740719801 Moonlight-16B-A3B-Instruct is a 16B-parameter ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 8192.0 NaN False NaN NaN\n", + "105 nousresearch/deephermes-3-llama-3-8b-preview:free Nous: DeepHermes 3 Llama 3 8B Preview (free) 1740719372 DeepHermes 3 Preview is the latest version of ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 131072.0 NaN False NaN NaN\n", + "106 openai/gpt-4.5-preview OpenAI: GPT-4.5 (Preview) 1740687810 GPT-4.5 (Preview) is a research preview of Ope... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.000075 0.00015 0 0.108375 0 0 128000.0 16384.0 True 0.0000375 NaN\n", + "107 google/gemini-2.0-flash-lite-001 Google: Gemini 2.0 Flash Lite 1740506212 Gemini 2.0 Flash Lite offers a significantly f... 1048576 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.000000075 0.0000003 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", + "108 anthropic/claude-3.7-sonnet Anthropic: Claude 3.7 Sonnet 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", + "109 anthropic/claude-3.7-sonnet:thinking Anthropic: Claude 3.7 Sonnet (thinking) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 64000.0 False 0.0000003 0.00000375\n", + "110 anthropic/claude-3.7-sonnet:beta Anthropic: Claude 3.7 Sonnet (self-moderated) 1740422110 Claude 3.7 Sonnet is an advanced large languag... 200000 None [max_tokens, temperature, stop, reasoning, inc... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 128000.0 False 0.0000003 0.00000375\n", + "111 perplexity/r1-1776 Perplexity: R1 1776 1740004929 R1 1776 is a version of DeepSeek-R1 that has b... 128000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.000002 0.000008 0 0 0 0 128000.0 NaN False NaN NaN\n", + "112 mistralai/mistral-saba Mistral: Saba 1739803239 Mistral Saba is a 24B-parameter language model... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", + "113 cognitivecomputations/dolphin3.0-r1-mistral-24... Dolphin3.0 R1 Mistral 24B (free) 1739462498 Dolphin 3.0 R1 is the next generation of the D... 32768 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "114 cognitivecomputations/dolphin3.0-mistral-24b:free Dolphin3.0 Mistral 24B (free) 1739462019 Dolphin 3.0 is the next generation of the Dolp... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "115 meta-llama/llama-guard-3-8b Llama Guard 3 8B 1739401318 Llama Guard 3 is a Llama-3.1-8B pretrained mod... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.00000002 0.00000006 0 0 0 0 131072.0 NaN False NaN NaN\n", + "116 openai/o3-mini-high OpenAI: o3 Mini High 1739372611 OpenAI o3-mini-high is the same model as [o3-m... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", + "117 deepseek/deepseek-r1-distill-llama-8b DeepSeek: R1 Distill Llama 8B 1738937718 DeepSeek R1 Distill Llama 8B is a distilled la... 32000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.00000004 0.00000004 0 0 0 0 32000.0 32000.0 False NaN NaN\n", + "118 google/gemini-2.0-flash-001 Google: Gemini 2.0 Flash 1738769413 Gemini Flash 2.0 offers a significantly faster... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] Gemini None 0.0000001 0.0000004 0 0.0000258 0 0 1000000.0 8192.0 False 0.000000025 0.0000001833\n", + "119 qwen/qwen-vl-plus Qwen: Qwen VL Plus 1738731255 Qwen's Enhanced Large Visual Language Model. S... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.00000021 0.00000063 0 0.0002688 0 0 7500.0 1500.0 False NaN NaN\n", + "120 aion-labs/aion-1.0 AionLabs: Aion-1.0 1738697557 Aion-1.0 is a multi-model system designed for ... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.000004 0.000008 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "121 aion-labs/aion-1.0-mini AionLabs: Aion-1.0-Mini 1738697107 Aion-1.0-Mini 32B parameter model is a distill... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other None 0.0000007 0.0000014 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "122 aion-labs/aion-rp-llama-3.1-8b AionLabs: Aion-RP 1.0 (8B) 1738696718 Aion-RP-Llama-3.1-8B ranks the highest in the ... 32768 None [max_tokens, temperature, top_p] text->text [text] [text] Other None 0.0000002 0.0000002 0 0 0 0 32768.0 32768.0 False NaN NaN\n", + "123 qwen/qwen-vl-max Qwen: Qwen VL Max 1738434304 Qwen VL Max is a visual understanding model wi... 7500 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0.0000008 0.0000032 0 0.001024 0 0 7500.0 1500.0 False NaN NaN\n", + "124 qwen/qwen-turbo Qwen: Qwen-Turbo 1738410974 Qwen-Turbo, based on Qwen2.5, is a 1M context ... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.00000005 0.0000002 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", + "125 qwen/qwen2.5-vl-72b-instruct:free Qwen: Qwen2.5 VL 72B Instruct (free) 1738410311 Qwen2.5-VL is proficient in recognizing common... 131072 None [max_tokens, temperature, top_p, seed, respons... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", + "126 qwen/qwen2.5-vl-72b-instruct Qwen: Qwen2.5 VL 72B Instruct 1738410311 Qwen2.5-VL is proficient in recognizing common... 32000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.00000025 0.00000075 0 0 0 0 32000.0 NaN False NaN NaN\n", + "127 qwen/qwen-plus Qwen: Qwen-Plus 1738409840 Qwen-Plus, based on the Qwen2.5 foundation mod... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000004 0.0000012 0 0 0 0 131072.0 8192.0 False NaN NaN\n", + "128 qwen/qwen-max Qwen: Qwen-Max 1738402289 Qwen-Max, based on Qwen2.5, provides the best ... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen None 0.0000016 0.0000064 0 0 0 0 32768.0 8192.0 False NaN NaN\n", + "129 openai/o3-mini OpenAI: o3 Mini 1738351721 OpenAI o3-mini is a cost-efficient language mo... 200000 None [tools, tool_choice, seed, max_tokens, respons... text->text [text] [text] Other None 0.0000011 0.0000044 0 0 0 0 200000.0 100000.0 True 0.00000055 NaN\n", + "130 deepseek/deepseek-r1-distill-qwen-1.5b DeepSeek: R1 Distill Qwen 1.5B 1738328067 DeepSeek R1 Distill Qwen 1.5B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.00000018 0.00000018 0 0 0 0 131072.0 32768.0 False NaN NaN\n", + "131 mistralai/mistral-small-24b-instruct-2501:free Mistral: Mistral Small 3 (free) 1738255409 Mistral Small 3 is a 24B-parameter language mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "132 mistralai/mistral-small-24b-instruct-2501 Mistral: Mistral Small 3 1738255409 Mistral Small 3 is a 24B-parameter language mo... 28000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral None 0.00000006 0.00000012 0 0 0 0 28000.0 14000.0 False NaN NaN\n", + "133 deepseek/deepseek-r1-distill-qwen-32b:free DeepSeek: R1 Distill Qwen 32B (free) 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 16000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16000.0 16000.0 False NaN NaN\n", + "134 deepseek/deepseek-r1-distill-qwen-32b DeepSeek: R1 Distill Qwen 32B 1738194830 DeepSeek R1 Distill Qwen 32B is a distilled la... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000012 0.00000018 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "135 deepseek/deepseek-r1-distill-qwen-14b:free DeepSeek: R1 Distill Qwen 14B (free) 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", + "136 deepseek/deepseek-r1-distill-qwen-14b DeepSeek: R1 Distill Qwen 14B 1738193940 DeepSeek R1 Distill Qwen 14B is a distilled la... 64000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Qwen deepseek-r1 0.00000015 0.00000015 0 0 0 0 64000.0 64000.0 False NaN NaN\n", + "137 perplexity/sonar-reasoning Perplexity: Sonar Reasoning 1738131107 Sonar Reasoning is a reasoning model provided ... 127000 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Other deepseek-r1 0.000001 0.000005 0.005 0 0 0 127000.0 NaN False NaN NaN\n", + "138 perplexity/sonar Perplexity: Sonar 1738013808 Sonar is lightweight, affordable, fast, and si... 127072 None [max_tokens, temperature, top_p, web_search_op... text+image->text [text, image] [text] Other None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", + "139 liquid/lfm-7b Liquid: LFM 7B 1737806883 LFM-7B, a new best-in-class language model. LF... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000001 0.00000001 0 0 0 0 32768.0 NaN False NaN NaN\n", + "140 liquid/lfm-3b Liquid: LFM 3B 1737806501 Liquid's LFM 3B delivers incredible performanc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000002 0.00000002 0 0 0 0 32768.0 NaN False NaN NaN\n", + "141 deepseek/deepseek-r1-distill-llama-70b:free DeepSeek: R1 Distill Llama 70B (free) 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 8192 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0 0 0 0 0 0 8192.0 4096.0 False NaN NaN\n", + "142 deepseek/deepseek-r1-distill-llama-70b DeepSeek: R1 Distill Llama 70B 1737663169 DeepSeek R1 Distill Llama 70B is a distilled l... 131072 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] Llama3 deepseek-r1 0.0000001 0.0000004 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "143 deepseek/deepseek-r1:free DeepSeek: R1 (free) 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, reasoning, include_reasoning, tem... text->text [text] [text] DeepSeek deepseek-r1 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "144 deepseek/deepseek-r1 DeepSeek: R1 1737381095 DeepSeek R1 is here: Performance on par with [... 163840 None [max_tokens, temperature, top_p, reasoning, in... text->text [text] [text] DeepSeek deepseek-r1 0.0000005 0.00000218 0 0 0 0 163840.0 163840.0 False NaN NaN\n", + "145 minimax/minimax-01 MiniMax: MiniMax-01 1736915462 MiniMax-01 is a combines MiniMax-Text-01 for t... 1000192 None [max_tokens, temperature, top_p] text+image->text [text, image] [text] Other None 0.0000002 0.0000011 0 0 0 0 1000192.0 1000192.0 False NaN NaN\n", + "146 mistralai/codestral-2501 Mistral: Codestral 2501 1736895522 [Mistral](/mistralai)'s cutting-edge language ... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000003 0.0000009 0 0 0 0 262144.0 NaN False NaN NaN\n", + "147 microsoft/phi-4 Microsoft: Phi 4 1736489872 [Microsoft Research](/microsoft) Phi-4 is desi... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000007 0.00000014 0 0 0 0 16384.0 16384.0 False NaN NaN\n", + "148 deepseek/deepseek-chat:free DeepSeek: DeepSeek V3 (free) 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0 0 0 0 0 0 163840.0 NaN False NaN NaN\n", + "149 deepseek/deepseek-chat DeepSeek: DeepSeek V3 1735241320 DeepSeek-V3 is the latest model from the DeepS... 163840 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] DeepSeek None 0.00000038 0.00000089 0 0 0 0 163840.0 163840.0 False NaN NaN\n", + "150 sao10k/l3.3-euryale-70b Sao10K: Llama 3.3 Euryale 70B 1734535928 Euryale L3.3 70B is a model focused on creativ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "151 openai/o1 OpenAI: o1 1734459999 The latest and strongest model family from Ope... 200000 None [tools, tool_choice, seed, max_tokens, respons... text+image->text [text, image] [text] GPT None 0.000015 0.00006 0 0.021675 0 0 200000.0 100000.0 True 0.0000075 NaN\n", + "152 eva-unit-01/eva-llama-3.33-70b EVA Llama 3.33 70B 1734377303 EVA Llama 3.33 70b is a roleplay and storywrit... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "153 x-ai/grok-2-vision-1212 xAI: Grok 2 Vision 1212 1734237338 Grok 2 Vision 1212 advances image-based AI wit... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000002 0.00001 0 0.0036 0 0 32768.0 NaN False NaN NaN\n", + "154 x-ai/grok-2-1212 xAI: Grok 2 1212 1734232814 Grok 2 1212 introduces significant enhancement... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000002 0.00001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "155 cohere/command-r7b-12-2024 Cohere: Command R7B (12-2024) 1734158152 Command R7B (12-2024) is a small, fast update ... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.0000000375 0.00000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "156 google/gemini-2.0-flash-exp:free Google: Gemini 2.0 Flash Experimental (free) 1733937523 Gemini Flash 2.0 offers a significantly faster... 1048576 None [max_tokens, temperature, top_p, stop] text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1048576.0 8192.0 False NaN NaN\n", + "157 meta-llama/llama-3.3-70b-instruct:free Meta: Llama 3.3 70B Instruct (free) 1733506137 The Meta Llama 3.3 multilingual large language... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 8000.0 8000.0 False NaN NaN\n", + "158 meta-llama/llama-3.3-70b-instruct Meta: Llama 3.3 70B Instruct 1733506137 The Meta Llama 3.3 multilingual large language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009 0.00000035 0 0 0 0 131000.0 131000.0 False NaN NaN\n", + "159 amazon/nova-lite-v1 Amazon: Nova Lite 1.0 1733437363 Amazon Nova Lite 1.0 is a very low-cost multim... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.00000006 0.00000024 0 0.00009 0 0 300000.0 5120.0 True NaN NaN\n", + "160 amazon/nova-micro-v1 Amazon: Nova Micro 1.0 1733437237 Amazon Nova Micro 1.0 is a text-only model tha... 128000 None [tools, max_tokens, temperature, top_p, top_k,... text->text [text] [text] Nova None 0.000000035 0.00000014 0 0 0 0 128000.0 5120.0 True NaN NaN\n", + "161 amazon/nova-pro-v1 Amazon: Nova Pro 1.0 1733436303 Amazon Nova Pro 1.0 is a capable multimodal mo... 300000 None [tools, max_tokens, temperature, top_p, top_k,... text+image->text [text, image] [text] Nova None 0.0000008 0.0000032 0 0.0012 0 0 300000.0 5120.0 True NaN NaN\n", + "162 qwen/qwq-32b-preview:free Qwen: QwQ 32B Preview (free) 1732754541 QwQ-32B-Preview is an experimental research mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0 0 0 0 0 0 16384.0 NaN False NaN NaN\n", + "163 qwen/qwq-32b-preview Qwen: QwQ 32B Preview 1732754541 QwQ-32B-Preview is an experimental research mo... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen deepseek-r1 0.00000009 0.00000027 0 0 0 0 32768.0 NaN False NaN NaN\n", + "164 google/learnlm-1.5-pro-experimental:free Google: LearnLM 1.5 Pro Experimental (free) 1732216551 An experimental version of [Gemini 1.5 Pro](/g... 40960 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 40960.0 8192.0 False NaN NaN\n", + "165 eva-unit-01/eva-qwen-2.5-72b EVA Qwen2.5 72B 1732210606 EVA Qwen2.5 72B is a roleplay and storywriting... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "166 openai/gpt-4o-2024-11-20 OpenAI: GPT-4o (2024-11-20) 1732127594 The 2024-11-20 version of GPT-4o offers a leve... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", + "167 mistralai/mistral-large-2411 Mistral Large 2411 1731978685 Mistral Large 2 2411 is an update of [Mistral ... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", + "168 mistralai/mistral-large-2407 Mistral Large 2407 1731978415 This is Mistral AI's flagship model, Mistral L... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 131072.0 NaN False NaN NaN\n", + "169 mistralai/pixtral-large-2411 Mistral: Pixtral Large 2411 1731977388 Pixtral Large is a 124B parameter, open-weight... 131072 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Mistral None 0.000002 0.000006 0 0.002888 0 0 131072.0 NaN False NaN NaN\n", + "170 x-ai/grok-vision-beta xAI: Grok Vision Beta 1731976624 Grok Vision Beta is xAI's experimental languag... 8192 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Grok None 0.000005 0.000015 0 0.009 0 0 8192.0 NaN False NaN NaN\n", + "171 infermatic/mn-inferor-12b Infermatic: Mistral Nemo Inferor 12B 1731464428 Inferor 12B is a merge of top roleplay models,... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "172 qwen/qwen-2.5-coder-32b-instruct:free Qwen2.5 Coder 32B Instruct (free) 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "173 qwen/qwen-2.5-coder-32b-instruct Qwen2.5 Coder 32B Instruct 1731368400 Qwen2.5-Coder is the latest series of Code-Spe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000006 0.00000015 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "174 raifle/sorcererlm-8x22b SorcererLM 8x22B 1731105083 SorcererLM is an advanced RP and storytelling ... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral vicuna 0.0000045 0.0000045 0 0 0 0 16000.0 NaN False NaN NaN\n", + "175 eva-unit-01/eva-qwen-2.5-32b EVA Qwen2.5 32B 1731104847 EVA Qwen2.5 32B is a roleplaying/storywriting ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000026 0.0000034 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "176 thedrummer/unslopnemo-12b Unslopnemo 12B 1731103448 UnslopNemo v4.1 is the latest addition from th... 32000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000045 0.00000045 0 0 0 0 32000.0 16000.0 False NaN NaN\n", + "177 anthropic/claude-3.5-haiku:beta Anthropic: Claude 3.5 Haiku (self-moderated) 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", + "178 anthropic/claude-3.5-haiku Anthropic: Claude 3.5 Haiku 1730678400 Claude 3.5 Haiku features offers enhanced capa... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", + "179 anthropic/claude-3.5-haiku-20241022:beta Anthropic: Claude 3.5 Haiku (2024-10-22) (self... 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 False 0.00000008 0.000001\n", + "180 anthropic/claude-3.5-haiku-20241022 Anthropic: Claude 3.5 Haiku (2024-10-22) 1730678400 Claude 3.5 Haiku features enhancements across ... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.0000008 0.000004 0 0 0 0 200000.0 8192.0 True 0.00000008 0.000001\n", + "181 neversleep/llama-3.1-lumimaid-70b NeverSleep: Lumimaid v0.2 70B 1729555200 Lumimaid v0.2 70B is a finetune of [Llama 3.1 ... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000015 0.00000225 0 0 0 0 16384.0 2048.0 False NaN NaN\n", + "182 anthracite-org/magnum-v4-72b Magnum v4 72B 1729555200 This is a series of models designed to replica... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000015 0.00000225 0 0 0 0 16384.0 1024.0 False NaN NaN\n", + "183 anthropic/claude-3.5-sonnet:beta Anthropic: Claude 3.5 Sonnet (self-moderated) 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", + "184 anthropic/claude-3.5-sonnet Anthropic: Claude 3.5 Sonnet 1729555200 New Claude 3.5 Sonnet delivers better-than-Opu... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", + "185 x-ai/grok-beta xAI: Grok Beta 1729382400 Grok Beta is xAI's experimental language model... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Grok None 0.000005 0.000015 0 0 0 0 131072.0 NaN False NaN NaN\n", + "186 mistralai/ministral-8b Mistral: Ministral 8B 1729123200 Ministral 8B is an 8B parameter model featurin... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", + "187 mistralai/ministral-3b Mistral: Ministral 3B 1729123200 Ministral 3B is a 3B parameter model optimized... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000004 0.00000004 0 0 0 0 131072.0 NaN False NaN NaN\n", + "188 qwen/qwen-2.5-7b-instruct:free Qwen2.5 7B Instruct (free) 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 32768.0 False NaN NaN\n", + "189 qwen/qwen-2.5-7b-instruct Qwen2.5 7B Instruct 1729036800 Qwen2.5 7B is the latest series of Qwen large ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000005 0.0000001 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "190 nvidia/llama-3.1-nemotron-70b-instruct NVIDIA: Llama 3.1 Nemotron 70B Instruct 1728950400 NVIDIA's Llama 3.1 Nemotron 70B is a language ... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "191 inflection/inflection-3-productivity Inflection: Inflection 3 Productivity 1728604800 Inflection 3 Productivity is optimized for fol... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", + "192 inflection/inflection-3-pi Inflection: Inflection 3 Pi 1728604800 Inflection 3 Pi powers Inflection's [Pi](https... 8000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000025 0.00001 0 0 0 0 8000.0 1024.0 False NaN NaN\n", + "193 google/gemini-flash-1.5-8b Google: Gemini 1.5 Flash 8B 1727913600 Gemini Flash 1.5 8B is optimized for speed and... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.0000000375 0.00000015 0 0 0 0 1000000.0 8192.0 False 0.00000001 0.0000000583\n", + "194 thedrummer/rocinante-12b Rocinante 12B 1727654400 Rocinante 12B is designed for engaging storyte... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.00000025 0.0000005 0 0 0 0 32768.0 NaN False NaN NaN\n", + "195 anthracite-org/magnum-v2-72b Magnum v2 72B 1727654400 From the maker of [Goliath](https://openrouter... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000003 0.000003 0 0 0 0 32768.0 NaN False NaN NaN\n", + "196 liquid/lfm-40b Liquid: LFM 40B MoE 1727654400 Liquid's 40.3B Mixture of Experts (MoE) model.... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other chatml 0.00000015 0.00000015 0 0 0 0 32768.0 NaN False NaN NaN\n", + "197 meta-llama/llama-3.2-3b-instruct:free Meta: Llama 3.2 3B Instruct (free) 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 20000 None [max_tokens, temperature, top_p] text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 20000.0 20000.0 False NaN NaN\n", + "198 meta-llama/llama-3.2-3b-instruct Meta: Llama 3.2 3B Instruct 1727222400 Llama 3.2 3B is a 3-billion-parameter multilin... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000001 0.00000002 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "199 meta-llama/llama-3.2-1b-instruct:free Meta: Llama 3.2 1B Instruct (free) 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131000.0 NaN False NaN NaN\n", + "200 meta-llama/llama-3.2-1b-instruct Meta: Llama 3.2 1B Instruct 1727222400 Llama 3.2 1B is a 1-billion-parameter language... 131072 None [max_tokens, temperature, top_p, top_k, stop, ... text->text [text] [text] Llama3 llama3 0.000000005 0.00000001 0 0 0 0 131072.0 NaN False NaN NaN\n", + "201 meta-llama/llama-3.2-90b-vision-instruct Meta: Llama 3.2 90B Vision Instruct 1727222400 The Llama 90B Vision model is a top-tier, 90-b... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.0000012 0.0000012 0 0.001734 0 0 131072.0 2048.0 False NaN NaN\n", + "202 meta-llama/llama-3.2-11b-vision-instruct:free Meta: Llama 3.2 11B Vision Instruct (free) 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 2048.0 False NaN NaN\n", + "203 meta-llama/llama-3.2-11b-vision-instruct Meta: Llama 3.2 11B Vision Instruct 1727222400 Llama 3.2 11B Vision is a multimodal model wit... 131072 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Llama3 llama3 0.000000049 0.000000049 0 0.00007948 0 0 131072.0 16384.0 False NaN NaN\n", + "204 qwen/qwen-2.5-72b-instruct:free Qwen2.5 72B Instruct (free) 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0 0 0 0 0 0 32768.0 NaN False NaN NaN\n", + "205 qwen/qwen-2.5-72b-instruct Qwen2.5 72B Instruct 1726704000 Qwen2.5 72B is the latest series of Qwen large... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Qwen chatml 0.00000012 0.00000039 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "206 qwen/qwen-2.5-vl-72b-instruct Qwen: Qwen2.5-VL 72B Instruct 1726617600 Qwen2.5 VL 72B is a multimodal LLM from the Qw... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000006 0.0000006 0 0.000578 0 0 32768.0 NaN False NaN NaN\n", + "207 neversleep/llama-3.1-lumimaid-8b NeverSleep: Lumimaid v0.2 8B 1726358400 Lumimaid v0.2 8B is a finetune of [Llama 3.1 8... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 32768.0 2048.0 False NaN NaN\n", + "208 openai/o1-preview OpenAI: o1-preview 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", + "209 openai/o1-preview-2024-09-12 OpenAI: o1-preview (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.000015 0.00006 0 0 0 0 128000.0 32768.0 True 0.0000075 NaN\n", + "210 openai/o1-mini OpenAI: o1-mini 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", + "211 openai/o1-mini-2024-09-12 OpenAI: o1-mini (2024-09-12) 1726099200 The latest and strongest model family from Ope... 128000 None [seed, max_tokens] text->text [text] [text] GPT None 0.0000011 0.0000044 0 0 0 0 128000.0 65536.0 True 0.00000055 NaN\n", + "212 mistralai/pixtral-12b Mistral: Pixtral 12B 1725926400 The first multi-modal, text+image-to-text mode... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Mistral None 0.0000001 0.0000001 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", + "213 cohere/command-r-plus-08-2024 Cohere: Command R+ (08-2024) 1724976000 command-r-plus-08-2024 is an update of the [Co... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000025 0.00001 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "214 cohere/command-r-08-2024 Cohere: Command R (08-2024) 1724976000 command-r-08-2024 is an update of the [Command... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.00000015 0.0000006 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "215 qwen/qwen-2.5-vl-7b-instruct:free Qwen: Qwen2.5-VL 7B Instruct (free) 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 64000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0 0 0 0 0 0 64000.0 64000.0 False NaN NaN\n", + "216 qwen/qwen-2.5-vl-7b-instruct Qwen: Qwen2.5-VL 7B Instruct 1724803200 Qwen2.5 VL 7B is a multimodal LLM from the Qwe... 32768 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Qwen None 0.0000002 0.0000002 0 0.0001445 0 0 32768.0 NaN False NaN NaN\n", + "217 sao10k/l3.1-euryale-70b Sao10K: Llama 3.1 Euryale 70B v2.2 1724803200 Euryale L3.1 70B v2.2 is a model focused on cr... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000007 0.0000008 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "218 google/gemini-flash-1.5-8b-exp Google: Gemini 1.5 Flash 8B Experimental 1724803200 Gemini Flash 1.5 8B Experimental is an experim... 1000000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Gemini None 0 0 0 0 0 0 1000000.0 8192.0 False NaN NaN\n", + "219 microsoft/phi-3.5-mini-128k-instruct Microsoft: Phi-3.5 Mini 128K Instruct 1724198400 Phi-3.5 models are lightweight, state-of-the-a... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.00000003 0.00000009 0 0 0 0 131072.0 NaN False NaN NaN\n", + "220 nousresearch/hermes-3-llama-3.1-70b Nous: Hermes 3 70B Instruct 1723939200 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.00000012 0.0000003 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "221 nousresearch/hermes-3-llama-3.1-405b Nous: Hermes 3 405B Instruct 1723766400 Hermes 3 is a generalist language model with m... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.0000008 0.0000008 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "222 openai/chatgpt-4o-latest OpenAI: ChatGPT-4o 1723593600 OpenAI ChatGPT 4o is continually updated by Op... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 16384.0 True NaN NaN\n", + "223 sao10k/l3-lunaris-8b Sao10K: Llama 3 8B Lunaris 1723507200 Lunaris 8B is a versatile generalist and rolep... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000005 0 0 0 0 8192.0 NaN False NaN NaN\n", + "224 aetherwiing/mn-starcannon-12b Aetherwiing: Starcannon 12B 1723507200 Starcannon 12B v2 is a creative roleplay and s... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "225 openai/gpt-4o-2024-08-06 OpenAI: GPT-4o (2024-08-06) 1722902400 The 2024-08-06 version of GPT-4o offers improv... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", + "226 meta-llama/llama-3.1-405b:free Meta: Llama 3.1 405B (base) (free) 1722556800 Meta's latest class of model (Llama 3.1) launc... 64000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0 0 0 0 0 0 64000.0 NaN False NaN NaN\n", + "227 meta-llama/llama-3.1-405b Meta: Llama 3.1 405B (base) 1722556800 Meta's latest class of model (Llama 3.1) launc... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.000002 0.000002 0 0 0 0 32768.0 NaN False NaN NaN\n", + "228 nothingiisreal/mn-celeste-12b Mistral Nemo 12B Celeste 1722556800 A specialized story writing and roleplaying mo... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000008 0.0000012 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "229 perplexity/llama-3.1-sonar-small-128k-online Perplexity: Llama 3.1 Sonar 8B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.0000002 0.0000002 0.005 0 0 0 127072.0 NaN False NaN NaN\n", + "230 perplexity/llama-3.1-sonar-large-128k-online Perplexity: Llama 3.1 Sonar 70B Online 1722470400 Llama 3.1 Sonar is Perplexity's latest model f... 127072 None [max_tokens, temperature, top_p, top_k, freque... text->text [text] [text] Llama3 None 0.000001 0.000001 0.005 0 0 0 127072.0 NaN False NaN NaN\n", + "231 meta-llama/llama-3.1-8b-instruct:free Meta: Llama 3.1 8B Instruct (free) 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0 0 0 0 0 0 131072.0 4096.0 False NaN NaN\n", + "232 meta-llama/llama-3.1-8b-instruct Meta: Llama 3.1 8B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000002 0.00000003 0 0 0 0 16384.0 16384.0 False NaN NaN\n", + "233 meta-llama/llama-3.1-405b-instruct Meta: Llama 3.1 405B Instruct 1721692800 The highly anticipated 400B class of Llama3 is... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000008 0.0000008 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "234 meta-llama/llama-3.1-70b-instruct Meta: Llama 3.1 70B Instruct 1721692800 Meta's latest class of model (Llama 3.1) launc... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Llama3 llama3 0.0000001 0.00000028 0 0 0 0 131072.0 16384.0 False NaN NaN\n", + "235 mistralai/codestral-mamba Mistral: Codestral Mamba 1721347200 A 7.3B parameter Mamba-based model designed fo... 262144 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 262144.0 NaN False NaN NaN\n", + "236 mistralai/mistral-nemo:free Mistral: Mistral Nemo (free) 1721347200 A 12B parameter model with a 128k token contex... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 128000.0 128000.0 False NaN NaN\n", + "237 mistralai/mistral-nemo Mistral: Mistral Nemo 1721347200 A 12B parameter model with a 128k token contex... 98304 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.00000003 0.00000007 0 0 0 0 98304.0 49152.0 False NaN NaN\n", + "238 openai/gpt-4o-mini OpenAI: GPT-4o-mini 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.000217 0 0 128000.0 16384.0 True 0.000000075 NaN\n", + "239 openai/gpt-4o-mini-2024-07-18 OpenAI: GPT-4o-mini (2024-07-18) 1721260800 GPT-4o mini is OpenAI's newest model after [GP... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.00000015 0.0000006 0 0.007225 0 0 128000.0 16384.0 True 0.000000075 NaN\n", + "240 google/gemma-2-27b-it Google: Gemma 2 27B 1720828800 Gemma 2 27B by Google is an open model built f... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.0000001 0.0000003 0 0 0 0 8192.0 NaN False NaN NaN\n", + "241 alpindale/magnum-72b Magnum 72B 1720656000 From the maker of [Goliath](https://openrouter... 16384 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.000004 0.000006 0 0 0 0 16384.0 4096.0 False NaN NaN\n", + "242 google/gemma-2-9b-it:free Google: Gemma 2 9B (free) 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0 0 0 0 0 0 8192.0 8192.0 False NaN NaN\n", + "243 google/gemma-2-9b-it Google: Gemma 2 9B 1719532800 Gemma 2 9B by Google is an advanced, open-sour... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Gemini gemma 0.00000002 0.00000006 0 0 0 0 8192.0 NaN False NaN NaN\n", + "244 01-ai/yi-large 01.AI: Yi Large 1719273600 The Yi Large model was designed by 01.AI with ... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Yi None 0.000003 0.000003 0 0 0 0 32768.0 4096.0 False NaN NaN\n", + "245 ai21/jamba-instruct AI21: Jamba Instruct 1719273600 The Jamba-Instruct model, introduced by AI21 L... 256000 None [max_tokens, temperature, top_p, stop] text->text [text] [text] Other None 0.0000005 0.0000007 0 0 0 0 256000.0 4096.0 False NaN NaN\n", + "246 anthropic/claude-3.5-sonnet-20240620:beta Anthropic: Claude 3.5 Sonnet (2024-06-20) (sel... 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 False 0.0000003 0.00000375\n", + "247 anthropic/claude-3.5-sonnet-20240620 Anthropic: Claude 3.5 Sonnet (2024-06-20) 1718841600 Claude 3.5 Sonnet delivers better-than-Opus ca... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 8192.0 True 0.0000003 0.00000375\n", + "248 sao10k/l3-euryale-70b Sao10k: Llama 3 Euryale 70B v2.1 1718668800 Euryale 70B v2.1 is a model focused on creativ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000148 0.00000148 0 0 0 0 8192.0 8192.0 False NaN NaN\n", + "249 cognitivecomputations/dolphin-mixtral-8x22b Dolphin 2.9.2 Mixtral 8x22B 🐬 1717804800 Dolphin 2.9 is designed for instruction follow... 16000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000009 0.0000009 0 0 0 0 16000.0 NaN False NaN NaN\n", + "250 qwen/qwen-2-72b-instruct Qwen 2 72B Instruct 1717718400 Qwen2 72B is a transformer-based model that ex... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Qwen chatml 0.0000009 0.0000009 0 0 0 0 32768.0 4096.0 False NaN NaN\n", + "251 mistralai/mistral-7b-instruct:free Mistral: Mistral 7B Instruct (free) 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0 0 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "252 mistralai/mistral-7b-instruct Mistral: Mistral 7B Instruct 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "253 nousresearch/hermes-2-pro-llama-3-8b NousResearch: Hermes 2 Pro - Llama-3 8B 1716768000 Hermes 2 Pro is an upgraded, retrained version... 131072 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 chatml 0.000000025 0.00000004 0 0 0 0 131072.0 131072.0 False NaN NaN\n", + "254 mistralai/mistral-7b-instruct-v0.3 Mistral: Mistral 7B Instruct v0.3 1716768000 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.000000028 0.000000054 0 0 0 0 32768.0 16384.0 False NaN NaN\n", + "255 microsoft/phi-3-mini-128k-instruct Microsoft: Phi-3 Mini 128K Instruct 1716681600 Phi-3 Mini is a powerful 3.8B parameter model ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000001 0 0 0 0 128000.0 NaN False NaN NaN\n", + "256 microsoft/phi-3-medium-128k-instruct Microsoft: Phi-3 Medium 128K Instruct 1716508800 Phi-3 128K Medium is a powerful 14-billion par... 131072 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Other phi3 0.0000001 0.0000003 0 0 0 0 131072.0 NaN False NaN NaN\n", + "257 neversleep/llama-3-lumimaid-70b NeverSleep: Llama 3 Lumimaid 70B 1715817600 The NeverSleep team is back, with a Llama 3 70... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.000004 0.000006 0 0 0 0 8192.0 4096.0 False NaN NaN\n", + "258 deepseek/deepseek-coder DeepSeek-Coder-V2 1715644800 DeepSeek-Coder-V2, an open-source Mixture-of-E... 128000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other None 0.00000004 0.00000012 0 0 0 0 128000.0 NaN False NaN NaN\n", + "259 google/gemini-flash-1.5 Google: Gemini 1.5 Flash 1715644800 Gemini 1.5 Flash is a foundation model that pe... 1000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.000000075 0.0000003 0 0.00004 0 0 1000000.0 8192.0 False 0.00000001875 0.0000001583\n", + "260 openai/gpt-4o OpenAI: GPT-4o 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.0000025 0.00001 0 0.003613 0 0 128000.0 16384.0 True 0.00000125 NaN\n", + "261 openai/gpt-4o:extended OpenAI: GPT-4o (extended) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000006 0.000018 0 0.007225 0 0 128000.0 64000.0 True NaN NaN\n", + "262 meta-llama/llama-guard-2-8b Meta: LlamaGuard 2 8B 1715558400 This safeguard model has 8B parameters and is ... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 none 0.0000002 0.0000002 0 0 0 0 8192.0 NaN False NaN NaN\n", + "263 openai/gpt-4o-2024-05-13 OpenAI: GPT-4o (2024-05-13) 1715558400 GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI ... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image, file] [text] GPT None 0.000005 0.000015 0 0.007225 0 0 128000.0 4096.0 True NaN NaN\n", + "264 allenai/olmo-7b-instruct OLMo 7B Instruct 1715299200 OLMo 7B Instruct by the Allen Institute for AI... 2048 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Other zephyr 0.00000008 0.00000024 0 0 0 0 2048.0 NaN False NaN NaN\n", + "265 neversleep/llama-3-lumimaid-8b:extended NeverSleep: Llama 3 Lumimaid 8B (extended) 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", + "266 neversleep/llama-3-lumimaid-8b NeverSleep: Llama 3 Lumimaid 8B 1714780800 The NeverSleep team is back, with a Llama 3 8B... 24576 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.00000009375 0.00000075 0 0 0 0 24576.0 2048.0 False NaN NaN\n", + "267 sao10k/fimbulvetr-11b-v2 Fimbulvetr 11B v2 1713657600 Creative writing model, routed with permission... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "268 meta-llama/llama-3-8b-instruct Meta: Llama 3 8B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, top_k, seed, ... text->text [text] [text] Llama3 llama3 0.00000003 0.00000006 0 0 0 0 8192.0 16384.0 False NaN NaN\n", + "269 meta-llama/llama-3-70b-instruct Meta: Llama 3 70B Instruct 1713398400 Meta's latest class of model (Llama 3) launche... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama3 llama3 0.0000003 0.0000004 0 0 0 0 8192.0 16384.0 False NaN NaN\n", + "270 mistralai/mixtral-8x22b-instruct Mistral: Mixtral 8x22B Instruct 1713312000 Mistral's official instruct fine-tuned version... 65536 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.0000004 0.0000012 0 0 0 0 65536.0 NaN False NaN NaN\n", + "271 microsoft/wizardlm-2-8x22b WizardLM-2 8x22B 1713225600 WizardLM-2 8x22B is Microsoft AI's most advanc... 65536 None [max_tokens, temperature, top_p, presence_pena... text->text [text] [text] Mistral vicuna 0.0000005 0.0000005 0 0 0 0 65536.0 16384.0 False NaN NaN\n", + "272 google/gemini-pro-1.5 Google: Gemini 1.5 Pro 1712620800 Google's latest multimodal model, supports ima... 2000000 None [max_tokens, temperature, top_p, stop, frequen... text+image->text [text, image] [text] Gemini None 0.00000125 0.000005 0 0.0006575 0 0 2000000.0 8192.0 False NaN NaN\n", + "273 openai/gpt-4-turbo OpenAI: GPT-4 Turbo 1712620800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] GPT None 0.00001 0.00003 0 0.01445 0 0 128000.0 4096.0 True NaN NaN\n", + "274 cohere/command-r-plus Cohere: Command R+ 1712188800 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "275 cohere/command-r-plus-04-2024 Cohere: Command R+ (04-2024) 1712016000 Command R+ is a new, 104B-parameter LLM from C... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.000003 0.000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "276 sophosympatheia/midnight-rose-70b Midnight Rose 70B 1711065600 A merge with a complex family tree, this model... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000008 0.0000008 0 0 0 0 4096.0 NaN False NaN NaN\n", + "277 cohere/command Cohere: Command 1710374400 Command is an instruction-following conversati... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Cohere None 0.000001 0.000002 0 0 0 0 4096.0 4000.0 False NaN NaN\n", + "278 cohere/command-r Cohere: Command R 1710374400 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "279 anthropic/claude-3-haiku:beta Anthropic: Claude 3 Haiku (self-moderated) 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 False 0.00000003 0.0000003\n", + "280 anthropic/claude-3-haiku Anthropic: Claude 3 Haiku 1710288000 Claude 3 Haiku is Anthropic's fastest and most... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.00000025 0.00000125 0 0.0004 0 0 200000.0 4096.0 True 0.00000003 0.0000003\n", + "281 anthropic/claude-3-opus:beta Anthropic: Claude 3 Opus (self-moderated) 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 False 0.0000015 0.00001875\n", + "282 anthropic/claude-3-opus Anthropic: Claude 3 Opus 1709596800 Claude 3 Opus is Anthropic's most powerful mod... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000015 0.000075 0 0.024 0 0 200000.0 4096.0 True 0.0000015 0.00001875\n", + "283 anthropic/claude-3-sonnet:beta Anthropic: Claude 3 Sonnet (self-moderated) 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 False 0.0000003 0.00000375\n", + "284 anthropic/claude-3-sonnet Anthropic: Claude 3 Sonnet 1709596800 Claude 3 Sonnet is an ideal balance of intelli... 200000 None [tools, tool_choice, max_tokens, temperature, ... text+image->text [text, image] [text] Claude None 0.000003 0.000015 0 0.0048 0 0 200000.0 4096.0 True 0.0000003 0.00000375\n", + "285 cohere/command-r-03-2024 Cohere: Command R (03-2024) 1709341200 Command-R is a 35B parameter model that perfor... 128000 None [tools, max_tokens, temperature, top_p, stop, ... text->text [text] [text] Cohere None 0.0000005 0.0000015 0 0 0 0 128000.0 4000.0 False NaN NaN\n", + "286 mistralai/mistral-large Mistral Large 1708905600 This is Mistral AI's flagship model, Mistral L... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.000002 0.000006 0 0 0 0 128000.0 NaN False NaN NaN\n", + "287 openai/gpt-3.5-turbo-0613 OpenAI: GPT-3.5 Turbo (older v0613) 1706140800 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 4095 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 4095.0 4096.0 False NaN NaN\n", + "288 openai/gpt-4-turbo-preview OpenAI: GPT-4 Turbo Preview 1706140800 The preview GPT-4 model with improved instruct... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", + "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo Nous: Hermes 2 Mixtral 8x7B DPO 1705363200 Nous Hermes 2 Mixtral 8x7B DPO is the new flag... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral chatml 0.0000006 0.0000006 0 0 0 0 32768.0 2048.0 False NaN NaN\n", + "290 mistralai/mistral-medium Mistral Medium 1704844800 This is Mistral AI's closed-source, medium-sid... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000275 0.0000081 0 0 0 0 32768.0 NaN False NaN NaN\n", + "291 mistralai/mistral-small Mistral Small 1704844800 With 22 billion parameters, Mistral Small v24.... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.0000002 0.0000006 0 0 0 0 32768.0 NaN False NaN NaN\n", + "292 mistralai/mistral-tiny Mistral Tiny 1704844800 Note: This model is being deprecated. Recommen... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral None 0.00000025 0.00000025 0 0 0 0 32768.0 NaN False NaN NaN\n", + "293 mistralai/mistral-7b-instruct-v0.2 Mistral: Mistral 7B Instruct v0.2 1703721600 A high-performing, industry-standard 7.3B para... 32768 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral mistral 0.0000002 0.0000002 0 0 0 0 32768.0 NaN False NaN NaN\n", + "294 mistralai/mixtral-8x7b-instruct Mistral: Mixtral 8x7B Instruct 1702166400 Mixtral 8x7B Instruct is a pretrained generati... 32768 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000008 0.00000024 0 0 0 0 32768.0 NaN False NaN NaN\n", + "295 neversleep/noromaid-20b Noromaid 20B 1700956800 A collab between IkariDev and Undi. This merge... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.00000075 0.0000015 0 0 0 0 8192.0 2048.0 False NaN NaN\n", + "296 anthropic/claude-2.1:beta Anthropic: Claude v2.1 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", + "297 anthropic/claude-2.1 Anthropic: Claude v2.1 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", + "298 anthropic/claude-2:beta Anthropic: Claude v2 (self-moderated) 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 False NaN NaN\n", + "299 anthropic/claude-2 Anthropic: Claude v2 1700611200 Claude 2 delivers advancements in key capabili... 200000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 200000.0 4096.0 True NaN NaN\n", + "300 undi95/toppy-m-7b Toppy M 7B 1699574400 A wild 7B parameter model that merges several ... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Mistral alpaca 0.0000008 0.0000012 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "301 alpindale/goliath-120b Goliath 120B 1699574400 A large LLM created by combining two fine-tune... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000065625 0.000009375 0 0 0 0 6144.0 512.0 False NaN NaN\n", + "302 openrouter/auto Auto Router 1699401600 Your prompt will be processed by a meta-model ... 2000000 None [] text->text [text] [text] Router None -1 -1 NaN NaN NaN NaN NaN NaN False NaN NaN\n", + "303 openai/gpt-3.5-turbo-1106 OpenAI: GPT-3.5 Turbo 16k (older v1106) 1699228800 An older GPT-3.5 Turbo model with improved ins... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000001 0.000002 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "304 openai/gpt-4-1106-preview OpenAI: GPT-4 Turbo (older v1106) 1699228800 The latest GPT-4 Turbo model with vision capab... 128000 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00001 0.00003 0 0 0 0 128000.0 4096.0 True NaN NaN\n", + "305 jondurbin/airoboros-l2-70b Airoboros 70B 1698537600 A Llama 2 70B fine-tune using synthetic data (... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 airoboros 0.0000005 0.0000005 0 0 0 0 4096.0 NaN False NaN NaN\n", + "306 openai/gpt-3.5-turbo-instruct OpenAI: GPT-3.5 Turbo Instruct 1695859200 This model is a variant of GPT-3.5 Turbo tuned... 4095 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] GPT chatml 0.0000015 0.000002 0 0 0 0 4095.0 4096.0 True NaN NaN\n", + "307 mistralai/mistral-7b-instruct-v0.1 Mistral: Mistral 7B Instruct v0.1 1695859200 A 7.3B parameter model that outperforms Llama ... 2824 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] Mistral mistral 0.00000011 0.00000019 0 0 0 0 2824.0 NaN False NaN NaN\n", + "308 pygmalionai/mythalion-13b Pygmalion: Mythalion 13B 1693612800 A blend of the new Pygmalion-13b and MythoMax.... 8192 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 8192.0 1024.0 False NaN NaN\n", + "309 openai/gpt-3.5-turbo-16k OpenAI: GPT-3.5 Turbo 16k 1693180800 This model offers four times the context lengt... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.000003 0.000004 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "310 openai/gpt-4-32k OpenAI: GPT-4 32k 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", + "311 openai/gpt-4-32k-0314 OpenAI: GPT-4 32k (older v0314) 1693180800 GPT-4-32k is an extended version of GPT-4, wit... 32767 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00006 0.00012 0 0 0 0 32767.0 4096.0 True NaN NaN\n", + "312 mancer/weaver Mancer: Weaver (alpha) 1690934400 An attempt to recreate Claude-style verbosity,... 8000 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000001125 0.000001125 0 0 0 0 8000.0 1000.0 False NaN NaN\n", + "313 anthropic/claude-2.0:beta Anthropic: Claude v2.0 (self-moderated) 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 False NaN NaN\n", + "314 anthropic/claude-2.0 Anthropic: Claude v2.0 1690502400 Anthropic's flagship model. Superior performan... 100000 None [max_tokens, temperature, top_p, top_k, stop] text->text [text] [text] Claude None 0.000008 0.000024 0 0 0 0 100000.0 4096.0 True NaN NaN\n", + "315 undi95/remm-slerp-l2-13b ReMM SLERP 13B 1689984000 A recreation trial of the original MythoMax-L2... 6144 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.0000005625 0.000001125 0 0 0 0 6144.0 1024.0 False NaN NaN\n", + "316 gryphe/mythomax-l2-13b MythoMax 13B 1688256000 One of the highest performing and most popular... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 alpaca 0.000000065 0.000000065 0 0 0 0 4096.0 4096.0 False NaN NaN\n", + "317 meta-llama/llama-2-70b-chat Meta: Llama 2 70B Chat 1687219200 The flagship, 70 billion parameter language mo... 4096 None [max_tokens, temperature, top_p, stop, frequen... text->text [text] [text] Llama2 llama2 0.0000009 0.0000009 0 0 0 0 4096.0 NaN False NaN NaN\n", + "318 openai/gpt-3.5-turbo OpenAI: GPT-3.5 Turbo 1685232000 GPT-3.5 Turbo is OpenAI's fastest model. It ca... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "319 openai/gpt-3.5-turbo-0125 OpenAI: GPT-3.5 Turbo 16k 1685232000 The latest GPT-3.5 Turbo model with improved i... 16385 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.0000005 0.0000015 0 0 0 0 16385.0 4096.0 True NaN NaN\n", + "320 openai/gpt-4 OpenAI: GPT-4 1685232000 OpenAI's flagship model, GPT-4 is a large-scal... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN\n", + "321 openai/gpt-4-0314 OpenAI: GPT-4 (older v0314) 1685232000 GPT-4-0314 is the first version of GPT-4 relea... 8191 None [tools, tool_choice, max_tokens, temperature, ... text->text [text] [text] GPT None 0.00003 0.00006 0 0 0 0 8191.0 4096.0 True NaN NaN" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Normalize the nested JSON\n", + "df = pd.json_normalize(val, sep='_')\n", + "df\n", + "# View the resulting DataFrame\n", + "#print(df.T) # Transpose just for readable vertical inspection" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id mistralai/mistral-medium-3\n", + "name Mistral: Mistral Medium 3\n", + "created 1746627341\n", + "description Mistral Medium 3 is a high-performance enterpr...\n", + "context_length 131072\n", + "per_request_limits None\n", + "supported_parameters [tools, tool_choice, max_tokens, temperature, ...\n", + "architecture_modality text+image->text\n", + "architecture_input_modalities [text, image]\n", + "architecture_output_modalities [text]\n", + "architecture_tokenizer Mistral\n", + "architecture_instruct_type None\n", + "pricing_prompt 0.0000004\n", + "pricing_completion 0.000002\n", + "pricing_request 0\n", + "pricing_image 0\n", + "pricing_web_search 0\n", + "pricing_internal_reasoning 0\n", + "top_provider_context_length 131072.0\n", + "top_provider_max_completion_tokens NaN\n", + "top_provider_is_moderated False\n", + "pricing_input_cache_read NaN\n", + "pricing_input_cache_write NaN\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0].T" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "col_names = [\"id\", \"context_length\", \"pricing_prompt\", \"pricing_completion\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id object\n", + "name object\n", + "created int64\n", + "description object\n", + "context_length int64\n", + "per_request_limits object\n", + "supported_parameters object\n", + "architecture_modality object\n", + "architecture_input_modalities object\n", + "architecture_output_modalities object\n", + "architecture_tokenizer object\n", + "architecture_instruct_type object\n", + "pricing_prompt object\n", + "pricing_completion object\n", + "pricing_request object\n", + "pricing_image object\n", + "pricing_web_search object\n", + "pricing_internal_reasoning object\n", + "top_provider_context_length float64\n", + "top_provider_max_completion_tokens float64\n", + "top_provider_is_moderated bool\n", + "pricing_input_cache_read object\n", + "pricing_input_cache_write object\n", + "dtype: object" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# " + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.0, 'is_bool': 0.0, 'is_string': 0.38819875776397517, 'type': 'is_bool'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 1.0, 'is_bool': 0.0, 'is_string': 1.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.9968944099378882, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.9968944099378882, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.5962732919254659, 'is_bool': 0.0, 'is_string': 0.0, 'type': 'is_numeric'}\n", + "{'is_numeric': 1.0, 'is_bool': 1.0, 'is_string': 0.0, 'type': 'is_bool'}\n", + "{'is_numeric': 0.13043478260869565, 'is_bool': 0.0, 'is_string': 0.13043478260869565, 'type': 'is_numeric'}\n", + "{'is_numeric': 0.07142857142857142, 'is_bool': 0.0, 'is_string': 0.07142857142857142, 'type': 'is_numeric'}\n" + ] + } + ], + "source": [ + "for col in df.columns:\n", + " print(hopenai.infer_column_types(df[col]))" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>is_numeric</th>\n", + " <th>is_bool</th>\n", + " <th>is_string</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>id</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>name</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>created</th>\n", + " <td>1.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>description</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>context_length</th>\n", + " <td>1.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>per_request_limits</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>supported_parameters</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_modality</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_input_modalities</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_output_modalities</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_tokenizer</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_instruct_type</th>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.388199</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_prompt</th>\n", + " <td>1.000000</td>\n", + " <td>0.0</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_completion</th>\n", + " <td>1.000000</td>\n", + " <td>0.0</td>\n", + " <td>1.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_request</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.996894</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_image</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.996894</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_web_search</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.996894</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_internal_reasoning</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.996894</td>\n", + " </tr>\n", + " <tr>\n", + " <th>top_provider_context_length</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>top_provider_max_completion_tokens</th>\n", + " <td>0.596273</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>top_provider_is_moderated</th>\n", + " <td>1.000000</td>\n", + " <td>1.0</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_input_cache_read</th>\n", + " <td>0.130435</td>\n", + " <td>0.0</td>\n", + " <td>0.130435</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_input_cache_write</th>\n", + " <td>0.071429</td>\n", + " <td>0.0</td>\n", + " <td>0.071429</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " is_numeric is_bool is_string\n", + "id 0.000000 0.0 1.000000\n", + "name 0.000000 0.0 1.000000\n", + "created 1.000000 0.0 0.000000\n", + "description 0.000000 0.0 1.000000\n", + "context_length 1.000000 0.0 0.000000\n", + "per_request_limits 0.000000 0.0 0.000000\n", + "supported_parameters 0.000000 0.0 0.000000\n", + "architecture_modality 0.000000 0.0 1.000000\n", + "architecture_input_modalities 0.000000 0.0 0.000000\n", + "architecture_output_modalities 0.000000 0.0 0.000000\n", + "architecture_tokenizer 0.000000 0.0 1.000000\n", + "architecture_instruct_type 0.000000 0.0 0.388199\n", + "pricing_prompt 1.000000 0.0 1.000000\n", + "pricing_completion 1.000000 0.0 1.000000\n", + "pricing_request 0.996894 0.0 0.996894\n", + "pricing_image 0.996894 0.0 0.996894\n", + "pricing_web_search 0.996894 0.0 0.996894\n", + "pricing_internal_reasoning 0.996894 0.0 0.996894\n", + "top_provider_context_length 0.996894 0.0 0.000000\n", + "top_provider_max_completion_tokens 0.596273 0.0 0.000000\n", + "top_provider_is_moderated 1.000000 1.0 0.000000\n", + "pricing_input_cache_read 0.130435 0.0 0.130435\n", + "pricing_input_cache_write 0.071429 0.0 0.071429" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.apply(lambda x: pd.Series(infer_column_types(x))).T" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>is_numeric</th>\n", + " <th>is_bool</th>\n", + " <th>is_string</th>\n", + " <th>type</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>id</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>1.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>name</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>1.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>created</th>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>description</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>1.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>context_length</th>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>per_request_limits</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>supported_parameters</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_modality</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>1.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_input_modalities</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_output_modalities</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_tokenizer</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>1.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>architecture_instruct_type</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.388199</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_prompt</th>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>1.0</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_completion</th>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>1.0</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_request</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.996894</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_image</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.996894</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_web_search</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.996894</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_internal_reasoning</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.996894</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>top_provider_context_length</th>\n", + " <td>0.996894</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>top_provider_max_completion_tokens</th>\n", + " <td>0.596273</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>top_provider_is_moderated</th>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>is_bool</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_input_cache_read</th>\n", + " <td>0.130435</td>\n", + " <td>0.0</td>\n", + " <td>0.130435</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " <tr>\n", + " <th>pricing_input_cache_write</th>\n", + " <td>0.071429</td>\n", + " <td>0.0</td>\n", + " <td>0.071429</td>\n", + " <td>is_numeric</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " is_numeric is_bool is_string type\n", + "id 0.0 0.0 1.0 is_bool\n", + "name 0.0 0.0 1.0 is_bool\n", + "created 1.0 0.0 0.0 is_numeric\n", + "description 0.0 0.0 1.0 is_bool\n", + "context_length 1.0 0.0 0.0 is_numeric\n", + "per_request_limits 0.0 0.0 0.0 is_bool\n", + "supported_parameters 0.0 0.0 0.0 is_bool\n", + "architecture_modality 0.0 0.0 1.0 is_bool\n", + "architecture_input_modalities 0.0 0.0 0.0 is_bool\n", + "architecture_output_modalities 0.0 0.0 0.0 is_bool\n", + "architecture_tokenizer 0.0 0.0 1.0 is_bool\n", + "architecture_instruct_type 0.0 0.0 0.388199 is_bool\n", + "pricing_prompt 1.0 0.0 1.0 is_numeric\n", + "pricing_completion 1.0 0.0 1.0 is_numeric\n", + "pricing_request 0.996894 0.0 0.996894 is_numeric\n", + "pricing_image 0.996894 0.0 0.996894 is_numeric\n", + "pricing_web_search 0.996894 0.0 0.996894 is_numeric\n", + "pricing_internal_reasoning 0.996894 0.0 0.996894 is_numeric\n", + "top_provider_context_length 0.996894 0.0 0.0 is_numeric\n", + "top_provider_max_completion_tokens 0.596273 0.0 0.0 is_numeric\n", + "top_provider_is_moderated 1.0 1.0 0.0 is_bool\n", + "pricing_input_cache_read 0.130435 0.0 0.130435 is_numeric\n", + "pricing_input_cache_write 0.071429 0.0 0.071429 is_numeric" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hopenai.infer_column_types_df(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def find_" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 True\n", + "1 True\n", + "2 True\n", + "3 True\n", + "4 True\n", + "5 True\n", + "6 True\n", + "7 True\n", + "8 True\n", + "9 True\n", + "10 True\n", + "11 True\n", + "12 True\n", + "13 True\n", + "14 True\n", + "15 True\n", + "16 True\n", + "17 True\n", + "18 True\n", + "19 True\n", + "20 True\n", + "21 True\n", + "22 True\n", + "23 True\n", + "24 True\n", + "25 True\n", + "26 True\n", + "27 True\n", + "28 True\n", + "29 True\n", + "30 True\n", + "31 True\n", + "32 True\n", + "33 True\n", + "34 True\n", + "35 True\n", + "36 True\n", + "37 True\n", + "38 True\n", + "39 True\n", + "40 True\n", + "41 True\n", + "42 True\n", + "43 True\n", + "44 True\n", + "45 True\n", + "46 True\n", + "47 True\n", + "48 True\n", + "49 True\n", + "50 True\n", + "51 True\n", + "52 True\n", + "53 True\n", + "54 True\n", + "55 True\n", + "56 True\n", + "57 True\n", + "58 True\n", + "59 True\n", + "60 True\n", + "61 True\n", + "62 True\n", + "63 True\n", + "64 True\n", + "65 True\n", + "66 True\n", + "67 True\n", + "68 True\n", + "69 True\n", + "70 True\n", + "71 True\n", + "72 True\n", + "73 True\n", + "74 True\n", + "75 True\n", + "76 True\n", + "77 True\n", + "78 True\n", + "79 True\n", + "80 True\n", + "81 True\n", + "82 True\n", + "83 True\n", + "84 True\n", + "85 True\n", + "86 True\n", + "87 True\n", + "88 True\n", + "89 True\n", + "90 True\n", + "91 True\n", + "92 True\n", + "93 True\n", + "94 True\n", + "95 True\n", + "96 True\n", + "97 True\n", + "98 True\n", + "99 True\n", + "100 True\n", + "101 True\n", + "102 True\n", + "103 True\n", + "104 True\n", + "105 True\n", + "106 True\n", + "107 True\n", + "108 True\n", + "109 True\n", + "110 True\n", + "111 True\n", + "112 True\n", + "113 True\n", + "114 True\n", + "115 True\n", + "116 True\n", + "117 True\n", + "118 True\n", + "119 True\n", + "120 True\n", + "121 True\n", + "122 True\n", + "123 True\n", + "124 True\n", + "125 True\n", + "126 True\n", + "127 True\n", + "128 True\n", + "129 True\n", + "130 True\n", + "131 True\n", + "132 True\n", + "133 True\n", + "134 True\n", + "135 True\n", + "136 True\n", + "137 True\n", + "138 True\n", + "139 True\n", + "140 True\n", + "141 True\n", + "142 True\n", + "143 True\n", + "144 True\n", + "145 True\n", + "146 True\n", + "147 True\n", + "148 True\n", + "149 True\n", + "150 True\n", + "151 True\n", + "152 True\n", + "153 True\n", + "154 True\n", + "155 True\n", + "156 True\n", + "157 True\n", + "158 True\n", + "159 True\n", + "160 True\n", + "161 True\n", + "162 True\n", + "163 True\n", + "164 True\n", + "165 True\n", + "166 True\n", + "167 True\n", + "168 True\n", + "169 True\n", + "170 True\n", + "171 True\n", + "172 True\n", + "173 True\n", + "174 True\n", + "175 True\n", + "176 True\n", + "177 True\n", + "178 True\n", + "179 True\n", + "180 True\n", + "181 True\n", + "182 True\n", + "183 True\n", + "184 True\n", + "185 True\n", + "186 True\n", + "187 True\n", + "188 True\n", + "189 True\n", + "190 True\n", + "191 True\n", + "192 True\n", + "193 True\n", + "194 True\n", + "195 True\n", + "196 True\n", + "197 True\n", + "198 True\n", + "199 True\n", + "200 True\n", + "201 True\n", + "202 True\n", + "203 True\n", + "204 True\n", + "205 True\n", + "206 True\n", + "207 True\n", + "208 True\n", + "209 True\n", + "210 True\n", + "211 True\n", + "212 True\n", + "213 True\n", + "214 True\n", + "215 True\n", + "216 True\n", + "217 True\n", + "218 True\n", + "219 True\n", + "220 True\n", + "221 True\n", + "222 True\n", + "223 True\n", + "224 True\n", + "225 True\n", + "226 True\n", + "227 True\n", + "228 True\n", + "229 True\n", + "230 True\n", + "231 True\n", + "232 True\n", + "233 True\n", + "234 True\n", + "235 True\n", + "236 True\n", + "237 True\n", + "238 True\n", + "239 True\n", + "240 True\n", + "241 True\n", + "242 True\n", + "243 True\n", + "244 True\n", + "245 True\n", + "246 True\n", + "247 True\n", + "248 True\n", + "249 True\n", + "250 True\n", + "251 True\n", + "252 True\n", + "253 True\n", + "254 True\n", + "255 True\n", + "256 True\n", + "257 True\n", + "258 True\n", + "259 True\n", + "260 True\n", + "261 True\n", + "262 True\n", + "263 True\n", + "264 True\n", + "265 True\n", + "266 True\n", + "267 True\n", + "268 True\n", + "269 True\n", + "270 True\n", + "271 True\n", + "272 True\n", + "273 True\n", + "274 True\n", + "275 True\n", + "276 True\n", + "277 True\n", + "278 True\n", + "279 True\n", + "280 True\n", + "281 True\n", + "282 True\n", + "283 True\n", + "284 True\n", + "285 True\n", + "286 True\n", + "287 True\n", + "288 True\n", + "289 True\n", + "290 True\n", + "291 True\n", + "292 True\n", + "293 True\n", + "294 True\n", + "295 True\n", + "296 True\n", + "297 True\n", + "298 True\n", + "299 True\n", + "300 True\n", + "301 True\n", + "302 False\n", + "303 True\n", + "304 True\n", + "305 True\n", + "306 True\n", + "307 True\n", + "308 True\n", + "309 True\n", + "310 True\n", + "311 True\n", + "312 True\n", + "313 True\n", + "314 True\n", + "315 True\n", + "316 True\n", + "317 True\n", + "318 True\n", + "319 True\n", + "320 True\n", + "321 True\n", + "Name: pricing_request, dtype: bool" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(df[\"pricing_request\"], errors='coerce').notna()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0.000002\n", + "1 0.00001\n", + "2 0.00000085\n", + "3 0.00000018\n", + "4 0.0000033\n", + "5 0.0000012\n", + "6 0.0000008\n", + "7 0.0000008\n", + "8 0.00000075\n", + "9 0\n", + "10 0.00000035\n", + "11 0\n", + "12 0\n", + "13 0.000001\n", + "14 0\n", + "15 0\n", + "16 0\n", + "17 0\n", + "18 0\n", + "19 0.00000218\n", + "20 0.00000005\n", + "21 0\n", + "22 0.0000003\n", + "23 0\n", + "24 0.000000138\n", + "25 0\n", + "26 0.00000024\n", + "27 0\n", + "28 0.0000003\n", + "29 0\n", + "30 0.000002\n", + "31 0\n", + "32 0.00000024\n", + "33 0\n", + "34 0\n", + "35 0\n", + "36 0\n", + "37 0.00000024\n", + "38 0\n", + "39 0.00000024\n", + "40 0.0000006\n", + "41 0.0000035\n", + "42 0.0000044\n", + "43 0.00004\n", + "44 0.0000044\n", + "45 0\n", + "46 0.00000003\n", + "47 0.000008\n", + "48 0.0000016\n", + "49 0.0000004\n", + "50 0.0000012\n", + "51 0.0000012\n", + "52 0\n", + "53 0\n", + "54 0\n", + "55 0.0000005\n", + "56 0.000015\n", + "57 0\n", + "58 0.0000004\n", + "59 0\n", + "60 0\n", + "61 0.0000006\n", + "62 0\n", + "63 0.0000003\n", + "64 0.0000034\n", + "65 0.0000001\n", + "66 0\n", + "67 0.00000018\n", + "68 0.00000088\n", + "69 0\n", + "70 0\n", + "71 0\n", + "72 0\n", + "73 0\n", + "74 0.0000009\n", + "75 0\n", + "76 0.00000088\n", + "77 0\n", + "78 0.0006\n", + "79 0\n", + "80 0.00000015\n", + "81 0\n", + "82 0\n", + "83 0\n", + "84 0.00000004\n", + "85 0.000008\n", + "86 0.0000004\n", + "87 0\n", + "88 0.0000001\n", + "89 0.00001\n", + "90 0.0000006\n", + "91 0.00001\n", + "92 0\n", + "93 0\n", + "94 0.0000002\n", + "95 0.000001\n", + "96 0.0000008\n", + "97 0.0000001\n", + "98 0.000008\n", + "99 0.000015\n", + "100 0.000008\n", + "101 0\n", + "102 0\n", + "103 0.0000002\n", + "104 0\n", + "105 0\n", + "106 0.00015\n", + "107 0.0000003\n", + "108 0.000015\n", + "109 0.000015\n", + "110 0.000015\n", + "111 0.000008\n", + "112 0.0000006\n", + "113 0\n", + "114 0\n", + "115 0.00000006\n", + "116 0.0000044\n", + "117 0.00000004\n", + "118 0.0000004\n", + "119 0.00000063\n", + "120 0.000008\n", + "121 0.0000014\n", + "122 0.0000002\n", + "123 0.0000032\n", + "124 0.0000002\n", + "125 0\n", + "126 0.00000075\n", + "127 0.0000012\n", + "128 0.0000064\n", + "129 0.0000044\n", + "130 0.00000018\n", + "131 0\n", + "132 0.00000012\n", + "133 0\n", + "134 0.00000018\n", + "135 0\n", + "136 0.00000015\n", + "137 0.000005\n", + "138 0.000001\n", + "139 0.00000001\n", + "140 0.00000002\n", + "141 0\n", + "142 0.0000004\n", + "143 0\n", + "144 0.00000218\n", + "145 0.0000011\n", + "146 0.0000009\n", + "147 0.00000014\n", + "148 0\n", + "149 0.00000089\n", + "150 0.0000008\n", + "151 0.00006\n", + "152 0.000006\n", + "153 0.00001\n", + "154 0.00001\n", + "155 0.00000015\n", + "156 0\n", + "157 0\n", + "158 0.00000035\n", + "159 0.00000024\n", + "160 0.00000014\n", + "161 0.0000032\n", + "162 0\n", + "163 0.00000027\n", + "164 0\n", + "165 0.000006\n", + "166 0.00001\n", + "167 0.000006\n", + "168 0.000006\n", + "169 0.000006\n", + "170 0.000015\n", + "171 0.0000012\n", + "172 0\n", + "173 0.00000015\n", + "174 0.0000045\n", + "175 0.0000034\n", + "176 0.00000045\n", + "177 0.000004\n", + "178 0.000004\n", + "179 0.000004\n", + "180 0.000004\n", + "181 0.00000225\n", + "182 0.00000225\n", + "183 0.000015\n", + "184 0.000015\n", + "185 0.000015\n", + "186 0.0000001\n", + "187 0.00000004\n", + "188 0\n", + "189 0.0000001\n", + "190 0.0000003\n", + "191 0.00001\n", + "192 0.00001\n", + "193 0.00000015\n", + "194 0.0000005\n", + "195 0.000003\n", + "196 0.00000015\n", + "197 0\n", + "198 0.00000002\n", + "199 0\n", + "200 0.00000001\n", + "201 0.0000012\n", + "202 0\n", + "203 0.000000049\n", + "204 0\n", + "205 0.00000039\n", + "206 0.0000006\n", + "207 0.00000075\n", + "208 0.00006\n", + "209 0.00006\n", + "210 0.0000044\n", + "211 0.0000044\n", + "212 0.0000001\n", + "213 0.00001\n", + "214 0.0000006\n", + "215 0\n", + "216 0.0000002\n", + "217 0.0000008\n", + "218 0\n", + "219 0.00000009\n", + "220 0.0000003\n", + "221 0.0000008\n", + "222 0.000015\n", + "223 0.00000005\n", + "224 0.0000012\n", + "225 0.00001\n", + "226 0\n", + "227 0.000002\n", + "228 0.0000012\n", + "229 0.0000002\n", + "230 0.000001\n", + "231 0\n", + "232 0.00000003\n", + "233 0.0000008\n", + "234 0.00000028\n", + "235 0.00000025\n", + "236 0\n", + "237 0.00000007\n", + "238 0.0000006\n", + "239 0.0000006\n", + "240 0.0000003\n", + "241 0.000006\n", + "242 0\n", + "243 0.00000006\n", + "244 0.000003\n", + "245 0.0000007\n", + "246 0.000015\n", + "247 0.000015\n", + "248 0.00000148\n", + "249 0.0000009\n", + "250 0.0000009\n", + "251 0\n", + "252 0.000000054\n", + "253 0.00000004\n", + "254 0.000000054\n", + "255 0.0000001\n", + "256 0.0000003\n", + "257 0.000006\n", + "258 0.00000012\n", + "259 0.0000003\n", + "260 0.00001\n", + "261 0.000018\n", + "262 0.0000002\n", + "263 0.000015\n", + "264 0.00000024\n", + "265 0.00000075\n", + "266 0.00000075\n", + "267 0.0000012\n", + "268 0.00000006\n", + "269 0.0000004\n", + "270 0.0000012\n", + "271 0.0000005\n", + "272 0.000005\n", + "273 0.00003\n", + "274 0.000015\n", + "275 0.000015\n", + "276 0.0000008\n", + "277 0.000002\n", + "278 0.0000015\n", + "279 0.00000125\n", + "280 0.00000125\n", + "281 0.000075\n", + "282 0.000075\n", + "283 0.000015\n", + "284 0.000015\n", + "285 0.0000015\n", + "286 0.000006\n", + "287 0.000002\n", + "288 0.00003\n", + "289 0.0000006\n", + "290 0.0000081\n", + "291 0.0000006\n", + "292 0.00000025\n", + "293 0.0000002\n", + "294 0.00000024\n", + "295 0.0000015\n", + "296 0.000024\n", + "297 0.000024\n", + "298 0.000024\n", + "299 0.000024\n", + "300 0.0000012\n", + "301 0.000009375\n", + "302 -1\n", + "303 0.000002\n", + "304 0.00003\n", + "305 0.0000005\n", + "306 0.000002\n", + "307 0.00000019\n", + "308 0.000001125\n", + "309 0.000004\n", + "310 0.00012\n", + "311 0.00012\n", + "312 0.000001125\n", + "313 0.000024\n", + "314 0.000024\n", + "315 0.000001125\n", + "316 0.000000065\n", + "317 0.0000009\n", + "318 0.0000015\n", + "319 0.0000015\n", + "320 0.00006\n", + "321 0.00006\n", + "Name: pricing_completion, dtype: object" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"pricing_completion\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>id</th>\n", + " <th>context_length</th>\n", + " <th>pricing_prompt</th>\n", + " <th>pricing_completion</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>302</th>\n", + " <td>openrouter/auto</td>\n", + " <td>2000000</td>\n", + " <td>-1</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>133</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-32b:free</td>\n", + " <td>16000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>59</th>\n", + " <td>nvidia/llama-3.1-nemotron-ultra-253b-v1:free</td>\n", + " <td>131072</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>113</th>\n", + " <td>cognitivecomputations/dolphin3.0-r1-mistral-24...</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>57</th>\n", + " <td>nvidia/llama-3.3-nemotron-super-49b-v1:free</td>\n", + " <td>131072</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>114</th>\n", + " <td>cognitivecomputations/dolphin3.0-mistral-24b:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>54</th>\n", + " <td>moonshotai/kimi-vl-a3b-thinking:free</td>\n", + " <td>131072</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>53</th>\n", + " <td>agentica-org/deepcoder-14b-preview:free</td>\n", + " <td>96000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>52</th>\n", + " <td>arliai/qwq-32b-arliai-rpr-v1:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>231</th>\n", + " <td>meta-llama/llama-3.1-8b-instruct:free</td>\n", + " <td>131072</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>226</th>\n", + " <td>meta-llama/llama-3.1-405b:free</td>\n", + " <td>64000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>125</th>\n", + " <td>qwen/qwen2.5-vl-72b-instruct:free</td>\n", + " <td>131072</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>shisa-ai/shisa-v2-llama3.3-70b:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>87</th>\n", + " <td>google/gemma-3-12b-it:free</td>\n", + " <td>131072</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>92</th>\n", + " <td>rekaai/reka-flash-3:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>131</th>\n", + " <td>mistralai/mistral-small-24b-instruct-2501:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>81</th>\n", + " <td>open-r1/olympiccoder-32b:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>60</th>\n", + " <td>meta-llama/llama-4-maverick:free</td>\n", + " <td>256000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>236</th>\n", + " <td>mistralai/mistral-nemo:free</td>\n", + " <td>128000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>62</th>\n", + " <td>meta-llama/llama-4-scout:free</td>\n", + " <td>512000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>83</th>\n", + " <td>google/gemma-3-4b-it:free</td>\n", + " <td>131072</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>93</th>\n", + " <td>google/gemma-3-27b-it:free</td>\n", + " <td>96000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>79</th>\n", + " <td>mistralai/mistral-small-3.1-24b-instruct:free</td>\n", + " <td>96000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>251</th>\n", + " <td>mistralai/mistral-7b-instruct:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>77</th>\n", + " <td>featherless/qwerky-72b:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75</th>\n", + " <td>deepseek/deepseek-chat-v3-0324:free</td>\n", + " <td>163840</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>242</th>\n", + " <td>google/gemma-2-9b-it:free</td>\n", + " <td>8192</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73</th>\n", + " <td>qwen/qwen2.5-vl-32b-instruct:free</td>\n", + " <td>8192</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>135</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-14b:free</td>\n", + " <td>64000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>72</th>\n", + " <td>google/gemini-2.5-pro-exp-03-25</td>\n", + " <td>1000000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>70</th>\n", + " <td>bytedance-research/ui-tars-72b:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>69</th>\n", + " <td>allenai/molmo-7b-d:free</td>\n", + " <td>4096</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>101</th>\n", + " <td>deepseek/deepseek-r1-zero:free</td>\n", + " <td>163840</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>102</th>\n", + " <td>qwen/qwq-32b:free</td>\n", + " <td>40000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>66</th>\n", + " <td>deepseek/deepseek-v3-base:free</td>\n", + " <td>163840</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>104</th>\n", + " <td>moonshotai/moonlight-16b-a3b-instruct:free</td>\n", + " <td>8192</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>105</th>\n", + " <td>nousresearch/deephermes-3-llama-3-8b-preview:free</td>\n", + " <td>131072</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>71</th>\n", + " <td>qwen/qwen2.5-vl-3b-instruct:free</td>\n", + " <td>64000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>218</th>\n", + " <td>google/gemini-flash-1.5-8b-exp</td>\n", + " <td>1000000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>82</th>\n", + " <td>google/gemma-3-1b-it:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>156</th>\n", + " <td>google/gemini-2.0-flash-exp:free</td>\n", + " <td>1048576</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>204</th>\n", + " <td>qwen/qwen-2.5-72b-instruct:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td>qwen/qwen3-30b-a3b:free</td>\n", + " <td>40960</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>thudm/glm-4-32b:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>157</th>\n", + " <td>meta-llama/llama-3.3-70b-instruct:free</td>\n", + " <td>8000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>deepseek/deepseek-prover-v2:free</td>\n", + " <td>163840</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>opengvlab/internvl3-2b:free</td>\n", + " <td>32000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td>qwen/qwen3-8b:free</td>\n", + " <td>40960</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>opengvlab/internvl3-14b:free</td>\n", + " <td>32000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>qwen/qwen3-1.7b:free</td>\n", + " <td>32000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>202</th>\n", + " <td>meta-llama/llama-3.2-11b-vision-instruct:free</td>\n", + " <td>131072</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>qwen/qwen3-0.6b-04-28:free</td>\n", + " <td>32000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>microsoft/phi-4-reasoning:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>162</th>\n", + " <td>qwen/qwq-32b-preview:free</td>\n", + " <td>16384</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>microsoft/phi-4-reasoning-plus:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>qwen/qwen3-4b:free</td>\n", + " <td>128000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>164</th>\n", + " <td>google/learnlm-1.5-pro-experimental:free</td>\n", + " <td>40960</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>148</th>\n", + " <td>deepseek/deepseek-chat:free</td>\n", + " <td>163840</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>199</th>\n", + " <td>meta-llama/llama-3.2-1b-instruct:free</td>\n", + " <td>131000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>thudm/glm-z1-32b:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>microsoft/mai-ds-r1:free</td>\n", + " <td>163840</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>thudm/glm-4-9b:free</td>\n", + " <td>32000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td>thudm/glm-z1-9b:free</td>\n", + " <td>32000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>188</th>\n", + " <td>qwen/qwen-2.5-7b-instruct:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>172</th>\n", + " <td>qwen/qwen-2.5-coder-32b-instruct:free</td>\n", + " <td>32768</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>qwen/qwen3-14b:free</td>\n", + " <td>40960</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>197</th>\n", + " <td>meta-llama/llama-3.2-3b-instruct:free</td>\n", + " <td>20000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>141</th>\n", + " <td>deepseek/deepseek-r1-distill-llama-70b:free</td>\n", + " <td>8192</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td>qwen/qwen3-235b-a22b:free</td>\n", + " <td>40960</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>143</th>\n", + " <td>deepseek/deepseek-r1:free</td>\n", + " <td>163840</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>qwen/qwen3-32b:free</td>\n", + " <td>40960</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>215</th>\n", + " <td>qwen/qwen-2.5-vl-7b-instruct:free</td>\n", + " <td>64000</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>tngtech/deepseek-r1t-chimera:free</td>\n", + " <td>163840</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>200</th>\n", + " <td>meta-llama/llama-3.2-1b-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.000000005</td>\n", + " <td>0.00000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>198</th>\n", + " <td>meta-llama/llama-3.2-3b-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.00000001</td>\n", + " <td>0.00000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>139</th>\n", + " <td>liquid/lfm-7b</td>\n", + " <td>32768</td>\n", + " <td>0.00000001</td>\n", + " <td>0.00000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>46</th>\n", + " <td>qwen/qwen2.5-coder-7b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.00000001</td>\n", + " <td>0.00000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>243</th>\n", + " <td>google/gemma-2-9b-it</td>\n", + " <td>8192</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>232</th>\n", + " <td>meta-llama/llama-3.1-8b-instruct</td>\n", + " <td>16384</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>84</th>\n", + " <td>google/gemma-3-4b-it</td>\n", + " <td>131072</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>140</th>\n", + " <td>liquid/lfm-3b</td>\n", + " <td>32768</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>115</th>\n", + " <td>meta-llama/llama-guard-3-8b</td>\n", + " <td>131072</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>223</th>\n", + " <td>sao10k/l3-lunaris-8b</td>\n", + " <td>8192</td>\n", + " <td>0.00000002</td>\n", + " <td>0.00000005</td>\n", + " </tr>\n", + " <tr>\n", + " <th>253</th>\n", + " <td>nousresearch/hermes-2-pro-llama-3-8b</td>\n", + " <td>131072</td>\n", + " <td>0.000000025</td>\n", + " <td>0.00000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>254</th>\n", + " <td>mistralai/mistral-7b-instruct-v0.3</td>\n", + " <td>32768</td>\n", + " <td>0.000000028</td>\n", + " <td>0.000000054</td>\n", + " </tr>\n", + " <tr>\n", + " <th>252</th>\n", + " <td>mistralai/mistral-7b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.000000028</td>\n", + " <td>0.000000054</td>\n", + " </tr>\n", + " <tr>\n", + " <th>268</th>\n", + " <td>meta-llama/llama-3-8b-instruct</td>\n", + " <td>8192</td>\n", + " <td>0.00000003</td>\n", + " <td>0.00000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>219</th>\n", + " <td>microsoft/phi-3.5-mini-128k-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.00000003</td>\n", + " <td>0.00000009</td>\n", + " </tr>\n", + " <tr>\n", + " <th>237</th>\n", + " <td>mistralai/mistral-nemo</td>\n", + " <td>98304</td>\n", + " <td>0.00000003</td>\n", + " <td>0.00000007</td>\n", + " </tr>\n", + " <tr>\n", + " <th>160</th>\n", + " <td>amazon/nova-micro-v1</td>\n", + " <td>128000</td>\n", + " <td>0.000000035</td>\n", + " <td>0.00000014</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>qwen/qwen3-8b</td>\n", + " <td>128000</td>\n", + " <td>0.000000035</td>\n", + " <td>0.000000138</td>\n", + " </tr>\n", + " <tr>\n", + " <th>193</th>\n", + " <td>google/gemini-flash-1.5-8b</td>\n", + " <td>1000000</td>\n", + " <td>0.0000000375</td>\n", + " <td>0.00000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>155</th>\n", + " <td>cohere/command-r7b-12-2024</td>\n", + " <td>128000</td>\n", + " <td>0.0000000375</td>\n", + " <td>0.00000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>187</th>\n", + " <td>mistralai/ministral-3b</td>\n", + " <td>131072</td>\n", + " <td>0.00000004</td>\n", + " <td>0.00000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>117</th>\n", + " <td>deepseek/deepseek-r1-distill-llama-8b</td>\n", + " <td>32000</td>\n", + " <td>0.00000004</td>\n", + " <td>0.00000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>258</th>\n", + " <td>deepseek/deepseek-coder</td>\n", + " <td>128000</td>\n", + " <td>0.00000004</td>\n", + " <td>0.00000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>203</th>\n", + " <td>meta-llama/llama-3.2-11b-vision-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.000000049</td>\n", + " <td>0.000000049</td>\n", + " </tr>\n", + " <tr>\n", + " <th>80</th>\n", + " <td>mistralai/mistral-small-3.1-24b-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.00000005</td>\n", + " <td>0.00000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>meta-llama/llama-guard-4-12b</td>\n", + " <td>163840</td>\n", + " <td>0.00000005</td>\n", + " <td>0.00000005</td>\n", + " </tr>\n", + " <tr>\n", + " <th>189</th>\n", + " <td>qwen/qwen-2.5-7b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.00000005</td>\n", + " <td>0.0000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>97</th>\n", + " <td>microsoft/phi-4-multimodal-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.00000005</td>\n", + " <td>0.0000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>88</th>\n", + " <td>google/gemma-3-12b-it</td>\n", + " <td>131072</td>\n", + " <td>0.00000005</td>\n", + " <td>0.0000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>124</th>\n", + " <td>qwen/qwen-turbo</td>\n", + " <td>1000000</td>\n", + " <td>0.00000005</td>\n", + " <td>0.0000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>132</th>\n", + " <td>mistralai/mistral-small-24b-instruct-2501</td>\n", + " <td>28000</td>\n", + " <td>0.00000006</td>\n", + " <td>0.00000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>173</th>\n", + " <td>qwen/qwen-2.5-coder-32b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.00000006</td>\n", + " <td>0.00000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>159</th>\n", + " <td>amazon/nova-lite-v1</td>\n", + " <td>300000</td>\n", + " <td>0.00000006</td>\n", + " <td>0.00000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>316</th>\n", + " <td>gryphe/mythomax-l2-13b</td>\n", + " <td>4096</td>\n", + " <td>0.000000065</td>\n", + " <td>0.000000065</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>microsoft/phi-4-reasoning-plus</td>\n", + " <td>32768</td>\n", + " <td>0.00000007</td>\n", + " <td>0.00000035</td>\n", + " </tr>\n", + " <tr>\n", + " <th>147</th>\n", + " <td>microsoft/phi-4</td>\n", + " <td>16384</td>\n", + " <td>0.00000007</td>\n", + " <td>0.00000014</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>qwen/qwen3-14b</td>\n", + " <td>40960</td>\n", + " <td>0.00000007</td>\n", + " <td>0.00000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>259</th>\n", + " <td>google/gemini-flash-1.5</td>\n", + " <td>1000000</td>\n", + " <td>0.000000075</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>107</th>\n", + " <td>google/gemini-2.0-flash-lite-001</td>\n", + " <td>1048576</td>\n", + " <td>0.000000075</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>63</th>\n", + " <td>meta-llama/llama-4-scout</td>\n", + " <td>1048576</td>\n", + " <td>0.00000008</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>294</th>\n", + " <td>mistralai/mixtral-8x7b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.00000008</td>\n", + " <td>0.00000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>264</th>\n", + " <td>allenai/olmo-7b-instruct</td>\n", + " <td>2048</td>\n", + " <td>0.00000008</td>\n", + " <td>0.00000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>163</th>\n", + " <td>qwen/qwq-32b-preview</td>\n", + " <td>32768</td>\n", + " <td>0.00000009</td>\n", + " <td>0.00000027</td>\n", + " </tr>\n", + " <tr>\n", + " <th>158</th>\n", + " <td>meta-llama/llama-3.3-70b-instruct</td>\n", + " <td>131000</td>\n", + " <td>0.00000009</td>\n", + " <td>0.00000035</td>\n", + " </tr>\n", + " <tr>\n", + " <th>266</th>\n", + " <td>neversleep/llama-3-lumimaid-8b</td>\n", + " <td>24576</td>\n", + " <td>0.00000009375</td>\n", + " <td>0.00000075</td>\n", + " </tr>\n", + " <tr>\n", + " <th>207</th>\n", + " <td>neversleep/llama-3.1-lumimaid-8b</td>\n", + " <td>32768</td>\n", + " <td>0.00000009375</td>\n", + " <td>0.00000075</td>\n", + " </tr>\n", + " <tr>\n", + " <th>265</th>\n", + " <td>neversleep/llama-3-lumimaid-8b:extended</td>\n", + " <td>24576</td>\n", + " <td>0.00000009375</td>\n", + " <td>0.00000075</td>\n", + " </tr>\n", + " <tr>\n", + " <th>212</th>\n", + " <td>mistralai/pixtral-12b</td>\n", + " <td>32768</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>qwen/qwen3-32b</td>\n", + " <td>40960</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>49</th>\n", + " <td>openai/gpt-4.1-nano</td>\n", + " <td>1047576</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>186</th>\n", + " <td>mistralai/ministral-8b</td>\n", + " <td>128000</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>118</th>\n", + " <td>google/gemini-2.0-flash-001</td>\n", + " <td>1000000</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>234</th>\n", + " <td>meta-llama/llama-3.1-70b-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.0000001</td>\n", + " <td>0.00000028</td>\n", + " </tr>\n", + " <tr>\n", + " <th>65</th>\n", + " <td>mistral/ministral-8b</td>\n", + " <td>131072</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>240</th>\n", + " <td>google/gemma-2-27b-it</td>\n", + " <td>8192</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>256</th>\n", + " <td>microsoft/phi-3-medium-128k-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>255</th>\n", + " <td>microsoft/phi-3-mini-128k-instruct</td>\n", + " <td>128000</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>94</th>\n", + " <td>google/gemma-3-27b-it</td>\n", + " <td>131072</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>qwen/qwen3-30b-a3b</td>\n", + " <td>40960</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>142</th>\n", + " <td>deepseek/deepseek-r1-distill-llama-70b</td>\n", + " <td>131072</td>\n", + " <td>0.0000001</td>\n", + " <td>0.0000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>307</th>\n", + " <td>mistralai/mistral-7b-instruct-v0.1</td>\n", + " <td>2824</td>\n", + " <td>0.00000011</td>\n", + " <td>0.00000019</td>\n", + " </tr>\n", + " <tr>\n", + " <th>205</th>\n", + " <td>qwen/qwen-2.5-72b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.00000012</td>\n", + " <td>0.00000039</td>\n", + " </tr>\n", + " <tr>\n", + " <th>190</th>\n", + " <td>nvidia/llama-3.1-nemotron-70b-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.00000012</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>134</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-32b</td>\n", + " <td>131072</td>\n", + " <td>0.00000012</td>\n", + " <td>0.00000018</td>\n", + " </tr>\n", + " <tr>\n", + " <th>220</th>\n", + " <td>nousresearch/hermes-3-llama-3.1-70b</td>\n", + " <td>131072</td>\n", + " <td>0.00000012</td>\n", + " <td>0.0000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>58</th>\n", + " <td>nvidia/llama-3.3-nemotron-super-49b-v1</td>\n", + " <td>131072</td>\n", + " <td>0.00000013</td>\n", + " <td>0.0000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>qwen/qwen3-235b-a22b</td>\n", + " <td>40960</td>\n", + " <td>0.00000014</td>\n", + " <td>0.000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>90</th>\n", + " <td>openai/gpt-4o-mini-search-preview</td>\n", + " <td>128000</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>136</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-14b</td>\n", + " <td>64000</td>\n", + " <td>0.00000015</td>\n", + " <td>0.00000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>214</th>\n", + " <td>cohere/command-r-08-2024</td>\n", + " <td>128000</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>238</th>\n", + " <td>openai/gpt-4o-mini</td>\n", + " <td>128000</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>103</th>\n", + " <td>qwen/qwq-32b</td>\n", + " <td>131072</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>196</th>\n", + " <td>liquid/lfm-40b</td>\n", + " <td>32768</td>\n", + " <td>0.00000015</td>\n", + " <td>0.00000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td>google/gemini-2.5-flash-preview:thinking</td>\n", + " <td>1048576</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000035</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>google/gemini-2.5-flash-preview</td>\n", + " <td>1048576</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>239</th>\n", + " <td>openai/gpt-4o-mini-2024-07-18</td>\n", + " <td>128000</td>\n", + " <td>0.00000015</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>61</th>\n", + " <td>meta-llama/llama-4-maverick</td>\n", + " <td>1048576</td>\n", + " <td>0.00000017</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>67</th>\n", + " <td>scb10x/llama3.1-typhoon2-8b-instruct</td>\n", + " <td>8192</td>\n", + " <td>0.00000018</td>\n", + " <td>0.00000018</td>\n", + " </tr>\n", + " <tr>\n", + " <th>130</th>\n", + " <td>deepseek/deepseek-r1-distill-qwen-1.5b</td>\n", + " <td>131072</td>\n", + " <td>0.00000018</td>\n", + " <td>0.00000018</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>arcee-ai/spotlight</td>\n", + " <td>131072</td>\n", + " <td>0.00000018</td>\n", + " <td>0.00000018</td>\n", + " </tr>\n", + " <tr>\n", + " <th>216</th>\n", + " <td>qwen/qwen-2.5-vl-7b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>262</th>\n", + " <td>meta-llama/llama-guard-2-8b</td>\n", + " <td>8192</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>293</th>\n", + " <td>mistralai/mistral-7b-instruct-v0.2</td>\n", + " <td>32768</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>86</th>\n", + " <td>ai21/jamba-1.6-mini</td>\n", + " <td>256000</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>122</th>\n", + " <td>aion-labs/aion-rp-llama-3.1-8b</td>\n", + " <td>32768</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>229</th>\n", + " <td>perplexity/llama-3.1-sonar-small-128k-online</td>\n", + " <td>127072</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>145</th>\n", + " <td>minimax/minimax-01</td>\n", + " <td>1000192</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000011</td>\n", + " </tr>\n", + " <tr>\n", + " <th>291</th>\n", + " <td>mistralai/mistral-small</td>\n", + " <td>32768</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>112</th>\n", + " <td>mistralai/mistral-saba</td>\n", + " <td>32768</td>\n", + " <td>0.0000002</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>119</th>\n", + " <td>qwen/qwen-vl-plus</td>\n", + " <td>7500</td>\n", + " <td>0.00000021</td>\n", + " <td>0.00000063</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>thudm/glm-4-32b</td>\n", + " <td>32000</td>\n", + " <td>0.00000024</td>\n", + " <td>0.00000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td>thudm/glm-z1-rumination-32b</td>\n", + " <td>32000</td>\n", + " <td>0.00000024</td>\n", + " <td>0.00000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>thudm/glm-z1-32b</td>\n", + " <td>32000</td>\n", + " <td>0.00000024</td>\n", + " <td>0.00000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>inception/mercury-coder-small-beta</td>\n", + " <td>32000</td>\n", + " <td>0.00000025</td>\n", + " <td>0.000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>292</th>\n", + " <td>mistralai/mistral-tiny</td>\n", + " <td>32768</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000025</td>\n", + " </tr>\n", + " <tr>\n", + " <th>126</th>\n", + " <td>qwen/qwen2.5-vl-72b-instruct</td>\n", + " <td>32000</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000075</td>\n", + " </tr>\n", + " <tr>\n", + " <th>194</th>\n", + " <td>thedrummer/rocinante-12b</td>\n", + " <td>32768</td>\n", + " <td>0.00000025</td>\n", + " <td>0.0000005</td>\n", + " </tr>\n", + " <tr>\n", + " <th>279</th>\n", + " <td>anthropic/claude-3-haiku:beta</td>\n", + " <td>200000</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000125</td>\n", + " </tr>\n", + " <tr>\n", + " <th>235</th>\n", + " <td>mistralai/codestral-mamba</td>\n", + " <td>262144</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000025</td>\n", + " </tr>\n", + " <tr>\n", + " <th>280</th>\n", + " <td>anthropic/claude-3-haiku</td>\n", + " <td>200000</td>\n", + " <td>0.00000025</td>\n", + " <td>0.00000125</td>\n", + " </tr>\n", + " <tr>\n", + " <th>55</th>\n", + " <td>x-ai/grok-3-mini-beta</td>\n", + " <td>131072</td>\n", + " <td>0.0000003</td>\n", + " <td>0.0000005</td>\n", + " </tr>\n", + " <tr>\n", + " <th>269</th>\n", + " <td>meta-llama/llama-3-70b-instruct</td>\n", + " <td>8192</td>\n", + " <td>0.0000003</td>\n", + " <td>0.0000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>146</th>\n", + " <td>mistralai/codestral-2501</td>\n", + " <td>262144</td>\n", + " <td>0.0000003</td>\n", + " <td>0.0000009</td>\n", + " </tr>\n", + " <tr>\n", + " <th>76</th>\n", + " <td>deepseek/deepseek-chat-v3-0324</td>\n", + " <td>163840</td>\n", + " <td>0.0000003</td>\n", + " <td>0.00000088</td>\n", + " </tr>\n", + " <tr>\n", + " <th>149</th>\n", + " <td>deepseek/deepseek-chat</td>\n", + " <td>163840</td>\n", + " <td>0.00000038</td>\n", + " <td>0.00000089</td>\n", + " </tr>\n", + " <tr>\n", + " <th>270</th>\n", + " <td>mistralai/mixtral-8x22b-instruct</td>\n", + " <td>65536</td>\n", + " <td>0.0000004</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>mistralai/mistral-medium-3</td>\n", + " <td>131072</td>\n", + " <td>0.0000004</td>\n", + " <td>0.000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>48</th>\n", + " <td>openai/gpt-4.1-mini</td>\n", + " <td>1047576</td>\n", + " <td>0.0000004</td>\n", + " <td>0.0000016</td>\n", + " </tr>\n", + " <tr>\n", + " <th>127</th>\n", + " <td>qwen/qwen-plus</td>\n", + " <td>131072</td>\n", + " <td>0.0000004</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>arcee-ai/arcee-blitz</td>\n", + " <td>32768</td>\n", + " <td>0.00000045</td>\n", + " <td>0.00000075</td>\n", + " </tr>\n", + " <tr>\n", + " <th>176</th>\n", + " <td>thedrummer/unslopnemo-12b</td>\n", + " <td>32000</td>\n", + " <td>0.00000045</td>\n", + " <td>0.00000045</td>\n", + " </tr>\n", + " <tr>\n", + " <th>278</th>\n", + " <td>cohere/command-r</td>\n", + " <td>128000</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>deepseek/deepseek-prover-v2</td>\n", + " <td>131072</td>\n", + " <td>0.0000005</td>\n", + " <td>0.00000218</td>\n", + " </tr>\n", + " <tr>\n", + " <th>285</th>\n", + " <td>cohere/command-r-03-2024</td>\n", + " <td>128000</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>arcee-ai/virtuoso-medium-v2</td>\n", + " <td>131072</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>arcee-ai/coder-large</td>\n", + " <td>32768</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>271</th>\n", + " <td>microsoft/wizardlm-2-8x22b</td>\n", + " <td>65536</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000005</td>\n", + " </tr>\n", + " <tr>\n", + " <th>96</th>\n", + " <td>thedrummer/skyfall-36b-v2</td>\n", + " <td>32768</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>144</th>\n", + " <td>deepseek/deepseek-r1</td>\n", + " <td>163840</td>\n", + " <td>0.0000005</td>\n", + " <td>0.00000218</td>\n", + " </tr>\n", + " <tr>\n", + " <th>305</th>\n", + " <td>jondurbin/airoboros-l2-70b</td>\n", + " <td>4096</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000005</td>\n", + " </tr>\n", + " <tr>\n", + " <th>318</th>\n", + " <td>openai/gpt-3.5-turbo</td>\n", + " <td>16385</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>319</th>\n", + " <td>openai/gpt-3.5-turbo-0125</td>\n", + " <td>16385</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>245</th>\n", + " <td>ai21/jamba-instruct</td>\n", + " <td>256000</td>\n", + " <td>0.0000005</td>\n", + " <td>0.0000007</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>arcee-ai/caller-large</td>\n", + " <td>32768</td>\n", + " <td>0.00000055</td>\n", + " <td>0.00000085</td>\n", + " </tr>\n", + " <tr>\n", + " <th>308</th>\n", + " <td>pygmalionai/mythalion-13b</td>\n", + " <td>8192</td>\n", + " <td>0.0000005625</td>\n", + " <td>0.000001125</td>\n", + " </tr>\n", + " <tr>\n", + " <th>315</th>\n", + " <td>undi95/remm-slerp-l2-13b</td>\n", + " <td>6144</td>\n", + " <td>0.0000005625</td>\n", + " <td>0.000001125</td>\n", + " </tr>\n", + " <tr>\n", + " <th>206</th>\n", + " <td>qwen/qwen-2.5-vl-72b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.0000006</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>289</th>\n", + " <td>nousresearch/nous-hermes-2-mixtral-8x7b-dpo</td>\n", + " <td>32768</td>\n", + " <td>0.0000006</td>\n", + " <td>0.0000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>150</th>\n", + " <td>sao10k/l3.3-euryale-70b</td>\n", + " <td>131072</td>\n", + " <td>0.0000007</td>\n", + " <td>0.0000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>121</th>\n", + " <td>aion-labs/aion-1.0-mini</td>\n", + " <td>131072</td>\n", + " <td>0.0000007</td>\n", + " <td>0.0000014</td>\n", + " </tr>\n", + " <tr>\n", + " <th>217</th>\n", + " <td>sao10k/l3.1-euryale-70b</td>\n", + " <td>131072</td>\n", + " <td>0.0000007</td>\n", + " <td>0.0000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>arcee-ai/virtuoso-large</td>\n", + " <td>131072</td>\n", + " <td>0.00000075</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>295</th>\n", + " <td>neversleep/noromaid-20b</td>\n", + " <td>8192</td>\n", + " <td>0.00000075</td>\n", + " <td>0.0000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>221</th>\n", + " <td>nousresearch/hermes-3-llama-3.1-405b</td>\n", + " <td>131072</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>233</th>\n", + " <td>meta-llama/llama-3.1-405b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>224</th>\n", + " <td>aetherwiing/mn-starcannon-12b</td>\n", + " <td>16384</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>179</th>\n", + " <td>anthropic/claude-3.5-haiku-20241022:beta</td>\n", + " <td>200000</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>95</th>\n", + " <td>thedrummer/anubis-pro-105b-v1</td>\n", + " <td>131072</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>180</th>\n", + " <td>anthropic/claude-3.5-haiku-20241022</td>\n", + " <td>200000</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>51</th>\n", + " <td>alfredpros/codellama-7b-instruct-solidity</td>\n", + " <td>4096</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50</th>\n", + " <td>eleutherai/llemma_7b</td>\n", + " <td>4096</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>267</th>\n", + " <td>sao10k/fimbulvetr-11b-v2</td>\n", + " <td>4096</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>276</th>\n", + " <td>sophosympatheia/midnight-rose-70b</td>\n", + " <td>4096</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>123</th>\n", + " <td>qwen/qwen-vl-max</td>\n", + " <td>7500</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000032</td>\n", + " </tr>\n", + " <tr>\n", + " <th>161</th>\n", + " <td>amazon/nova-pro-v1</td>\n", + " <td>300000</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000032</td>\n", + " </tr>\n", + " <tr>\n", + " <th>171</th>\n", + " <td>infermatic/mn-inferor-12b</td>\n", + " <td>16384</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>300</th>\n", + " <td>undi95/toppy-m-7b</td>\n", + " <td>4096</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>177</th>\n", + " <td>anthropic/claude-3.5-haiku:beta</td>\n", + " <td>200000</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>178</th>\n", + " <td>anthropic/claude-3.5-haiku</td>\n", + " <td>200000</td>\n", + " <td>0.0000008</td>\n", + " <td>0.000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>228</th>\n", + " <td>nothingiisreal/mn-celeste-12b</td>\n", + " <td>16384</td>\n", + " <td>0.0000008</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>68</th>\n", + " <td>scb10x/llama3.1-typhoon2-70b-instruct</td>\n", + " <td>8192</td>\n", + " <td>0.00000088</td>\n", + " <td>0.00000088</td>\n", + " </tr>\n", + " <tr>\n", + " <th>74</th>\n", + " <td>qwen/qwen2.5-vl-32b-instruct</td>\n", + " <td>128000</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000009</td>\n", + " </tr>\n", + " <tr>\n", + " <th>249</th>\n", + " <td>cognitivecomputations/dolphin-mixtral-8x22b</td>\n", + " <td>16000</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000009</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>arcee-ai/maestro-reasoning</td>\n", + " <td>131072</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000033</td>\n", + " </tr>\n", + " <tr>\n", + " <th>317</th>\n", + " <td>meta-llama/llama-2-70b-chat</td>\n", + " <td>4096</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000009</td>\n", + " </tr>\n", + " <tr>\n", + " <th>250</th>\n", + " <td>qwen/qwen-2-72b-instruct</td>\n", + " <td>32768</td>\n", + " <td>0.0000009</td>\n", + " <td>0.0000009</td>\n", + " </tr>\n", + " <tr>\n", + " <th>230</th>\n", + " <td>perplexity/llama-3.1-sonar-large-128k-online</td>\n", + " <td>127072</td>\n", + " <td>0.000001</td>\n", + " <td>0.000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>287</th>\n", + " <td>openai/gpt-3.5-turbo-0613</td>\n", + " <td>4095</td>\n", + " <td>0.000001</td>\n", + " <td>0.000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>277</th>\n", + " <td>cohere/command</td>\n", + " <td>4096</td>\n", + " <td>0.000001</td>\n", + " <td>0.000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>138</th>\n", + " <td>perplexity/sonar</td>\n", + " <td>127072</td>\n", + " <td>0.000001</td>\n", + " <td>0.000001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>137</th>\n", + " <td>perplexity/sonar-reasoning</td>\n", + " <td>127000</td>\n", + " <td>0.000001</td>\n", + " <td>0.000005</td>\n", + " </tr>\n", + " <tr>\n", + " <th>303</th>\n", + " <td>openai/gpt-3.5-turbo-1106</td>\n", + " <td>16385</td>\n", + " <td>0.000001</td>\n", + " <td>0.000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>42</th>\n", + " <td>openai/o4-mini-high</td>\n", + " <td>200000</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " </tr>\n", + " <tr>\n", + " <th>210</th>\n", + " <td>openai/o1-mini</td>\n", + " <td>128000</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " </tr>\n", + " <tr>\n", + " <th>211</th>\n", + " <td>openai/o1-mini-2024-09-12</td>\n", + " <td>128000</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " </tr>\n", + " <tr>\n", + " <th>44</th>\n", + " <td>openai/o4-mini</td>\n", + " <td>200000</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " </tr>\n", + " <tr>\n", + " <th>129</th>\n", + " <td>openai/o3-mini</td>\n", + " <td>200000</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " </tr>\n", + " <tr>\n", + " <th>116</th>\n", + " <td>openai/o3-mini-high</td>\n", + " <td>200000</td>\n", + " <td>0.0000011</td>\n", + " <td>0.0000044</td>\n", + " </tr>\n", + " <tr>\n", + " <th>312</th>\n", + " <td>mancer/weaver</td>\n", + " <td>8000</td>\n", + " <td>0.000001125</td>\n", + " <td>0.000001125</td>\n", + " </tr>\n", + " <tr>\n", + " <th>201</th>\n", + " <td>meta-llama/llama-3.2-90b-vision-instruct</td>\n", + " <td>131072</td>\n", + " <td>0.0000012</td>\n", + " <td>0.0000012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>272</th>\n", + " <td>google/gemini-pro-1.5</td>\n", + " <td>2000000</td>\n", + " <td>0.00000125</td>\n", + " <td>0.000005</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>google/gemini-2.5-pro-preview</td>\n", + " <td>1048576</td>\n", + " <td>0.00000125</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>248</th>\n", + " <td>sao10k/l3-euryale-70b</td>\n", + " <td>8192</td>\n", + " <td>0.00000148</td>\n", + " <td>0.00000148</td>\n", + " </tr>\n", + " <tr>\n", + " <th>181</th>\n", + " <td>neversleep/llama-3.1-lumimaid-70b</td>\n", + " <td>16384</td>\n", + " <td>0.0000015</td>\n", + " <td>0.00000225</td>\n", + " </tr>\n", + " <tr>\n", + " <th>306</th>\n", + " <td>openai/gpt-3.5-turbo-instruct</td>\n", + " <td>4095</td>\n", + " <td>0.0000015</td>\n", + " <td>0.000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>182</th>\n", + " <td>anthracite-org/magnum-v4-72b</td>\n", + " <td>16384</td>\n", + " <td>0.0000015</td>\n", + " <td>0.00000225</td>\n", + " </tr>\n", + " <tr>\n", + " <th>128</th>\n", + " <td>qwen/qwen-max</td>\n", + " <td>32768</td>\n", + " <td>0.0000016</td>\n", + " <td>0.0000064</td>\n", + " </tr>\n", + " <tr>\n", + " <th>169</th>\n", + " <td>mistralai/pixtral-large-2411</td>\n", + " <td>131072</td>\n", + " <td>0.000002</td>\n", + " <td>0.000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>286</th>\n", + " <td>mistralai/mistral-large</td>\n", + " <td>128000</td>\n", + " <td>0.000002</td>\n", + " <td>0.000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>85</th>\n", + " <td>ai21/jamba-1.6-large</td>\n", + " <td>256000</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>154</th>\n", + " <td>x-ai/grok-2-1212</td>\n", + " <td>131072</td>\n", + " <td>0.000002</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>openai/gpt-4.1</td>\n", + " <td>1047576</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>100</th>\n", + " <td>perplexity/sonar-deep-research</td>\n", + " <td>128000</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>227</th>\n", + " <td>meta-llama/llama-3.1-405b</td>\n", + " <td>32768</td>\n", + " <td>0.000002</td>\n", + " <td>0.000002</td>\n", + " </tr>\n", + " <tr>\n", + " <th>153</th>\n", + " <td>x-ai/grok-2-vision-1212</td>\n", + " <td>32768</td>\n", + " <td>0.000002</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>168</th>\n", + " <td>mistralai/mistral-large-2407</td>\n", + " <td>131072</td>\n", + " <td>0.000002</td>\n", + " <td>0.000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>98</th>\n", + " <td>perplexity/sonar-reasoning-pro</td>\n", + " <td>128000</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>111</th>\n", + " <td>perplexity/r1-1776</td>\n", + " <td>128000</td>\n", + " <td>0.000002</td>\n", + " <td>0.000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>167</th>\n", + " <td>mistralai/mistral-large-2411</td>\n", + " <td>131072</td>\n", + " <td>0.000002</td>\n", + " <td>0.000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>166</th>\n", + " <td>openai/gpt-4o-2024-11-20</td>\n", + " <td>128000</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>225</th>\n", + " <td>openai/gpt-4o-2024-08-06</td>\n", + " <td>128000</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>260</th>\n", + " <td>openai/gpt-4o</td>\n", + " <td>128000</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>192</th>\n", + " <td>inflection/inflection-3-pi</td>\n", + " <td>8000</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>91</th>\n", + " <td>openai/gpt-4o-search-preview</td>\n", + " <td>128000</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>213</th>\n", + " <td>cohere/command-r-plus-08-2024</td>\n", + " <td>128000</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>191</th>\n", + " <td>inflection/inflection-3-productivity</td>\n", + " <td>8000</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>89</th>\n", + " <td>cohere/command-a</td>\n", + " <td>256000</td>\n", + " <td>0.0000025</td>\n", + " <td>0.00001</td>\n", + " </tr>\n", + " <tr>\n", + " <th>64</th>\n", + " <td>all-hands/openhands-lm-32b-v0.1</td>\n", + " <td>16384</td>\n", + " <td>0.0000026</td>\n", + " <td>0.0000034</td>\n", + " </tr>\n", + " <tr>\n", + " <th>175</th>\n", + " <td>eva-unit-01/eva-qwen-2.5-32b</td>\n", + " <td>16384</td>\n", + " <td>0.0000026</td>\n", + " <td>0.0000034</td>\n", + " </tr>\n", + " <tr>\n", + " <th>290</th>\n", + " <td>mistralai/mistral-medium</td>\n", + " <td>32768</td>\n", + " <td>0.00000275</td>\n", + " <td>0.0000081</td>\n", + " </tr>\n", + " <tr>\n", + " <th>195</th>\n", + " <td>anthracite-org/magnum-v2-72b</td>\n", + " <td>32768</td>\n", + " <td>0.000003</td>\n", + " <td>0.000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>284</th>\n", + " <td>anthropic/claude-3-sonnet</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>283</th>\n", + " <td>anthropic/claude-3-sonnet:beta</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>309</th>\n", + " <td>openai/gpt-3.5-turbo-16k</td>\n", + " <td>16385</td>\n", + " <td>0.000003</td>\n", + " <td>0.000004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>184</th>\n", + " <td>anthropic/claude-3.5-sonnet</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>183</th>\n", + " <td>anthropic/claude-3.5-sonnet:beta</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>275</th>\n", + " <td>cohere/command-r-plus-04-2024</td>\n", + " <td>128000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>274</th>\n", + " <td>cohere/command-r-plus</td>\n", + " <td>128000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>109</th>\n", + " <td>anthropic/claude-3.7-sonnet:thinking</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>110</th>\n", + " <td>anthropic/claude-3.7-sonnet:beta</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>99</th>\n", + " <td>perplexity/sonar-pro</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>244</th>\n", + " <td>01-ai/yi-large</td>\n", + " <td>32768</td>\n", + " <td>0.000003</td>\n", + " <td>0.000003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>246</th>\n", + " <td>anthropic/claude-3.5-sonnet-20240620:beta</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>247</th>\n", + " <td>anthropic/claude-3.5-sonnet-20240620</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>56</th>\n", + " <td>x-ai/grok-3-beta</td>\n", + " <td>131072</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>108</th>\n", + " <td>anthropic/claude-3.7-sonnet</td>\n", + " <td>200000</td>\n", + " <td>0.000003</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>152</th>\n", + " <td>eva-unit-01/eva-llama-3.33-70b</td>\n", + " <td>16384</td>\n", + " <td>0.000004</td>\n", + " <td>0.000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>257</th>\n", + " <td>neversleep/llama-3-lumimaid-70b</td>\n", + " <td>8192</td>\n", + " <td>0.000004</td>\n", + " <td>0.000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>241</th>\n", + " <td>alpindale/magnum-72b</td>\n", + " <td>16384</td>\n", + " <td>0.000004</td>\n", + " <td>0.000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>165</th>\n", + " <td>eva-unit-01/eva-qwen-2.5-72b</td>\n", + " <td>16384</td>\n", + " <td>0.000004</td>\n", + " <td>0.000006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>120</th>\n", + " <td>aion-labs/aion-1.0</td>\n", + " <td>131072</td>\n", + " <td>0.000004</td>\n", + " <td>0.000008</td>\n", + " </tr>\n", + " <tr>\n", + " <th>174</th>\n", + " <td>raifle/sorcererlm-8x22b</td>\n", + " <td>16000</td>\n", + " <td>0.0000045</td>\n", + " <td>0.0000045</td>\n", + " </tr>\n", + " <tr>\n", + " <th>263</th>\n", + " <td>openai/gpt-4o-2024-05-13</td>\n", + " <td>128000</td>\n", + " <td>0.000005</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>222</th>\n", + " <td>openai/chatgpt-4o-latest</td>\n", + " <td>128000</td>\n", + " <td>0.000005</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>170</th>\n", + " <td>x-ai/grok-vision-beta</td>\n", + " <td>8192</td>\n", + " <td>0.000005</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>185</th>\n", + " <td>x-ai/grok-beta</td>\n", + " <td>131072</td>\n", + " <td>0.000005</td>\n", + " <td>0.000015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>261</th>\n", + " <td>openai/gpt-4o:extended</td>\n", + " <td>128000</td>\n", + " <td>0.000006</td>\n", + " <td>0.000018</td>\n", + " </tr>\n", + " <tr>\n", + " <th>301</th>\n", + " <td>alpindale/goliath-120b</td>\n", + " <td>6144</td>\n", + " <td>0.0000065625</td>\n", + " <td>0.000009375</td>\n", + " </tr>\n", + " <tr>\n", + " <th>313</th>\n", + " <td>anthropic/claude-2.0:beta</td>\n", + " <td>100000</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>297</th>\n", + " <td>anthropic/claude-2.1</td>\n", + " <td>200000</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>299</th>\n", + " <td>anthropic/claude-2</td>\n", + " <td>200000</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>298</th>\n", + " <td>anthropic/claude-2:beta</td>\n", + " <td>200000</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>314</th>\n", + " <td>anthropic/claude-2.0</td>\n", + " <td>100000</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>296</th>\n", + " <td>anthropic/claude-2.1:beta</td>\n", + " <td>200000</td>\n", + " <td>0.000008</td>\n", + " <td>0.000024</td>\n", + " </tr>\n", + " <tr>\n", + " <th>304</th>\n", + " <td>openai/gpt-4-1106-preview</td>\n", + " <td>128000</td>\n", + " <td>0.00001</td>\n", + " <td>0.00003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43</th>\n", + " <td>openai/o3</td>\n", + " <td>200000</td>\n", + " <td>0.00001</td>\n", + " <td>0.00004</td>\n", + " </tr>\n", + " <tr>\n", + " <th>273</th>\n", + " <td>openai/gpt-4-turbo</td>\n", + " <td>128000</td>\n", + " <td>0.00001</td>\n", + " <td>0.00003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>288</th>\n", + " <td>openai/gpt-4-turbo-preview</td>\n", + " <td>128000</td>\n", + " <td>0.00001</td>\n", + " <td>0.00003</td>\n", + " </tr>\n", + " <tr>\n", + " <th>151</th>\n", + " <td>openai/o1</td>\n", + " <td>200000</td>\n", + " <td>0.000015</td>\n", + " <td>0.00006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>282</th>\n", + " <td>anthropic/claude-3-opus</td>\n", + " <td>200000</td>\n", + " <td>0.000015</td>\n", + " <td>0.000075</td>\n", + " </tr>\n", + " <tr>\n", + " <th>281</th>\n", + " <td>anthropic/claude-3-opus:beta</td>\n", + " <td>200000</td>\n", + " <td>0.000015</td>\n", + " <td>0.000075</td>\n", + " </tr>\n", + " <tr>\n", + " <th>208</th>\n", + " <td>openai/o1-preview</td>\n", + " <td>128000</td>\n", + " <td>0.000015</td>\n", + " <td>0.00006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>209</th>\n", + " <td>openai/o1-preview-2024-09-12</td>\n", + " <td>128000</td>\n", + " <td>0.000015</td>\n", + " <td>0.00006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>321</th>\n", + " <td>openai/gpt-4-0314</td>\n", + " <td>8191</td>\n", + " <td>0.00003</td>\n", + " <td>0.00006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>320</th>\n", + " <td>openai/gpt-4</td>\n", + " <td>8191</td>\n", + " <td>0.00003</td>\n", + " <td>0.00006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>311</th>\n", + " <td>openai/gpt-4-32k-0314</td>\n", + " <td>32767</td>\n", + " <td>0.00006</td>\n", + " <td>0.00012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>310</th>\n", + " <td>openai/gpt-4-32k</td>\n", + " <td>32767</td>\n", + " <td>0.00006</td>\n", + " <td>0.00012</td>\n", + " </tr>\n", + " <tr>\n", + " <th>106</th>\n", + " <td>openai/gpt-4.5-preview</td>\n", + " <td>128000</td>\n", + " <td>0.000075</td>\n", + " <td>0.00015</td>\n", + " </tr>\n", + " <tr>\n", + " <th>78</th>\n", + " <td>openai/o1-pro</td>\n", + " <td>200000</td>\n", + " <td>0.00015</td>\n", + " <td>0.0006</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " id context_length pricing_prompt pricing_completion\n", + "302 openrouter/auto 2000000 -1 -1\n", + "133 deepseek/deepseek-r1-distill-qwen-32b:free 16000 0 0\n", + "59 nvidia/llama-3.1-nemotron-ultra-253b-v1:free 131072 0 0\n", + "113 cognitivecomputations/dolphin3.0-r1-mistral-24... 32768 0 0\n", + "57 nvidia/llama-3.3-nemotron-super-49b-v1:free 131072 0 0\n", + "114 cognitivecomputations/dolphin3.0-mistral-24b:free 32768 0 0\n", + "54 moonshotai/kimi-vl-a3b-thinking:free 131072 0 0\n", + "53 agentica-org/deepcoder-14b-preview:free 96000 0 0\n", + "52 arliai/qwq-32b-arliai-rpr-v1:free 32768 0 0\n", + "231 meta-llama/llama-3.1-8b-instruct:free 131072 0 0\n", + "226 meta-llama/llama-3.1-405b:free 64000 0 0\n", + "125 qwen/qwen2.5-vl-72b-instruct:free 131072 0 0\n", + "45 shisa-ai/shisa-v2-llama3.3-70b:free 32768 0 0\n", + "87 google/gemma-3-12b-it:free 131072 0 0\n", + "92 rekaai/reka-flash-3:free 32768 0 0\n", + "131 mistralai/mistral-small-24b-instruct-2501:free 32768 0 0\n", + "81 open-r1/olympiccoder-32b:free 32768 0 0\n", + "60 meta-llama/llama-4-maverick:free 256000 0 0\n", + "236 mistralai/mistral-nemo:free 128000 0 0\n", + "62 meta-llama/llama-4-scout:free 512000 0 0\n", + "83 google/gemma-3-4b-it:free 131072 0 0\n", + "93 google/gemma-3-27b-it:free 96000 0 0\n", + "79 mistralai/mistral-small-3.1-24b-instruct:free 96000 0 0\n", + "251 mistralai/mistral-7b-instruct:free 32768 0 0\n", + "77 featherless/qwerky-72b:free 32768 0 0\n", + "75 deepseek/deepseek-chat-v3-0324:free 163840 0 0\n", + "242 google/gemma-2-9b-it:free 8192 0 0\n", + "73 qwen/qwen2.5-vl-32b-instruct:free 8192 0 0\n", + "135 deepseek/deepseek-r1-distill-qwen-14b:free 64000 0 0\n", + "72 google/gemini-2.5-pro-exp-03-25 1000000 0 0\n", + "70 bytedance-research/ui-tars-72b:free 32768 0 0\n", + "69 allenai/molmo-7b-d:free 4096 0 0\n", + "101 deepseek/deepseek-r1-zero:free 163840 0 0\n", + "102 qwen/qwq-32b:free 40000 0 0\n", + "66 deepseek/deepseek-v3-base:free 163840 0 0\n", + "104 moonshotai/moonlight-16b-a3b-instruct:free 8192 0 0\n", + "105 nousresearch/deephermes-3-llama-3-8b-preview:free 131072 0 0\n", + "71 qwen/qwen2.5-vl-3b-instruct:free 64000 0 0\n", + "218 google/gemini-flash-1.5-8b-exp 1000000 0 0\n", + "82 google/gemma-3-1b-it:free 32768 0 0\n", + "156 google/gemini-2.0-flash-exp:free 1048576 0 0\n", + "204 qwen/qwen-2.5-72b-instruct:free 32768 0 0\n", + "21 qwen/qwen3-30b-a3b:free 40960 0 0\n", + "38 thudm/glm-4-32b:free 32768 0 0\n", + "157 meta-llama/llama-3.3-70b-instruct:free 8000 0 0\n", + "18 deepseek/deepseek-prover-v2:free 163840 0 0\n", + "17 opengvlab/internvl3-2b:free 32000 0 0\n", + "23 qwen/qwen3-8b:free 40960 0 0\n", + "16 opengvlab/internvl3-14b:free 32000 0 0\n", + "14 qwen/qwen3-1.7b:free 32000 0 0\n", + "202 meta-llama/llama-3.2-11b-vision-instruct:free 131072 0 0\n", + "12 qwen/qwen3-0.6b-04-28:free 32000 0 0\n", + "11 microsoft/phi-4-reasoning:free 32768 0 0\n", + "162 qwen/qwq-32b-preview:free 16384 0 0\n", + "9 microsoft/phi-4-reasoning-plus:free 32768 0 0\n", + "15 qwen/qwen3-4b:free 128000 0 0\n", + "164 google/learnlm-1.5-pro-experimental:free 40960 0 0\n", + "148 deepseek/deepseek-chat:free 163840 0 0\n", + "199 meta-llama/llama-3.2-1b-instruct:free 131000 0 0\n", + "36 thudm/glm-z1-32b:free 32768 0 0\n", + "35 microsoft/mai-ds-r1:free 163840 0 0\n", + "34 thudm/glm-4-9b:free 32000 0 0\n", + "33 thudm/glm-z1-9b:free 32000 0 0\n", + "188 qwen/qwen-2.5-7b-instruct:free 32768 0 0\n", + "172 qwen/qwen-2.5-coder-32b-instruct:free 32768 0 0\n", + "25 qwen/qwen3-14b:free 40960 0 0\n", + "197 meta-llama/llama-3.2-3b-instruct:free 20000 0 0\n", + "141 deepseek/deepseek-r1-distill-llama-70b:free 8192 0 0\n", + "29 qwen/qwen3-235b-a22b:free 40960 0 0\n", + "143 deepseek/deepseek-r1:free 163840 0 0\n", + "27 qwen/qwen3-32b:free 40960 0 0\n", + "215 qwen/qwen-2.5-vl-7b-instruct:free 64000 0 0\n", + "31 tngtech/deepseek-r1t-chimera:free 163840 0 0\n", + "200 meta-llama/llama-3.2-1b-instruct 131072 0.000000005 0.00000001\n", + "198 meta-llama/llama-3.2-3b-instruct 131072 0.00000001 0.00000002\n", + "139 liquid/lfm-7b 32768 0.00000001 0.00000001\n", + "46 qwen/qwen2.5-coder-7b-instruct 32768 0.00000001 0.00000003\n", + "243 google/gemma-2-9b-it 8192 0.00000002 0.00000006\n", + "232 meta-llama/llama-3.1-8b-instruct 16384 0.00000002 0.00000003\n", + "84 google/gemma-3-4b-it 131072 0.00000002 0.00000004\n", + "140 liquid/lfm-3b 32768 0.00000002 0.00000002\n", + "115 meta-llama/llama-guard-3-8b 131072 0.00000002 0.00000006\n", + "223 sao10k/l3-lunaris-8b 8192 0.00000002 0.00000005\n", + "253 nousresearch/hermes-2-pro-llama-3-8b 131072 0.000000025 0.00000004\n", + "254 mistralai/mistral-7b-instruct-v0.3 32768 0.000000028 0.000000054\n", + "252 mistralai/mistral-7b-instruct 32768 0.000000028 0.000000054\n", + "268 meta-llama/llama-3-8b-instruct 8192 0.00000003 0.00000006\n", + "219 microsoft/phi-3.5-mini-128k-instruct 131072 0.00000003 0.00000009\n", + "237 mistralai/mistral-nemo 98304 0.00000003 0.00000007\n", + "160 amazon/nova-micro-v1 128000 0.000000035 0.00000014\n", + "24 qwen/qwen3-8b 128000 0.000000035 0.000000138\n", + "193 google/gemini-flash-1.5-8b 1000000 0.0000000375 0.00000015\n", + "155 cohere/command-r7b-12-2024 128000 0.0000000375 0.00000015\n", + "187 mistralai/ministral-3b 131072 0.00000004 0.00000004\n", + "117 deepseek/deepseek-r1-distill-llama-8b 32000 0.00000004 0.00000004\n", + "258 deepseek/deepseek-coder 128000 0.00000004 0.00000012\n", + "203 meta-llama/llama-3.2-11b-vision-instruct 131072 0.000000049 0.000000049\n", + "80 mistralai/mistral-small-3.1-24b-instruct 131072 0.00000005 0.00000015\n", + "20 meta-llama/llama-guard-4-12b 163840 0.00000005 0.00000005\n", + "189 qwen/qwen-2.5-7b-instruct 32768 0.00000005 0.0000001\n", + "97 microsoft/phi-4-multimodal-instruct 131072 0.00000005 0.0000001\n", + "88 google/gemma-3-12b-it 131072 0.00000005 0.0000001\n", + "124 qwen/qwen-turbo 1000000 0.00000005 0.0000002\n", + "132 mistralai/mistral-small-24b-instruct-2501 28000 0.00000006 0.00000012\n", + "173 qwen/qwen-2.5-coder-32b-instruct 32768 0.00000006 0.00000015\n", + "159 amazon/nova-lite-v1 300000 0.00000006 0.00000024\n", + "316 gryphe/mythomax-l2-13b 4096 0.000000065 0.000000065\n", + "10 microsoft/phi-4-reasoning-plus 32768 0.00000007 0.00000035\n", + "147 microsoft/phi-4 16384 0.00000007 0.00000014\n", + "26 qwen/qwen3-14b 40960 0.00000007 0.00000024\n", + "259 google/gemini-flash-1.5 1000000 0.000000075 0.0000003\n", + "107 google/gemini-2.0-flash-lite-001 1048576 0.000000075 0.0000003\n", + "63 meta-llama/llama-4-scout 1048576 0.00000008 0.0000003\n", + "294 mistralai/mixtral-8x7b-instruct 32768 0.00000008 0.00000024\n", + "264 allenai/olmo-7b-instruct 2048 0.00000008 0.00000024\n", + "163 qwen/qwq-32b-preview 32768 0.00000009 0.00000027\n", + "158 meta-llama/llama-3.3-70b-instruct 131000 0.00000009 0.00000035\n", + "266 neversleep/llama-3-lumimaid-8b 24576 0.00000009375 0.00000075\n", + "207 neversleep/llama-3.1-lumimaid-8b 32768 0.00000009375 0.00000075\n", + "265 neversleep/llama-3-lumimaid-8b:extended 24576 0.00000009375 0.00000075\n", + "212 mistralai/pixtral-12b 32768 0.0000001 0.0000001\n", + "28 qwen/qwen3-32b 40960 0.0000001 0.0000003\n", + "49 openai/gpt-4.1-nano 1047576 0.0000001 0.0000004\n", + "186 mistralai/ministral-8b 128000 0.0000001 0.0000001\n", + "118 google/gemini-2.0-flash-001 1000000 0.0000001 0.0000004\n", + "234 meta-llama/llama-3.1-70b-instruct 131072 0.0000001 0.00000028\n", + "65 mistral/ministral-8b 131072 0.0000001 0.0000001\n", + "240 google/gemma-2-27b-it 8192 0.0000001 0.0000003\n", + "256 microsoft/phi-3-medium-128k-instruct 131072 0.0000001 0.0000003\n", + "255 microsoft/phi-3-mini-128k-instruct 128000 0.0000001 0.0000001\n", + "94 google/gemma-3-27b-it 131072 0.0000001 0.0000002\n", + "22 qwen/qwen3-30b-a3b 40960 0.0000001 0.0000003\n", + "142 deepseek/deepseek-r1-distill-llama-70b 131072 0.0000001 0.0000004\n", + "307 mistralai/mistral-7b-instruct-v0.1 2824 0.00000011 0.00000019\n", + "205 qwen/qwen-2.5-72b-instruct 32768 0.00000012 0.00000039\n", + "190 nvidia/llama-3.1-nemotron-70b-instruct 131072 0.00000012 0.0000003\n", + "134 deepseek/deepseek-r1-distill-qwen-32b 131072 0.00000012 0.00000018\n", + "220 nousresearch/hermes-3-llama-3.1-70b 131072 0.00000012 0.0000003\n", + "58 nvidia/llama-3.3-nemotron-super-49b-v1 131072 0.00000013 0.0000004\n", + "30 qwen/qwen3-235b-a22b 40960 0.00000014 0.000002\n", + "90 openai/gpt-4o-mini-search-preview 128000 0.00000015 0.0000006\n", + "136 deepseek/deepseek-r1-distill-qwen-14b 64000 0.00000015 0.00000015\n", + "214 cohere/command-r-08-2024 128000 0.00000015 0.0000006\n", + "238 openai/gpt-4o-mini 128000 0.00000015 0.0000006\n", + "103 qwen/qwq-32b 131072 0.00000015 0.0000002\n", + "196 liquid/lfm-40b 32768 0.00000015 0.00000015\n", + "41 google/gemini-2.5-flash-preview:thinking 1048576 0.00000015 0.0000035\n", + "40 google/gemini-2.5-flash-preview 1048576 0.00000015 0.0000006\n", + "239 openai/gpt-4o-mini-2024-07-18 128000 0.00000015 0.0000006\n", + "61 meta-llama/llama-4-maverick 1048576 0.00000017 0.0000006\n", + "67 scb10x/llama3.1-typhoon2-8b-instruct 8192 0.00000018 0.00000018\n", + "130 deepseek/deepseek-r1-distill-qwen-1.5b 131072 0.00000018 0.00000018\n", + "3 arcee-ai/spotlight 131072 0.00000018 0.00000018\n", + "216 qwen/qwen-2.5-vl-7b-instruct 32768 0.0000002 0.0000002\n", + "262 meta-llama/llama-guard-2-8b 8192 0.0000002 0.0000002\n", + "293 mistralai/mistral-7b-instruct-v0.2 32768 0.0000002 0.0000002\n", + "86 ai21/jamba-1.6-mini 256000 0.0000002 0.0000004\n", + "122 aion-labs/aion-rp-llama-3.1-8b 32768 0.0000002 0.0000002\n", + "229 perplexity/llama-3.1-sonar-small-128k-online 127072 0.0000002 0.0000002\n", + "145 minimax/minimax-01 1000192 0.0000002 0.0000011\n", + "291 mistralai/mistral-small 32768 0.0000002 0.0000006\n", + "112 mistralai/mistral-saba 32768 0.0000002 0.0000006\n", + "119 qwen/qwen-vl-plus 7500 0.00000021 0.00000063\n", + "39 thudm/glm-4-32b 32000 0.00000024 0.00000024\n", + "32 thudm/glm-z1-rumination-32b 32000 0.00000024 0.00000024\n", + "37 thudm/glm-z1-32b 32000 0.00000024 0.00000024\n", + "13 inception/mercury-coder-small-beta 32000 0.00000025 0.000001\n", + "292 mistralai/mistral-tiny 32768 0.00000025 0.00000025\n", + "126 qwen/qwen2.5-vl-72b-instruct 32000 0.00000025 0.00000075\n", + "194 thedrummer/rocinante-12b 32768 0.00000025 0.0000005\n", + "279 anthropic/claude-3-haiku:beta 200000 0.00000025 0.00000125\n", + "235 mistralai/codestral-mamba 262144 0.00000025 0.00000025\n", + "280 anthropic/claude-3-haiku 200000 0.00000025 0.00000125\n", + "55 x-ai/grok-3-mini-beta 131072 0.0000003 0.0000005\n", + "269 meta-llama/llama-3-70b-instruct 8192 0.0000003 0.0000004\n", + "146 mistralai/codestral-2501 262144 0.0000003 0.0000009\n", + "76 deepseek/deepseek-chat-v3-0324 163840 0.0000003 0.00000088\n", + "149 deepseek/deepseek-chat 163840 0.00000038 0.00000089\n", + "270 mistralai/mixtral-8x22b-instruct 65536 0.0000004 0.0000012\n", + "0 mistralai/mistral-medium-3 131072 0.0000004 0.000002\n", + "48 openai/gpt-4.1-mini 1047576 0.0000004 0.0000016\n", + "127 qwen/qwen-plus 131072 0.0000004 0.0000012\n", + "8 arcee-ai/arcee-blitz 32768 0.00000045 0.00000075\n", + "176 thedrummer/unslopnemo-12b 32000 0.00000045 0.00000045\n", + "278 cohere/command-r 128000 0.0000005 0.0000015\n", + "19 deepseek/deepseek-prover-v2 131072 0.0000005 0.00000218\n", + "285 cohere/command-r-03-2024 128000 0.0000005 0.0000015\n", + "7 arcee-ai/virtuoso-medium-v2 131072 0.0000005 0.0000008\n", + "6 arcee-ai/coder-large 32768 0.0000005 0.0000008\n", + "271 microsoft/wizardlm-2-8x22b 65536 0.0000005 0.0000005\n", + "96 thedrummer/skyfall-36b-v2 32768 0.0000005 0.0000008\n", + "144 deepseek/deepseek-r1 163840 0.0000005 0.00000218\n", + "305 jondurbin/airoboros-l2-70b 4096 0.0000005 0.0000005\n", + "318 openai/gpt-3.5-turbo 16385 0.0000005 0.0000015\n", + "319 openai/gpt-3.5-turbo-0125 16385 0.0000005 0.0000015\n", + "245 ai21/jamba-instruct 256000 0.0000005 0.0000007\n", + "2 arcee-ai/caller-large 32768 0.00000055 0.00000085\n", + "308 pygmalionai/mythalion-13b 8192 0.0000005625 0.000001125\n", + "315 undi95/remm-slerp-l2-13b 6144 0.0000005625 0.000001125\n", + "206 qwen/qwen-2.5-vl-72b-instruct 32768 0.0000006 0.0000006\n", + "289 nousresearch/nous-hermes-2-mixtral-8x7b-dpo 32768 0.0000006 0.0000006\n", + "150 sao10k/l3.3-euryale-70b 131072 0.0000007 0.0000008\n", + "121 aion-labs/aion-1.0-mini 131072 0.0000007 0.0000014\n", + "217 sao10k/l3.1-euryale-70b 131072 0.0000007 0.0000008\n", + "5 arcee-ai/virtuoso-large 131072 0.00000075 0.0000012\n", + "295 neversleep/noromaid-20b 8192 0.00000075 0.0000015\n", + "221 nousresearch/hermes-3-llama-3.1-405b 131072 0.0000008 0.0000008\n", + "233 meta-llama/llama-3.1-405b-instruct 32768 0.0000008 0.0000008\n", + "224 aetherwiing/mn-starcannon-12b 16384 0.0000008 0.0000012\n", + "179 anthropic/claude-3.5-haiku-20241022:beta 200000 0.0000008 0.000004\n", + "95 thedrummer/anubis-pro-105b-v1 131072 0.0000008 0.000001\n", + "180 anthropic/claude-3.5-haiku-20241022 200000 0.0000008 0.000004\n", + "51 alfredpros/codellama-7b-instruct-solidity 4096 0.0000008 0.0000012\n", + "50 eleutherai/llemma_7b 4096 0.0000008 0.0000012\n", + "267 sao10k/fimbulvetr-11b-v2 4096 0.0000008 0.0000012\n", + "276 sophosympatheia/midnight-rose-70b 4096 0.0000008 0.0000008\n", + "123 qwen/qwen-vl-max 7500 0.0000008 0.0000032\n", + "161 amazon/nova-pro-v1 300000 0.0000008 0.0000032\n", + "171 infermatic/mn-inferor-12b 16384 0.0000008 0.0000012\n", + "300 undi95/toppy-m-7b 4096 0.0000008 0.0000012\n", + "177 anthropic/claude-3.5-haiku:beta 200000 0.0000008 0.000004\n", + "178 anthropic/claude-3.5-haiku 200000 0.0000008 0.000004\n", + "228 nothingiisreal/mn-celeste-12b 16384 0.0000008 0.0000012\n", + "68 scb10x/llama3.1-typhoon2-70b-instruct 8192 0.00000088 0.00000088\n", + "74 qwen/qwen2.5-vl-32b-instruct 128000 0.0000009 0.0000009\n", + "249 cognitivecomputations/dolphin-mixtral-8x22b 16000 0.0000009 0.0000009\n", + "4 arcee-ai/maestro-reasoning 131072 0.0000009 0.0000033\n", + "317 meta-llama/llama-2-70b-chat 4096 0.0000009 0.0000009\n", + "250 qwen/qwen-2-72b-instruct 32768 0.0000009 0.0000009\n", + "230 perplexity/llama-3.1-sonar-large-128k-online 127072 0.000001 0.000001\n", + "287 openai/gpt-3.5-turbo-0613 4095 0.000001 0.000002\n", + "277 cohere/command 4096 0.000001 0.000002\n", + "138 perplexity/sonar 127072 0.000001 0.000001\n", + "137 perplexity/sonar-reasoning 127000 0.000001 0.000005\n", + "303 openai/gpt-3.5-turbo-1106 16385 0.000001 0.000002\n", + "42 openai/o4-mini-high 200000 0.0000011 0.0000044\n", + "210 openai/o1-mini 128000 0.0000011 0.0000044\n", + "211 openai/o1-mini-2024-09-12 128000 0.0000011 0.0000044\n", + "44 openai/o4-mini 200000 0.0000011 0.0000044\n", + "129 openai/o3-mini 200000 0.0000011 0.0000044\n", + "116 openai/o3-mini-high 200000 0.0000011 0.0000044\n", + "312 mancer/weaver 8000 0.000001125 0.000001125\n", + "201 meta-llama/llama-3.2-90b-vision-instruct 131072 0.0000012 0.0000012\n", + "272 google/gemini-pro-1.5 2000000 0.00000125 0.000005\n", + "1 google/gemini-2.5-pro-preview 1048576 0.00000125 0.00001\n", + "248 sao10k/l3-euryale-70b 8192 0.00000148 0.00000148\n", + "181 neversleep/llama-3.1-lumimaid-70b 16384 0.0000015 0.00000225\n", + "306 openai/gpt-3.5-turbo-instruct 4095 0.0000015 0.000002\n", + "182 anthracite-org/magnum-v4-72b 16384 0.0000015 0.00000225\n", + "128 qwen/qwen-max 32768 0.0000016 0.0000064\n", + "169 mistralai/pixtral-large-2411 131072 0.000002 0.000006\n", + "286 mistralai/mistral-large 128000 0.000002 0.000006\n", + "85 ai21/jamba-1.6-large 256000 0.000002 0.000008\n", + "154 x-ai/grok-2-1212 131072 0.000002 0.00001\n", + "47 openai/gpt-4.1 1047576 0.000002 0.000008\n", + "100 perplexity/sonar-deep-research 128000 0.000002 0.000008\n", + "227 meta-llama/llama-3.1-405b 32768 0.000002 0.000002\n", + "153 x-ai/grok-2-vision-1212 32768 0.000002 0.00001\n", + "168 mistralai/mistral-large-2407 131072 0.000002 0.000006\n", + "98 perplexity/sonar-reasoning-pro 128000 0.000002 0.000008\n", + "111 perplexity/r1-1776 128000 0.000002 0.000008\n", + "167 mistralai/mistral-large-2411 131072 0.000002 0.000006\n", + "166 openai/gpt-4o-2024-11-20 128000 0.0000025 0.00001\n", + "225 openai/gpt-4o-2024-08-06 128000 0.0000025 0.00001\n", + "260 openai/gpt-4o 128000 0.0000025 0.00001\n", + "192 inflection/inflection-3-pi 8000 0.0000025 0.00001\n", + "91 openai/gpt-4o-search-preview 128000 0.0000025 0.00001\n", + "213 cohere/command-r-plus-08-2024 128000 0.0000025 0.00001\n", + "191 inflection/inflection-3-productivity 8000 0.0000025 0.00001\n", + "89 cohere/command-a 256000 0.0000025 0.00001\n", + "64 all-hands/openhands-lm-32b-v0.1 16384 0.0000026 0.0000034\n", + "175 eva-unit-01/eva-qwen-2.5-32b 16384 0.0000026 0.0000034\n", + "290 mistralai/mistral-medium 32768 0.00000275 0.0000081\n", + "195 anthracite-org/magnum-v2-72b 32768 0.000003 0.000003\n", + "284 anthropic/claude-3-sonnet 200000 0.000003 0.000015\n", + "283 anthropic/claude-3-sonnet:beta 200000 0.000003 0.000015\n", + "309 openai/gpt-3.5-turbo-16k 16385 0.000003 0.000004\n", + "184 anthropic/claude-3.5-sonnet 200000 0.000003 0.000015\n", + "183 anthropic/claude-3.5-sonnet:beta 200000 0.000003 0.000015\n", + "275 cohere/command-r-plus-04-2024 128000 0.000003 0.000015\n", + "274 cohere/command-r-plus 128000 0.000003 0.000015\n", + "109 anthropic/claude-3.7-sonnet:thinking 200000 0.000003 0.000015\n", + "110 anthropic/claude-3.7-sonnet:beta 200000 0.000003 0.000015\n", + "99 perplexity/sonar-pro 200000 0.000003 0.000015\n", + "244 01-ai/yi-large 32768 0.000003 0.000003\n", + "246 anthropic/claude-3.5-sonnet-20240620:beta 200000 0.000003 0.000015\n", + "247 anthropic/claude-3.5-sonnet-20240620 200000 0.000003 0.000015\n", + "56 x-ai/grok-3-beta 131072 0.000003 0.000015\n", + "108 anthropic/claude-3.7-sonnet 200000 0.000003 0.000015\n", + "152 eva-unit-01/eva-llama-3.33-70b 16384 0.000004 0.000006\n", + "257 neversleep/llama-3-lumimaid-70b 8192 0.000004 0.000006\n", + "241 alpindale/magnum-72b 16384 0.000004 0.000006\n", + "165 eva-unit-01/eva-qwen-2.5-72b 16384 0.000004 0.000006\n", + "120 aion-labs/aion-1.0 131072 0.000004 0.000008\n", + "174 raifle/sorcererlm-8x22b 16000 0.0000045 0.0000045\n", + "263 openai/gpt-4o-2024-05-13 128000 0.000005 0.000015\n", + "222 openai/chatgpt-4o-latest 128000 0.000005 0.000015\n", + "170 x-ai/grok-vision-beta 8192 0.000005 0.000015\n", + "185 x-ai/grok-beta 131072 0.000005 0.000015\n", + "261 openai/gpt-4o:extended 128000 0.000006 0.000018\n", + "301 alpindale/goliath-120b 6144 0.0000065625 0.000009375\n", + "313 anthropic/claude-2.0:beta 100000 0.000008 0.000024\n", + "297 anthropic/claude-2.1 200000 0.000008 0.000024\n", + "299 anthropic/claude-2 200000 0.000008 0.000024\n", + "298 anthropic/claude-2:beta 200000 0.000008 0.000024\n", + "314 anthropic/claude-2.0 100000 0.000008 0.000024\n", + "296 anthropic/claude-2.1:beta 200000 0.000008 0.000024\n", + "304 openai/gpt-4-1106-preview 128000 0.00001 0.00003\n", + "43 openai/o3 200000 0.00001 0.00004\n", + "273 openai/gpt-4-turbo 128000 0.00001 0.00003\n", + "288 openai/gpt-4-turbo-preview 128000 0.00001 0.00003\n", + "151 openai/o1 200000 0.000015 0.00006\n", + "282 anthropic/claude-3-opus 200000 0.000015 0.000075\n", + "281 anthropic/claude-3-opus:beta 200000 0.000015 0.000075\n", + "208 openai/o1-preview 128000 0.000015 0.00006\n", + "209 openai/o1-preview-2024-09-12 128000 0.000015 0.00006\n", + "321 openai/gpt-4-0314 8191 0.00003 0.00006\n", + "320 openai/gpt-4 8191 0.00003 0.00006\n", + "311 openai/gpt-4-32k-0314 32767 0.00006 0.00012\n", + "310 openai/gpt-4-32k 32767 0.00006 0.00012\n", + "106 openai/gpt-4.5-preview 128000 0.000075 0.00015\n", + "78 openai/o1-pro 200000 0.00015 0.0006" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(\"pricing_prompt\")[col_names]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<Axes: xlabel='pricing_prompt', ylabel='pricing_completion'>" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABqwAAAHJCAYAAADwyhjGAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXl0VFXWt58aMlZSGSAEQoAMShImIQgKwYi2imCLLWqD2gqKiC1qK+DYyiAo6CvtAMqkyCCCoiKDjAoSgwwKUQhhzCBDIASSVCWVqZKq7498uVBGEAJHbjjnWetddm6d+9Rv34vd683mnG1wu91uFAqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLhPFSB1AoFAqFQqFQKBQKhUKhUCgUCoVCoVDIjWpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLimpYKRQKhUKhUCgUCoVCoVAoFAqFQqFQKC4pqmGlUCgUCoVCoVAoFAqFQqFQKBQKhUKhuKSohpVCoVAoFAqFQqFQKBQKhUKhUCgUCoXikqIaVgqFQqFQKBQKhUKhUCgUCoVCoVAoFIpLivlSB1BcXrjdblwu96WOoUuMRsNFfTYX2yfCKWNGGWsW4dS7T4RTZZTDJ8KpMsrhE+GUMaOMNYtwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjPn2XA0ajAYPBcE5rVcNKcVFxudwUFDgudQzdYTYbCQmxYLeXUlXl0p1PhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4dS7T4RTZdRnRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzXg5EBpqwWQ6t4aVOhJQoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUU1rBQKhUKhUCgUCoVCoVAoFAqFQqFQKBSXFNWwUigUCoVCoVAoFAqFQqFQKBQKhUKhUFxSVMNKoVAoFAqFQqFQKBQKhUKhUCgUCoVCcUlRDSuFQqFQKBQKhUKhUCgUCoVCoVAoFArFJUV3DavMzEweeughOnbsSFJSEm+++SaVlZV/ep/b7WbGjBn07NmTDh060L9/f3755Zc66/Ly8njyySfp1KkTXbt25b///S8lJSV11q1bt46+ffvSvn17evXqxZdffllnTWVlJW+88QZJSUl07NiRhx56iKysrHrXtGjRInr16kX79u3p27cv69evr7OmuLiYl156ia5du9KpUyeeeuopjh8/Xmedy+Vi9uzZ3HrrrbRr146kpCRGjBjhseaBBx4gLi6uzv9lZmbW8SkUCoVCoVAoFAqFQqFQKBQKhUKhUIjCfKkDnI7NZmPgwIFERUUxefJk8vLymDhxIuXl5YwaNeqs986cOZP33nuPkSNHEhcXx/z583n44YdZsmQJLVq0AMDpdPLII48AMGnSJMrLy3njjTcYMWIE06dP11w///wzTzzxBHfffTcvvfQSmzdv5r///S8Wi4Vbb71VWzd+/HhWrFjBCy+8QHh4ONOmTWPQoEF88803BAYGnldN33zzDa+88gqPPfYY1157LStWrOCJJ55g/vz5dOzYUVv39NNPc+DAAcaMGYOPjw/vvPMOQ4YM4csvv8RsPvU6R40axfr163n88ce58soryc/PZ9u2bXWeW2JiIs8//7zHtcjIyD97VQqFQqFQKBQKhUKhUCgUCoVCoVA0WI6edJB5rASLt5HGVt9LHUeBzhpWCxcuxOFwMGXKFIKDgwGorq5m7NixDB06lPDw8D+8r6KigunTp/Pwww8zaNAgADp37sytt97KRx99xJgxYwBYvXo1+/fvZ8WKFcTExABgtVoZPHgwO3bsoEOHDgBMnTqVDh068OqrrwJw7bXXcujQId577z2tYXXs2DG++OILRo8ezd133w1A+/btueGGG1i4cCFDhgw5r5ree+89brvtNp5++mntO/ft28f777/PzJkzAUhLSyM1NZWPPvqIHj16ABAdHU2fPn1Ys2YNffr0AWDTpk0sXryYr776iri4OO053XbbbXWendVq9WiIKRQKhUKhUCgUCoVCoVAoFAqFQnG5UlLmZMbSXaRnF2jX2kWHMvSOtlh8vS5hMoWujgRMSUmhW7duWmMHoHfv3rhcLjZu3HjG+7Zv305JSQm9e/fWrnl7e3PzzTeTkpLi4Y+Li9OaVQBJSUkEBwezYcMGoOaYvy1btnjspALo06cPmZmZHD58GIDU1FRcLpfHuuDgYJKSkup855/VdOjQIXJycjzy137npk2btOMDU1JSsFqtJCUlaWtiYmJISEjw+M7PP/+crl27ejSrFAqFQqFQKBQKhUKhUCgUCoVCoZCdGUt3kZFT4HEtI6eA6Ut2XaJEilp0tcMqKyuLu+66y+Oa1WolLCzsD2dDnX4f4NGIAoiNjWXOnDmUl5fj6+tLVlZWnTUGg4Ho6GjNcfDgQZxO5x+6AJ544glycnIA8PX1xc/Pr866L774ok5NM2bM4NNPP6WgoICEhASCg4O176z9p9Vq5cknnyQ1NRUvLy86dOiA0+nk0KFDxMbGkpWVRXR0NOvXr+edd94hOzubiIgIgoKCPJ7Pr7/+SqNGjejZsydHjx4FoGPHjkycOJHo6Ght3cmTJ9m2bZvW2PLz8+P+++9n5MiRGAyGMz7vP8Ns1lUfVBeYTEaPf+rNJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zChjzSKcF8t39KTDY2dVLS43pGcXcMJeTtNQ/0uaUWZ01bCy2+1YrdY614OCgrDZbGe9z9vbGx8fH4/rVqsVt9uNzWbD19cXu92uzZY6k7/2n7/PUdvAqaioYPLkyUybNo3t27czceJEj1lUVqvVI6vdbmf37t1s2bLFY77Wr7/+ypEjRzy+c/To0ZjNZm2+1vjx4z0+t9vtuN3uOvO1pk6dSlhYmPad+fn55Obm4u/vT2JiItu3b+fEiRMMHjyYlStXas+ppKSEtm3bkpycTHV1NV988QUffvghRUVFvPbaa2d83mfDaDQQEmKp170yYLX6/fmiS+gT4ZQxo4w1i3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoxw+EU4ZMmYeKznr545K1wX/flvEc5QFXTWs9MzixYsBGDRoENdddx2rV68mJyeHhQsXnnW+FsDWrVvrzNfq1KkTe/fu9ViXlZXFypUrtd1dBQUFjBs3jszMTBITE4GaHWC/n6+1YsUKcnNzNY/b7cbb25s1a9aQmZnJgw8+yLPPPsszzzzDsmXLtJlbX3/9NaGhodp9jz76KElJSSxevJhx48ZhNJ5/J9jlcmO3l573fZc7JpMRq9UPu72M6mqX7nwinDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYMsCu7gEMnHLQMs9AmKvTPb/gTLlZGf6+znyxm8TZSWOiol1vEc7wcsFr9znnXma4aVlarleLi4jrXbTYbQUFBZ72vsrKSiooKj11Wdrsdg8Gg3Wu1WikpqdtBtdlsNGvWDEBb+/scP/zwA4C2rnb3Vu0sqn79+mnfeXpWPz8/iouL68zX8vb2Ji8vz+M7o6OjPY4ijIqKAmD37t0ABAQEYLPZ6szXatKkCb/99huHDx8mMjISq9VK06ZNady4MZmZmQA0b96cpk2bcuDAAe2+05tVAP7+/rRp04aff/6Z0tJSAgIC6jyrc6GqSv3LeCaqq10X9flcbJ8Ip4wZZaxZhFPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcF4MX15hKa/N/ZmSsirtWoCfmVcGXk1YcP2O2judC80YFuRHu+hQMnIKcLlPXTcaoE1UKI2tvhf8DES8a1nQ1WGKMTExdWZVFRcXk5+fX2em1O/vA8jOzva4npWVRUREBL6+vmf0u91usrOzNUfLli3x8vKqs652blXtupiYGAoKCmjUqJHH2t/PyQoJCfG4r7amsrIySkpKKC8v1z77fQMpOzsbg8HAyZMntc/dbrfHHCqAwsJC7bsBrrjiijM8qZojDc9Gfn4+RqOx3s0qhUKhUCgUCoVCoVAoFAqFQqFQyMnvm1UAJWVVjJvz8yVKVJehd7Sts+urTVQoQ+9oe4kSKWrR1Q6r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSko6432JiYkEBATw+uuvc/DgQQoKCoiPjyc3N5ebbrrJw7906VIefvhh0tLS8PLyokOHDhQVFXH99dcDNbufrrnmGj777DO+/PJLsrOziYiIoKSkhNDQUCIjIwHo0aMHRqMRh8PB3Llz+eSTT2jfvj07d+5k2LBh2ndGRkZy8OBBHnnkEY+aDAaDNl+rRYsWmEwmdu/eTXJyMnl5eTz33HN8++23hISEaLu9rrzySgDGjRvHf/7zH8rKyvjss8+0XVS1s66Cg4P56aef6NatG6WlNcfzrVy5kmPHjtG27al/6W688UZtjtbpnK05eC6Yzbrqg+oCvQ4ZFOmUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSp5h2ZJ+o0q2opKati98FC2sc0qpf7YtYcFODDc/cnkl9Uhr28GquvibDgC587JeJdy4bB7Xa7/3zZX8OhQ4e45ZZbsFgsPPLII/z222989dVXtG7dmmXLlmnrBg4cSG5uLmvXrtWuDR48mNTUVG666SYSExOZPXs2x48fZ+7cuVxzzTUAlJaWav/5kUceoaSkhE8++YTg4GA2bdqkuebPn8+rr75KbGws99xzD2vXrmXbtm0kJiayYMECbV2fPn3IzMwkKiqKO++8k5kzZ1JaWsq6deu0owMnTpzIxx9/TGBgoEdNzZo14+jRo6SkpBAeHk779u2prKykbdu27Nq1i44dO5Kenk779u3x9/dn1qxZ/PDDDzzyyCN4eXkRGxvLnj17iI2NxWg0sn//ft566y1uv/12kpOTKS4uplGjRlx99dXa/K3GjRvz7bff4ufnx88//8zgwYO54ooruPPOOzl06BDz5s3D5XKxYMECOnXqVK936Ha7MRjOfg6oQqFQKBQKhUKhUCgUCoVCoVAoLi8WrNnDp6v3nvHz+3rFce8t8X9hIkVDQ1c7rFasWIG3tzfx8fFMnToVi8VCUlISmzZtIi8vj/DwcABcLhfV1dXafRUVFaSlpdG1a1fS09NJSUkhPj4el8vFypUrtSbVunXrqKysJCkpidmzZ2M2m+nevTupqans2LGDDh06aOtiYmIwmUxMmjSJiIgITCaTx9F/x44dIzs7Gy8vL/Ly8pg6dSodOnRgx44dLF++nCFDhgDw22+/AdC6dWuPmjZu3OgxXysuLg6omYEFcOTIEaZMmcL06dO1NbVNsGuuuYaff67ZQtmiRQseffRR7rvvPm3d119/jcPh4LXXXmPFihVAzZyswMBA/PxqOsVhYWG43W4OHDjAhAkTcLlc+Pn58f7779e7WVXzbtzY7aX1vv9yxWTS/yBElVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM2NDqHlXdgGHTjhoGWapc1Te+RARevZdSs0b+VNY6KiX+3hRGcUXcTcUNIx3fTlgtfqd864zXTWsUlJSSEpK4oMPPtCu2e12unbtysaNG+nXrx8A8+bN87hv+/btOBwOXnrpJRISErTrEyZM8NiFVdvImjVrlnbN7XZz7bXXsmHDBjp06EBlZSVbtmxh5MiRDBo0SFt32223ceDAAQ4fPkxkZCSpqam4XC7cbjevvvqqlu2JJ54gJSVFa1gdPnwYgFGjRhEfH6/V1KVLF4KCgjzma+3bt481a9YQFxfHQw89RM+ePXnhhRe04xBr52slJydz22238eKLLzJhwgR++eUXzQE1s65CQ0OZNm0aW7Zs4cEHH6Rnz55s2LBBq6dVq1Y0btyYHj16sH//fo4ePcpnn32mNQUvBDVQ7szocRCiaKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU8aMMtYswiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwXQxnXmFpnZlTAX5mXhl4NWHB/ufta9MqlAA/8x8eCxjgZyahZch55y0pczJj6S7Sswu0a+2ia+ZNWXy9zjvjH9EQ3rUs6KphlZWVxV133eVxzWq1EhYW5rG7CSAzM5Px48eTlpaG0VjTnaudL1VLbGwsc+bMoby8HF9fX7KysoiJiWHGjBl8+umnFBQUkJCQQJMmTTT/wYMHcTqdhIaG8uSTT5KamoqXlxdNmjQBID09ncjISLKysggICKC0tJQPP/yQ0aNHExERQVRUlEfW/Px8vLy8GDVqFEeOHMHhcHDVVVdhNBq1HVNwar7WgAEDAJgyZQo7duz4w/la8+fPp6SkBIB//etfNGrUiNjYWK3+yspK3nnnHX799Vd27Nih5f79bCq3282iRYtwuWr+5bnlllt48MEHGTFixHm9N4VCoVAoFAqFQqFQKBQKhUKhUDQsft+sgppZU+Pm/Mx7/0mul/OVgVczbs4fN8Hqw4ylu8jIKfC4lpFTwPQluxjev2O9nAr9oquGld1ux2q11rkeFBSEzWbTfrbZbAwcOJCoqCgmT57MwoUL+fbbb3n77bcZNWqUts5qteJ2u7HZbPj6+mK32zl8+DBr165l5MiRxMXFMX/+fNatW0dgYKDmBnjvvffw8/Nj0qRJlJeX89prrwHw7rvvEhAQwK+//orDUbN98eqrr2bUqFFs3ryZqVOnemQvLi6mcePG/Prrr9x2223aDi+Xy0WLFi20dd26dcNoNLJnzx4ArrzySlavXk1ERIR2VCFAx44dSU1NJSIiAgCz2czWrVt5+umntTXl5eUsWLCAFi1a0LRpUw4ePEhmZiaDBw9m586dtG/fHqg5WtHlchEXF0fr1q1Zt24dM2bMoKCggNGjR+Pt7X2eb5D/n0kNlfs9eh2EKNIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwqoz6zKjXmndknvjDnVBQ07TafbCQ9jGNztvbrHEAH4zoSUZOAQfzL+yYwaMnHR47q2pxuSE9u4AT9nKahp7/TrBaGsK7lg1dNazOlYULF+JwOJgyZQrBwcGkp6fz/fffs3DhQoYOHXrGY+3cbjcZGRkMHjxYO+6vc+fOXH311Rw5csRj7aFDh1i5cqW2K8lgMPD000/jdDoZNmwYbrcbk8lE27ZtefXVVwG49tprmTNnDqWlpR7feezYMW666Sa2bdvG2rVrufLKKyksLOTYsWPaui+++AIvLy+uueYavv/+e/bs2UOnTp345ZdfPOZ3LV++nKuvvlo7atDhcNCyZUu2b9+uuaxWK23btuWnn37yqOmjjz6ioKCAiRMnAnDixAkA9u7dy969p4bhffHFFwwZMoSoqKhzeyGnYTQaCAmxnPd9smC1XpzzVUX5RDhlzChjzSKceveJcKqMcvhEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinynj5+47kl7Bvdx4RjS1EhAXUy5FbcOjs33GylOTOLevlBkgKsZBU77tryDxWctbPHZWui/K7aD2/a9nQVcPKarVSXFxc57rNZiMoKEj7OSUlhW7duhEcHKzdV1VV0w0+fdaV3W7HYDBo95pMJqqqqujdu7fm8vb29tjBVbs2MjLS4wi92vlTHTt25K233mLixIl8/PHH9OnTxyPrddddx+rVq7VZV76+vjgcDl5//XWPGjp06EB+fr5HTbXzu+Li4vjPf/7DPffc4zG/69ChQ+Tk5PDss89it9t58cUXWbRoEcuXL+fNN9+ksrJS2xX1ySefcOTIEf7xj39gt9v54YcftGMNAcrKyqiurmbs2LHaMYQAd911F+np6eTn59erYeVyubHbS/98oWSYTPof4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mvJi+kjInUxfvZGfWqV1H7WNCefzO9lj8zm+eU0To2ZsqzRv5U1joqFdOuDh1+3sZzvq5xdt4yTOK9F0uWK1+57zrTFcNq5iYmDqzqoqLi8nPz/doHv1+1lXtZyEhIR73Z2VlERERga+vL4B27N/pLrfbTUlJCWVlZZSXl9OyZUsMBoO2tpbs7GwtD9Q0yYA6u7lqZ0tlZWURGRlJQEAATqfTo1lVXFxMRUWFR3PuXOZ31f4zOjqaX3/9VVsXGxuL0+nk0KFDxMbGAlBQUMDgwYOxWCzY7XbMZs9XXVlZidvtrnPsX+08sMzMTLp06UJ9UAPlzkxDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzXgzfB1/trDPPaVd2Ae9/tfO85zm1aRVKgJ/5D48FDPAzk9Ay5KLUfyF1hwX50S46lIycAlzuU9eNBmgTFUpjq+8lz/hX+GRCVw2r5ORkpk2b5jHLatWqVRiNRpKSTm0g/P2sq8TERAICarY+1u6UcjqdrFmzhuTkZG1dREQEO3bs4OjRo9ruoU2bNmlH+NlsNsLDw/Hx8fHY/QSwYsUK/P39cTqdAFxxxRUAZGRkaDu2bDYbaWlpHjnCwsI4fvx4nZoMBgPl5eVnrKmW03d/1f7z9+tqf6793OFwMGTIEJxOJ4MGDWLChAl/6PXz82Py5Mm8+eabFBYW8uqrr2oztE6fGXa+qBlWdWkI56GqjPrziXDKmFHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVGfGS+WT8Q8pzEPd2XMrK0eTasAPzNjHu56wb/jvVh1D+vXng9+t6usbXTNrjK9ZBTlkxFdNawGDBjAvHnzGDZsGEOHDiUvL48333yTAQMGeOxkqq6u5qOPPuLRRx8FwMfHh6FDh/K///2Pffv2sWnTJhYsWEBRURGDBw/W7rvyyitZvXo1Tz75JMOHD6esrIw333yTNm3akJGRoa0LCgri+PHjjBkzht69e7NlyxaWL19OQkKCtqZRo5qBc/PnzycqKorw8HCmT5+OxWLxmGEVGxvL7t2769TUsWNH0tPTtXVut5t9+/axatUqAO0/Oxx1tzSuX79eayytX7+ekydPenz+5JNPsmfPHl577TVycnIASE9Px2q10rFjR6BmFpbb7SY3N5fWrVtTWFjIW2+9pTXkDIazb7c8E2qG1dlpCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjBfqEzHPKSTEwoLxt5G29zh7fisgvlUoneKa/PmN58GF1h0SAq8Pu47c/BJyTzguaG7XmdDbu5YZXTWsgoKCmDNnDuPGjWPYsGFYLBbuvvtunnnmGY91tbOoTmfIkCFMmzaN/fv38+ijj5KQkMBHH31EixYttDUhISG43W5atGjB8OHDMZvN3HzzzcTHx7N7927t2L4mTZrQokULtm3bxhdffEFERATjx4/niy++0NbU/jMpKYlJkybhcDhITEzk9ddfZ8iQIdrnjRs3xmq1YjKZPGoymUwcOnRqsJ2XlxfLli1j2bJlAHz99dd8/fXXGI3GOt/5yiuvaPe99NJLHs8PauZ4ATz//PMezwdg7969QM2Mrnbt2rFjxw727dsH1Bxn+O9//5sPPviAsLCwc3pnv0fNsPpjTCb9n4eqMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU69+0Q4VUZ9ZjxeVEZxeTVWXxNhwfVvZIic5xTbLJBOcU2w28suaCbU6Vzs5xjgY+LqhHBdZxTx5/tywGptoDOsoGZH0uzZs8+6pmPHjgQHB3tcKykpobS0lNdff51+/fr94X21s6ueeuopPvjgA+36xIkTPWZdxcTEsG/fPq15BDU7oP7v//5PO5qwZcuWeHl5cfXVVzN58mRt3bp16zy+KyYmhqKiIt59912POVZPPvmkxyytdu3aERwczPvvv69dKy4upkuXLh4ugPfff5+bbrpJWzdv3jzeeOMNrTlX25QC+Oqrr3jxxRfZtGkToaGhHs9w/vz5QM3uq7vuuouJEycSHR3NBx98wFVXXfWHz/BcUOdznpmGcB6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2WUwyfCqTLK4RPhVBnl8IlwyphRxppFOGXIWFLmZMbSXR7H+LWLDmXoHW2x+Hqdt++vmOckw3v5K5wiMspCgzxMMTk5mR9//BG73a5d+6NZV7+ndtbVypUrtWt/NOsqOTmZPXv2aMfpQc2sq6KiIq6//noAvL29ueaaa1i9erXHd6xYsYLY2FgiIyMB6NGjB0ajkTVr1mhrbDYbqampdb7zz2pq0aIFUVFR2rGBp39nt27d8Pb2PvNDOwu1zT8fHx/mz5/P1Vdf7dFMUygUCoVCoVAoFAqFQqFQKBQKxbkzY+kuMnIKPK5l5BQwfcmuejuH3tGWNlGhHtfaRNU0wRSKywHd7bDKzMxk/PjxpKWlYbFYuOOOO3j66ac9mjFnmnXVvn17+vfvT0FBAQkJCVRVVWG321m7di1Q05C57777mDFjBh999BHe3t4EBwfXmXXVq1cvJk2axO23347L5SIkJITKykp69uxJhw4dtHVDhgxh0KBBXHXVVbhcLpo0acKRI0d4++23tTVNmzbllltuYfTo0YwdOxY/Pz/8/f0JCAhgwIAB2rp+/foxbdo0unfvDtQcS1hYWOgxv6u4uJjAwECWLVvGihUr8PPzIyAggPz8fG23FMALL7zA4sWLPZ5rt27dABg0aBAvvvii9p83bdqkrfnPf/4DwJgxY+rx5k5xocPuLkcawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqJ+PRkw6PnVW1uNyQnl3ACXs5TUP9z9sbFODDc/cnkl9Uhv0iHDNYiyzvRbRTREbZ0FXDymazMXDgQKKiopg8eTJ5eXlMnDiR8vJyRo0apa37o1lXrVu35pdffuHZZ58lLi6O+fPn891333nMYnI6naxfv57Q0FCqq6spLi7m+PHjtGvXzmPW1a+//sqxY8eIiIjgxIkT2Gw2Kisr6d27t0feFStW4Ovri9Vq5eTJk+Tn5xMYGEiPHj08avrpp59o3LgxpaWllJWV4XA46N27N4GBgdq6999/H5fLRfPmzcnNzSU/P5+qqir69u2rramsrKRVq1bExsayadMmTp48SXV1NWaz2eOIxMcff5zU1FTy8/PrPOPS0lKqq6sxmUwYjUa8vLwwGAxUVlYSGxvL448/ru0iqw9Go+G8h/vJREMY4Kcy6s8nwiljRhlrFuGUMaOMNYtwyphRxppFOPXuE+FUGeXwiXCqjHL4RDhVRjl8IpwyZpSxZhHOyz1j5rGSs37uqHRd0O9QRf3+9XJ/L3+VU0RGWdBVw2rhwoU4HA6mTJmiNWCqq6sZO3YsQ4cO1XYageesq4qKCrp3787gwYMZNGgQAJ07d+bWW2/1OHZv9erVHDhwgBUrVmhH3qWmpjJ48GB27Nih7Z6aOnUqV111FQsXLtTuHTFiBDNmzOAf//gHAMeOHeOLL75g9OjR9O/fH4CioiJuuOEGFi5cyJAhQ7SaSktLWb9+vVbTZ599xtixY3nuuecIDw8nLy+Pzz//nBdffJEHHngAqJmZ1bdvX6ZPn87UqVMBaNSoEZMmTfJ4Zg6HQzua8LHHHgNq5mulpqZqa7Zs2cKDDz4IwOeff05OTg7z5s2jSZMmREVFMW3aNP72t7/x1FNPceutt57nW/PE5XJjt5dekONyxGTS/wA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8qoz4wy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTJeHOfxojKKL3D3kr+X4ayfW7yNFBY66uUGOd+LrBkvB6xWv3PedaarhlVKSgrdunXz2C3Uu3dvRo8ezcaNG+nXr98f3rd9+3ZKSko8dkB5e3tz8803a8cB1vrj4uI85jMlJSURHBzMhg0b6NChA5WVlWzZsoWRI0d6fEefPn1Yvnw5hw8fJjIyktTUVFwul0eDJzg4mKSkJFJSUrSG1bnUtGfPHqqrqz3mbxkMBnr06MEnn3xCZWXlGedT+fv74+Pjg9PpPONzbdKkCQaDgQEDBnDXXXdhsYjdAaUGyp2ZhjDAT2XUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqkyyuET4VQZ5fCJcKqMcvhEOGXMKGPNIpx6zFhS5mTG0l0eR/m1i66ZD2Xx9TovV1iQH+2iQ8nIKcDlPnXdaKiZOdXY6ntR6pfhvYj2iXCKyCgLujpMMSsry6OZBGC1WgkLCyMrK+us9wF17o2NjSU3N5fy8vIz+g0GA82aNWPBggV07NiR6667DqfTScuWLeu4Tv+urKwsGjVqxGeffabNturfvz8BAQEeWbOysggPD+fJJ5+kU6dOdO3alTfeeIPGjRtr6yorKwHYtm0bffv2pX379vTq1YusrCwqKys5fPiw5qusrGTixIl0796dq666ihtuuAFA2/kFsHLlSv7973+TnJxMx44deeCBB3C73QwePJj27dt7PIPffvuN22+/HaiZYdWzZ88zPmeFQqFQKBQKhUKhUCgUCoVCobgcmbF0Fxk5BR7XMnIKmL5kV718Q+9oS5uoUI9rbaJqGmAKheKP0dUOK7vdjtVqrXM9KCgIm8121vu8vb3x8fHxuG61WnG73dhsNnx9fbHb7R5zo6BmxlRmZia+vr5MnjyZrVu3MmPGDL7++mtuvPFGD1ft+trvrK6u5r333mPkyJHa3KxvvvkGl+tU99Rut7NmzRpCQ0OZNGkS5eXlvPHGGzgcDs3VqlUrAF555RX++c9/8tJLL7F582btKMDTax8/fjyLFy/Wmly1c7NO38E1e/ZsmjdvzgsvvEBISAgjRowAYMmSJTzxxBPauuDgYPr27YuPjw/z58/HaDRy9OhRpkyZ4rHufDGbddUH1QUNYYCfyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnr7zx60uGxs6oWlxvSsws4YS+naaj/eTmDAnx47v5E8ovKsF/gEYO/R5b3ItInwikio2zoqmF1KVi4cCFVVVXEx8dz3XXX4e/vz4wZM1i7di15eXkec7NOp7q6mqKiIh599FGPuVlJSUmUlJwaqudyuThx4gTz5s3TdjZZrVYGDx7MyZMnAWjdujVWq5Xy8nLuvPNOoqKi2LVrFwaDAbfbjcFQc+Zp7dysZ555hmuvvZb8/Hw+/fRTfvjhB959911efvlloGYGV2hoKACZmZmcPHmSTp068fHHH/P4449jNNb8C1NUVMTixYs9sgLMnDmz3g0ro9EgbOjf5UBDGOCnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRzovpe3vBNtIzT9Lhisb8Z0BivRyZx0rO+rmj0lXv33mK/F2pnt+LKKesGWVBVw0rq9VKcXFxnes2m42goKCz3ldZWUlFRYXHLiu73Y7BYNDutVqtHs0kqJkxFRgYSOPGjQG0tS6Xy2Nult1u9/i8rKwMt9tdZ25WVFQU6enp2jWz2UxAQECduVkGg4HCwkKg5pi/0tJSGjduzIABAwBo3rw5t912G8uXL6e6uhpAm5v1z3/+U8vRs2dPunbtytKlS7WGVW2zCmDZsmWYzWb+9re/kZaWRmlpKQEBAQBMnDiRxx9/nL59+7Jw4UJmz57NDz/8wIkTJygvL8fX1/eMz/xMuFxu7PbS877vcsdk0v8AP5VRfz4RThkzylizCKeMGWWsWYRTxowy1izCqXefCKfKqM+MMtYswiljRhlrFuGUMaOMNYtw6t0nwilbxtQdR5ixdLf287c/HeLbnw7x2B1t6N4+4rxc/l6Gs35u8TZSWOioV07Z3osop6wZLwesVr9z3nWmq4ZVTExMnVlVxcXF5Ofn15k99fv7ALKzs4mPj9euZ2VlERERoTVeYmJi2Ldvn8e9mZmZlJeXa46WLVvi5eWFj49PnVlUp3+X2Vzz6GobXbVUVVXhcrm0ho/ZbMbLy3MoX0lJCW63m6qqKgAOHjxIVVUV48aNIyYmhvLycqKjo/nf//4HgMPh0DI0atTIo3lnNBpp3rw5+/fv/8Nn880339CtWzf27NlDeHi41qyq5bXXXuOOO+7weG4Xihood2YawgA/lVF/PhFOGTPKWLMIp4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPhlDGjjDWLcF4M3+nNqtOZtiSDrglNz8sVFuRHu+hQMnIKcLlPXTcaauZONbb6XnBeWd6LaKesGWVBVw2r5ORkpk2b5jHLatWqVRiNRpKSks54X2JiIgEBAaxcuVJrvDidTlasWAFAx44dsVgstGvXjj179pCTk0NUVBRQs3vL5XJx/fXXAzW7pLp27cqWLVv45JNPmDt3LgkJCQQEBBAbG0tkZCQATZo0AeDRRx8lJycHLy8vrr/+eg4cOKB5fX198fb25sSJE9x2220cPHiQiIgIOnXqpH1X7VqomTG1efNmHA4HHTp04NChQx6f2+12fH19eeihh0hLS8NisXD77bdz4sQJj7lZZWVlfPDBByxevJj8/HxsNht2u53nnntOW1NZWcnw4cP54Ycf8PLyYuHChfTu3Zvi4mKuvPLKeu2uqkXNsKpLQzgPVWXUn0+EU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwqky6jOjjDWLcMqYUcaaRThlzChjzSKceveJcMqUcfqSnWf9/OMVGQzp2+68nMP6teeDxTvZmVWgXWsbHcrjd7a/oN93yvReRDplzSgbumpYDRgwgHnz5jFs2DCGDh1KXl4eb775JgMGDPCYJTVw4EByc3NZu3YtAD4+PgwdOpTJkycTGhpK69atmTt3LsePH6ddu3a89tpr5OXlMWHCBKxWK08++STDhw+nrKwMl8tFTEwMHTp00PyRkZFs3LiRoKAgHnnkERYtWsQvv/zCK6+8oq3x968Zsrdnzx7uv/9+/P39mTVrFm73aS14wMvLi+rqamw2G0899RTbtm1j8eLFhISEeBxfCLBmzRoeeOABXC4XCxcupLKy0uPzffv2ceTIEby9vXniiSc4ePAgc+fOxeVyaXOpAF599VXWrFlDmzZtKCoqwul04na7KSoq0tbs3buXtWvX0qhRI5o1a0Z6ejpbt26loqKC//znP/V8g2qG1Z/REM5DVRn15xPhlDGjjDWLcMqYUcaaRThlzChjzSKceveJcKqMcvhEOFVGOXwinCqjHD4RThkzylizCOeF+vYftp/1872HbOf9O8qQEHh92HXk5peQe8JBRGMLEWEBf37jOSLDe/krnLJmlAVdNayCgoKYM2cO48aNY9iwYVgsFu6++26eeeYZj3Uul4vauU61DBkyBLfbzaxZsygoKCA0NBRvb28+/PBDgoODAaiurmbMmDFcddVVDB8+HLPZjLe3N8nJyZqnoqKCb775Bm9vb9xuN//73/9o1qwZwcHB2u4pgCNHjgDQt29fVqxYgcPh4MorryQjI8NjblZZWRmBgYFcccUVTJkyBYvFQkxMDEeOHNHWOJ1OoGbH1dy5cwkODua2225j2bJlVFdXa+vKy8txu90UFhbyzjvvEBYWRlxcHBkZGR6zt1auXMlDDz3EZ599ho+PDxEREVx55ZWsWLGC4cOHA7Bu3Tr8/PwwmUzs3r1bu7dZs2Zcc801VFZWajvAzgc1w+qPMZn0fx6qyqg/nwinjBllrFmEU8aMMtYswiljRhlrFuHUu0+EU2XUZ0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhbAgZjxeVUVxejdXXRFhw/X+pf2WkleOFZWf8PK5FUL1nTgX4mLg6IRy7vazejtNpCO9FZdRvxssBq7WBzrACiI2NZfbs2WddM2/evDrXDAYDQ4cOZejQoQDcf//9tG3bVmtWAfTu3ZvRo0fTu3dvZs6cqa07fPiwtmb79u2UlJRgMBh49tln6devHwATJkzQdnQBHDt2DICHHnqIN954AwC3281VV12Ft7c3vr6+VFZW4nA4aNKkiUdN3377LcOGDdPmX+Xk5AA1xwvW5gc4cOAAv/zyizY3q6KiAoDVq1drDSq73U6XLl20n2tnY/n7+9OqVSuOHj3Khx9+yMyZMz12fx09epSysjLKyk79F3thYSEAXbp0YcyYMdx7771neQtnRp3PeWYawnmoKqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWNGGWu+GM6SMiczlu4iPbtAu9YuOpShd7TF4ut13r7Bt7Vl4868M37+UJ82ups5pcf3ItonwilrRlnQXcPqYpGVlcVdd93lcc1qtRIWFkZWVpZ2LTk5malTp/LAAw+wc+dO7Wi938/Nio2NZc6cOZSXl+Pr64vNZsNsNvP6669z8OBBCgoKiI+Pp7q6mpCQEAAOHjyI2+3m+PHjPPzww6SlpeHl5cWVV14JoM3DOnjwIF5eXixZsoRvvvmG7OxsIiIiKC0txWQyaeuKioowGAw888wz7N27F4fDQfv27QEICwsDwGQyERQUxFtvveVRYy3ffPMNt912G0OGDGH37t3s2bOnzrObOHEi3bt3r+eTVygUCoVCoVAoFAqFQqFQKBSKU8xYuouMnAKPaxk5BUxfsovh/TvWy/lo3wRmLN39h9cVCkXD5LJtWNntdqxWa53rQUFB2Gw27ec+ffrwzjvvsHv3bh577DFWr15NRkYGsbGxHnOz5s6di9vtxmaz4evrS0lJCY0bN2bLli3cdNNNJCYmMnv2bKqqqrTvrf0es9nMTz/9xCOPPEJJSQmffPIJAKGhoR5ZMzMziY2NZfjw4axdu5acnByP2VQlJSWEhISwceNG+vXrR6tWrbSdYk2aNNHWJSYmsmbNGo+6mzdvzrFjx+jcuTNQ04CzWq0kJiZyzTXXMHXqVLp168ahQ4fo06dPnfla58OFDCG8XGkIA/xURv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinjBllrFmEU8aMMtYswql3nwinXjMePenw2FlVi8sN6dkFnLCX0zTU/7y9PTo0p0eH5ny0LJ3dB20ktAxi8O3t6p2zFlnei0ifCKesGWXjsm1YnSsrVqzA29ub+Ph4pk6ditFoxGAwcODAAfLy8rSm1enH6dX+nJ+fT9euXUlPTyclJYX4+HgKCws5ceKEx1qn00lSUhKzZ8/GbDbTpUsXtmzZwsGDB7U1paWlxMTEYDKZmDRpEhERETRr1oyjR496fGdBQQHdu3dnw4YNOBwOOnTowNatWz1cP/74Y506a2duVVVVeVy3Wq20bNkSgJCQEE6cOHFBzSqj0XDeAw1loiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRn35tu89zt7NvxHfKpROcU3+/IbzQK81i3TKmFHGmkU49e4T4dRbxsxjJWf93FHpuqDfJ4588Jp633s2Lvf38lf4RDhlzSgLl23Dymq1UlxcXOe6zWbT5j0BpKSkkJSUxAcffADA/PnzefXVV3G73dpOJoCBAwcyatQo7V6TyUR1dTUvvfQSCQmntpled9112s6q2rWRkZHMmjVLW5Odnc2tt95KdnY2AAEBAZSVldG/f38GDRqkrXvqqac4evQohw8fJjIyEl9fXxwOB++8845HDR06dCA/Px+Affv2UVJSwtSpU7nxxhuBmrlc9957L2az2WOm1+955ZVXtF1f9cXlcmO3l16Q43LEZNL/AD+VUX8+EU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8a8glLGfryVkrJTf2E0wM/MmIe70iTk/HcQiMgoyifCKWNGGWsW4dS7T4RTrxn9vQxn/dzibaSw0FEvN+j/Oer1vYj0iXDKmvFywGr1O+ddZ5dtwyomJsZjVhVAcXEx+fn5xMTEaNd+P+uq9rOQkBCP+7OysoiIiMDX1xeAwMBAj/VQswOqpKSEsrIyysvLadmyJQaDQVtbS22jqrahVnuE4OlHEELNEYC13x0ZGUlAQABOp9OjWVVcXExFRYXmOnDgAIBHE2358uX4+PhQUVFBXl4eAQEB2mdbt25l48aNAPz73/9m5MiRdOnSpe4DPQ/UQLkz0xAG+KmM+vOJcMqYUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRjl8Ipwqoz58v29WAZSUVTFm1lbe+0/yGe46P/RW81/hlDGjjDWLcOrdJ8Kpt4xhQX60iw4lI6cA12mHWBkN0CYqlMZW34uSV+/PUW/v5a/wiXDKmlEWGmzDKjMzk/Hjx5OWlobFYuGOO+7g6aefxtvbG4Dk5GSmTZvmMctq1apVGI1GcnJy6NmzJwUFBVRUVGiNIaiZ/xQQEIDL5WLFihXMnz8fs9lMVVUVffr00dZFRESwY8cOFi9ezKeffkp2djYhISGUltbsLrLZbISHh+Pj40N+fj5vvPEGS5cuxeFwYLFY8PX1xel0AnDFFVcANUf5ff7556SlpeHn54fdbtdcAGFhYRw/fpzHHnuMjIwM8vLy6NWrFwaDgfLycqBmVhXACy+8QG5uLnl5eVqTq7KykoiICK0Gt9ut5QX45Zdf+Ne//sXgwYN57rnn6v1u1AyrujSE81BVRv35RDhlzChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinLBl3ZJ6o06yqpaSsit0HC2kf06jefj3WLNopY0YZaxbh1LtPhFPPGYf1a88Hi3eyM6tAu9Y2OpTH72x/wb9L1Ptz1PN7EeUT4ZQ1o2wY3L8fztQAsNls3HbbbURFRTF06FDy8vKYOHEiffv2ZdSoUR5roqOjPdbExMSwa9cuRo4cSVxcnHYE37fffkuLFi0AmDp1Ku+88w6+vr4MHTqU9evXs3PnTrp27crcuXMBmDJlCpMnTwZqjgFs3749c+fO1ZpfKSkphIeHk5ycTF5eHl5eXjz44IMcP36cZcuWYTab6dy5M3PnzmXbtm3cd999GAwGWrVqxZ133snXX39NTk4Obrebt956i9tvv53nnnuOpUuX4ufnR5cuXdiwYQM+Pj60adOG9PR00tPTqa6uJjk5maKiIvr06YOPjw+LFi0Cahpeqamp2nMcOXIkq1at4sYbb2T16tUMHjyYRYsWER0dzeeff16vd+N2uzEYzr7NV6FQKBQKhUKhUCgUisuRBWv28OnqvWf8/L5ecdx7S/xfmEihUCj0RW5+CbknHEQ0thARFvDnNygUCqlokDusFi5ciMPhYMqUKdpMpurqasaOHcvQoUMJDw8nKCiIOXPmMG7cOIYNG4bFYuHOO+/kiy++4OGHH9YaVWazmerqaj766CPGjBkD1MycAjAYDEydOpWEhAReeOEFJkyYwI4dO+jQoQMhISFAzZyqbdu28euvv9KrVy92795NRkaGdmxfSEgIeXl5hIaGMnfuXCIiIvjvf//LhAkTKCoq0hwARqMRm83G1KlTSUxMpEePHsybN4/anmLjxo0JCgoiISGBLVu2ADXzqzp27MihQ4cAMJlMzJ49mzlz5vDjjz9y7NgxjEYjN954I99++y3p6em0a9cOgO+//x6n08nq1asB+Oijj4CaOVj1Rc2w+mNMJv2fh6oy6s8nwtkQMu7KLuDQCQctwyy0iQrVXT4RTpVRnxllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpS8aI0LMPWW/eyP+yntMiwiljRhlrFuHUu0+EsyFkDPAxcXVCOHZ72QX99+Hp6P05NoT3ojLqN+PlwGU/wyolJYVu3bppzSqA3r17M3r0aDZu3Ei/fv0AiI2NZfbs2dqaTZs2MWfOHHr37q1d69ixI8ePHyclJUW7tm7dOgBGjRqludxuN1OnTmXDhg106NCBli1bAnDXXXfx/PPPa/c+/vjjZGRkcOLECSIjI7WZV8uWLdMaU263mzfffFObO1U76yomJobly5drruXLlzNv3jzy8/OBmnlZNpuNd999l6CgIOLi4rjhhhv45ZdfPGZpXXnllYwfP57y8nK6detG3759efDBB/n22285fvy4tm7gwIHMmjWLtLQ07drYsWO1BlZ9UedznpmGcB6qyqg/nwinHjPmFZby2tyf6wynfmXg1YQFX9hw6ouR769wqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCOflnrFNq1AC/Mx/eCxggJ+ZhJYhUsxpEeGUMaOMNYtw6t0nwqkyyuET4VQZ9emTiQZ5mGJWVpZHgwbAarUSFhZGVlbWWe8DPO5NTk7m2LFjHDlyRJsD9euvv2IwGEhKStLWGQwGoqOjNUejRjVnTh87dkxb43Q6SU9P9/iu2qZaYWGhtm7Tpk04nU4cjpq/ReDt7Y3JZNJmVtXy/fffYzKZtHt79OiB0WhkzZo12pry8nJSU1NJTk6uU++6desoLS3l9ttvZ9u2bXVqr73/2muvpU2bNtx666188803tG/f/ozPUKFQKETx+2YV1JzzP27Oz5cokUKhUCgUCoVCcf68MvBqAvw8/35w7V/EUigUCoVCoVCcmQa5w8put2O1WutcDwoKwmazaT9nZmYyfvx40tLSsFgstGrVCm9vb3x8fLQ1AwYM4MMPP6SyspK1a9dSWVnJ0aNHCQ0NpX///hQUFJCQkEBVVRUHDhzA37/mb/mXlZUBsHLlStauXYu3tzfBwcHarqnaHI0aNcJsNnP77bfjcrkICQmhsrKSmJgY7Rg/qNl1lZeXx1VXXYXL5aJJkyYcOXKE8PBwzdW0aVNuueUWRo8ezdixYwH4+OOPCQgIYMCAAZprxYoVrFy5kg0bNgDw448/smjRIv72t78RFRWlrZkyZQpwqpmWnZ0NoM3yqi8XOijxcqQhDPBTGfXnE+HUa0aRw6n1WrNInwinjBllrFmEU8aMMtYswql3nwinyqjPjDLWLMIpU8ZmjQP4YERPMnIKOJh/8Y66Pj2b3moW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyppRNhpkw+pcsNlsDBw4kKioKCZPnkxeXh5jx46lqsrzF6JBQUEMGzaMCRMm8N///peAgAC8vLwoKCjghRdeIC4ujvnz5/Pdd99hNp96XLUeq9WK0WikuLiY48ePEx8fz86dO7V1J0+epKqqisjISE6cOIHNZqOyspKkpCSPhpXL5cJsNhMUFMTJkyfJz88nMDBQa5DV1vTTTz/RuHFjSktLcTqdlJSUcP311xMYGKitW7VqFTk5OTidTgDtmMHaGV1waoeYxWKhvLwcX19fWrduTXV1NV9//TXPP/88Xl5e5/3cjUYDISGW875PFqzWs59nfql9IpwyZpSx5gt15hYcOuvnR06Wkty5Zb39oL+a/wqfCKeMGWWsWYRTxowy1izCqXefCKfKKIdPhFNl1JcvKcRC0p8vqxd6rVmkU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPKarVqO5lOx2azaXOiFi5ciMPhYMqUKdqxfJs2bWL58uUcPHhQm0EF4O/vj8FgYOvWrRgMBjp27EhMTAyDBg0CoHPnztx6661UVFRo/tqm1LPPPss999wDQGpqKoMHDwbQ1u3fvx+z2cx3332nfd+IESNITU3V1hw7dgy32023bt348MMPASgqKuKGG27g+PHjHjWVlpayfv16goODiYuL4+abb2blypU899xzhIeHA/DOO+9gNBpxu93Ex8dz9OhRFi1aRJMmTbQMzZs3B+D111/n1ltv1a6vXLmSp59+moMHDxIbG3t+LwZwudzY7aXnfd/ljsmk/wF+MmY8XlRGcXk1Vl8TYcEX/j8kyzZms+dgEW1aBXNb9+gL9oE870XkcGq91izSJ8IpY0YZaxbhlDGjjDWLcOrdJ8KpMuozo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpyyZrwcsFr9znnXWYNsWMXExNSZVVVcXEx+fr42oyklJYVu3bppzSqAPn36sHz5cpYtW8awYcO061lZWURERODr68umTZtwuVy4XKf+QHl7e3PTTTcxb948zb97924ASktPNWeSkpKwWCw4HA5iYmKorKzk2LFjVFdXezTTanNcddVVQE2jC8BoPPXSgoOD6dq1K99///1Za4qPj2ft2rVs3LiRfv36eXjeeOMNAO68807i4+PP6xlfCGqg3JlpCAP8ZMhYUuZkxtJdpGcXaNfaRYcy9I62WHzPf2dhRk4Bby38Rft5Z+ZJPluXyXP3dSS+5cU5+uNyfy9/xXBqvdX8V/hEOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpa0ZZaJCHKSYnJ/Pjjz9it9u1a6tWrcJoNJKUVLPhPisrS2v01NKjRw8MBgMpKSnaNafTyZo1a0hOTtbuA8jJySEnJ0dbZ7PZcLlczJgxg6SkJDZv3kyjRo1YvXq1tsZgMODt7U1AQACRkZEcPHiQ6upqDAYDN998Mx06dKB///4UFRUBcMUVV2jf6efnR2pqKh07dqRr167897//1Y70O70mk8lE3759ad++PVCzgyssLMyjgVdZWcl9993Hxx9/DMDmzZvrNPi2bNkCwNNPP01cXBwJCQn8/e9/58svv8RqtXrsQFMoLjdmLN1FRk6Bx7WMnAKmL9lVL9/pzarTefPTP76u+GPUcGqFQqFQKBQKhUKhUCgUCoVCXhrkDqsBAwYwb948hg0bxtChQ8nLy+PNN99kwIAB2rF4drud1atXs3r1atauXQuAj48PoaGh7Nixgzlz5tC6dWsWLFhAUVGRdpSf3W7Hy8uL6OhonnzySYYPH87JkydZsmQJUHOEXkVFBS+//DIRERH88ssvjBkzht69e7NlyxYKCwuJi4sDappctVRUVPCvf/2LtLQ0Xn75ZQA6duwI1Bz/V9ucioyM5MYbb2TBggWUlJRgNBq1mmw2G2vWrKF79+7cfPPNTJkyhZUrVxIcHMyePXu07xoyZAjbtm2jY8eO/PLLL5SWlnLffffxv//9j/j4eEJDQyktLcXHx4dOnToRFRVFRkYGO3bsYP/+/fWeX1WL2dwg+6BCaQgD/GTJePSkw2NnVS0uN6RnF3DCXk7TUP8/uPOPWfJD1lk/X7n5N27vUf/jAWV5LyBuOLWeaxblE+GUMaOMNYtwyphRxppFOPXuE+FUGfWZUcaaRThlzChjzSKcMmaUsWYRTr37RDhVRn1mlLFmEU5ZM8qGwe12uy91iPqQmZnJuHHjSEtLw2KxcMcdd/DMM8/g7e0NQNu2bWnatClut5t169Zp99122234+fmRn59PQUEBCQkJvPjii3Tq1AmAqVOn8sEHH/Dtt98yfvx4UlNTqa6uxul04nK5SElJITw8nG7dulFYWMhrr73G7Nmzyc7O1o4VbNy4MbNmzWLz5s0MHDiQ22+/nbCwMJYsWYLD4cDlclFZWclbb73F7bffzoMPPsiWLVuYPn06s2fPJi0tDW9vb+x2O0ajUTt+MD4+HovFQklJSZ3n4e/vT1paGseOHaNnz56c6bVOmDBBOzrw6aefZseOHZw4cQKDwYDFYuHkyZOMGTOGe++9t17vxe12YzAY6nWvQvFX8PPuPMZ+uPmMn49+5FquTgg/Z99LH6SyM/PkGT9vH9uI1x/vcV4ZFQqFPliz5Td2HsjnqivDuKlrq0sdR6FQKBQKhUKhUCgUCoXisqZB7rACiI2NZfbs2Wf83Gq10qdPH0aMGOFx3W63c8MNNzBy5Mgz3ldZWUlwcDCTJ08G4P7776ekpIS9e/dqc6iaNm1KQUEBBoOBZcuWafcPGDBAW3P48GEAEhMTue+++3j++ecBePHFF/nqq6+0dSdPnsRkMtGzZ0969uwJ1DR+rrrqKkwmE1BzzJ/b7aZ9+/YedX/33Xc8/vjj9O3bFzg1D2vr1q0EBQURFxfHc889R1paGjabTWtWAbzzzjseta9evZqnnnpKa5DVB5fLjd1e+ucLJcNk0v8AP1ky+nudvaFq8TZSWOg4Z1/ryKCzNqziWwSfl+/3yPJeGpJPhFNl1FfG7Fwbr87+iVrF99uPMGXRL4x+qCtRzay6yCjKJ8IpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxppFOGXNeDlgtfqd866zBtuw+jNiYmLqzG0qLi4mPz+/zmyr398HkJ2dTXx8PFAzO6p58+baDiqAK6+8kn379nl8h9vtJjs7W5s55XDU/JK6rKzM4zv8/PwAaN68ufZ5dXU1NptNa2IZDAZ8fX21htXBgwf/0NW0aVMALVdWVhaNGjXSPLXExsbyxRdf1Km3urqaqqoqDhw4wPvvv4/JZCIwMPCMz+dcUAPlzkxDGOB3uWcMC/KjXXQoGTkFuE7biGg0QJuoUBpbfc/LfVu3KL7ccOZjAXtf2+qi1H+5v5eG6BPhVBn14Tu9WaU5XTD2463MfO7GC0z3/306q/mvcMqYUcaaRTj17hPhVBnl8Ilwqoxy+EQ4VUY5fCKcMmaUsWYRThkzylizCKesGWXhsm1YJScnM23aNOx2O1Zrzd+GXrVqFUajUWso1ZKZmcn48eNJS0vD398fLy8vli9frjWsbDYbVVVV3HbbbR7+JUuWsGHDBpYvX05BQQGRkZEUFRVx/fXXA1BaWorBYGDVqlX88ssvpKam4uXlpc2HCggI0HwGg4F3332Xn3/+mezsbMLDw7Hb7bRs2VLLALBnzx5effVVVq9ejcPh0OZbtWjRAqjZQebr68uoUaP49ddfAZg1axYPP/ywx0wtqGl+JSUlaY01b29vqquradOmzQU9ezXDqi4N4TxUmTIO69eeDxbvZGdWgXatbXQoj9/Zvl5/fl/8VyITPtn+h9cv9N8Hmd5LQ/GJcKqM+sn4fdrhOs2qWqpd8GP6UZI7Nq+3X481i3bKmFHGmkU49e4T4VQZ9ZlRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRTlkzysZl27AaMGAA8+bNY9iwYQwdOpS8vDzefPNNBgwYoDV5oOa4v+3bt9O5c2cmT55MXl4eY8eO5aOPPiIsLIzWrVvjcrkoLy9n8ODB2n29evXiueeeY9++fdx99900adKEWbNmYTKZCAkJ0daZTCZ27NjBb7/9xtChQ9m1axdr1qzxyGo2m2nWrBnz58+nS5cuPPXUU3z66ae43W6tuVWLy+Vi4cKFPPDAAwDMmzcPAB8fH21NWVkZq1evplWrmnkbFRUV7N27F5fr1G/gJk6cSGpqKk6nk169enHixAm2bdsGQEZGhkdz7nwwGg2EhFjqda8MWK1+uvaJcOoxY0gIvD7sOnLzS8g94SCisYWIsIA/v/EMdA+xsKxTCxZ9t4+0vcfpFNeEe/7W+oIy/h4Z3ktD84lwqoyX3pd1tPisnx/ItXPHDRf+77eeav6rnDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOlVEOnwinjBllrFmEU8aMMtYswilrRlm4bBtWQUFBzJkzh3HjxjFs2DAsFgt33303zzzzjMe6Y8eO4Xa7mTJlCsHBwQBUVVUxZswYZs6cic1mw2g00rt3b20XE9Q0jqqrqwkKCmLFihWYzWZ69+7N5s2b+eijjxgzZgxWq5WqqioAQkJCmDJlChEREXTr1o1NmzZx8OBBwsPDsVqt5Obm0qRJE7Kysti5cyeJiYmUlJRw7NgxrR4Ap9NJdHQ0CxcuxGKx0KdPH5YtW0ZaWhr33HMPVquV0tJSysrKKCoqAmqOQlyyZAlG46nObkxMDLNnz8bLy4vvvvuO8PBwbr/9dpYtW8bixYt59tln6/Xc1QyrP8Zk0v95qDJmDPAxcXVCOHZ72QXNmaqlV5cW3PO31hfNB3K+F737RDhVRv1kjGkWyPdn+fyKCKuaS3eJfSKceveJcMqYUcaaRThlzChjzSKcMmaUsWYRThkzylizCKfefSKcIjIeLyqjuLwaq6+JsOAL/yW8jM9RxppFOGXNeDlgtaoZVkDN3KbZs2efdU3Tpk2Ji4vTmlUAffr0YcyYMQwfPpx+/fpx//33a8fm1ZKamgrAgw8+yBNPPKFdnzBhAmvXrgVOzcOKiopi9erVHms2b97M5s2b6dKlC1FRUezcuZMXX3yRQYMGATXzsDp37ozD4eDw4cO0bNkSk8lEdXU1Cxcu1BpY69atY9myZRw4cED7zvLycrZu3UpQUBAvvPAC6enpREdHaw0sgLvuuotXX32VESNGeHznN998g9PpPLcHfAbU+ZxnpiGch6oy6s8nwiljRhlrFuG83DP2aB/BnJV7/vBYQJMRurdrpubS6cQnwql3nwinjBllrFmEU8aMMtYswiljRhlrFuGUMaOMNYtw6t0nwnkxfCVlTmYs3UV6doF2rV10KEPvaIvF1+ssd54bsjxHkT4RTpVRnz6Z0F3D6vR5UhaLhTvuuIOnn34ab2/vs97ndruZOXMmn376KQUFBSQkJPDiiy/SsWNHj3V5eXmMHz9emydVXl7OgAEDPNZYrVasViv/93//x+jRo/H396e0tNRjHtaKFSsAOHr0qDYHqlOnTlx99dXk5uZSXl5OYmIiRqOR6upqHnroIW1GVkVFBaGhoWRlZQEQHx/PsmXL2L17N7169dJ2W9U2ybKysoiMjKRp06bk5eXxxhtv8O233+J0OgkKCiIoKIgjR44A0KNHDwC6du3qUdP+/fu56aabtJ9NJhP9+vXjk08+ITExkc2bNzNp0iQAbrjhhnN+XwqFQqFQXG68PPBqxs/52aNpZTLWXFcoFAqFQqFQKBQKGZixdBcZOQUe1zJyCpi+ZBfD+3e8NKEUCsVlj64aVjabjYEDBxIVFaXNk5o4cSLl5eWMGjXqrPfOnDmT9957j5EjRxIXF8f8+fN5+OGHWbJkiXaUn9Pp5JFHHgFg0qRJlJeX88wzz5CSksJLL72kuX7++WdsNhsxMTG8/fbbbNiwgVmzZnHvvffy4osvkpeXp+2iWr16NS+88ALh4eE888wzbN68Gbfbjc1mIzw8nMDAQA4dOgTA448/zqpVq9i1axf+/v7YbDYA2rdvD8DXX39N79696dOnj8fOsNp1rVu35siRI6xYsYKHH36Y7OxsVqxYgdVq1XZPNW3alDZt2rBr1y4ee+wxNm7cyN69ewkICOC5557TnE888QRt2rShZcuW3HPPPdp1Ly8vxo8ff97v7nTMZjVU7vc0hAF+KqP+fCKcMmaUsWYRTpkyxjYP5uOXbiJ1Ry77DttoHRlEjw4RFyOibmsW6ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNIpx694lwXizf0ZMOj51VtbjckJ5dwAl7OU1D/S9pRpFOvftEOFVG/WaUDYPb7XZf6hC1TJ8+nWnTprF+/XrtiL7PPvuMsWPHsn79esLDw//wvoqKCrp3787999/P8OHDAaisrOTWW28lOTmZMWPGALB8+XJGjhzJihUrtOP62rRpQ3V1NYsWLaJDhw4ADB48mJ9//pm+ffsybtw4AB599FG2bt2K2+3GYrEQERHBzp07efXVV+nfvz8A9957L9u3bwcgJSWF8PBwunbtSnFxMY0bN6aoqIiEhAS6dOnChx9+yNVXX838+fPZtm0b9913H40aNaKsrAyz2czNN9/M7t27ycjI4K233uL222/n3//+N+vWrSMyMpK8vDwiIiK48847eeeddzCZTGRkZADwzjvvMHPmTIKCgigqKsLb25svv/yS2NhY7ZnNmDGDjz/+mMLCQgwGA76+vpjNZoqLixk5cqTW2Dtf3G43BoOhXvcqFAqFQqFQKBQKhUKhUCgUsrB973H2/lZAfKtQOsU1udRxNH7encfYDzef8fPRj1zL1Ql//HtahUKhuBB0tcMqJSWFbt26ecyT6t27N6NHj2bjxo3069fvD+/bvn07JSUl9O7dW7vm7e3NzTffrO2EqvXHxcVpzSqAoKAgHA4HGzZsoEOHDlRWVrJlyxZ8fHy0OVEA/fv3Z8OGDXz33XdERkYycuRIdu7c6XF83oIFC/jHP/7B7t27tXudTidhYWGkpKRo6+x2Ox9++CGVlZUAlJWVATBgwACeeuopbd0777xDRkYGFosFgIKCAgwGA99++61HU+iTTz6huLhY+9lkMuHt7c2PP/6ozbA6vVkF0LNnTyZNmsSIESOYNm0aq1atYsSIEeTn5/Puu+8yYMAAAgIC/vB5nw2Xy43dXnre913umEz6H+CnMurPJ8IpY0YZaxbhlDGjjDWLcMqYUcaaRTj17hPhVBn1mVHGmkU4ZcwoY80inDJmlLFmEU49+/IKShn78VZKyqq0awF+ZsY83JUmIfXbuXQxM/p7nf0vo1u8jRQWOurlVn925KhZhFPWjJcDVqvfOe8601XDKisri7vuusvjmtVqJSwsTJv3dKb7AI9GFEBsbCxz5syhvLwcX19fsrKy6qyJiYlh//79muPgwYM4nU6qqqo81tY2fGrnSVVV1fwPSkFBAU2anPobEF5eXhiNRnx9fQGoqqrS1tYSGBiIwWDAbK55/OXl5UBNc+t0vLxqBhie7nK73djtdo9mmtls1ly1lJeXc+2111JUVITZbObzzz/nn//8p/b5gQMHgJpjCB977DGthoCAACorK8nLy6tXw6omp/qX8Uw0hAF+KqP+fCKcMmaUsWYRThkzylizCKeMGWWsWYRT7z4RTpVRDp8Ip8ooh0+EU2WUwyfCKWPGi+H7fbMKoKSsijGztvLef5IvyA0XnjEsyI920aFk5BTgOu1sLqMB2kSF0tjqe8HPQP3Z0adTZdSnTyZ01bCy2+1YrdY614OCgrQ5TrVkZmYyfvx40tLSMBgMmEymOkfRWa1WbZ6Ur68vdrudwMBAZsyYwaeffkpBQQGhoaEUFxdz8uRJ4NS8KIPBwMqVKxk3bhxeXl4kJyd7fO7n54fBYGDq1KlkZ2eTnZ1Ns2bNOH78uEeGqqoqTp48yUsvvcSGDRtwOBxER0fjdrsJCQkBoLS0ZkfS999/T3p6OmlpaVgsFnx8fIBTDavanwcNGsTJkyfJy8vjqaee4sSJEwQGBmrf2bJlS0aOHEmbNm2YPn06W7du5ZVXXuG7775j+vTpADRv3hyA48ePs3TpUj744AOqqqrw8fHBYDAQEVH/WR1qhlVdGsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhFPGjDLWLMIpY0YZaxbh1KtvR+aJOs2qWkrKqth9sJD2MY3q5b6YNQ/r154PFu9kZ1aBdq1tdCiP39n+gn73p/7sXBxUxotDQ8goG7pqWJ0rNpuNgQMHEhUVxeTJk1m4cCHffvstEydOZNSoUWe9d/fu3Xz11VeMHDmSuLg4Zs+ezdGjR9m1axepqals2bIFAH9/f44dO8akSZMoLy/nueee8/CYTCYCAgJYtWoVnTp14plnnmHRokWUlZVhNJ76A1k7H2rx4sXcf//9+Pv78/HHHwPUac7t27ePwsJChg0bxrZt21i/fr3H5z4+Pvj4+LBnzx46d+5MXl4eixcvxsvLy6Nhdcstt7BhwwZsNhsnT56kdkxZamoqeXl5hIeH06xZMwwGA8XFxXTo0IE777yTmTNnUlhYSKNGjfDz8zvPt1KD0WggJMRSr3tlwGqt33P9q3winDJmlLFmEU69+0Q4VUY5fCKcKqMcPhFOGTPKWLMIp4wZZaxZhFPGjDLWLMIpY0YZaxbh1Jsvt+DQWT8/crKU5M4tL+g7LkbNISHw+rDryM0vIfeEg4jGFiLC6nca0x+h/uzo06ky6tMnE7pqWFmtVo9ZTLXYbDaPI/AWLlyIw+FgypQpBAcHc/DgQb799lsWLFjA0KFDCQ+vGfpnt9sxGAzavYGBgWRkZDB48GAGDRoEQOfOnUlMTMTpdDJs2DBtN1NJSQnvvvuudizge++9R3Z2NgUFBVrWsrIymjVrxtGjR3n77bdJSEigefPmHDt2TMsaGBhIUVERbdq04csvv8RsNtOrVy+WLVvGkSNHALR8Xl5eBAUF8e677xIREUG3bt3YtGmTdqSg1WqldevWdOnShSVLlgBgNBq55pprKCws1L7z5MmT/Oc//6nzHKuqqvjxxx+58847mTJliraLKysri59//hmA0NBQTp48yaFDh2jRosV5vT9QM6zOhMmk//NQVUb9+UQ4ZcwoY80inDJmlLFmEU4ZM8pYswin3n0inCqjPjPKWLMIp4wZZaxZhFPGjDLWLMJ5sX3Hi8ooLq/G6msiLLj+v4yOCD37vc0b+etqPlSAj4mrE8Kx28vqnet0ZPyzI2PNIpyyZrwcsFob6AyrmJiYOrOqiouLyc/P95gnlZKSQrdu3QgODtbuA3C5XGzcuJF+/foBNfOmIiIitCaU1WqlqqqK3r17a67amVPe3t78/PPPVFZW0qFDB8LCwjy+89lnn+Xxxx/n8OHDALRq1Yqqqir69+/Pv//9b21d//79OXLkCIcPHyYyMpLg4GCKioqYPXu21pgqLi5m2bJl5Ofne+RPSEhg0aJFmmvmzJls2rSJnJwcunbtSkxMDJs2bWLRokU8//zzxMXF0b9/f7755htat26t3RcZGcnevXuZOnUq3333HYsWLSI+Ph6ADh06AJCdnU1JSQklJSUez7uiogKAbdu21athBWqG1dloCOehqoz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGOXwinCqjHD4RTpVRDp8IpwwZS8qczFi6i/TsAu1au+hQht7RFouv13n72rQKJcDP/IfHAgb4mUloGaLmQ+nEqXefCKfKqE+fTOjqMMXk5GR+/PFH7Ha7dm3VqlUYjUaSkpK0a1lZWR7NpMTERAICAvD399caXk6nkzVr1mizpwCaNm0K1BznV8umTZuoqKiguLiY8vJyvL29sVgsOJ1Oj2wrV67E19eXEydOADUNK0DbcQU1O8H27t2rZQRo3LgxBoPBY77WqlWrMBgMFBUVAdCiRQuMRqM2y6qWdevW4e3tzcGDB7XnY7PZ2LRpk7amoKCAjIwMjzoBcnNzmTFjBi+//LL23T4+PrRs2RKAl156icceewyTycQTTzzB9OnTiY6Oxs/Pj44dO9KzZ08UCoVCoVAoFAqFQqFQKBQKmZmxdBcZOQUe1zJyCpi+ZFe9na8MvJoAP899BAF+Zl4ZeHW9nQqFQnE5oKsdVgMGDGDevHkMGzaMoUOHkpeXx5tvvsmAAQO0Y/4ACgsLWbRoESNHjgRqGjFDhw7lf//7Hz/99BObNm1iwYIFFBQUsHfvXjp27IjFYqFly5YYDAZGjBjB8OHDKSsr480336RNmzZkZGRgs9nw9fXFYrGQl5dHp06dcDqdhIaGcvz4cRISErDZbEDNziyA+fPns3DhQsxmM/7+/lgsFsrKyrR1zZs3Z/v27SQnJ+N0OgkKCsLhcNCxY0fS09M96j9w4AAdO3akurqakJAQTpw4QbNmzTRXp06d6Ny5M0OHDtXumT9/Pq1bt+aWW27RrvXr148TJ05QXV3N/fffr82jSkpK0nJ/++23TJs2DYApU6Z45IiNjdV2r9WHCxm8eLnSEAb4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUsWYRThkzylizCKeMGWWsWYTzYviOnnR47KyqxeWG9OwCTtjLaRrqf97eZo0D+GBETzJyCjiY76BlmIU2UaH1zlmLLO9FtFPvPhFOlVG/GWVDVw2roKAg5syZw7hx4xg2bBgWi4W7776bZ555xmOd2+3G5fLcUjdkyBBmz57N3r17efTRR2ndujU+Pj6YTCYmT55MXl4eY8eOBSAqKorhw4djNpu5+eab6dq1K88//7zmqqiowGAwYLFYKCwsxG634+XlRWBgoLamdq6Uv3/N/yiVl5djt9tp06aNtgsLanZduVwurFYrhYWFlJaWUl5eTmRkpEfDyu12YzKZ8Pf3x2azYbfbCQwMxNvb28OVk5OD1WqltLSUyspKKioqaNOmDWbzqVdZVlZGXl4eJpMJk8mkNakiIiK0Nffccw/e3t5MnTqV22+/nYSEBCZPnqw1+dxut8eusHPFaDQQEmI57/tkoSEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDhVRjl8Ipwqoxw+Ec7LPWPmsZKzfu6odF3Q78GSQiwk/fmy8+Zyfy9/lVPvPhFOlVGfPpnQVcMKanb3zJ49+6xrQkNDufvuuz2uGQwGTCYT//rXvxg5ciTTp09n2rRpTJkyRdsttGnTJpYvX86zzz7L5MmTtXs///xzDAYDQUFBVFRUYLfbiYmJYcWKFQBUVlZy6623kpmZSdeuXQHYuXMnAM8//zz33HMPAKmpqQwePBhAm1e1f/9+zGYzKSkp2veNGDGCH374QVtz7Ngx3G433bt358MPPwSgqKiIG264gePHj2vrFi5cSFlZGevXryc4OJi4uDhuuukmli5dyjPPPKPtQjt48CBXXHEFCxYs0L6zS5cu/PDDD9jtdqxWK+Hh4cyePZsBAwbwwgsvALBjxw7WrFlDeno6GzdupEePHn/6vn6Py+XGbi/984WSYTLpf4CfyqifAayno/eaRTj17hPhVBn1mVHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMI58Xw+Xud/S9zW7yNFBY66uUGfdYs2iljRhlrFuGUNePlgNXqd867znTXsDoXYmJitBlRtRQXF5Ofn6/NtkpJSaFbt24eR9v16dOH5cuXs2zZMoYNG6Zdz8rKIiIiAl9fXzZt2oTL5fLYweXt7c1NN93EvHnzNP/u3bsBPOZOJSUlYbFYcDgcxMTEUFlZybFjx6iursZms2mNp9ocV111FVDT6AIwGk+9tODgYLp27cr3339/1pri4+NZu3YtGzdupF+/fhw6dIiqqioOHDhAly5dPJ7Rb7/9RpcuXdixYwclJSUUFBQQHx8P1OwqW7t2Lb169eKrr77S5mbVBzVQ7sw0hAF+KuP5c7EHsP4Reqv5r3Dq3SfCqTLK4RPhVBnl8IlwyphRxppFOGXMKGPNIpwyZpSxZhFOGTPKWLMI54X4woL8aBcdSkZOAS73qetGA7SJCqWx1feiZNVTzX+VU8aMMtYswilrRllokA2r5ORkpk6dygMPPMDOnTuxWCzEx8djNBpJSqrZSJuVlcVdd93lcV+PHj0wGAwsWrSIRYsWaQ2b3NxcbrrpJu0+gJycHB5++GHS0tLw8vIiLCwMl8vFtddeC9Q0fxo3bsxnn33Gl19+SXZ2NhEREbjdbgICAoiMjOTAgQNUV1djNBp55pln2Lt3Lw6HgyuuuAJA+2dWVhb+/v5s3brVoyZfX1+AOjUtWrRI24m1YMECrFarlrv2n2PGjGHdunVs27aNqqoqKioqAJg4cSJeXl6EhoZiMBh4/vnnPY5D/OqrrwC09QqF4s852wDW4f07XppQCoVCoVAoFAqFQqFQKC6YoXe0ZfoSz7+k2iaq5i+pKhQKheLi0iAbVn369OGdd95h9+7dPPbYY/z222989dVXtG7dWjsWz263s3r1alavXs3atWsB8PHxwc/Pj6NHj3LTTTeRmJjI7Nmzyc/Pp3fv3tp9Xl5eGAwGfvrpJx555BFKSkqYN28eAC1bttTWtWzZku3btxMbG8vw4cNZu3YtOTk52qwom80GQLNmzbQdUK1atWLGjBkAJCQkaK7Q0FByc3Pr1AR41LR3715mzpzJLbfcQk5ODhaLhfz8fLZu3erxnStWrODgwYOMHz8eHx8fHn/8cQAiIyO1nVx///vfWbFiBf/4xz9IT0/n8OHDOJ1Oqqur6d+/f73fj9mshsr9noYwwE9lrB+iBrBezIwifSKceveJcKqM+swoY80inDJmlLFmEU69+0Q4VUZ9ZpSxZhFOGTPKWLMIp4wZZaxZhPNi+YICfHju/kTyi8qwCxgDcPo/9eYT4ZQxo4w1i3DKmlE2GmTDasWKFXh7exMfH8/UqVOxWCwkJSWxadMm8vLytAaPy+XC7T61X7eiooKysjLCwsJIT08nJSWF+Ph4XC4XK1eu5JprrgHA7XbjdDpJSkpi9uzZmM1m4uLi2LNnD7t379b8ubm5xMTEYDKZmDRpEhEREQQGBlJYWOiRNzc3l+7du7NhwwYcDgcJCQmkpaWRlpbGAw88AIDD4ahTU6tWrfjtt988atqyZQsAa9asAWp2gsGpmVq1bN26lY8++qjOHKqtW7dqRwW+/vrrXHHFFSxevJicnBz8/f2prq7m9ttvx9+/fr9gNxoNFzRs8nKnIQzwUxnPD9EDWGvRU81/lVPvPhFOlVEOnwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmi+m80h+Cft25xHR2EJEWMAF+0T+vkum9yLKJ8Kpd58Ip8qoT59MNMiGVUpKCklJSXzwwQfaNbvdTteuXbWdTFarlT59+jBixAhtzfbt23G73Vx33XVMmDBBuz5hwgRtF5bVaqWqqorWrVsza9Ysbc1nn33GqFGjSEtLo2fPngQGBnLw4EFeeOEFBg0apK3r3bs3WVlZHD58WJtZ5Xa7eeedd7Sfc3Jy6NWrF/v379e+s6SkhOTkZI+aJk6cyMcff6zVFBAQQFFREe+//752hCFA586dKS0tpbKyUvuO2iZeLbNmzeLhhx8mIyNDu+bt7c1jjz1Go0aNePnllxk6dChvv/32Be2ucrnc2O2lf75QMkwm/Q/wUxnVAFa9OPXuE+FUGfWZUcaaRThlzChjzSKceveJcKqM+swoY80inDJmlLFmEU4ZM8pY88V0lpQ5mbp4JzuzTp2G0j4mlMfvbI/Fr/5zpvVcsyifCKeMGWWsWYRT1oyXA1ar3znvOmuQDas/mk9ltVoJCwvTZjjFxMRo/7mW2mZNp06dPK7HxsYyZ84cysvLiYmJAaBJkyYea7Kzs/H29ubgwYPa5+np6dp6qGlMnThxQst47bXXYjQa8fPz0xpJtZ8B5Ofna1mdTifNmzf3+M4jR47g5eWlrQ8LC6OoqIjo6GhtTXFxMQ6HA7fbzaFDh7Q8YWFhGAynfpGelZWF0WgkLy+vzvNcvnw5MTExpKWl0bx5cxITE+usOR/UQLkz0xAG+KmM54cawCrOqXefCKfKKIdPhFNllMMnwiljRhlrFuGUMaOMNYtwyphRxppFOGXMKGPNF8P5wVc768yZ3pVdwPtf7bwoc6b1WLNonwinjBllrFmEU9aMstAgG1Z2ux2n08lDDz1EWloaFouFO+64A6vVqs1wSk5OZtq0adjtdqxWK1Czwwrg8OHD9OzZk4KCAhISErjhhhtwu93YbDYSExMxGAzk5eXx5JNPkpqaitlspqqqitDQUM3fpk0b1q1bx5YtW3jrrbfIzs4mJCQEu90O1MyS8vb21ppMb7zxBkuXLsXhcGCxWAgODqakpOYosdpj+w4cOKDV5OfnR3FxsUdNV1xxBfv372f16tUsWbKE3NxcQkJCtOdSm9/b25ujR49y8803c/z4ccLDwykvLyc8PJzi4mJt/eTJk5kyZYr2c21jbMGCBdx77731fj9qhlVdGsJ5qCpj/RnWrz0f/O5vb7WNrvnbWxf674Neaxbp1LtPhFNl1GdGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNYtwyphRxppFOGXMKGPNF8spcs60XmsW6RPhlDGjjDWLcMqaUTYaZMPK7XazaNEi2rRpw+TJk8nLy2PixIkYjaf+IAwYMIB58+YxbNgwhg4dSl5eHj/88AMGg4FZs2YxcuRI4uLiGD58OG+//bZ2n4+PD1arlf3791NUVMSQIUNYv349O3fuxMvr1Lbhrl27AvDhhx9y3XXX8be//Y25c+fWydqmTRvWr1/PvHnzePDBBzl+/DjLli3D19dXm6/VtGlTDAYDP/74I1FRUTz22GN8/fXXFBYW4nQ6Nde1117LypUreffdd/n73/9+xu/09/enqKiImJgY7r//ftasWcO2bdsICQnBx8fHY21tM+6ee+5h0aJF/N///Z/HUYLni5phdXYawnmoKuP5ExICrw+7jtz8EnJPOC7a+dino7ea/wqn3n0inCqjHD4RTpVRDp8Ip4wZZaxZhFPGjDLWLMIpY0YZaxbhlDGjjDVfqPOvmDOtt5r/Cp8Ip4wZZaxZhFPWjLLQIBtW3t7eVFRUMGXKFIKDgwGorq5m1KhRmM01JQUFBTFnzhzGjRvHsGHDsFgsdOzYkS1btvDggw9qc6diYmIoLCzE7XZrx/YFBgZis9lwu91MnTqVhIQEXnjhBSZMmKA1mRo1agTUzIratm0bv/76K7169SIvL4/U1FTNFR4eDkBoaChz584lIiKC//73v7zxxht4e3trNfn4+FBRUYHNZmPq1KkkJiby97//ncmTJ2s1NWvWDIDg4GC+/fZbLBYLDzzwAN9//z1ZWVnadyYmJrJ//36ys7PZuHEj0dHRPPzww8yaNYvGjRt7PEuXy0WHDh3IysoiLi6Ovn37XtC7UTOs/hiTSf/noaqMF+4L8DFxdUI4dnvZBc2tOh291yzCuSu7gEMnHLQMs9AmKlR3+UQ4VUZ9ZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKeMGWWsWYRTxowy1nyxnCLnTOu1ZpE+EU4ZM8pYswinrBkvB6zWy3yGlclkIiAgQGtWAVx33XUAVFZWatdiY2OZPXu29vNHH33Eli1baNeunXZt/vz59OvXjz179uDr66td9/HxYePGjdrPLpeLiRMnajuemjZtCkC3bt14//33tXVvv/02qamp+PvXbC+uqKjQvqdFixbaupkzZ1JWVqb9bDabCQwMJDU1VbuWm5vL5MmTtZpq8/3zn/9kxIgR2rrDhw+TlZWlZYqPj2fbtm1s2bJFm2OVmZnJrFmztEZbLb6+vrz77rvceOONDB8+nIuBOp/zzDSE81BVRv35RDj1mDGvsJTX5v5MSVmVdi3Az8wrA68mLLh+RzZczHx/hVNllMMnwqkyyuET4ZQxo4w1i3DKmFHGmkU4ZcwoY80inDJmlLHmC3X+FXOm9VbzX+ET4ZQxo4w1i3DKmlEWGmTDqrq6msLCQh544AF27tyJxWIhPj4ewGPX0u+p3an06aef8uabb1JQUEB8fDwHDx6kurqa8vJyfH19MRgMVFRU8PDDD5OWloaXlxcdOnTA7XZrxwIeO3YMgF27dtG3b1+ys7OJiIjQmkqlpTW7jGqP4Bs9ejR79+7F4XDQvn17CgoKPLKeS03l5eUAbNiwgTVr1pCbm0t0dLQ2N+vYsWPExsYSERGBzWYjOTkZu91OeHi4trPqxhtv9HgmZWVl3Hjjjbjdbt5++22+//57Jk+eXKexpVAoFKL5fbMKoKSsinFzfua9/yRfolQKhUKhUCgUCoVCoWhIDL2jLdOX7PKYZdUmKpShd7S9hKkUCoVCcS40yIZVRUUFbreb3bt389hjj/Hbb7/x1Vdf4e3tTVXVqV92Dhw4kNzcXNauXQvUNJEMBgM//fQTN910E4mJicyePZvi4mIAbDab1nAyGo389NNPPPLII5SUlPDJJ5/g5eWl7Viy2WwAHD16FH9/f4YPH87atWvZtm2bx+fV1dV4eXmxceNG+vXrR6tWrZg5cybV1dXa7qtzranWuXfvXjp37syAAQNYtGgRR48e9fg8KysLq9VKRUUFjzzyCEeOHGHx4sWYTCbuv/9+7Tvz8/Nxu90EBgYSGBhIWFgY27Zt4+6772b9+vX1fj9msxoq93sawgA/lVF/PhFOvWbckXmiTrOqlpKyKnYfLKR9TP0a6XqtWaRPhFPGjDLWLMIpY0YZaxbh1LtPhFNl1GdGGWsW4ZQxo4w1i3DKmFHGmi+mMyjAh+fuTyS/qAx7eTVWXxNhwRc+T0bPNYvyiXDKmFHGmkU4Zc0oGw2yYQVgMpmIj49n6tSpWCwWkpKS2Lhxo8cxey6Xi+rqau3nqqoq3G43Xbt2JT09nZSUFOLj4ykrK9OaVlCz68jlctGlSxdmz56N2Wyme/fupKameqwDiIiIwGQyMWnSJCIiImjbti27du3SPi8tLcXpdNK9e3c2bNiAw+GgQ4cObN++HZfLc1vgudQEkJCQwIkTJ5g0aRLR0dE0b96cI0eOaJ8PGTKExx9/nAkTJjB37lxKSkowm81UVVWxZ88e7UjEI0eO0L59e3bu3Mnw4cO57777uOuuu0hPT+fQoUMeRxieK0aj4YKHV17ONIQBfiqj/nwinHrLmFtw6KyfHzlZSnLnlvX2w8Wt+Uh+Cft25xHR2EJEWMBF8+rtvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtZ8MZ2ifj+l55pF+UQ4ZcwoY80inLJmlIUG2bAyGo00b96cTz75RLtmt9vp0qWLdjwewLx58zzuq92BNHLkSK666irt+kMPPcSPP/5IUFAQUNPYCgwMZNasWdoat9tN27ZtteZR7Yyq6667jldffVVbt2DBAnbt2qXNuqrN884772h+gFtvvZXDhw+fV021s6zuuecej51SY8aMYcGCBVqm0NBQAF577TWcTiffffcdb7zxBk888QTHjx/X7quqqiIsLIy9e/dq12qbeYcPH65Xw8rlcmO3l573fZc7JpP+B/ipjPrziXDqNWNE6Nn/h7x5I39dDMYtKXMydfFOdmadOlqifUwoj9/ZHoufV729en0vIn0inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzylizCKfefSKcKqM+M8pYswinrBkvB6xWv3PeddYgG1Z/RO1RfRdr7ZnWuN3uP7xenxz1df3+vjOte+ONN1i5ciUzZ87k0KGa3QsxMTHa53fffTcvvvgiq1atokePHhw9epQ1a9YA0KRJk3Ou4/eogXJnpiEM8FMZ9ecT4dRbxjatQgnwM//hsYABfmYSWoboYjDuB1/tJCOnwOParuwC3v9qJ8P7d7wgN+jvvfwVPhFOvftEOFVGOXwinDJmlLFmEU4ZM8pYswinjBllrFmEU8aMMtYswql3nwinyiiHT4RTZdSnTyYaZMPK5XJx5MgRHnjgAXbu3InFYiE+Ph4Aq9V6xvtqdzi99dZbHDx4kIKCAuLj48nOzgZOzbAym82cOHGChx9+mLS0NLy8vOjQoQPV1dXaLqbS0ppdRKmpqfTt25fs7GwiIiIICKg5GsrLy8sjzzPPPMPevXtxOBy0b9+ew4cPexwJeC41eXt7A/Dll18yb948cnNziY6OxuFweGQCmDx5Mh9//DF+fn78+9//xuVy0aNHD6KiorQ1t99+Oxs3buQ///mPds3f35/AwEBatmx57i/kd6gZVnVpCOehqoz684lw6jnjmIe7MmbWVo+mVYCfmTEPd72g/165WPmOnnR4DO2txeWG9OwCTtjLaRrqf0kzinTKmFHGmkU4ZcwoY80inHr3iXCqjPrMKGPNIpwyZpSxZhFOGTPKWLMIp959Ipwqoz4zylizCKesGWWjQTasAKqrq9m9ezePPfYYv/32G1999RXe3t74+Z06VmrgwIHk5uaydu1aAMxmMwaDgS1btnDTTTeRmJjI7NmzPY4RBPDz88NoNPLTTz/xyCOPUFJSwieffIKXlxeBgYEea48cOUJsbCzDhw9n7dq1bNu2zeNzf39/vLy82LhxI/369aNVq1bMnDmTqqoqjEbPP7jnUhNARkYGnTt3ZsCAASxatMjjaEGAZcuWMWXKFHx9fXnkkUdYsmQJhw8fJicnh+PHj2u7p7p3787JkyeJiYmhY8eOpKamcvz4cRo3bozZXL8/GmqG1dlpCOehqoz684lw6jFjSIiFBeNvI23vcfb8VkB8q1A6xdV/t+fvudB8mcdKzvq5o9J1wf/9p8f3Itonwql3nwinyiiHT4RTxowy1izCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ5fCJcKqM+vTJRINsWPn4+FBZWUl8fDxTp07FYrGQlJTExo0bPRotLpeL6upq7Wd/f3/cbjdXX3016enppKSkEB8fT2lpKSUlJR4zplwuF126dGH27NmYzWa6d+9Oamqqdhxf7dqmTZtiMpmYNGkSERERxMXFsXfvXu1zk8mE0+mke/fubNiwAYfDQYcOHdi2bRs+Pj7nVVOtMy4ujhMnTjBp0iSio6Np2rQpx44d0z5funQpAOXl5UyePFn7jsOHD/PBBx8wZswY3G43J0+exGQyceTIEXJzc2ndujXh4eHs3LmTDRs20LNnz/N+N2qG1R9jMun/PFSVUX8+Ec6GkDG2WSCd4ppgt5fVe26ViHz+Xmc/8tXibdTFnC1RThkzylizCKeMGWWsWYRT7z4RTpVRnxllrFmEU8aMMtYswiljRhlrFuE8XlRGcXk1Vl8TYcEX/svjhlCzyqjPjDLWLMIpa8bLAav1Mp9hZTKZCA0N5ZNPPtGu5ebmcsMNN1BZWaldmzdvnsd9VVU1x0zdf//99OnTR7ver18/9uzZg6+vL1AzI8rHx4dZs2Zpa1wuF23atMHpdAI1jSqAdu3a8f7772vr3n77bfbu3asdHVhRUQHAq6++SosWLbR11113HWVlZedVU22+66+/nhEjRmjrnnzySY4dO+aR6ZdffqFfv37Mnz+fmTNn0q1bN+68807Ky8s1N8Add9zBhAkTNNfq1at56qmn2LRpU70aVqBmWJ2NhnAeqsqoP58Ip4wZL9QXFuRHu+hQMnIKcJ02StBogDZRoTS2+upizpZop4wZZaxZhFPGjDLWLMKpd58Ip8ooh0+EU2WUwyfCqTLK4RPh1GPGkjInM5bu8jjSvV10KEPvaIvF1+uS5/srnCqjHD4RTpVRnz6ZaJCHKVZXV1NUVORxlN/GjRuBU3Oe/ojanUoZGRnaNafTyZEjR6iurtaaOQaDgYqKCnJycrR1mzdvxu12a7Opjh07BuCxBmDfvn3AqXlStbuoNm/erK2x2WwUFhZy+u6vc6mpNt/+/fs9vjMrK8sjU1ZWFv7+/syZM4eJEyfSrVs3AGJiYrS1FkvNsVV5eXkertpGVm1zT6FQKBSnGHpHW9pEhXpcaxNV8//4KBQKhUKhUCgUCoXi0jNj6S4ycgo8rmXkFDB9ya5LlEihUCgU50qD3GFVUVGBwWDgmmuuISAggKuuuopffvmFoKAgj0bL72dYlZaWYjAYmDlzJrNmzSIqKorGjRtrO51sNhu+vr5UV1djNpu59dZb8fPzo3379uTk5BAaGorBYNDWAhw4cICEhATCwsJo27Yt69ev9/jc6XTi7e3NK6+8wtixY7nyyisxmUz4+PhoDahzranWuX79ehISEmjevDnR0dFkZ2drnx86dIgffvgBh8OB0Whk3LhxfP755wwYMACn00lBQc3/YAcHB2M2m9m4cSMJCQkEBQXRqVMnfvrpJwBCQz1/IXs+mM0Nsg8qlIYwwE9l1J9PhFPGjBfTFxTgw3P3J5JfVIb9Ih8tcfo/LwZ6fo6inHr3iXCqjPrMKGPNIpx694lwqoz6zChjzSKcMmaUsWYRThkzyljzxXIePenw2FlVi8sN6dkFnLCX0zTU/5LlE+1UGfWZUcaaRThlzSgbDa5hZbPZcLlchIeHExoayoEDB0hNTSU+Pl47rq+W38+w+umnn3C73Vx//fXs3LmT7OxssrOzGThwIB9//DFQ02DKz8/Hz8+P2NhYdu/ezdatWwkPD6dNmzbaDKs9e/YA0KFDBwoKCsjNzWXdunXceeedLF68WPvOtLQ0nE4nSUlJ/Prrr+zZswez2Uz//v1ZuHDhedX0yy+/ANC9e3cyMzM5cuQIhw4d4sEHH2Tu3LkAOBwOrWaXy0VRURFbtmxhy5YtADRq1EhzVVVVYTAYsFqtFBcXs27dOozGmn+ZaneSnS9Go4GQEEu97pWBhjDAT2XUn0+EU8aMF9Mn6r/nZHwvIpx694lwqoxy+EQ4ZcwoY80inDJmlLFmEU4ZM8pYswinjBllrPlCnZnHSs76uaPSdcH//5zeav4rfCKcMmaUsWYRTlkzykKDa1jVNnl69erFf//7XwA+++wzxo4dS0hICEFBQdra02dYVVRU8PPPPwMwefJkfHx8qKys5NZbbyU9PR2DwUBQUBCrV6/G6XTSvXt3ZsyYAUBqaiqDBw8mKCiI2NhYAJYvXw7AU089xXXXXQfAiBEjtO8ICgri2LFjZGdnY7FY+OijjwAoKirihhtuYN++fVrWc62pdqfYK6+8QkxMDAADBgwgLS1N+87Y2Fh69uzJsWPH+Oyzz4CaYwt79epFp06dcLlc2jNISEggNjaWb775RjvusLbZFRYWVq/343K5sdtL63Xv5YzJpP8Bfiqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUZ8ZZaxZhPN4URnFF3GHOuj/OTaE96Iy6jOjjDVfLKe/l+Gsn1u8jRQWOurl1mvNIn0inDJmlLFmEU5ZM14OWK1+57zrrME1rFJSUggODtZmLQH07t2bUaNGcfLkSa2R83u2b99ORUUFANnZ2cTHx+Pt7c3NN9/MF198QUREBL6+vqSkpGC1Wjl+/Lh2b1JSEkFBQfz222/cfPPNVFZWsmPHDoxGI1lZWVrDqk+fPlojKyYmhtTUVNxuNw6HA5vNRlBQEMHBwSQlJfHzzz9z5ZVXnnNNhw4d0jJlZWVpdfbp04cJEybg5eVFixYttO/etGkTbrcbg8FAcHAwUDOv6pprrgFg9+7d3HXXXYwYMYL//ve/5OfnU1FRwT333APAVVddVe93pAbKnZmGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLK4RPh1GPGkjInM5bu8jiiq110zQxQi2/9Tvb4PXp/jnp8L6J9IpwyZpSx5gt1hgX50S46lIycAlzuU9eNhpr5w42tvhecV281/xU+EU4ZM8pYswinrBllQXeHKWZmZvLQQw/RsWNHkpKSePPNN6msrNQ+z8rK4sorr+THH3/EbrcDYLVaCQwMxO128/bbb9OhQwf69++vHaFXex+Av78/Tz/9NJ06daJr1678+uuvlJSUkJSUpK2LiYlhz5493HrrrbRv355bb71Vmzl1/fXXc/DgQaqqqoiLi+Ojjz4iKSmJjh07MnPmTACaNWtGZGQkWVlZhISEYDAYuPfee7WacnNzKSwsJDk52aOmlJQUbrrpJtq3b8+//vUv/Pxq/tZZUlKSlj8iIoKJEyfStWtXOnXqxKpVq3C5XFx11VV4e3sDkJycjM1m44cffiAtLY3u3bsDcPToUe07KyoqcLvdvPLKK/Tu3Zt//vOfjBs3DoCmTZuesfGnUCgUCoVCoVAoFArFuTJj6S4ycgo8rmXkFDB9ya5LlEihUFzuDL2jLW2iQj2utYmqaZQrFAqFQt/oaoeVzWZj4MCBREVFMXnyZPLy8pg4cSLl5eWMGjUKALvdTteuXcnJyWHYsGEMHTqUvLw8SkpqzqgdPHgwcXFxzJ8/n3vvvZemTZuyfv167HY73t7e+Pv7k5OTw913301YWJjWZOrXr5/mj46Oxu12U1BQwLBhw0hPT2ft2rVYLBY6dOjAtm3bgJrm1+7du+natSvJycnaEYTXX3+95rJarZSVlZGTk8MDDzwAwJw5c4Ca4/xq14WEhFBZWUl1dTVPPfUUGzZsoKysjMjISMLDw7UZVH5+fmRmZnLLLbfQpk0bPvzwQwBuu+027Tl26tSJsLAwhgwZAoDBULMd+oorruCWW24BoFWrVixYsACXy8WAAQMwm83Mnz8fgPj4+At6j2az7vqgl5yGMMBPZdSfT4RTxowy1izCKWNGGWsW4ZQxo4w1i3Dq3SfCqTLqM6OMNV8s59GTDo+dVbW43JCeXcAJezlNQ/0vacaG5BPhVBn1mVHGmi+mMyjAh+fuTyS/qAz7RTyKVM81i/KJcMqYUcaaRThlzSgbumpYLVy4EIfDwZQpU7Rj7Kqrqxk7dixDhw4lPDwcAF9fX+bMmcO4ceMYNmwY/v7+uN1u4uLiGDRoEACdO3emc+fO2Gw2ze9yuThx4gSDBg1i1apVFBQUEBISwvHjxz3W7dmzh7Zt29K8eXOmT5+O2WzG19eX2vlOtaSlpXHffffx888/8+6772pzn4qLi7U1drsdg8FA//79WbZsGQ6Hg5CQEAoKCigtLSUwMBCArVu3cv3111NZWcmUKVOwWCyYTCacTqfHd2ZmZjJo0CDWr1/P+vXrady4MSUlJRQWFnqsmzlzJmPGjPHYZfbCCy9gNte88uuuu45du3YRFBTEp59+itFo1Oo7cuTIeb230zEaDRc8vPJypiEM8FMZ9ecT4ZQxo4w1i3DKmFHGmkU4ZcwoY80inHr3iXCqjHL4RDj1ljHzWMlZP3dUui7K//+o9+eot/fyV/hEOGXMeDF9R/JL2Lc7j4jGFiLCAi6aV8/vRdTvp/RcsyifCKeMGWWsWYRT1oyyoKuGVUpKCt26ddOaVVAzy2n06NFs3LiRfv36YbVaKS4uJjY2ltmzZwOwadMmBg0aRNu2p7b2ent7c99997F27Vqg5tjAqqoqWrduzYsvvsiLL74IwGeffcaoUaNIS0ujZ8+eBAYGcvDgQR566CGt+VWbIysri8OHDxMUFATUNMCefvpp7eecnBx69erF/v37te8sKSkhOTmZ0aNHM3r0aAAmTpzIxx9/rNUUEBBAUVER//znP7npppu07+zcuTP5+flUVlZq32GxWHjhhRe0/Bs3buThhx8mIyPD41leccUVFBcX88orr2A2mxk9ejQrVqygR48eADRp0gQAh8NBVVUVXl5ePPHEE7z//vvk5eXV8w2Cy+XGbi+t9/2XKyaT/gf4qYz684lwyphRxppFOGXMKGPNIpwyZpSxZhFOvftEOFVGfWaUseaL5fT3Mpz1c4u3kcJCR73coP/nqNf3ItInwiljxovpKylzMnXxTnZmndrt2D4mlMfvbI/Fr/5z5NR7kaNmEU4ZM8pYswinrBkvB6xWv3PedaarhlVWVhZ33XWXxzWr1UpYWJg2wykmJkb7z7XUNms6derkcT02NpY5c+ZQXl6uzWSqbdTUkp2djbe3NwcPHtQ+T09P95jh5Ha7OXHihJbx2muvxWg04ufnpzWSaj8DyM/P17I6nU6aN2/u8Z1HjhzBy8tLWx8WFkZRURHR0dHamuLiYhwOB263m0OHDml5wsLCtCP+ar/TaDTWaTLNnTsXk8nEvffey5IlSwA4fPiw9rnT6cRkMpGamkpeXh4RERFUVVXx9ttvU1lZSXl5Ob6+vtQHNVDuzDSEAX4qo/58IpwyZpSxZhFOGTPKWLMIp4wZZaxZhFPvPhFOlVEOnwin3jKGBfnRLjqUjJwCXO5T142Gmnkyja2+FyWv3p+j3t7LX+ET4ZQx48XwffDVzjpz5HZlF/D+VzsZ3r/jBblBvRe9OlVGOXwinCqjPn0yoauGVe3Mp98TFBSkHdmXnJzMtGnTPNZu374dODU7qhar1Yrb7cZms5GYmIjBYKCg4NT/SDudTtasWUNQUBAbN26kY8eO2menHxG4adMm7Ha7dt3b21trMs2YMYNPP/2UgoIC/P39CQoK0uZp1e5mysnJ4cknnyQ1NRWz2UxpaSlWq1X7jiuuuIL9+/ezZcsWnnnmGbKzs7WjAmu/MzExEV9fXxwOB2+88QZLly7F4XBgNptp3LixxzGEeXl5TJ48mZiYGDp37ozRWNO9bNasmbamVatWVFdXc/fdd1NQUIDBYMDf3x+j0YjL5cJut9e7YaVmWNWlIZyHqjLqzyfCKWNGGWsW4ZQxo4w1i3DKmFHGmkU49e4T4VQZ9ZlRxpovpnNYv/Z88LvdHW2ja3Z3XOj/76j356jn9yLKJ8IpY8aL5RM5R069l4uDynhx0HtGGWsW4ZQ1o2zoqmF1LgwYMIB58+YxbNgwhg4dSl5eHj/88ANGo1GbcQUwcOBADhw4oP3s4+NDUFAQe/bsYc6cObRu3ZoFCxZQVFREZWUlfn5+TJ48mc2bN/Phhx8yZswYAgICKCsr480336R79+78+OOPmq9NmzasX7+et99+mwEDBuBwOFiyZIk2IwqgadOmGAwGfvjhB5o0acLgwYNZsWIF2dnZlJWVaeuuvfZaVq5cydixY7n++uvp0aMH8+bNw+0+7a+gAS1atGD//v188sknPPDAAxw4cICUlBQqKiq05tLkyZNZunQpTqeTqqoqBgwYwOLFiwEoLT11VJ/VasXb25uSkhL69u1LdnY2W7du1b7z9F1c54OaYXV2GsJ5qCqj/nwinDJmlLFmEU4ZM8pYswinjBllrFmEU+8+EU6VUQ6fCKceM4aEwOvDriM3v4TcE46LPj8H9P8c9fheRPtEOGXMeKG+v2KOnHov+nSqjHL4RDhVRn36ZEJXDava+VS/x2azaUfvBQUFMWfOHMaNG8ewYcOwWCwkJiayadMmKioq8PHxAWrmS1VVVWEwGLR7IyMjcTqdzJo1i4KCAhISErj99tv5/PPP6dq1K9dddx3NmjXjww8/pKSkhGeeeQYvLy9uvvlm7r//fvr166e5WrVqBUBwcDCLFi0iIiKCV199lTfffJOqqiotu7+/Pw6Hg4qKCmbOnEliYiIPPfQQL7/8MhUVFcCpnU/+/v5s2bKF9PR0/vWvf7Fz505++ukn7TubN2/O/v37CQgIYO7cuURHR/PWW2/x3HPP4XQ6AfD19dWON8zKyqKkpIRWrVpRVFTEt99+y2+//UarVq3w9/enVatWHDhwgIULFwI1DbGKigry8/M95oidD2qG1R9jMun/PFSVUX8+EU4ZM8pYswinjBllrFmEU0TGXdkFHDrhoGWYhTZRoRfsawg1q4z684lwqoz6zChjzSKcAT4mrk4Ix24vu6C5Vaej9+fYEN6LyqjPjBfLJ3KOnHovctQswiljRhlrFuGUNePlgNXaQGdY/dF8quLiYvLz8z1mSsXGxjJ79mzt502bNrFp0yays7OJj48HYN68eUycOJE1a9Zou49iY2PZt28fGzZs0O697777MJlM2n0tW7bEbDZTVVXFqFGj6NevHwDr1q3TMgLaMXvvvfceXbp00XwfffQRR48e1X729fXFYDCwZcsW7Zrdbufll1/Wjg6MjIwEoHfv3rz++uvaupdffpmffvpJ+67aXU8rV670aCiNGTNGa1g1atRIu+50Ojly5AhHjhwBapp4I0eOZNGiRSQkJLB8+XLcbje//fYbbrebqKgo/va3v+Hv74+XV/0Hb6rzOc9MQzgPVWXUn0+EU8aMMtYswiljRhlrFuG8GL68wlJem/szJWWn/nJQgJ+ZVwZeTVhw/Y6zOR091izaKWNGGWsW4ZQxo4w1i3DKmFHGmkU4Zcx4ob6/Yo6cei/6dKqMcvhEOFVGffpk4qI0rA4cOMChQ4c85j6dzj/+8Y9z8iQn151PtWrVKoxGI0lJSR5rMzMzGT9+PGlpaVqDZfny5VrjqXY+VXJysod/6dKlvPHGG6xcuZKCggKcTicul0ubf+Xt7c211/4/9s48Lqp6///PmYFhGRwWRQx3MAEXRE3NjRat1G56M7tZplbmpSK7V/O2+C3TLDNvZmVqWpq7Vje7LmlmlpFmVmqK+wK4gCDIMjAsAzPz+2N+HJ1QbzLzsYOf83o8eiTnfM7zvF7ngzjDez6f98389NNPzJs3jylTpuDr60twcDAtW7ZUikvV/tavX8+UKVNIT08nIiKCs2fPUlVVRXl5Of7+/vj4+FBYWMirr77Kpk2bsFqtREZGAijb71X//9ixYzz66KPs2bMHk8mkHD99+jQtW7ZUCm9Dhw4lLy8Po9FIq1at3Fal9e7dm8DAQMLCwrDZbBQUFBAQEKA809atWytjZ82axfvvv19jHm666aY/NF+aNGnSpEmTJk3XUr8vVgGUlFUxZfGvvPePxMtcpUmTJk2aNGnSJK+SBrVl3poDbr2s2rQII2lQ2z/RlSZNmjRp0nRpeVSwOnXqFP/617/Yt29fjX5L1dLpdH+4YHWp/lTTp09n6NChbv2phg0bxu7du+ncuTOzZs0iJyeHyZMns2DBAsLDw936U40aNUq57q677uKNN95g4cKFDBkyhIYNGzJnzhx0Oh2hoaHKuL///e9s27aNrKwskpOTOXDgAF9//TVt2rRx86vX61m1ahVdunThmWeeYcWKFcp2gEVFRfj7+ysrlVatWsXw4cMB1+ovvV6P1WpVxgLs27ePyMhIkpOTSUlJ4eeff3Y7b7fbAcjNzeXhhx8GYPHixcCF/lTh4eGUlZXhdDp5+umnadGiBf/5z39Yu3YtNptNWa0F8Ouvv2IwGBQuuLYdfOedd/7QfF1OnjbOvR5VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUxv8fadyKtRrKpWSVkVh04V0D6q/iXP/y+pNbNIpoweZcwsgimjRxkzi2DK6FHGzCKYMnr0Ji84yI/nhnUit7AMS7kds7+B8BDPe6to8+IdaR69I7V7lDGzCKasHmWTRwWriRMncvToUSZMmMBNN92krDqqrS7Vn2rIkCGMHTvWbVx2djZOp5P3339f2RqvqqqKSZMm8eGHH1JUVERcXBwLFiygadOmynUOh4PS0lJatmzJhg0b8PHxQafTERQUxIIFC5g0aRLgKgiBa7XV+++/T2RkJI888giLFi1i3759xMfHK7yGDRuSlpZGamoqnTp1IiQkhIMHDyr3dDqd2O12oqKiWLVqFSaTiaFDh7JixQqys7Pdcvn4+KDX63n33Xdp2bIl9913H59//rlSsDKZTDRr1owuXbooxbHu3buzfft2ZUtAcBXShg4dygMPPABAZmYma9eupby8nPz8C5+oKS8vx+Fw4OPjQ3BwME2aNOHAgQNs3bqV+++/v1ZzqNfrPG7YeT2rLjTw0zyqjyeCKaNHGTOLYMroUcbMIpie8rLyT1/xfOb5UhI7N/PoHmrLfC2YMnqUMbMIpoweZcwsgimjRxkzi2B6k7f7yDmO/HSS2OZhdIxp6DWumjOL+l2NbN87IngimJpHOXgimJpHdfJkkkcFq927d5OUlKSsHPKGft+f6lJq1KgRMTExbn2cBgwYwKRJkxg3bpzSd+pSfktLS5k5cyZxcXEAdO/encaNG5OSkqKMS0lJwcfHhwcffJDx48cDrsLTf//7X77//nvi4+MxmVz/0I8cOZLHH39cuXby5MkcPHiQkpISt1Vhq1atIjg4WPl6/fr1ylZ+1cfbtWvHJ598oozZv38/n3/+OadPu35BExYWRllZGVOnTnXrddW7d28KCwuVr4ODg922CRw8eDAdOnRgwIABbiusevXqxdGjR9mzZ49ybOrUqUybNo3BgwdjMBgu+RyvJIfDicVSetXXXe8yGNTfwE/zqD6eCKaMHmXMLIIpo0cZM4tgeosXGXblF/yN6weqpmm4TPMikql2ngim5lGdHmXMLIIpo0cZM4tgepOXk1/K5I9/rtEPc9JjXWkYWvt+mGrOLIopo0cZM4tgyuhRxswimLJ6vB5kNgf84VVnHhWsQkNDqVevnieIWiktLY377rvP7ZjZbCY8PJy0tLQrXgcQFRWlHIuKisJqtZKVlaX0nTp27BhVVVVu43Q6HS1btlQY1QWrgAD3X56Ul5cDrlVN0dHRBAQEYDAY3IpVTqeTiooKdDodAM2aNbsk69y5c27MqKgo8vLyKCoqUngWi4W8vDy34lhUVFSN57B9+3YA2rdvX8PvzTffjMVioUWLFiQkJFBSUkJ+fj7h4eGXfZZXktZQ7vKqCw38NI/q44lgyuhRxswimDJ6lDGzCKanvDbNwwgK8LnktoBBAT7ENQtVXdNwGeblWjDVzhPB1DzKwRPB1DzKwRPBlMXj74tV4NpaeNLCn73SD1ONmUUzZfQoY2YRTBk9yphZBFNWj7LIo4LV0KFDWbt2LcOGDavVapzaymKxcKntB4ODg5Xt8y53ndFoxM/PTzmWmJjI+++/j9PppFu3bgQFBVFSUoJOp6Nnz56X5UdGRgLw+eef8+GHH5Kfn09sbCxnzpwBLvSdql+/PsePH+exxx5jz549+Pr6Eh8fT3l5ufLMjEYjOp2Oo0ePMnDgQNLT04mMjCQkJASj0YjD4frm7tWrF3q9nrFjx3LkyBGsVitBQUE4HA4GDhzolmnu3LkMHz6c1NRUAgIClEyDBg1SxjVr1ozx48fTpk0bdu7cydy5czlx4gRGo9Gtp9fVSuthVVN1YT9UzaP6eCKYMnqUMbMIpoweZcwsgulN3qTHujJp4aU/Fe3J6w81ZxbFlNGjjJlFMGX0KGNmEUwZPcqYWQTTWzytH6bmUW08EUzNozo9yphZBFNWj7LJo4JVixYtcDgcDBo0iPvuu49GjRpxqcLVnXfe6clthGrAgAHMnDkTgOHDh3P+/HlWr15NcHCw26qlkSNH8ttvv9G5c2fAVWQCOHDgAH379qVTp04sWrSI8+fPu/GrC1u//PILjz/+OCUlJSxbtozAwEC3vlM6nY7z588TEhLCuHHj2Lx5M7t27XLb9rBRo0a0aNGC7du3M3jwYIqLi9m8eTM6nY4HH3zQLdM777zDoUOHGD16NCtXrsRmsxEVFaVkOnz4MOvWraNfv37YbDY++eQT/Pz8qKioAFyrwGojrYfVlVUX9kPVPKqPJ4Ipo0cZM4tgyuhRxswimN7ghYaaWPna3ew5co7DJ/Ol6jshiimjRxkzi2DK6FHGzCKYMnqUMbMIpqc8rR+mGKaMHmXMLIIpo0cZM4tgyupRFnlUsBo7dqzy5zfffPOSY3Q6HYcOHfLkNjVkNpvdejRV6+Kt8i53nc1mo6KiQllltWHDBnx8fKisrGTp0qWYTCaCgoIoKioiJydHKfA4HA4cDofCr96+r3Xr1uzfv5+UlBRiY2OprKykoKBAGVdQUABAly5dWLRoET4+PvTo0YNt27a5eTUYDNSrVw+DwcCMGTOIjIwkPj6eAwcOKOOys7NJT0+nR48ebN68meLiYiIjI8nPz2f9+vWMHj1ayWQ0GomJiWH27NnY7XYSEhLYt2+fkqlBgwaYzWbmzJlDdnY2Op2O4OBgAgICKCws5NSpU0RHR1/13Gg9rC4tg0H9+6FqHtXHE8GU0aOMmUUwZfQoY2aAddvTOXyqkDbNQ7i7R0uPeSI8Bhr13Ng0FJNRX+u+VRerLsyL5lF9PBFMzaM6PcqYWQRTRo8yZhbB9BZP64epeVQbTwRT86hOjzJmFsGU1eP1ILP5GvWwWrJkiSeX11qX6tFUXFxMbm6uW9+pS10HkJ6eTmxsLAApKSnccMMN2O12vv32W8BViNuwYYOykglcWW+++WaFkZeXB0Dv3r157rnnlHs89dRTbNmyRRlXWFgIwMyZM5XCk9PppH379kofLJvNRlVVFREREaxbt05hrV+/nmeffVZZZbVt2zacTiejRo3i6aefZsiQIbz++us8/fTTpKSkKAWrlJQUevbsSbNmzdi7dy8LFiygbdu2dO3aVcnUoEED3n77bU6dOsXAgQNZtWoVixYtYseOHVczFZeUtj/n5VUX9kPVPKqPJ4Ipo0cZM4tgyuhRlswHM/J5a9VvytepJ87zybcneO6hBGKbhXno0DseS8oqmb/2APvT85Vj7VqGkTSoLSZ/X08tqnJeRDNl9ChjZhFMGT3KmFkEU0aPMmYWwfSUp/XDFMOU0aOMmUUwZfQoY2YRTFk9yiKPNlPs2rXrH/rP20pMTOTHH3/EYrEox7766iv0en2NvlMXq1OnTgQFBbFx40bl2IkTJ8jPzycxMVE51qdPHwD27NmjHNuxYweFhYXccsstAJw+7VpKvnv3brd75ObmAtCgQQMAysvL0el0fP3118oYi8WC3W6nXr16AJw6dQqn08nJkyfdMlXfo2FD11Y3aWlpBAcH8+yzz3LzzTczefJkAKKjo90KeGlpaVgsFhYtWsS0adPo3r07ZrOZ8PDwGoW+119/nUGDBikFvOLiYsxmM82aNbvsc9SkSZMmTZo0XX+6uFh1saavuPTxP0Pz1x7gYEa+27GDGfnMW3PgT3KkSZMmTZo0aapLennkTQQFuH92OyjAh5dH3vQnOdKkSZMmTZo0XSyPVlhdrOPHj5OZmQlA48aNadWqlbfQNTR06FAWLVpEnz59KC8vx2g0YrPZuP/++2v0ncrKymLz5s0A+Pn58fe//513332XFStWUFZWRmVlJT4+PowaNUq57q677uKFF15g9erVrFmzBr3eVdfr1asX8fHxgKvo5OPjw969e+nWrRslJSWYTCaKiooA1/aE/v7+lJWV0bJlS1599VWmTZuGzWbD398fg8GgrJyqvsbPz69GJnBtqwhw7tw5iouLMZlMHDx4kA4dOhAZGUlUVJSykgtcq7p++eUXBg4cSJMmTRg2bBi//vorwcHB5OTkKOO6dOmiFMhWrVqlHO/Xrx++vrX/lLInTc+vV9WFBn6aR/XxRDBl9ChjZhFMGT3KlHnND2lXPL/xp5Pc06t22wN6y+PZ81a3lVXVcjhhf3o+eZZyGoUF/qkeRfFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9CbvhgZBzHn2Vg5m5HMq10qzcBNtWni+klzNmUUxZfQoY2YRTBk9yphZBFNWj7LJ44LVN998w7Rp05RiVbWaNGnCCy+8oKxW8racTqfbny/+uloOhwO73X7Jay++5lLX/hFVF5L+yPV/5H5Op9ON+ftx1SuzLBaLUmg6deoUp06dqsEBWLt2LWvXrlWOFxUVcfbsWQAqKiooLXX1mvL19UWn0+Hn54evry8TJ078n3kuJ71eR2ioqdbXX++qCw38NI/q44lgyuhRxswimDJ6lCHz0TNFVzx/+HQhIzz8991TjyeyS6543mpzePwaRG3zci2YMnqUMbMIpoweZcwsgimjRxkzi2B6k9cz1MTl9+epvdScWRRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9//33PPPMM0RGRjJ27Fiio6MB1zZ7n376KWPGjOGDDz5w227PG1q1ahUVFRV89913yiqlTz75hMmTJ/PUU08pq6yWLl3qdl1FRQXz589n9OjRjBs3DoDu3btjs9lYsGABkyZNAmDTpk1UVlZy//3389prrwGu/lGjRo1i3759xMfHYzabqayspEOHDnz66afKPYYMGUJqaqrSryogIIAjR44wefJkHnjgAcC1Aqp79+7KqqjqsTabjZSUFCXTnDlzePfdd5UCVKtWrdi2bRv9+/dnxowZyj0TExPJz7/wiePQ0FCGDBnCmDFj+Mtf/kJSUhITJkwgKCiIhIQEABYvXozJZFLuCTB58mQOHTqEr68vNpsNo9F41XPjcDixWEqv+rrrXQaD+hv4aR7VxwM4V1hGcbkds7+B8BDP/7HT5kWOzCKYMnqUKXPrJsGknjh/2fOxTUP+9Cbkgb66K543GfV/ukdRPBFMGT3KmFkEU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9Xg8ymwP+8KozjwpWc+bMISYmhuXLlxMYeGELlj59+vDwww/z0EMPMXv2bK8XrFJSUujevbtS2AHo378/r7zyCtu3b2fw4MGXvG737t2UlJTQv39/5VhUVBTnzp1TijYAW7ZsAVw9r6rVs2dPQkJC+P7774mPj1d6PHXu3NntHtX9pvLy8mjSpAn+/v44nU769eunjAkODsZgMFBcXAxAs2bN0Ol0NGvWzC1T9T2q+2KFhIRgt9uVPlrVCgkJ4dy5c0qRKSoqirS0NBYsWIDZbGbw4MFMmDCBkpISoqKiAFefq+qtCLt06eLG69KlC5MmTeLBBx+85HP8X9Iayl1edaGBn+ZRHbySskrmrz3gtv1Vu5ZhJA1qi8m/9lt2VkubF3UyNY9y8EQwPeXd3b0Fn39/+W0B+9/c/E9vQh4eHEC7lmEczMjHcdEidL0O2rQIo4HZ/0/3KJongimjRxkzi2DK6FHGzCKYMnqUMbMIpoweZcwsgql2ngim5lEOngim5lGdPJnk0WaKR44c4a9//atbsapagYGB3HvvvRw5csSTW1xSaWlpSuGlWmazmfDwcNLSLvyy5cSJEzz66KMkJCTQs2dP5s2bB+B2bWJiItnZ2WRmZlJeXg7A3r17AZg5cybx8fE88MAD7N27l5YtWyr8+vXrA7BhwwY6duxI165defHFF0lNTVU8AkoB6oEHHqB9+/bcddddTJ8+ncrKSqxW16eAjUYjBoOBzMxMevbsSUJCAo8++ijr16/HYDBQUFAAXCiGvffee0qmKVOmkJGRgdPp5PTp00qm7du3M2/ePO666y5iY2MB1xaGPXu6Fr0PHDjwks/WYDCwZMkSbr/99quYEU2aNHlb89ce4GBGvtuxgxn5zFtz4E9ypEmTputdzz2UcFXH/wwlDWpbo89EmxauYr4mTZo0adKkSZMmTZo0adKkqW7LoxVWfn5+yiqdS6moqAg/Pz9PbnFJWSwWzGZzjePBwcGKn6KiIkaOHEmLFi2YNWsWOTk5TJ48Gb1e7+Zp6NChfPTRR9hsNjZv3ozNZiMrKwuAUaNGKSvIHnzwQXx9fZXiXPXqqOzsbO6//37Cw8NZsGABVVVVyv3B1UcLID8/n+TkZPbv38/ChQuJiIhw28avqqqKqqoq2rVrR2JiIsuXL+fs2bM0bNhQYVVUVACQlZXF8OHDAde2hz4+Pm73HDp0KLNnz8ZkMvHxxx8TFBRESUkJCQkJynaJ1au3EhMTSUxMpLKyko8//phz586xf/9+unXrVuv58fHRmsr9XnWhgZ/mUT28s+etbiurquVwwv70fPIs5TQKq/lBgWvpUSRT7TwRTM2jOj3KlrldVAOWvNSXL39M5+DJQto0D+HuHi095nrTY3CQH88N60RuYRkWL2+XevH/1cYTwZTRo4yZRTBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT86hOjzJmFsGU1aNs8qhg1a1bN5YsWULv3r3p2LGj27m9e/eydOlSZUXPtdaqVauwWq28//77yiqnb7/9li1btpCTk6MUboKDg0lOTuaNN97g//7v/zCZTDidTmJjY3nkkUcA17Z/nTt3VopRAD///DMAAwYM4IcffiA/P58mTZq4rfAC1yo0o9FIt27dmDdvHj4+PjRt2pSSkguNw7OzswHo0KEDhYWFvPvuu0RERGA0GrHb7cq4HTt2ADBo0CDWrVuH1WqlefPmpKenu90zNTUVnU6Hn58fOTk5+Pq6tg+7/fbbazyn++67T9mu8OjRo2zevJm5c+cyYsQI5bqrkV6v87jh+fWsutDAT/P45/NOZJdc8bzV5vD475k2L+pkah7l4IlgepP38N3tvMa6WN70KOq1hprnRRRTRo8yZhbBlNGjjJlFMGX0KGNmbzIzc0s4eiiHyAYmIsODvMKsltqfo5rnRRRPBFPtPBFMzaMcPBFMzaM6eTLJo4LVv/71L4YOHcpDDz1EfHw8LVu6PoWbnp7Ovn37qF+/PuPHj/eK0YtlNpuVFU4Xq6ioiODgYODSfa46d+7Mli1b2Lp1Kw888IByPDAwEJ1Ox88//8yePXt45JFHCA8PV84bjUYeeughVqxYofAPHHBtyzV48GBmzpwJgNPppEuXLhQXFxMcHIzNZiM3N5eAgABmzZql8LZs2cJTTz1FaGgoANu2bQMgISGBCRMmKOOefvpptm7dqtzz2LFjAIwePZo33ngDcK02q+5BVT3utdde49577+WLL75g6dKlrFixgi+//JKqqqrLrk4DmDZtGrfccgv//Oc/OXXqFNHR0VeYhUvL4XBisZRe9XXXuwwG9Tfw0zyqhxfoq7vieZNRT0GBtVZsbV7kyCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYMnqUMbM3mSVllcz9IpXUtAu7T7SPCuOpe9tjCvCsr6/an6Oa50UUTwRT7TwRTM2jOj3KmFkEU1aP14PM5oA/vOrMo4JV06ZNWbt2LfPmzSMlJYUNGzYAEBkZyYgRI/j73/+u9HrypqKiomqsZCouLiY3N1fpT5WWlsZ9993nNqZNmzYA/Pbbb24Fq7S0NCIjI/H391e4ubm5Ne5ps9mUrfTOnTuHTqcjLS2N3r17A64eUeHh4RQXFxMVFcWpU6dwOByUlpa6FdOqC0HVRbG0tDR8fHzIzMx0u2eTJk2orKxUMlV7uriHl9lsJigoiNLSUpo2bQq4CobVq66GDRum8N59913effdd9u3bpxybNGkSY8eOJSQkhD59+tRYKVcbaQ3lLq+60MBP8/jn88KDA2jXMoyDGfk4nBeO63WuXi0NzP4e+9XmRZ1MzaMcPBFMzaMcPBFMGT3KmFkEU0aPMmYWwZTRo4yZvcGcszq1Rl/fA+n5zF6dyrgHEjx055Lan6Ma50U0TwRT7TwRTM2jHDwRTM2jOnkyyaOCFUD9+vWZMGGC28og0UpMTGTu3LkMHz6c1NRUTCYTsbGx6PV6ZQvCS60k6tSpE3q9nh9++IFbb72V/Px8YmNjycrKom/fvsp1BoOBI0eO8Nhjj7Fnzx58fX2JjIwEXKugAEpKSoiIiOCTTz7h888/Jz09ncjISM6fP09AQABNmjRh165dgKuQNXbsWI4cOYLVaiU2NhaAG2+8UblnvXr12L59u1umoCDXUvfqTFarldDQUObPn8+///1vsrKyaNmyJTabjYYNG2I0GgF48MEH+fTTTwkNDaWoqAiDwUB5eTlDhgxh4MCB+Pr6YjQaMZlMFBQUAK4eW5999hmfffYZgHK8NtJ6WNVUXdgPVfOoLl7y4PbM+d2nCdu2dH2a0JO/Y9q8eEeaR+9I7R5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzN5iiuzr6y2PdYkngimjRxkzi2DK6FHGzCKYsnqUTR4XrP4MDRgwgHfeeYdDhw7xxBNPcPLkSVavXk3r1q2V3lSAUoDZvHkzAH5+fvj7+5Obm0vfvn3p1KkTixYtIjc3l/79+yvX6fV6DAYDv/zyC48//jglJSUsXboUuLBKC1wryXbv3k10dDTjxo1j8+bNZGRkKMWtat1www1s376dwYMH07x5c+bPnw/gtprJZDJRVFRUIxPglql58+b89ttvdO7cmaFDh/LZZ59hs9lo0qQJABUVFaxfv57IyEhGjx5NixYtmDFjBnv37uXnn3/m9ddfB6Bhw4ZERESQmJhIZGQkNpuNdevWKT23qgtzVyuth9WVVRf2Q9U8qoMXGgpTk3uTlVtCVp7V6/u1a/OiTqbmUQ6eCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyuhRxsyeMq9FX19Q/3NU27xcC54Iptp5IpiaRzl4IpiaR3XyZNJVFaxefPFFdDodU6ZMwWAw8OKLL/7Pa3Q6HVOnTq21wUtpw4YNGI1GYmNjmTt3LiaTiZ49e7Jjxw5ycnKIiIjAbDZTWVmJXn+hmllRUUFpaSk33HAD+/fvJyUlhdjYWBwOBxs3bqRbt27KdeBa2bRo0SJ8fHyIjo7m+PHjnDp1SuGfOnWKqKgoDAYDM2bMIDIyksDAQCwWC3Chp1RWVhY9evTg+++/x2q1cuONN5KamsqhQ4cA17Z+BQUFNTI1adKEM2fOuGVKT08nPj6evLw8ZsyYQcuWLTEYDOTk5ACwePFigoOD+fTTT/HxcU1v06ZN2bt3L6dOnWLPnj1KoSwmJobNmzeTl5eHTqejVatWgGvLwuprr1ZaD6tLy2BQ/36omkf18QCC/AzcFBeBxVJW675VF0ubFzkyi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKaMHutC5nOFZRSX2zH7GwgP8c4v17zhUWRfX1D/XNeF7x0ZPcqYWQRTRo8yZhbBlNXj9SCzWVAPq507d6LT6XA4HBgMBnbu3Pk/r9HprvwiozZKSUmhZ8+ezJkzRzlmsVjo2rWrspIpKiqKkJAQZs+erYzZtm0bAEOGDOHpp59Wjr/xxhvKKqzq3lAtWrRg4cKFbmNOnDjBTz/9RJcuXWjRogWpqamMHj2aRx55BACn00nnzp0pKSnhzJkzNGvWDIPBgN1u55133lEKWN9++y1PPvkkx48fV+5ptVpJTEzkww8/VO75xBNPcObMGSVT48aNSU1NJSkpSdnCsLi4mJtuuomsrCxsNhtpaWmcOXOG7t27X/LZrVmzRilYvfPOO27n1q5dy7/+9S+lcFVbaftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXmUg+cNZklZJfPXHnDbdq9dyzCSBrXF5O/rDYseebwWfX099VgXeSKYMnqUMbMIpoweZcwsgimrR1l0VQWrb7/99opfXyulpaVx3333uR0zm82Eh4eTlpYGuPpcffDBB269rDZs2ADAoEGDalybmZlJhw4dMJlM6HQ6t5VZlZWVbN68mfr16yv82NhY1q1bx65du1i0aBH5+fk0adIEq9WqeGzSpAmNGjUiJyeHl156iW3btuHr60twcDAhISFkZmYC0KtXLwDy8vIYOHAg6enpREREkJOTg8lkUu7ZqlUrUlNT2bhxI6+88gpWq5XIyEj0ej12u53Tp08zevRo7r33XrKysli6dCnHjh3D6XSi1+upqKjgnnvucctusVh47733+Oqrr8jNzQVwy65JkyZNmjRp0qRJkyZNmjRpur40f+0BDmbkux07mJHPvDUHGPdAwp9j6ndKGtSWeWvci2ptWriKapo0adKkSZOm61Me9bDKysoiLCwMf3//S54vLy8nPz+/Rk8nT3VxEepiBQcHU1RUBMDQoUNZunQpycnJJCUlkZOTw+bNm9Hr9TRt2lS5ZtiwYezatQuA1157jYqKCv7v//6P9PR0Fi9eTOvWrVm5ciWFhYW0a9dO4bdv3x6Ar7/+miFDhtCwYUMWLlyIXq/H4XAo41q1akVmZiY7d+4kKSmJAwcO8PXXX9OwYUMKCgoAaNSoETqdjoMHD9KlSxeeeeYZVqxYgc1mIyQkRGElJCTwxRdfsGHDBkaMGAHA0qVL8fX1paKigqKiIjp16kSDBg149tlnadGiBXPmzOHdd98lNTWVJk2a0LlzZwCmTZuG3W7nm2++wdfXl4SEBDZv3kxQUBA33nijR/Pj46MVvH6vutDAT/OoPp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimDJ6VGvms+etbkWgajmcsD89nzxLOY3CAv9UjwDBQX48N6wTuYVlWARsW3jx/693ngimjB5lzCyCKaNHGTOLYMrqUTZ5VLDq06cP06dPr7Fqp1rffvstzz77rNKr6VoqODiYxYsXM2XKFJKTkzGZTLRr1459+/a5jcvOzsbpdK0v79q1KxEREbz11lsUFBTw4YcfUlRURFxcHAsWLOCDDz5Q+ltVX3PDDTewYcMGfHx86N+/P9u3b+fcuXMKv7y8HIDQ0FDef/99IiMjeeSRR1i0aBEGg0EZ53Q6ldVUqampdOrUidatW7N9+3ZlTPW9mzRpwqpVqzCZTAwdOpTPPvvMLdOqVauwWq28//77mEwmXnzxRfR6PVlZWUo/rOjoaN555x3Onz+PwWCgsLAQgAULFpCQkFDr567X67zS+PR6VV1o4Kd5VB9PBFNGjzJmFsGU0aOaM3/yzRH2Hs2lY0xD7u/T2ivMasn0HOsKTwRTRo8yZhbBlMmj9rNW86g2nqfME9klVzxvtTm88r7eW7lF/o5B7XOttu+da8ETwVQ7TwRT8ygHTwRT86hOnkzyqGBVXbS5nCorK4VsL2c2mykuLq5xvKioSOkTBRAdHc2iRYuUr5cvX86uXbuoqKjAz88PcK1uCgoK4siRI8q1N9xwAwUFBYwbN47Bgwe78W+44QYAzpw5A8Df//53HnroIWXMiy++yOrVqxVWdUFo06ZNyhin08nKlSuVgpXNZgMgPj7eze+WLVvYunWr8gxPnToFwIwZM4iPj1fGHTp0iN27dyv3TElJoXv37gQHB/Pcc89RWlrK0qVLefjhh5V+WPfffz/vvvsu//jHP3jyySd56KGHKCkp8ahYBeBwOLFYSj1iXI8yGNTfwE/zqD6eCKaMHmXMLIIpo0c1Zz6Qfp43l+9Rvk49cZ4lGw7x4sOdiGsRpgqPongimGrniWDK6FHGzCKYMnnUftZqHtXG8xYz0PfKvcZNRj0FBdZasUGe51iXeCKYMnqUMbMIpoweZcwsgimrx+tBZnPAH151dtUFq5KSEiwWi/J1YWEhWVlZNcZZLBY2bNhAeHj41d7ifyoqKkrp61St4uJicnNziYqKuuJ1AOnp6cTGxgKuXlONGzcmMjJS2drwxhtv5OjRo273cDqdpKen07NnTwClV1VZWZnbPQICXNXTxo0bK+ftdrtbMU2n0+Hv768UrKoLUb9nNWrUCEDxVX3+4hVcF9+zeqvD6h5fb775Jhs3buTDDz/kpptucuvxdebMGXJzcwkNDeWRRx5h165d+Pv789JLL/Hiiy9iMtX+E0xaQ7nLqy408NM8qo8ngimjRxkzi2DK6FGNmS/+BerFemPZbha+cHutuRdLhudY13gimDJ6lDGzCKYMHrWftWKYMnpUW+bw4ADatQzjYEY+jos+h6zXuXpENTD7e8Xv9f4c6yJPBFNGjzJmFsGU0aOMmUUwZfUoi666YLVo0SJmz54NuAovU6dOZerUqZcc63Q6+ec//+mRwUspMTGRDz74wK2X1VdffYVer1cKSpdSp06dCAoKYuPGjUrBqqioiKqqKu6++243/po1a5RVVACff/45hYWFfPjhh6xatYrmzZuj0+nYsmULo0aNUsYdPnwYgKCgILd733HHHZSXlxMXF8eYMWMoKSmhSZMmigeAffv2kZCQgNFo5I477qBZs2YAbj23DAYDL7zwAhUVFURGRvL3v/+djIwMdDodRqMRuFAszMzMxNfXl/nz5xMREeHW4ysvLw+A119/XVnB5XQ6Wb16NcXFxbz77rt/bDIuIa2HVU3Vhf1QNY/q44lgyuhRxswimDJ6VGvmNT+kXfH8xp9Ock+vlrXmy/Ic6xJPBFNGjzJmFsGUxaP2s1bzqEaeN5nJg9sz54tUUtPylWNtW4bx1L3tPX5PL9NzrCs8EUwZPcqYWQRTRo8yZhbBlNWjbLrqglXPnj0JDAzE6XTy73//m7vvvpu2bdu6jdHpdAQEBNC2bVvat2/vNbPVGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOJSIiQhk3cuRIsrKy2Lx5MwB+fn4kJSUxa9YswsLCaN26NQ6Hg/Lycrei01133cWLL77Ijz/+yHfffcf58+d5+eWXCQ4OZsaMGeTk5DB58mQAfvvtNyZNmkT//v3ZuXMnu3fvdvNavRKroqKChx9+mD179jB69GgMBoOygqqqqkoZ36RJE26//XZWrlypXFu9Mis3Nxe73U5xcTGDBg3C6XQyYcIEdDqd29aLVVVVZGZmkpCQQP/+/fniiy946KGHCAgIUHpqORwOZWz9+vVp1qwZTz/9NMuWLeOrr77i9OnTboWyPyqth9WVVRf2Q9U8qo8ngimjRxkzi2DK6FFtmY+eKbri+cOnCxmhor4TongimGrniWDK6FHGzCKY17tH7WetOKaMHtWYOTQUpib3Jiu3hKw8K5ENTESGB/3vC69CMjzHusYTwZTRo4yZRTBl9ChjZhFMWT3KoqsuWHXs2JGOHTsCri3q7rzzTlq39m7j2f+l4OBgFi9ezJQpU0hOTsZkMjFkyBDGjh3rNs7hcGC3292OjR49GqfTycKFC8nPz0ev19O/f3+34oyvry/16tWjXr16jBs3Drvdjk6nY/Xq1cqqqB07drB+/XomTpzI8uXL+c9//kNkZCSDBg1izZo1BAcHU1FRgcViISoqiltvvZX//ve/WK1WfH19CQgIoH79+gCkpqYC8PTTT/PLL7+wePFijEaj4r26YHXs2DF8fHyYNGkSH374IVlZWdSrVw+Hw6FsC5idna3k+O233/jtt9+UrwsKCpQVVtXbOnbr1o0dO3bw1FNPcdddd9GmTRv69u3LsWPHalWw0npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGtmVs3CSb1xPnLno9tGqL1nbjOeCKYMnqUMbMIpiwetZ+1mkdv8H7Ym8WxzCJaNwmmV3ykxzzwvscgPwM3xUVgsZR59D19sdQ+LyKYaueJYMroUcbMIpgyepQxswhmXfB4ID2f03lWmoWbaONhz9PrSWazwB5WF+vpp592+7q4uJjAwECqezOJVHR0NIsWLbrimKVLl9Y4ptPpSEpKIikpCYBhw4YpK5mqVVxcTH5+PuPHj2fw4MEMGzaM4OBgpVgFMGDAANavX8/58+dZt26dcnzatGlKP6wdO3bgcDhwOp08//zzPP/88wBMnTqVpUuXKj21Dh06BLi2EazO5HQ66dy5M1arlaioKGw2G9nZ2djtdu68807uv/9+ALZs2cJTTz1Fq1atANi2bRsAt9xyC/Pnz1d8JSUlsXXrVvr16wfAzp07AYiLi7vkc6yoqLjis72StP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPast8d/cWfP795beq6n9zc63vxHXKE8GU0aOMmUUwr3eP2s9acUwZPKaftTB16a9U/95r6+5MFqw/yEsjb6J5hFkVHkXzRDBl9ChjZhFMtfNEMDWPcvBEMDWPtVNOQSmvL/mVkrILO6kFBfjw8sibCA8J9IZFaeTxZoqpqamMGjWKDh060K1bN37++WcA8vPzefLJJ5XCiFqVmJjIjz/+qKw4gpr9sNLS0pTiUrV69eqFTqcjJSVFOVZZWcnXX39NYmKich1ARkYGGRkZyjidTofD4eDmm28G4OTJkzRo0IBNmza5jTEajQQFBdGkSRNOnTqF3W5Hr9fz9ddfK+MaNmwIoBSs0tLSCAwM5JdffnHL5HS6OqlWZ9q/fz/h4eGsW7eO7t27065dO4YOHcqnn34KUGObR02aNGnSpEmTevTcQwlXdVyTJk2aNF29tJ+1mmqri4tV1bI74LXFv/45hjRp0qRJkyZNQvX7YhVASVkVU7R/+69aHq2w2r17NyNHjiQiIoKBAwfy2WefKefCwsIoKSnhk08+oVu3bh4b/b1OnDjBa6+9xp49ezCZTAwaNIh//vOfGI3GK17ndDr58MMPWbFiBfn5+dx44434+fnV6Ic1cOBAXnvtNbZt20ZpaSkrVqxg48aNbNmyBXD1wwoLC2Pv3r307t2b/Px8ZRu/6n5YFosFX19fmjdvztChQ7Hb7ZSVlSnFo2bNminj4uPj2bp1K4mJiRQUFGAwGCgrK1O2W6zeyu/222/ntddeY+bMmRQVFeHj45rChIQEhdWgQQNKSkro27cvdrudqqoqKioq0Ol0So+v3NxciouLKS8vx8/PD7vdTmpqKnv27OHOO+9UvNVGnjZovR5VFxr4aR7VxxPBlNGjjJlFMGX0qObM7aIasOSlvnz5YzoHTxbSpnkId/do6Q2LUj3HusITwZTRo4yZRTBl8qj9rNU81kZb95ypUayqlt0BP+4/S2JC41rz1ZhZNFNGjzJmFsFUO08EU/OoTo8yZhbBVKvHfSfyahSrqlVSVsWhUwW0j6pfa75s8qhgNXPmTKKjo/n0008pKSlxK1iBqz/SF1984ZHBS6moqIiRI0fSokULZs2aRU5ODtOmTaO8vJyJEyde8doPP/yQ9957j/HjxxMTE8Py5cs5ceIElZWVSj+swYMHs337dnQ6HTNmzCA5OZmysjKqqty/8fz9/XE6nRQXF6PT6QgKCuLcuXMcOHBA6f+k0+mIi4tj48aN6PV6fH19MRgMFBcXu21FGBwcTFBQEBaLBYfDQb169SgvL3dbJQXQunVrvvnmG3Q6HXq9Xhl37tw5t3F2ux273U5FRQX+/v74+flRVFTEkSNHiImJwel0Ul5eDri2IiwqKsLX15eqqqoaWyRejfR6HaFeaD58vaouNPDTPKqPJ4Ipo0cZM4tgyuhRzZkfvrudVziXkkzPsa7wRDBl9ChjZhFMmTxqP2s1j1ejtLPFVzx/PMvCoNs87wOupszXiimjRxkzi2CqnSeCqXmUgyeCqXm8emXln77i+czzpSR2rv3iENnkUcEqNTWVcePGYTQa0el0Nc5HRESQl5fnyS0uqVWrVmG1Wnn//fcJCQkBXAWayZMnk5SUpKwi+r0qKiqYN28ejz32GI888ggAnTt3pl+/fsTGxrJq1SoA1q9fz+LFi9mwYQNRUVGEhIRw8803s2HDBvbt20d8fDzgWqXUsGFDfvjhB+Uezz77LO+99x79+vXDbDZjs9n48ssvmTRpEg888AAAixYt4o033uCrr77iqaeewmw2c/DgQaqqqti6dauSqW/fvpw5c4acnByCg4MB+Pzzz/nLX/7CjBkzANd2g3fddRffffcdTz/9NGazmYKCAoqLi9myZYvSd+vf//43H330Ed988w0xMTGYzWb0ej19+/Zl1qxZiv/evXvz888/U1lZia+v71XPjcPhxGIpverrrncZDPI1GZTRo4yZRTDVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEcy64PFcYRnF5XbM/gbCQ2r3i6uoG+qx9QrnW0WaKSio/YdEZZwXGT3KmFkEU+08EUzNozo9yphZBFOtHiPDrvyaoXH9QI/+7b8eZDYH/OFVbB4VrHx8fHA4Lj+ROTk5BAZ6v6lYSkoK3bt3Vwo7AP379+eVV15h+/btDB48+JLX7d69m5KSEvr3768cMxqN3HHHHWzevNmNHxMTo/StioqKwmazERISwvfff098fDznz5/HZrPRpUsXt3sMGDCA9evXc+bMGeV6h8NBv379lDHZ2dn4+/uzY8cOnnrqKaKiovj666/p0aOHksnpdGKxWHA6nWzfvp2//OUv+Pj4kJOT4+a/uk/W4cOHsdlsREVFUVJSAkC9evWUcSdPnkSv1yvbEbZs2ZJ9+/YpPa2qFRwczLlz5/jtt99qZPuj8nbTu+tJMjQZFM0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY9y8EQwNY+1U0lZJfPXHmB/er5yrF3LMJIGtcXkf3Uf5OzVPpLFGw9fcltAgx56tLvBK/llmBfRPBFMtfNEMGX0KGNmEUwZPcqYWQRTbR7bNA8jKMDnktsCBgX4ENcsVPt9+VXIow0fO3TowKZNmy55rrS0lNWrV9e66HElpaWlERYWxqOPPkpCQgI9e/bkgw8+IDw8XCngXO46gO+++45bb72V+Ph4HnjgAYxGI1lZWcoWeWlpaURGRjJmzBg6duxIamoqW7dupUmTJgqjejVWeHg4AwcOpH379tx1110cP35cYXTq1AlfX18CAgL44IMP6NmzJx06dGDFihU0btxYYSUmJlJWVoavr6+SqWvXrhQVFREaGkpaWhpGo1HpZ3X06FHuuusu2rdvz3PPPUdoaChVVVWcPn2aXr16KVsP9u/fnw4dOhATE8N3332Hj48PgwYNAqBHjx4Aygq47OxsEhISOHbsGODqEaZJkyZNmjRp0qRJkyZNmjRd75q/9gAHM/Ldjh3MyGfemgO14r008iZ+/yFig951XJMmTZo0adJ0/enlkTcRFOC+NigowIeXtX/7r1oerbB65plnePjhh/n73//O3XffDcCRI0c4c+YMCxYsID8/n6eeesorRi9WUVERGzZsoE2bNm49rPR6PUVFRZe9zmKxYDAYmDNnjlsPq8WLF+N0OikqKsLf35+ioiJOnTpFREQEM2bMID8/n5dffpnDhw/jcDj4/PPP+eijjwBYsmQJ999/PxMmTGDChAm8/fbbikc/Pz9iY2NJTU1l+fLlDBs2jL1797Jnzx7Onj2LzWYD4K677uLZZ59l06ZNtGrVihEjRrBq1SoMBgMOh0PJdOutt3Lw4EHeffddBg4cSIcOHVizZg16vV65Z3R0NIMGDWL16tUUFRUpfbfsdjvz589XemsNHDiQCRMm8PHHHxMVFcXSpUuprKx0e8a1lY+P9xrfXS+SpcmgSJ4Iptp5IpgyepQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2Cq1ePZ81a3lVXVcjhhf3o+eZZyGoVd3c4x0Y1D+HhCX7bty+LomSJaNwmmV3xkrT1eLFnmRSRPBFPtPBFMGT3KmFkEU0aPMmYWwVSzxxsaBDHn2Vs5mJHPqVwrzcJNtGkR5g2L0smjglWHDh2YP38+kyZN4vnnnwdg2rRpADRr1oz58+cTGxvrucvfyel0YrPZavSwmjhxImVlZZe9rqqqCrvdzuOPP+7Ww+qWW26hoqJCGVdaWkpRURGrVq1y29bv5Zdf5tChQ8yYMYPbbruNL7/8klatWvHqq68C0LhxY86fP6+s1AJo2rQpqamp+Pn5sWzZMuLi4pg/fz5PPvmksp2ir6+v0gMsMzOTlStXcscddxATE8PUqVOVTC1btgQgKCiIjRs3EhkZyeuvv86SJUs4cuSIcs9nnnmGb7/9FqvVisFgwG6306VLF5599lmWL19OdHQ0er2eESNGsHjxYp5//nlsNhsNGzbk3LlzAJfsSfZHpNfrCA011epaGXS9Nxm8FjwRTLXzRDC9ycvMLeHooRwiG5iIDA/yGlfNmb3NFPUMQa7nWFd4IpiaRzl4IpgyepQxswimjB5lzCyCqTaPJ7JLrnjeanPU+j3uPbfcWKvr/oiu93m5FjwRTLXzRDBl9ChjZhFMGT3KmFkEU80ee4aa6Pm/h2m6gjwqWAF0796dTZs2cejQITIyMnA6nTRt2pR27drVuujxv6TX64mMjKzRw2rixIlYLJbLXle9aqhPnz7KMaPRSGxsLD/++CPBwcGAq7BVr149pVgFcP/99zNp0iSioqJYv349hw4d4ssvv6Rjx47KmKVLl7Jy5UomTZqkrFYqLi4G4JtvvlH4AE2aNOHMmTPK1waDgcaNG/P1118rxywWC1OnTlUyVa/IGjduHMOGDVPG7d+/nyNHjij9wpYsWYLRaOSHH35g/fr1vPjii7z99tsMHz6cOXPmMGPGDAD++c9/cu7cOb788ksACgsLFWZ4ePhln+OV5HA4sVhKa3Xt9SyDQY4mgyJ5Iphq54lgepNXUlbJ3C9SSU278InU9lFhPHVve0wBV7fXvyiPInjeZIp6ht70KIongql2ngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCqVaPgb5X/r2FyaivdaN0tWYWyRPBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOr2DwuWFUrLi6OuLg4b+GuWldTHPsjYy83xul0es1HbVm/v+73444fP05UVBRGo1E5ZjAYiImJ4dSpU8oxf39/2rZty6FDh3j77bfZu3cvr7zyCuBaPVdbaU3kLq/rvcngteCJYKqdJ4LpDd6c1ak19vo/kJ7P7NWpjHsgwSM2qDOzt5minyHI8RzrGk8EU/MoB08EU0aPMmYWwZTRo4yZRTDV5jE8OIB2LcM4mJGP46K32nodtGkRRgOzv8d+1Zb5WvBEMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkqypY/fLLL7W6SZcuXWp13eXkcDjIysrCYrFgNpsB+OqrrwCUrwFOnDjBa6+9xp49ezCZTEr/pi1bthAfHw9AZWUlhw8fBlB6WPn4+JCXl0evXr2wWCzExcVx9913Y7fblVVMpaWuVURffvkl69atw9fXlzvuuIOCggLAtc3fxX7uueceCgoKiIyMZPjw4Zw5c0bZErA60+nTp+nevTtlZWV07NhReW7VjOoC1OzZs3nrrbcwmUwMGjSIXbt2uXmqqqpiz5499O7dW9niz263c/jwYaWoaLPZeP311/n000/x8fHhr3/9Ky+99BIACQkJbqvLrlZaD6uakmnPVlE8EUy180QwvcUTsde/tz2K4nmLKfIZesujSJ4Iptp5IpiaR3V6lDGzCKbaeSKYmkd1epQxswimmj0mD27PnN+tem/b0rXq3ZP3t2rOLIongimjRxkzi2CqnSeCqXlUp0cZM4tgyupRNl1VwWr48OFXvYJIp9Nx6NChqzZ2Jel0OoxGI8nJySQlJZGTk8P06dMJDg4mIMC132RRURGDBg1Cr9cze/ZscnJymDx5MjqdjoULFxIWFkbr1q1ZuXIlVqv78v7q7fwAnnzySbZs2cLUqVMJCQmhXr16gKsoBFBSUkJiYiLt27dn0aJFNVjVX+fn5zNixAhycnKYMmUK/v7+biulqotXwcHBjBw5ki+++IL33nsPs9msZKruZZWfn88999xDw4YNWbJkieLlYlZFRQV6/YW/GBMmTODkyZO89tprAJSXl/Ppp5/i7+9PVFQU+/fv57PPPgPgX//611XPSbW0HlZXlkx7toriiWCqnSeC6SlP5F7/1VJbZm8zr8UzhOv/OdZFngim5lEOngimjB5lzCyCKaNHGTOLYKrRY2goTE3uTVZuCVl5VtX3ZhXB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiqypYLVmyRJSPq1JwcDB9+vThzJkzJCcnYzKZGDJkCOvWrVP6RK1atQq73U6DBg3o3bs3ADt27GD9+vU8/PDDLFy4kPz8fOLi4hgxYgQfffQRwcHBVFRUUFxcTPPmzYmJiWH+/PkYDAYCAgLQ6/UKPzU1FYBhw4bxyy+/sGPHDkJDQ5UCVfW4Y8eOYTAYGD58OP/973+xWq00aNCAiooKgoJcL4Czs7MB6Ny5M0ajkblz5xIQEIDBYMButyus3bt3A/DEE0+wceNGsrKyCAsLU1ZRVY/7+OOP2blzJ3PmzFHYhYWFzJ8/X1m1deLECXQ6HQ0bNuTIkSOAaxUWQEREBGVlZUqh7Gqk9bC6tAwG9e+HqnlUH08E01u8urTXv1rnReQzBHmeY13iiWBqHtXpUcbMIphq5wGcKyyjuNyO2d9AeIjnb0plnBcRTLXzRDA1j95hBvkZuCkuAoulzKPXYaL8iWBqHtXpUcbMIphq54lgah7V6VHGzCKYsnq8HmQ2C+ph1bVr11oZ8raioqIoKChg0aJFyrHi4mI+/vhjZSu7lJQUbrvtNubMmaOMGTBgAOvXrycoKIjvv/9eOT5t2jQiIyPx9/dnx44dOBwO9Ho9s2bNUsZMnTqVpUuXKvzqVWPNmzdn4sSJgGtFWefOnbFarURFRWGz2cjOzsZut/PEE0/w/PPPA64tCZ966imFtW3bNgCCgoKYP3++cs+kpCS2bt2qjDtx4gQA7du3Z+zYsQBYLBa6dOmCwWBQtjzU6/V0796d7t27ExMTA8C8efMICwtT2Onp6djtdjIyMpRjx48fB6Bv374MGDCAmTNn/tEpcZO2P+flVRf2Q9U8qo8ngukpry7u9a+2ebkWz9BTj9eCJ4Kpdp4IpuZRDp4IpowevcErKatk/toDblu7tmsZRtKgtpj8fT21KOW8iGCqnSeCqXmUgyeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFV1WwupLOnz9PZmYmAI0bN6Z+/freQtdQYmIic+fOZfjw4aSmpmIymYiNjUWv19OzZ08A0tLSuO+++9yu69WrFzqdjs8++4zPPvuM/Px8YmNjycrKom/fvsp1ABkZGTz22GPs2bMHX19fwsPDcTgc3HzzzQCcPHmSBg0a8Mknn/D555+Tnp5OZGQkTqeToKAgmjRpwvHjx7Hb7ej1esaOHcuRI0ewWq20atUKQPl/WloagYGB/Pzzz26Z/P39AZRMmZmZBAcHM3/+fP7973+TlZVFy5YtMRgMREZGKj2uwFXAe+ONN5SvJ0yYwKuvvkrDhg0B6N27N4888gi//vorx48fp7y8nAYNGpCXl8fs2bNp0aKFV+dMkyZN15+SBrVl3hr3X9a1aeH6ZZ2mPybtGWrSpElT3dT8tQc4mJHvduxgRj7z1hxg3AMJf44pTZo0adKkSZMmTZo0aarj8rhgtWPHDv7973/X6FMVFxfH+PHj6dGjh6e3qKEBAwbwzjvvcOjQIZ544glOnjzJ6tWrad26NREREYBr5dGmTZvYtGkTmzdvBsDPz4+AgADOnj1L37596dSpE4sWLSI3N5f+/fsr1/n6+qLT6fjll194/PHHKSkpYenSpQA0a9ZMGdesWTN2795NdHQ048aNY/PmzWRkZBAZGQm4+mgB3HDDDWzfvp3BgwfTvHlzZRVVXFycwgoLCyMrK6tGJsAtU7t27fjtt9/o3LkzQ4cO5bPPPsNutytjqjVixAiysrKUrw8ePMgDDzzA3LlziY2NJTw8nE2bNtGrVy8SEhJYtmwZTZs2JS8vj+zsbKWAVxt50pT2elVdaOCneVQfTwTTm7zgID+eG9aJ3MIyLF7eDuni/6uN502mqGd4sTcZnmNd4Ylgah7V6VHGzCKYauWdPW91+6BBtRxO2J+eT56lnEZhgX+qR5FMGT3KmFkEU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomzwqWG3evJl//OMf1K9fn8cff1xZlZOens6aNWsYPXo077zzDnfccYc3vCrasGEDRqOR2NhY5s6di8lkomfPnuzYsYOcnByleONwOHA6L+yzVFFRQVlZGeHh4ezfv5+UlBRiY2NxOBxs3LiRbt26Aa6t/SorK+nZsyeLFi3Cx8eHmJgYDh8+zKFDhxR+VlYWUVFRGAwGZsyYQWRkJPXq1aOgoMDNb1ZWFj169OD777/HarUSFxfHnj172LNnD8OHDwfAarXWyNS8eXNOnjzplikjI4P4+Hjy8vKYMWMGLVu2xNfXlzNnzij327NnDwcPHnTzkJOTA8CcOXN47733AFi9ejVhYWFKYezuu+9mz549rFixgocffrhWc6PX6wgNNdXqWhlUFxr4aR7VxxPB9CZP1N95NWf2NlPkz02ZnmNd4GXmlnD0UI7WzF2lTLXzRDBl8uitv38nskuueN5qc3j8c12meRHJVDtPBFPzKAdPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWL3zzjvceOONLF++nKAg9zd9TzzxBA8++KCQglVKSgo9e/Z0609lsVjo2rWrspLJbDYzYMAAnn32WWXM7t27cTqd9O7d2227vDfeeENZhWU2m6mqqqJ169YsXLhQGfPJJ58wceJE9uzZw6233kq9evU4deoUL7zwAo888ogyrn///qSlpXHmzBmCg4MBVwHsnXfeUb7OyMjgrrvu4tixY8o9S0pKSExMdMs0bdo0Pv74YyVTUFAQhYWFvP76624roDp37sy5c+ew2WwYjUZSUlIwm838/PPPxMbG8txzzzFq1CjuvfdeAgMvfNrz4p5W4CpYZWdn85///OfqJ+X/y+FwYrGU1vr661UGg/ob+Gke1ccTwZTRo4yZRTBl9OhNXklZJXO/SCU17cKqjPZRYTx1b3tMAbXvdyPjvIhgqp0ngimTR2///Qv01V3xvMmop6DAetVckGteRDLVzhPB1Dyq06OMmUUwZfQoY2YRTLXzRDA1j+r0KGNmEUxZPV4PMpsD/vCqM48KVqdPn+bZZ5+tUawCCAoKYsiQIbz99tue3OKSulR/KrPZTHh4uNKDKioqSvlztapXHXXs2NHteHR0NIsXL6a8vJyoqCgApddTtdLT0zEajZw6dUo5v3//fmU8uApTeXl5isebb74ZvV5PQECAUqyqPgeQm5ureK2srKRx48Zu98zMzMTX11cZHx4eTmFhIS1btlTGFBcXY7VacTqdnD59mujoaNLS0mjZsiU6nfub6Us9k99r165dbplqI62h3OVVFxr4aR7VxxPBlNGjjJlFMGX06A3enNWpNfrdHEjPZ/bqVK/0u5FxXkQw1c4TwZTBo7f//oUHB9CuZRgHM/JxXNjMAb3O1Yewgdnf4/wyzMu1YKqdJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUeFayioqLIz8+/7Pnz588r2wR6UxaLhcrKSh599FH27NmDyWRi0KBBmM1mpW9UYmIiH3zwARaLBbPZDLhWWAGcOXOGW2+9lfz8fOLi4rjttttwOp0UFRXRqVMndDodOTk5jBkzhm3btuHj40NVVRVhYWEKv02bNnz77bfs3LmTt956i/T0dEJDQ7FYLICrf5XRaFSKTG+++SZr167FarViMpkICQmhpMS1nUivXr0AOH78uJIpICCA4uJit0ytWrXi2LFjbNq0iTVr1pCVlUVoaKjyXKrHWSwWAgMDmTBhAgAzZ85k7969mEwmZQxAamoqK1as4IcffgBcfa+OHTvG7NmzPZofrYdVTdWF/VA1j+rjiWDK6FHGzCKYMnr0Fk/rdyOfRxkzi2B6gyfq71/y4PbM+d2qrbYtXau2PHktLMu8iGaqnSeCqXlUp0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNHhWs/vWvfzFu3Djat2/vtkUduPpbffLJJ8ycOdMjg5eS0+nks88+o02bNsyaNYucnBymTZuGXn/hG2Ho0KEsXbqU5ORkkpKSyMnJ4YcffkCn07Fw4ULGjx9PTEwM48aNc/Po5+eH2Wzm2LFjFBYWMnr0aL777jtSU1Px9b2wZUjXrl0B+Oijj+jduzd9+vRhyZIlNby2adOG7777jqVLlzJixAjOnTvHunXr8Pf3V/prNWrUCJ1Ox48//kiLFi144okn+O9//0tBQQGVlZUK6+abb2bjxo28++67/OUvf7nsPUtLSzl69CiHDh0CoFOnTuzbtw+LxUKDBg2UcZ999hkpKSmYTK499o8fP063bt08KjJqPayurLqwH6rmUX08EUwZPcqYWQRTRo+e8rR+N2J4Iphq54lgXu8eRf39Cw2Fqcm9ycotISvPqvWlUylT7TwRTM2jHDwRTM2jHDwRTBk9yphZBFNGjzJmFsGU1aMs8qhgtXTpUkJDQxkzZgwNGzakWbNmAJw6dYpz587RokULlixZ4lZU0el0zJ071yPTRqORiooK3n//fUJCQgCw2+1MnDgRHx9XpODgYBYvXsyUKVNITk7GZDKRkJDAzp07GTFihNJ3KioqioKCApxOp7JtX7169SgqKsLpdDJ37lzi4uJ44YUXeOONN5QiU/369QEwmUzs2rWLvXv3ctddd5GTk8O2bdsUVkREBODqF7VkyRIiIyP5v//7P958802MRqOSyc/Pj4qKCoqKipg7dy6dOnXiL3/5C7NmzVIy3XDDDQCEhITwzTffYDKZGD58OFu3biUtLU25Z25uLqWlpZSWunpJ7dy5U7nPxQWrH374gby8PGUbQ6fTyc6dO9m4cSNjxoyp1dxoPawuLYNB/fuhah7VxxPBlNGjjJlFMGX06C2e1u9GPo8yZhbB9AZP5N8/gCA/AzfFRWCxlHnEqZYs8yKaqXaeCKbmUZ0eZcwsglkXPJ4rLKO43I7Z30B4iOe/pKwLmWX0KGNmEUwZPcqYWQRTVo/Xg8zma9TD6ujRo8CFQkpmZiYABoOBG264gYqKCmVMtX7fV6k2MhgMBAUFKcUqgN69ewNgs9mUY9HR0SxatEj5esGCBezcuZN27dopx5YvX87gwYM5fPgw/v7+ynE/Pz+2b9+ufO1wOJg2bZqy4qlRo0YAdO/e3W0LvZkzZ7Jt2zYCA11bi1RUVCj3adq0qTLuww8/pKysTPnax8eHevXqsW3bNuVYVlYWs2bNUjJV+/vb3/7Gs88+q4w7c+YMaWlpiqcbbriBzMxMDh065LbqrEOHDkrBDeC7774jPz+fhx56iJycHDp16sSCBQvwVNr+nJdXXdgPVfOoPp4IpoweZcwsgimjR0954cEBxDYL4fCpwhrn4pqHaP1uVMRUO08E83r3eC36TXnq8VrwRDBl9ChjZhFMGT3KmFkEU40eS8oqmb/2gNv2s+1ahpE0qC0mf98rXHlt/F0LpoweZcwsgimjRxkzi2DK6lEWeVSw+vbbb73l46pkt9spLCx0609VXVy6eNXS71W9UungwYMMGDAAgMrKSjIzM7Hb7ZSXl+Pv749Op6OiooKMjAxle7yffvoJp9OpbAuYnZ0NQEZGhts9qgt01aub/Pz8lOurC1ZFRUUUFBS4bTH4RzKVl5cDcOzYMbd7pqWlKZ6io6MxGAzKPXv06AFAeno65eXlbgVDq9XK6NGjqayspHXr1srz0aRJkyZNmjR5V5f9uI7zcic0adLkLSUNasu8Ne6/TGzTwvXLRE2aNGnSpMkTzV97gIMZ+W7HDmbkM2/NAcY9kPDnmNKkSZMmTZrqsOpkhcJms+Hv7+/Wn2r69OkEBwdTVVWljBs5ciRZWVls3rwZcBWRDAYDixcvJjw8nNatW7Ny5UqluNStWzeCgoKoqKjAbDYzZswYxo0bR1lZGdOnTycsLEwp+BQVFQFw4sQJOnbsSGVlJWFhYZw7d87tvN1uJygoiEmTJvHqq6/i4+NDYGAgfn5+SgGqOpOvry+JiYlUVlYSHByM1Wp1y1TN/O6770hISMButxMaGkpubq7beYPBgNlsJikpCXAVzXx8fNxWpAHceuutWCwWwLVKCyAmJoa3336bu+++u9bz40mj6etVdaGBn+ZRfTwRTBk9yphZBFNGj97inT1v5dAlVlcBHDpVSJ6lnEZhgbViyzgvIphq54lgyuQxOMiP54Z1IrewDIsXt2u62JvaMotkyuhRxswimDJ6lDGzCKZaPZ49b3X7MES1HE7Yn56vvcZTAVPtPBFMzaM6PcqYWQRTVo+yySsFq8rKSnJycrBYLG5bzlWrbVvvfnpRp9Nx//33c+TIEaU/1ZAhQ0hJSXEb53A4sNvtbsf0ej1PP/00CxcuJD8/X1lZZLPZeO2116ioqOCll16iefPmtGjRgnHjxuHj48Mdd9xBTk5OjXw6nQ6TyURBQQEWiwVfX1+3bQkdDgfl5eXUq1ePqqoqysvLsVgsNGzYkJycHDdWeXk5ERERFBQUUFpaSnl5udKX6mIZjUYCAwMpKirCYrEQGBhIScmFhtJVVVWUlpYSEhJCaWkpFRUVlJWV0axZMxyOC0sRq4tVv9e4ceNqXbDS63UeN46/nlUXGvhpHtXHE8GU0aOMmUUwZfToKe9EdskVz1ttDo//7ZRxXkQw1c4TwZTJo8jXqGrNLJIpo0cZM4tgyuhRxswimGrzqL3GE8MTwVQ7TwRT8ygHTwRT86hOnkzyqGBlsVh48803WbdundLb6WI5nU50Oh2HDh3y5DY1ZDabMRqNbv2pANavX+9W4Fm6dGmN6yorK3nkkUeU1Ufz5s3j/fffR6fTcccdd+Dv78+cOXM4efIkS5YsISIiQrl+6NChSr+ugADXN12/fv2YOXMm4Fol1bdvX3JychQf58+fp6qqihUrVhAVFQXAtm3bGDVqlJtXvV5PgwYN3Ipuzz77LBs3blTGVRebnnjiCZKTkwEoLCwkMTERQBlXWFiIw+Hgyy+/VFZVffLJJ7zyyivExcUp/K5duxIYGMi8efMYPny48mdP5HA4sVhKPWJcjzIY1N3QFbzv0ds8EUy180QwZfQoY2YRTBk9eosX6Hvl/p0mo56CAmut2DLOiwim2nkimDJ6lDGzCKaMHmXMLIIpwqO33xfVhcyaR/V41F7jyedRxswimDJ6lDGzCKasHq8Hmc0Bf3jVmUcFqxdeeIHvvvuOAQMG0KFDB+rVq+cJ7g8rKipK6dtUreLiYnJzc5Wi0OWuA1c/p9jYWABSUlJo1KgRdrsdf39/ADp06EBmZibbt29n8ODBgKv4lp6eTs+ePQHIy8sDUApY4Fr51K5dO3JycpR7FRYWAlC/fn1lXM+ePfH19cVkcn3SxmazUVVVVWM11W233cb69euVolP1doPVvbAAQkJCaNGiBUePHlWOFxcXo9fr3Xj9+/dn4sSJSkaR0hrKXV5qb+gKdaPJoNo9yphZBFPtPBFMzeP1yQsPDqBdyzAOZuTjuGihtl7n6qPTwOzvsV8Z50UEU+08EUwZPcqYWQRTRo8yZhbB9AZP9PsiNWYWzdQ8Xr2013hieCKYaueJYGoe5eCJYGoe1cmTSR4VrLZv387w4cOZMGGCt/z8ISUmJjJ37lyGDx9OamoqJpOJ2NhY9Hq9UlC6lDp16kRQUBBTp07l1KlT5OfnU1VVhZ+fH4MGDVLG9enThw0bNvD+++8zZcoUfH19iY+Pp7CwkFtuuQWA06dPA65+Utu2bSM9PZ3IyEiqtyBs0KAB4NrmT6fTMXbsWI4cOYLVaqV9+/bY7XalwHfq1CmcTicZGRlumZo3bw5Aw4YNAVfxy2AwsGzZMmbPnk1WVhYtW7YkJycHHx8fjEYjACUlJVRVVTFw4EBOnz5NWVkZn3zyCYBbH6tz585x6tQpYmJiADAYDMydO5cnn3zSswnSJExaQ1dNmjRpqptKGtSWeWvcf7HWpoXrF2uaNGnSpEmTpquT9r5Ik1qkvcbTpEmTJk2avCuPClYhISFKUeVaasCAAbzzzjscOnSIJ554gpMnT7J69Wpat27ttoXfyJEjycrKYvPmzQD4+fmRkJDAtm3b6Nu3L506dWL69OmUlpbSv39/5brbb78dnU7H2bNneeKJJygpKWHZsmWEhYURHx8PuLZDNBgMpKWlER0dzbhx49i8eTO7du0CoKioCH9/f8rKyqhXr56yWqt58+Z8+OGHOBwOgoKClLHg6gX2+0zg6pNVfU+TycTevXvp3LkzQ4cO5bPPPqOwsBC9/sKSutLSUnQ6HRkZGURFRXH48GEmTJiA0WgkLCxMGZebm0uLFi2Ii4vjxx9/xGKx8M4773Dw4EGmT5+ubHt4tfLx0ZrK/V5qb+jqLY8ieSKYaueJYMroUcbMIpgyevQmLzjIj+eGdSK3sAyLl7cuuvj/3pCan6Moptp5IpgyepQxswimjB5lzCyC6S2eyPdFas0skql59IypvcaTy6OMmUUwZfQoY2YRTFk9yiaPClZ/+9vf+PLLL3nwwQfdCiaitWHDBoxGI7GxscydOxeTyUTPnj3ZsWMHOTk5StHK4XAoK54AKioq2LNnD127dmX//v1Kv6jAwEA2btxIt27dAPj2229xOp00bNiQRYsW4ePjQ48ePdi2bRv79u1TilYOh4OoqCgMBgMzZswgMjKSZs2acerUKeWeVVVVWCwWevTowffff4/VaiU+Pp5ffvlF2VawWj4+Pm6Zunbtys8//6wUtMBVjIqPjycvL48ZM2bQsmVLwsLCKCgoUMbodDqeeuopsrOz+fLLLwHXNoIOh8NtnqZPn05ycnKN7RW//vprnn/+eZo0aXLVc6PX64Q2tK7rUntDV6gbTQbV7lHGzCKYaueJYGoer3+eqH8jZZwXEUy180QwZfQoY2YRTG/yMnNLOHooh8gGJiLDg7zGVXNmUUwZ5uVavC9S87yIYmoePZP2Gs+7UrtHGTOLYMroUcbMIpiyepRFHhWskpOTsdls3HfffQwaNIiIiAgMBkONcXfeeacnt6mhlJQUevbsyZw5c5RjFouFrl27uvWdWrp0qdt1u3fvxmq1MmHCBOLi4gDo3r07jRs3VopX1XwfHx8GDhzI+PHjAVcPq5tvvpnvv/+e+Ph4TCYTTqeT++67j8cff1y5dvLkyaxYsYKSkhK31V7vvPOOW0+prl27UlxcDKAcb9u2LcuWLVPG7N+/n/vuu0/ZflCn01FVVUVSUhJ9+/ZVxj366KP8+OOP2Gw2jEYjZrOZyspKpk6dyk033cSLL77IG2+8waBBg9w89O3blyNHjrg9o7/+9a8cPny4VsUqAIfDicVSWqtrr2cZDOpu6Are8SiSJ4Kpdp4IpoweZcwsgimjRxkzi2DK6FHGzCKYaueJYMrmsaSskrlfpJKadmG1TPuoMJ66tz2mgNr3IVJzZlFMmeZF5PsiNc+LKKbmUZ0eZcwsgql2ngim5lGdHmXMLIIpq8frQWZzwB9edeZRwSonJ4edO3dy6NAhDh06dMkxOp3usudqq7S0NO677z63Y2azmfDw8BqrhX5/HUBUVJRyLCoqCqvVSlZWFuXl5fj7+3Ps2DGqqqrcxul0Olq2bKkwTCbXp2d+v21eeXk5AJmZmURHRxMQEIDBYHArFDmdTioqKpSt/po1a3ZJ1rlz59yY/v7+wIX+WNWqqKgAXH21oqOjiYqKqvEcSkpKyM3Ndct0KeXm5l6y6Hg10hrKXV5qb+jqqcdrwRPBVDtPBFNGjzJmFsGU0aOMmUUwZfQoY2YRTLXzRDBl8ThndWqNPkQH0vOZvTrVK32I1JhZNFOGebkW74vUOC+imZpHOXgimDJ6lDGzCKaMHmXMLIIpq0dZ5FHBasKECRw4cICkpCTi4+OpV6+et3xdURaLBbPZXON4cHCw2/Z5J06c4LXXXmPPnj2YTCaaN2+O0WjEz89PGZOYmMjs2bNxOp1K36ns7GwAZs6cyaRJk4iLi+PFF19040dGRgLw0Ucf8dZbb+Hr60ufPn348ccfgQt9qerXr8/x48fp168fmZmZREZGcvvtt1NeXq4UhoxGIzqdjtTUVHr27InVaqVjx474+flhNBpxOFzf3E2bNgXgueee49y5c5hMJvr168eBAwfc7pmYmMgHH3zAxIkT2bBhAwBDhgwBoGfPnpd8pq+//jpLliwBoF27dlcxGzWl9bCqKW/tX5o8uD1zfvdpx7YtXZ929PS514U9W9XuUcbMIphq54lgah7V6VHGzCKYMnqUMbMIptp5IpgyedT6EKnTY12ZF1Hvi9Q6LyKZmkd1epQxswim2nkimJpHdXqUMbMIpqweZZNHBatdu3YxevRonnnmGW/58ZqKiooYOXIkLVq0YNasWeTk5DB58mSqqqrcxg0dOpSPPvqIiooKfv75Z2w2m9IPatSoUcTExLB8+XIefPBBfH19uemmmwCUXlBZWVncf//9hIeHs2DBghr86uJYfn4+ycnJ7N+/n4ULFxIREUF+/oUX106nk5KSEtq0aUNiYiLLly/n7NmzNGzYsAbrzJkzDB8+HHBte+jj4z6NgwYN4r333mPdunV07dqVrVu3Ul5eTkJCgrJN4a+//spHH33EHXfcgcPhYOXKlcr1r7zySi2futbD6n/J0/1LQ0NhanJvsnJLyMqzen0/eagbe7aq3aOMmUUw1c4TwdQ8ysETwdQ8ysETwZTRo4yZRTC1PkTqZMoyL6LfF6ltXq4FU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYNWjQwG2ru2sls9ms9H+6WEVFRYqfVatWYbVaef/99wkJCQFgx44drF+/nlOnTinb8AUHBzNy5EhmzZrFSy+9RGBgIE6nk6ioKB555BEAOnfuTOfOnamsrFT4qampANxxxx388MMP5Ofn06RJE2UrvupxJ06cwGAw0K1bN+bNm4ePjw9NmzbFYrEoY6pXdLVp04bCwkLeffddIiIiMBqNlJaWKuN2794NQP/+/Vm3bh1Wq5XmzZuTnp7uds9Vq1bRoEEDmjVrxvbt2wG47777ePHFF5VnFR4eTmVlJTNnziQvLw+n04ler6dv377Ex8fXem60HlaXlsHg3f1Lg/wM3BQXgcVS5lHfqovlbY/e5olgqp0ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpgyedT6EHmXea6wjOJyO2Z/A+Ehtf8FSV2aF/D++yIZv3c0j+r0KGNmEUy180QwNY/q9ChjZhFMWT1eDzKbr1EPq0cffZRVq1YxZMgQpafTtdClejQVFxe79WhKSUmhe/fuSrEKYMCAAaxfv55169aRnJysHC8pKaFx48Z8++237Nixg0ceeQSn88Jm2EajkQcffJClS5cq/Oq+XF26dOH9998HXKukOnfujNVqJSoqCpvNRnZ2Nna7nddee00pKG3ZsoWnnnqKDh06ALBt2zbAVUSaP3++ct+kpCS2bt2q3PPEiROAq2A1Y8YMwLU9YpcuXTAYDMqWgf/5z38YNmwYTz75JKtXr+bFF19kzJgxGI1Ghd28eXMWLFjAypUrmTJlCq1atcJisRAeHn71E/I7aftzXl51YT9UzaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKYNHrQ+Rd5glZZXMX3vAbRu/di3DSBrUFpO/71Xz6uK8iGCqnSeCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFHhWsbDYbPj4+3HnnnfTv359GjRpR3ZepWjqdTlmp5C0lJrp6NF3cy+qrr75Cr9crPZrS0tLo06cPjz76qNLD6u6770an05GSkqIUrCorK/n6669JTExUrgNIT0+nV69eWCwW4uLiiIyMxOFwcPPNNwNw8uRJwsLCmDNnDu+88w6+vr7ccccd+Pr6EhQURJMmTTh+/Dh2ux2dTsc999xDQUEBkZGRDBgwAIBWrVop9wwMDGT79u10796dsrIyOnbsiM1mAy70ncrMzCQoKIhXXnmF8ePHYzKZGDRoEL6+vjRq1Aij0ciZM2fIzc1l+fLlzJo1C7vdDkBycjIfffSRUlhMS0tj/vz5fPHFF4BrdZrFYqG8vNyrc6VJkyZNmjRp0qRJkyZNl1LSoLbMW+NebGnTwlVs0fTHNH/tAQ5m5LsdO5iRz7w1Bxj3QEKtmNq8aNKkSZMmTZo0afqz5FHB6s0331T+vGzZskuOEVGwGjp0KEuXLiU5OZmkpCRycnKYPn06Q4cOVXo0FRUV8fnnn+Pr68vs2bPJyclh2rRpGI1G9u3bx+LFi2ndujUrV66ksLCQUaNGAa4VS3q9HofDVQF98skn2bJlCxs2bABQthIsKiqioqICq9VKYmIi7du3Z9GiRVitVmJiYpQx4Fp5lZ+fz4gRI8jJyWHOnDkAJCQkuN3TbrcrWxR+8cUXZGRkoNPp3DL5+PhQUlLCPffcQ8OGDVmyZAmVlZXceOONAOTl5QFQUFBAdHQ0gYGB/Pbbb+zevZvBgwcze/ZsWrVqxY8//si6desA10oug8HABx98wMaNG7n33nvp0qVLrefHkya316vqQgM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCKZvH4CA/nhvWidzCMixe2M6uWmrO7E3m2fNWt6JStRxO2J+eT56lnEZhgVfNrSvzIoKpdp4IpuZRnR5lzCyCqXaeCKbmUZ0eZcwsgimrR9nkUcFqy5Yt3vJxVQoODmbx4sVMmTKF5ORkTCYTQ4YMYezYscoYp9OJ0+kkLCyM3r17A2C325k4cSKxsbEsXLiQ/Px84uLiWLBggbKdXlVVFQ6Hg4cffphz584xf/58DAYDRqNRWfEEUFpaitVq5ZVXXmHlypXs2LGD0NBQrFYrfn5+bn5btWpFYmIi//3vf7FarYSFhZGfn09AQIDCKikpYcyYMfz666/MnTuXgIAA9Hq929aETqcTh8PB//3f/7Fs2TKysrIICwsjJydHWTlVXWjT6XQcPXrUzUdGRgarVq3ipZdeIi4ujqqqKgDmzZvnluvhhx/myJEjtZobvV7ncRPe61l1oYGf5lF9PBFMGT3KmFkEU0aPsmXefeQcR346SWzzMDrGNPQaV7bnKIIngimjRxkzi2B6kyfq/YOaM3uDeSK75IrnrTaHR8+2rsyLCKbaeSKYmkc5eCKYMnqUMbMIpoweZcwsgimrR1nkUcGqcePG3vJx1YqOjmbRokWXPa/X62ncuDFff/21cqx///5MnDiRiIgI1qxZc8nrqldFDRw4UOkxBa5+XT/++KPSh6qqqop69erx0EMP8dBDDwGuglLbtm0pKysDIDDQ9Wm2zp078/zzz/P8888DsHLlSiZNmkRlZSXgWmEFMHz4cJ5++mnlnv369ePMmTNumSIjIxkxYgQjRoxQru3SpYvCqPY3fPhw5X4AI0eO5KeffqJHjx4AzJkzh4EDB/Lyyy8rY+655x6ys7MZP348DocDvf7qK8EOhxOLpfSqr7veZTCov4Gf5lF9PBFMGT3KmFkEU0aPsmXOyS9l8sc/U1JWpRwLCvBh0mNdaRh69Z/QF+FRFFPtPBFMGT3KmFkEU0aPas0c6Ku74nmTUU9BgbVWbJDnOdYlngim5lGdHmXMLIKpdp4IpuZRnR5lzCyCKavH60Fmc8AfXnXmUcGqWqWlpfzyyy9kZmYCrkJWly5dlIKNWqTTXfkF/dWOvdyYi1dFeerjallNmzbFaDTidDqpqqqitLSUb7/9ll27dgFQUVEBuHp0bdu2jbVr19ZgvvXWW9x+++1ER0f/YZ8XS2sod3nVhQZ+mkf18UQwZfQoY2YRTBk9ypL598UqgJKyKiYt/Jn3/pHoERvkeY4ieSKYMnqUMbMIpowe1ZY5PDiAdi3DOJiRj+Oit416navnVAOzv1f8Xu/PsS7yRDA1j3LwRDBl9ChjZhFMGT3KmFkEU1aPssjjgtXSpUt55513KC0tdSuumEwmxo4dy8MPP+zpLWolh8NBZmYmw4cPJzU1FZPJRGxsLABms/my11WvUHrrrbc4deoU+fn5xMbGkp6eDrhWYPn7++Pj40NeXh6PPfYYe/bswdfXl/j4eOx2u1KoKy11rTTatm0bAwcOJD09ncjISIKCggDw9fV18zN27FiOHDmC1Wqlffv2nDlzRtni749mMhqN3HjjjaxatYqPP/4YcK3Muvvuu1m3bh1t27oa5b799ttK8QpgxowZ7N27F6PRyIcffkhkZGStn73Ww6qm6sJ+qJpH9fFEMGX0KGNmEUwZPcqUed+JvBrFqmqVlFVx6FQB7aPq14ot03MUxRPBlNGjjJlFMGX0qObMyYPbM+eLVFLT8pVjbVuG8dS97T1+XybTc6wrPBFMzaM6PcqYWQRT7TwRTM2jOj3KmFkEU1aPssmjgtV///tfXn/9dRISEhgxYgRRUVEApKWlsXTpUl5//XWCgoL461//6g2vVy273c6hQ4d44oknOHnyJKtXr8ZoNCq9o8C1VV5WVhabN28GwMfHB51Ox86dO+nbty+dOnVi0aJFypZ71aruMfXLL7/w+OOPU1JSwrJly/D19aVevXpuYzMzM4mOjmbcuHFs3rxZWe1UrcDAQHx9fdm+fTuDBw+mefPmfPjhh1RVVdXYlu+PZKpXrx7l5eU0atSI7OxsmjRpwrp16+jQoQPNmjUDICEhQRn//fffc/jwYQCioqK4+eaba/nEtR5W/0t1YT9UzaP6eCKYMnqUMbMIpoweZciclX/6iuczz5eS2LmZR/eQ4TmK5olgyuhRxswimDJ6VGPm0FCYmtybrNwSsvKsRDYwERke5CV3LsnwHOsaTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIs8Klh9/PHHdOnShUWLFmEwGJTjsbGx3HXXXTzyyCN8/PHHf0rBys/PD5vNRmxsLHPnzsVkMtGzZ0+2b9+Oj8+F2A6HA7vdrnwdGBiI0+nkpptuYv/+/aSkpBAbG0tpaSklJSXKCqzqa6vz+/j40KNHD7Zt26asNKse26hRIwwGAzNmzCAyMpKYmBiOHDminDcYDFRWVtKjRw++//57rFYr8fHx7Nq1Cz8/v6vO9PHHH7Nz507eeustsrOzOX/+PG3atOH48ePY7Xa3ubLZbIwfP17pp3Vx4as20npYXVoGg/r3Q9U8qo8ngimjRxkzi2DK6FGmzJFhV/73v3H9wFr3QZHpOYriiWDK6FHGzCKYMnoUkflcYRnF5XbM/gbCQzz/hUaQn4Gb4iKwWMo86lt1serCc1S7Rxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vB5kNl+jHlbp6ek8//zzbgWQahkMBvr168ebb77pyS1qLYPBQFhYGMuWLVOOZWVlcdttt2Gz2ZRjS5cudbuuqsq1Hc6wYcMYMGCAcnzw4MEcPnwYf39/wNVbys/Pj4ULFypjHA4Hbdq0UYo/jRo1AqBdu3bMnj1bGTdz5kyOHDmibB1YvTXfq6++StOmTZVxvXv3pqys7Koz6fV6unfvzueff05MTAzJyck0aNCA5557jvz8fMLDw5WxY8aMwWKxMG3aNF544QXuuOOO//Fk/7e0/Tkvr7qwH6rmUX08EUwZPcqYWQRTRo8yZG7TPIygAJ9LbgsYFOBDXLNQj/3K8BxF80QwZfQoY2YRTBk9eoNXUlbJ/LUH2J9+YQu/di3DSBrUFpO/r6cWpZwXEUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LIo80U69Wrx5kzZy57/syZM0q/pmstu91OYWGh21Z+27dvB1x9ni6n6pVKBw8eVI5VVlaSmZmJ3W6nvLwcAJ1OR0VFBRkZGcq4n376CafTqfSmys7OBnAbA3D06FHgQo+r6lVUP/30kzKmqKiIgoICLl79VdtMALt27SIoKIjQ0FDl2NKlS9m6dSsPPfQQ99577xWv16RJkyZNmjRd33p55E0EBbh/likowIeXR970JznSpEmTputP89ce4GBGvtuxgxn5zFtz4E9ypEmTJk2aNGnSpEmTeuTRCqtbbrmFZcuW0a5dO+6++263cxs2bGD58uXcc889Hhm8lE6cOMFrr73Gnj17MJlMDBo0iH/+859uRRubzYa/vz/JyckkJSWRk5PD9OnT8fPz44svvuDzzz8nLi6OqqoqLBaL0sOqtLQUg8HAggULWLRoEUajkZCQEGWlU1FREf7+/uh0OgIDA7nnnntwOByEhoZis9kICwtDp9MpY6v9dujQAYfDQcOGDcnMzHQ7b7fbMZlMvPLKK0yePJmAgAACAwPx8/NTCmTVmQwGA7169cJut9OgQQMsFgvBwcHKyrDDhw/z1ltv0bZtW1JSUgB46623cDgcjBgxQinIzZ8/nxkzZqDT6fjkk0+U/BkZGWRnZyurw2ojT5v7Xo+qCw38NI/q44lgyuhRxswimDJ6lC3zDQ2CmPPsrRzMyOdUrpVm4SbatAjzmCvbcxTBE8GU0aOMmUUwZfToLd7Z81a3lVXVcjhhf3o+eZZyGoUF/qkeRTJl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWj7LJo4LV+PHj+e233xg/fjzTpk2jRYsWgKvokZeXR1RUFM8++6w3fCoqKipi5MiRtGjRglmzZpGTk8O0adMoLy9n4sSJyjidTsf999/PkSNHSE5OxmQy0bp1a3799VcSEhL45z//yfLly9myZYvbFnl2u10pBtntdoqLizl37hyNGzd2WylVUVFBaWkpTZo0IS8vj6KiImw2GzExMTU8+/r6EhwczPnz58nNzcVkMlFSUqKct9lslJWVER4eTmlpKWVlZVitVlq3bs3x48eVcU6nE5vNRuPGjcnNzaWgoACbzea2aqpBgwbYbDY++OAD9HrXX4ywsDC6du1K//79lXHffvutwrTb7eTm5gLw6aef0rBhQ8aMGVOr+dHrdYSGmmp1rQyqCw38NI/q44lgyuhRxswimDJ6lC1zz1ATPb1GuyDZnqMIngimjB5lzCyCKaNHT3knskuueN5qc3j8XkrGeRHBVDtPBFPzKAdPBFNGjzJmFsGU0aOMmUUwZfUoizwqWIWFhfHFF1+watUqUlJSyMrKAqB169aMHj2aBx54QNnuzltatWoVVquV999/n5CQEMBVZJo8eTJJSUlEREQAYDabMRqNLFq0CHAVmHr06EFgYCBdunShe/fudO7cmX79+pGYmKjwq1c/LViwgNjYWAC2bdvGqFGj0Ol0BAcHA2C1WgkJCWHLli3Ktc8++yzffPMN0dHRAEovqwceeICXXnoJgMLCQm655RYAhZWZmYnD4WDt2rVKpk8++YRXXnnFrRgFEB0dzYYNG5Svhw4dyoEDBxRWSEgIZ86c4fHHH+df//oXMTExPPbYY4waNcqNU1hYyBNPPOF2vEuXLgBuha2rlcPhxGIprfX116sMBvU38NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR+8wzxWWUVxux+xvIDzE818WeMtfoK/uiudNRj0FBdZasevCvMjoUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwerweZzQF/eNWZRwUrcPVfGjlyJCNHjvQU9YeUkpJC9+7dlcIOuAosr7zyCtu3b2fw4MEAREVFkZaWpozZvXs3JSUl6HQ6oqKiAFffpzvuuEPZDg8u9J26WD179sTPzw+j0Yi/vz82mw2r1UrDhg3dxvXv35/169fToEED4ELvqotXcIWEhBAbG8tvv/2m+MjLywNQthKsZk2cOFEpRJ0+fRqHw1GjJ9jtt9/Onj17aNasGQA//vgjmZmZjBgx4orPMT09nQ8++IAPPvigxrm7776bffv21brYqDWUu7zqQr2YNyQAAQAASURBVAM/zaP6eCKYMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmsnUrKKpm/9oDbtnvtWoaRNKgtJn/fP91feHAA7VqGcTAjH4fzwnG9Dtq0CKOB2d/jZ6rGeRHNE8FUO08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLPCpYFRYWkp2draxE+r2OHDlCo0aNlKKLN5SWlsZ9991Xo49VQEAAx44dU8YlJibywQcfYLFYMJvNSvFKr9eTkZHBrbfeSn5+Pg0bNiQrK4vy8nL8/f0pKirCx8eH//znP+Tk5LBt2zZ8fHyorKxUCk+nTp3C6XRy7tw5Vq1axYoVK0hPT6devXoANGnSRBnn6+vL1q1bKSwsZO3atVitVsVH9bjCwkJ0Oh3Lli3j119/Zc+ePQQEuD4JWL9+fSU3wNGjR1myZAnLly8nKysLk8m1ZUT1dox79+4lJCSEn3/+mUmTJgEwffp0NmzYwNy5c5Ui25IlSwCoqqpi06ZNfPPNN5w/fx69Xk/Xrl3x9fX8DZ0mTZo0adKkSZMmTZo0XSvNX3uAgxn5bscOZuQzb80Bxj2Q8OeY+p2SBrVl3hr3olqbFq6imiZNmjRp0qRJkyZNssujgtUbb7xBeno6n3766SXPv/LKK0RFRTF16lRPbuMmi8WC0Wis0cfq5ZdfJiUlheeffx5wbZW3dOlSkpOTSUpKYseOHQC0b9+ehQsXMn78eGJiYnj66adxOp0cOnSIjh07UlJSQlxcHMuWLaNBgwaMHj2a7777jn379mGxWABXHy1wrZx65ZVX6N27N3369GHx4sWAazVUtdcGDRqwZ88eUlNTGTFiBOfOnWPdunUAFBcXU69ePUpKSmjTpg2zZs2iefPmPPHEE/z3v/8lPz+f8+fPu93TYDDw+uuv85e//MXtnunp6XTt2pXc3FysVivPPfccRqNR8bl//37++te/kpKSgo+PD926dQMgISGBsrIy5fneddddhIWFKf2vaiMfH62p3O9VFxr4aR7VxxPBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM1j7Zlnz1vdikDVcjhhf3o+eZZyGoUF/mn+qhUc5MdzwzqRW1iGxcvbFl78f29Ilu+dusQTwdQ8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9SibPCpY/fTTTzz44IOXPX/bbbexatUqT25xSe3du7dGH6t3332XEydOkJOTQ0REBMHBwSxevJgpU6aQnJyMXq9Hr9dz7NgxHnvsMR555BEAIiIiKCkpYeXKlXTs2BEAk8mE0+nE6XQyd+5c4uLiiI6O5sSJE+zbt0/xUb9+fSoqKti1axd79+7ltttuY/369WzatEnpWeXr64tOpyMsLIwlS5YQGRlJnz592LJlC6tWrWL06NGAq5+VwWCgqKiIuXPn0qlTJ4qKisjIyCAnJ0e5p8lkwt/fn2+++QaTycSAAQNYu3Ytn3/+OQ888ABOp1PpnVVeXg5Abm4uAOfPn+frr79mwIABAOzYsQObzcb06dOx2Wy89NJLHDhwgK+++qrWc6PX6zxuFHw9qy408NM8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIprd4u4+c48hPJ4ltHkbHmIb/+4KrkFozi2SqzeOJ7JIrnrfaHB6/T/FmZlHvmdQ2L9eCJ4Kpdp4IpuZRDp4Ippo9ZuaWcPRQDpENTESGB/3vC/6g1JxZFE8EU0aPMmYWwZTVoyzyqGCVn59PaGjoZc+HhIQoK4S8JbPZzPHjx2v0saouMF3cxyo6OppFixYBsHz5cl599VWsViv9+/dXrnvkkUd4+eWX+eWXXxT+6dOniY2NZc2aNcq4oUOH4uvry/fff69cf/ToUZ577jml+JWRkcH69es5d+4cZ86cwWw2k5+fj9PpZN26dcrWiDNnzuSHH34gJSWF0aNHK1sW3nLLLcyZM0e5Z69evZRM1VsD5uTkMHv2bPr27QvA9u3bWbt2LQcOHMBms2E2mwFXYWvXrl1KX6z33nuPDz74gJSUFKVg9emnn9KtWzcGDRoEuFafDRo0iM2bN9OvX79azY/D4cRiKa3VtdezDAb1N/DTPKqPJ4Ipo0cZM4tgyuhRxswimDJ6lDGzCKa3eDn5pUz++GdKyqqUY0EBPkx6rCsNQ2u34sbbHkXxRDDV6jHQV3fF8yajnoICa63Yas0skieCKaNHGTOLYMroUcbM3mSWlFUy94tUUtMurLxtHxXGU/e2xxRQ+xYYas4siieCKaNHGTOLYMrq8XqQ2Rzwh1edeVSwCg8P5+DBg5c9f+DAAcLCwjy5RQ1FRUWxd+9eoqKilGPFxcXk5eVRr149pdfTpa671J/T0tIICQnh7NmzlJeXExUVxbFjx+jQoYMyxul0kp6eTv369UlLS6NZs2b4+PhQVVVVg3Xxn6OioigpKSEsLMytj1daWprC+qOZunTpopxr2bKlG6vay+nTp7nxxhsBaNy4sVKsulgXe6xeFfb666/zxRdfUFFRgU6n47fffqt1wQrQGspdQXWhgZ/mUX08EUwZPcqYWQRTRo8yZhbBlNGjjJlFMD3l/b5YBVBSVsWkhT/z3j8SPbUHqC/ztWCqzWN4cADtWoZxMCMfh/PCcb3O1SOqgdnfY79qy3wteCKYMnqUMbMIpoweZczsDeac1ak1ehoeSM9n9upUr/Q0VGNm0TwRTBk9yphZBFNWj7LIo4JV3759WbFiBYmJifTp08ft3DfffMPq1asZOnSoRwZ/r8TERH799VelPxPAV199hV6vp0GDBkqvp9+rU6dOGI1Gqqqq8PPzA6CyspKvv/6atm3bsn37doqKikhMTHRbWQWurfMKCwuJi4tj+/btdO3aVTkXEHBhed+GDRto0aIFGRkZFBUV0atXL3Q6HXq9nvnz57NixQrOnz9PVVUVHTp0YP/+/W6ZbDYbY8aMYdu2bTidrndZYWFhFBUV0bRpU8LDw8nNzWXfvn2MHTuW9PR0dDodkZGRnDp1yu2e586d49FHHyU1NZXi4mJat26tsKqVk5PDsmXLajyrFStWMHbsWOU5Xa20HlY1VRf2Q9U8qo8ngimjRxkzi2DK6FHGzCKYMnqUMbMIpjd4+07k1ShWVaukrIpDpwpoH1W/1nw1ZhbNVLPH5MHtmfO7T9S3ben6RL0n71HUnFkUTwRTRo8yZhbBlNGjjJm9xawrPQ1FMTWP6vQoY2YRTFk9yiads7oyUgsVFxfz0EMPcfz4cWJjY5XVPceOHePw4cNER0ezYsUKZZs6b6ioqIiuXbvSpEkTJk+eTE5ODtOmTeOee+7h559/pmPHjkyZMoWRI0eSlZXF5s2blWsfe+wxtm/fzoQJE2jdujUrV65k27ZtjBs3jilTppCSkkJYWBgdO3YkMDCQN998k7KyMqZPn05UVBS//PIL/v7+vP3226xdu5a1a9fStGlTXn/9dXbu3MmcOXOYMmUKL730Em+99Rb33HMP/fv3Jz09Hb1ez5AhQ/jtt9/IyMjA4XDgdDo5cOCAksnHx4eIiAhuv/12/vOf/ygrpP7yl78wZcoUJk+ezIoVKwBXfzCA77//HofDVa1duXIlnTp14rbbbiMrK4umTZvSsGFDZWvA2267jbS0NDZt2sTTTz/NN998g9PpJCkpidLSUtatW0dgYCBnz57ltddeY8iQIVc9P06n85IruzRp0qRJkyZNmjRputZa+fVhVmw6ctnzD90Vw4N3xl5DR5quhbJyS8jKs3q9Z4kmTZo0aVK/fj2Uw+SPfrrs+Vcev5mb4iKuoSNNmjRp0nQ18miFVb169fjkk0/46KOP2Lx5M5s2bQKgWbNmPPXUU4waNYrAQM/2hf+9goODCQ4Oxul0kpycjMlkYsiQIYwdO5Y+ffooW+85HA7sdrvbtbfffjvbt29nwYIFFBQUEBcXx4IFCzh27Bg6nY7g4GB8fX2Jjo6msLCQcePG4ePjwx133EGjRo3YsWMHXbt2pXfv3txwww2sXbuW06dPM2rUKCIjI3nttde46aabFJ8APXv2JC0tDaPRyJo1a+jUqRPTp09n2LBhVFVVKWMDAwMpLS0lLy+PDRs28OCDD9KtWzeSkpKoqKgA4NZbb2XFihX4+vqybds2WrZsyZw5c/j444/ZuXOncs927dpht9ux2Wzs3bsXgOeee44DBw4oY+Lj49myZQtOp5MlS5bQpEkTHnroIUaNGsU999zD8ePHazU/Wg+rS8tgUP9+qJpH9fFEMGX0KGNmEUwZPcqYWQRTRo8yZhbB9AYvMuzKzY4b1w+sdV8jUGfm3+tcYRnF5XbM/gbCQzxv/lwXvneC/AzcFBeBxVLm0fyK8ieCqXlUp0cZM4tgyuhRxszeYmo9DTWPavQoY2YRTFk9Xg+6Zj2sAAIDA3nmmWd45pln/ufYyspKfvvtN2JjY6lXr16t73njjTcSEhLC7NmzlWPFxcXk5uYqfaCWLl1a47ro6GgA5s+fT2zshU9Sbtq0icjISPz9/QGIiYnh6NGjfP/998qYhx56CIPBoFx3cR+rV199lcGDBwPw7bffAhf6ZPn6upo5fvjhh259qOrXr8/Zs2eVrwMCAtDr9ezatUs5ZrFYACgpKQGgSZMmAAwcOJCpU6cq47Zs2cLOnTvR612T3qpVK3bu3MnOnTv5+eefGTFiBF26dGH9+vW0bt0agL///e988cUXpKWl8e2339boNVZdJKuNtP05L6+6sB+q5lF9PBFMGT3KmFkEU0aPMmYWwZTRo4yZRTA94bVpHkZQgM8ltwUMCvAhrlmoV7yqKXO1Ssoqmb/2gNu2SO1ahpE0qC0m/9o3nK/W9f69cy14IpiaRzl4IpiaRzl4Iphq86j1NBTDE8GU0aOMmUUwZfUoi67pZopFRUWMGDFC6d1UWyUmJvLjjz8qBR240MeqZ8+el72uU6dOBAUFsXHjRuVYdR+rxMREN/7hw4fJyMhQjh09epTKykpuueUWAIxGIzfffDO+vr6kpaUp4zZs2EB0dLRSXKreDvHYsWPKmKKiIs6ePYvNZqO8vBwAHx8fysrK3DJVr1ir3rWx+v9nzpxxy3Xw4EEATp8+rfgvKipix44dypisrCwOHjzolrO6+DZgwADi4uLo06cPb7zxBtnZ2bRt2/ayz1GTJk2aNGnSpEmTprqil0feRFCA++f0ggJ8eHnkTX+So2uj+WsP1Gg4fzAjn3lrDvxJjjRp0qRJk6Zro6RBbWnTwv2D2W1auD60oUmTJk2a1C2PV1hdrTxomaVo6NChLF26lOTkZJKSksjJyWH69OkMHTqUiIgL+9D+7W9/48iRI+h0OkwmE4MGDWLUqFHMnTuXsLAwpY9VYWEho0aNUq678847qV+/PnfffTcAjRs3pri4mKioKOLj4918bNu2jY8++ogVK1bQrFkzDh8+zMyZM9386nQ6pkyZwmuvvUaDBg0IDAwkICAAm81GUVER/v7+ykqs7t27o9fradq0KTk5OURGRiqrnYqKigDYuXMnbdu2JTAwkMjISKUYVn2+Y8eO3HjjjTz++OPK854yZQoxMTHceeedAOzbt4/z58+j0+koLCxUCmuLFi2iXr16SvbayJOGxter6kIDP82j+ngimDJ6lDGzCKaMHmXMLIIpo0cZM4tgeot3Q4Mg5jx7Kwcz8jmVa6VZuKnGL7FqK7Vm1hrOq5sngql5VKdHGTOLYMroUcbM3mQGB/nx3LBO5BaWYfHytrje8CeSqXlUp0cZM4tgyupRNl3zgpU3FBwczOLFi5kyZUqNPlbVKioq4uDBg+h0OubMmUNOTg7Tpk3jnnvu4emnn2bhwoXk5+crfayaNm2qXLto0SIKCwuJjo7m5MmTZGZmArit3qqsrOS9997Dx8cHo9FIeXk5R48epXXr1vTv318Zl5mZidPppHXr1pw9e5bz58+Tk5PDvffeyxdffKGMKywsRKfT0bx5c06fPs3JkycxGo20aNFCGWO1uvbYveGGG7Db7Zw/f56jR49y88038+OPPyrjvvzyS44dO0ZsbCzp6elUVFSQm5vLq6++io+Pa8o3btxIVVUV//znP9m6dSv79+9XimdWq5Xi4mICAq7+H3O9XkdoqOmqr5NFZrPnL5BE8kQwZfQoY2YRTLXzRDA1j3LwRDA1j3LwRDBl8tgz1MTl92LwTGrLfCK75IrnrTaHx6/ZZfreEcUTwdQ8ysETwdQ8ysETwVSzR1G/n1JzZlE8EUwZPcqYWQRTVo+yqE4WrMDVj2rRokWXPb9q1Sp8fX357rvvCAkJAcButzN58mS+++47kpKSLnldRUUF8+bNY9SoUYwbNw4Am81Ghw4d+PXXX5VxmzZt4tixY4SFhTF48GDGjx/Ptm3bGDVqFPv27VNWYu3evRuAzz77DD8/PwCeffZZtm3bhk6nIzg4mOzsbEpKSmjfvj3/+c9/AFcB67bbbiMjI4OEhAQAtm3bBsALL7xAv379APjkk0+YNGkS4CrkAbz33nv85S9/YcaMGezcuZMRI0YQExPDypUruf322wEYPXq00rfqiSeeAGDdunWMHz8egEOHDtGwYcP/NQ015HA4sVhKr/q6610Gg/ob+Gke1ccTwZTRo7ebzYtgyjgvoP7nKOu8aB7VxxPBlNGjiMwrNh/h8MlC2rQIYWjfGI953vKoNZxXN08EU/OoTo8yZhbBlNGjjJlFMNXOE8HUPKrTo4yZRTBl9Xg9yGwO+MOrzupswep/KSUlhe7duyvFKoD+/fvzyiuvsH37dgYPHnzJ63bv3k1JSYnbKimj0Uh4eDjp6elu/FatWnH8+HGioqIA1wqskJAQvv/+e+Lj47HZbEofrPT0dLeeUevXryciIgJ/f3+lEFW99R9ASEgIPXr0YOvWrQo/NTUVnU5HTk6OW6aJEycCEBUVxenTp8nIyOBf//qXW64ePXqwbNkybDYbRqNRKVZdrDZt2lz5of5BaQ3lLq+60MBP86g+ngimDB5FNJvXGth7h1fXnqMs8yKaKaNHGTOLYKqRt+vIOWZ/caEvb0Z2MRt+Os0zQ9qR0OrqP/T1e3nqUWs4Xzd4IpiaRzl4IpiaRzl4IpgyepQxswimjB5lzCyCKatHWVRnN1M8ceIEjz76KAkJCfTs2ZPp06djs9mU82lpaUqhp1pms5nw8HBWr17NrbfeSnx8PA888AC//fab23UAJpOJMWPG0LFjR7p27YrBYKC8vJxz584p4/z9/dHpdHz00Ue0b9+efv36YTabFcapU6ew2+34+/szceJEevbsSUJCAvPnzwcgLi5OYdWrV49jx44xdOhQJVNaWhpVVVXccsstAGRkZBAZGcny5cu56667aN++PQ8//DC+vr6EhITQpEkT5d4RERFMmDCBJ598EoAdO3ZQWVnJ6dOnL/k8s7Oz+etf/wqAXq/3WvFKkyZNmv4siWg2rzWw946056hJk6a6rouLVRfrvf9c+vifIa3hvCZNmjRp0qRJkyZNmuqa6uQKq6KiIkaOHEmLFi2YNWuW0p+qvLxcWW1ksVgwm801rrXb7fz666+88MILxMTEsHz5ch577DHWrFlD06ZNsVgsGI1GkpOTAZgxYwbl5eW88sorAIwZM4YxY8aQlZVFUVERTqeTm266iYkTJ/LTTz8xd+5czp49q/gEaNy4MXv37uXuu+8mNjaWjz76CIBOnTopXkNDQyktLeXw4cM8/vjj5ObmsmrVKgBle0GLxUJMTAw7duygbdu2jBs3jnXr1lFZWUmTJk3c7vnSSy+RnZ1Nr1692LRpE9nZ2QD8+uuvREdHAzBq1Ci6detGTEwMs2fPVgp+999/P+Hh4bWeHx+fOlsHFaa60MBP86g+ngimLB5FNJvXGth7h1eXnqNM8yKSKaNHGTOLYKqVt2zT4Sue//TbYzx0Z+22B/RmZq3hvHp5IpiaR3V6lDGzCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2VQnC1arVq3CarXy/vvv1+hPlZSURERExCWvq6iooKCggJiYGB555BEAOnfuTL9+/ViwYIHSC8rhcHDs2DE2bNigrNI6evQoc+fOpby8nOTkZGw2G/7+/tx44428+uqrANx888189NFHVFVVud03LS2Nvn37smvXLjZv3kxUVBRFRUUcOXJEGVNcXIyvry/dunVjwYIF+Pj40LhxYzIzM8nJyVEyHTp0iJtuugmLxcKMGTOIjIzEx8eHvLw8t3sePux6I71p0ybA1RMLYOXKlTzwwAMAtGzZks8//5ysrCxsNht6vR6Hw8E//vGPq50SRXq9TlhTy+tBdaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9BYvM7eEo4dyiGxgIjI8qNaco6eLrnj+8KlCVf0cqwsN53cfOceRn04S2zyMjjGeb6lYLbV+L4pkah7l4Ilgah7l4IlgyuhRzZm99XrnUpLpOdYVngim5lGdPJl0TQtW9erV44033uDGG2/0iPNH+lOZzWaKi4vdrtu9ezcOh8Ntuzuj0cgdd9zB5s2bAde2gVVVVbRu3dptS8EbbrgBgFtvvZWxY8dy3333ceDAAQYMGOB2j6ZNm5KWlsaZM2cIDg4GwOl0MnXqVOXrjIwM7rrrLo4dO6bcs6SkhMTERObMmaOwpk2bxscff6xkCgoKorCwkEcffZS+ffsq4zp37sy5c+ew2WzKPUwmE7t27UKnczVc3r59O4899hiNGzdWrnvppZew2Wx0794dgCeeeIL33ntPuaY2cjicWCyltb7+epXBoP4GfppH9fFEMGXxKKLZvNbA3ju8uvQcZZoXkUwZPcqYWQTTW7ySskrmfpFKatqF1Z3to8J46t72mAKuvm9e66bBZGQXX/Z8bLMQ1fwcE8H0Ji8nv5TJH/9MSdmFD/wFBfgw6bGuNAyt3Wpbb3sUwRPB1Dyq06OMmUUwZfQoY2YRTLXzvMn09usdER5F8UQw1c4TwdQ8qtfj9SCzOeAPrzrzqGD1yy+/XPG8TqfDaDTSqFEjGjZsiJ+fH/fee68ntwRcK5buu+8+t2PV/amqezhFRUUpf67WwYMHAejYsaPb8ejoaBYvXkx5eblSpGrY0P2Tfenp6RiNRk6dOqWc379/v1tRy+l0Kiud0tLSuPnmm9Hr9QQEBCiFpOpzALm5uYrXyspKt2ISQGZmJr6+vsr48PBwCgsLadmypTKmuLgYq9WK0+nk9OnTip/w8HC3wlNaWhp6vZ6cnBy3ezz66KNYrVY++ugjpT+Xp9Iayl1edaGBn+ZRfTwRzOvdo4hm81oDe+/w6uJzlGFergVTRo8yZhbB9JQ3Z3Vqjb55B9Lzmb06lXEPJFw1b2if1nz9y5nLnv/b7Teq7ueYCKY3eL8vVgGUlFUxaeHPvPePRI/YoM7MopmaRzl4IpiaRzl4IpgyelRjZm+/3rmUZHiOdY0ngql5VCdPJnlUsBo+fPgfXo3TvHlznnnmmRorkmoji8VCZWUljz76KHv27MFkMjFo0CDMZrPSwykxMZEPPvjArZfV7t27AThz5gy33nor+fn5xMXFcdttt+F0OikqKqJTp07odDpycnIYM2YM27Ztw8fHh6qqKsLCwhR+mzZt+Pbbb9m5cydvvfUW6enphIaGYrFYAFcvKaPRqBSZ3nzzTdauXYvVasVkMhESEkJJiWtbpF69egFw/PhxJVNAQADFxcVumVq1asWxY8fYtGkTa9asISsri9DQUOW5VPv39/cnJyeHYcOGceDAAcrKyoiPj6dRo0Zuq84efPBB5ZmMGjVKOb569Woef/zxWs+P1sOqpurCfqiaR/XxRDBl8pg8uD1zfvcJs7YtXZ8wq+3PKRFMkGteoO48R9nmRRRTRo8yZhbB9AZPVN+8f94fzzuf7bvkcTX9HBPB9BZv34m8GsWqapWUVXHoVAHto+rXiq3WzCKZmkd1epQxswimjB5lzCyCqXaet5gi+wR7y6NIngim2nkimJpH9XqUTR4VrD766CPeeustbDYbf/vb32jWrBkAJ0+e5LPPPsPf358nn3ySzMxMPvnkE5599ln0ej39+vXzyLTT6eSzzz6jTZs2zJo1i5ycHKZNm4Zef+EbYejQoSxdupTk5GSSkpLIycnhhx9+QKfTsXDhQsaPH09MTAzjxo1j5syZynV+fn6YzWaOHTtGYWEho0eP5rvvviM1NRVf3wtLaLt27ao8g969e9OnTx+WLFlSw2ubNm34f+yde1xUdf7/n3NhuAxyk4siKkIqqJhSWmpSpl3UzI10czVvWV8qat1ca7VatXLT3G2r1aQ0De92WU0tzeymoWZ5SRHvgoKgqAwwMHKdmd8f/Dg6oW7CfOzg57wej31snPM5z/N6nc/AjLz5fN7fffcdS5YsYdSoUZw9e5Z169bh5eWF01nzp+XNmjVDp9Oxbds2IiMjefLJJ/nss88oLCykqqpKYd1+++1s2LCBd955hwceeOCK92zRogXHjx8nPz+f6Oho9u/fT0ZGBn379uXIkSMArFu3TilWvfbaa0BNQW/16tXEx8fXe260HlZXV2PYD1XzqD6eCKYMHgMD4fXk3uSdKyXvvM0te3iLYF4qGeYFGt9zlGVeRDNl9ChjZhHMhvBE9c3re3sb+t7ehgVr9/PL4bN0aR/KuAc71ddmHd3o8wKQZ8m56vncggsk3NKqQfdQW+brwdQ8ysETwdQ8ysETwZTRo9oyX48+wXDjP8fGyBPB1DyqkyeTGlSw+uGHH/D09OTjjz/GZDK5nBs+fDgjR47kl19+4fnnn+dPf/oTDz/8MPPnz29wwcpkMlFRUcGcOXOUPlZ2u50pU6ZgNNZE8vf3Z9GiRbz22mskJydjNpvp0qULO3bsYNSoUYwZMwao2Y6vsLAQp9OpbNvXpEkTiouLcTqdpKSkEBsby6RJk5gxY4ZSZGratOav/Wp7Re3du5f77ruP/Px80tLSFFZYWBgAQUFBLF68mPDwcF566SXeeOMNl2fm6elJRUUFxcXFpKSkEB8fzwMPPMDs2bOVTLV9tAICAvj6668xm82MHDmS77//nszMTOWebdu2paKiAr1ez8GDB4GaflibN29Wxqxdu1a599///neX57t7925iY2Px9r72byyth9Xl5e79S93NE8GU0aOMmUUw3c0rKS0HoLS0nEJj/Xv0XSpfTwO3xoZhtZbVu0/JpZJxXgAycwrJOW+jorwSbzfMTWPIrHmUw6OMmUUw3cET2TcPYOhd0Yx7sJP2flAPhQdd/d8aLZr6qKYXmEzzcqnOFpVRUm7Hz8tASEDDf+ki43MUkTkjy0LOeRutQsx0iAxqME/GeRHBVDtPBFNGj2rNLPrzjizPsTHxRDA1j+r1eCPouvWwWrduHU899VSdYhXUFGAGDRrEe++9x/PPP4+npycPPvggc+fObcgtATAYDPj6+irFKoDevXsDUFlZqRyLjo4mNTVV+XrBggXs2LGDTp0u/vXjsmXLSExM5NChQ3h5ebn437p1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWloaPj41S20rKiqU+7Rs2VIZN3/+fMrKypSvjUYjTZo0IS0tTTmWl5fH7NmzlUy1/v74xz/y17/+VRl36tQpMjMzFU9RUVFs376dHTt2sHr1aiZPnswdd9xBamoq7dq1A6B///5s2bLlss/3n//8JxkZGS4rz65F2v6cV1Zj2A9V86g+ngimDB5Ly6qYtzbDZWuETm2CSBrcEbNXw5rO1kptma8H0x28/MIL/GPxTpetoHy9jfx99K2EBNR/q4paqTGzaKbmUQ6eCOaN7vF69M1rqMfrwRPBbCivQ+sgfL2Nl90W0NfbSGyrQNX1ApNhXkD8ZyhZnqO7eY3t85MIpoweZcwsgql2XkOZ2ucdcUy180QwNY/q5MmkBm2mWFZWxvnz5694/ty5c1y4cHG1TZMmTVy27auv7HY7RUVFSr8oQCkuXa54VqvalUoHDhxQjlVVVZGbm4vdbqe8vOav8HU6HRUVFZw4cUIZ9+OPP+J0OpVtAc+cOQPgMgZgz549AIwdO5ZevXopK5x+/PFHZUxxcTGFhYXY7XaXTAUFBSQkJNC5c2ceeeQRVqxY4ZKp1t/q1avp2rUr3bt356WXXuLYsWMunhISEiguLqZfv368/PLLACxatIgDBw6QkJAA1BT4/vCHP6DT6TAYDC4Znn32WZKTk6/4HDVp0qSpMWje2ow6TWcPnLDw/pqM38mRplr9+pctUNOv5LVFO38nR5o0abqRlTS4Y51VCB0ia375run31d9H34qvt+vfUNb+Al7T7yftM5Q6pX1+0qRJ09Wkfd7RpEnTjaIGrbC67bbbWLx4MV26dKFPnz4u57799lsWL17M7bffrhw7ePAgLVq0aMgtgZpVVF5eXi79qWbNmoW/vz/V1Rc/wI0ePZq8vDw2bdoEwIULFzAYDCxatIiQkBDatWvHihUrlJVOxcXFeHl5odPp8Pf359lnn2XChAmUlZUxa9YsgoKC0Ol0yliAzMxMpk2bpqxY2rFjh3LvyMhIpk2bhoeHB7NmzUKv1xMWFsb777+Pp6enUoCCmmKU0+nE09OTZ599lvXr1zNv3jyaNGmiZLJYav7RcO7cOQYMGECbNm1YuHChi39AKYRZLBbuvPNOvv32W9577z1atGjBvffeC0BISAjR0dGYTCY6d+7Mbbfdxvr168nMzOT48ePcdNNN9Z6fhjSavlHVGBr4aR7VxxPBlMWj1nRWvR73HT9/2b+mh5pfuhzMLiQuqmm92GrNLJKpeVSnRxkzi2C6i+fv68kLI+I5V1SG1Y3bm13qTW2ZRTLdyWse7Mvcv97FgRMWss+5d4uzS/9fbTwRTHfxRH6Gkuk5upvXmD4/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lE2NahgNWXKFEaNGsXTTz9NWFiYsuVdTk4O+fn5hIeHK/2RKioqOH36NEOHDm2waZ1Ox9ChQzl8+LDSn2rIkCF1trhzOBwuq5gA9Ho9zzzzDAsXLsRisRAbG8tTTz3F22+/7cJPSEigoqKCCRMmYDQaueeee8jPz1d6WNXqr3/9K2vWrOHTTz/FbDbj4eFBVVUV7du3Z9CgQaxatYpdu3aRmJjIm2++ic1mIz4+nkceeYTFixcrz8bpdBIXF4evry9z5szBx8cHX19fpUAGsG/fPgCeeOIJvvrqKzZt2kRYWBinTp1y8ZSSkkLnzp1p27YtX3zxBQChoaEYjUZllRnA999/z913361kb9GiBZMnT2bz5s1UVVUpq8muRXq9zi2NHG9UNYYGfppH9fFEMG90j1rTWXHMhvLyLDlXPZ9bcIGEW1o16B5qy3w9mJpHOXgimGr2mHuulCMH8wkPNhMe4ttgnsjPqDLNiwher0AzvdxGuyg1ZxbFbCjvenyGkuE5upvXGD8/iWDK6FHGzCKYaue5k6l93lG3Rxkzi2DK6lEWNahgFR4ezrp161i5ciVpaWnk5uYCNb2jRo8ezSOPPKL0cvL09GT+/PkNdwz4+flhMplc+lMBfP755/j7+ytfL1mypM51VVVVjBkzhqSkJOX4xx9/rKyqqh3ncDiYPXu2y/XDhg2jefPmAMrY9u3bs27dOgBGjBiBh4cH27dvV8536NCBXbt20aVLF1599VWF9dZbbyljdu/eDUC7du14/fXXlTEzZsxg8eLFyrjMzEwAEhMTmThxIgBOp5NbbrkFm82Gv78/lZWV7Nixg4kTJzJmzBhuvfVWJk+ezMSJE3nhhRc4deoUERERHD9+nF27drF8+XLlfomJiXh7e/OXv/yF7OxsoqOj/8dM1JXD4cRqvfC/B0omg0H9Dfw0j+rjiWDK4lFrOqtej+FBV//Q1qKpT73nRq2ZRTI1j+r0KGNmdzJLy6pIWZ1OeubFVR5xUUE8/VAcZu/6989Rc2ZRPBFMGT3KlFnkZyiZnqO7eY3p85MIpoweZcwsgql2ngim5lGdHmXMLIIpq8cbQX5+3r951VmDClYA3t7ejB07lrFjxzYU9ZsVFRWlFG9qVVJSwrlz54iKirrqdQBZWVnExMQoxzMzMwkPD8fLy0sZd+TIEZdrnU4nWVlZ9OpV87d/rVq1wsPDg8zMTHr37q1wbr31Vpd71d6ntpfVpfesHVObpbCw0GVMixYtcDgcysq12vOXXqvT6QgMDKSsrIyWLVuSnZ1NVVVVnecQGRmpXBsREcHevXuBmm0SH3roIQ4fPkxoaCjdunW74vP7rdIayl1ZjaGBn+ZRfTwRzBvdo9Z0VhyzobwOrYPw9TZedlsbX28jsa0CG+xXbZmvB1PzKAdPBFONHueuSq/TPycjy8K7q9KZ8EiXBrpTZ2bRPBFMGT3KkPl6fIaS4Tm6m9cYPz+JYMroUcbMIphq54lgah7l4Ilgah7VyZNJDS5Y/R5KSEjgvffew2q14ufnB8CXX36JXq9XCkoAx48fZ/r06ezZswez2czAgQPx9fVlw4YNSiGpqqqKr776ioSEBOW63r17s2bNGu644w6sViuxsbEMHDiQoqIi7rzzTgBMJhNdunRh7ty5vP3223h4eGC1Wjlx4gTR0dFEREQAcMcddwDwySefsHLlSsLDwxk5ciRpaWk8/fTTAFitVgwGA1u2bKFHjx6UlZXRtWtXAgICAOjUqRNQ0+fKbDYzdepUJk6ciNlsZvDgwVitVvz9/TGZTEofq507d/KPf/yDnJyarQNqC2a158+fPw/Ak08+CdSsgDObzaxduxYvLy9atWpV7/nReljVVWPYD1XzqD6eCKZMHpMT45j7q7/Q79im5i/0G/pzSq2ZRTLdyZv2WHemLfzJ5Zcuvt5Gpj3WvUFzo+bMopiaR3V6lDGzu5ha/xzNoww8EUx38kR9hpLtObqb11g+P4lgyuhRxswimGrniWBqHtXpUcbMIpiyepRNDS5Y/fDDD3z66afk5ORgtVrr9HjS6XR8/fXXDb2Ni4YNG8aSJUtITk4mKSmJ/Px8Zs2axbBhwwgLCwNqCjODBw9Gr9fz7rvvkp+fz8yZM7nppptYuHAhQUFBtGvXjhUrVlBUVMS4ceMU/qU9oZ566im++eYbXn/9dW677TY6d+4M1BS6zpw5Q1FREQkJCcTFxfHuu+9y5MgRl35YtayKigrGjh1Lfn4+r732Gv7+/gwbNkwZ53DUVFz9/f0ZPXo0q1evZtu2bQAEBwcrYyorK7HZbAwaNIjQ0FAWL15MVVUVcXFxLs/o/fff59577yU6OppvvvmGadOmAXDmzBmgpvgFNYW3xx57DJvNxsqVK5Vj9elfBVoPq/+lxrAfquZRfTwRTBk8BgbC68m9yTtXSt55m9t6oFwqtWW+Hkx38AIDzayYPpA9h89y6KSFmNZBdG0f6gZ3NVJjZtFMzaMcPBFMtXnU+ueI4YlgyuhRlsyiP0PJ8hzdzWtsn59EMGX0KGNmEUy180QwNY9y8EQwNY/q5MmkBhWsPvjgA958802aNm1K586dad++vbt8XVX+/v4sWrSI1157jeTkZMxmM0OGDOG5555TxqxcuRK73U5wcLCyZZ/dbmfatGk8/vjjLFy4EIvFQmxsLAsWLFC23auoqOCDDz7g0Ucf5ezZs8ybNw+DwYC3t7cyBmDjxo2cOnWKqVOnsmLFCrZv345Op8PpdNKiRQtlXEpKCkajkdjYWD777DNsNhvBwcF4e3vTpEkTZZzT6eTJJ59k7969pKSk4O3tjV6vx+FwKD2sKisrcTgcvPTSSyxdupS8vDyCgoLIz88nJCREeTZQU3T66quvXPgAhw8fBuDQoUPAxd5iOp2OiIgIjh07htVqpbS0FF/fa/9HidbD6vIyGNS/H2pj8LhuaxaHsovo0DqAgT3bNJjXGDJrHhvO8/U0cGtsGFZrWYP6Vl0qtWcWwRThMbp5E7q2D3Xb3DSGzDJ6PFtURkm5HT8vAyEB7vnQrvbn2BjmRa0etf45mkc1epQxM7j/M5SMz1HGz08imDJ6lDGzCKbaeSKYmkd1epQxswimrB5vBPn5XaceVosXL+b2229n3rx59V6RU19FR0eTmpp6xfNbtmyhT58+zJ07VznWv39/pk6dSps2bdi8efNlr9u9ezelpaUMGTKE2NhY5fiMGTPYtGmTC799+/YMHz6c4cOHAzBixAj27t3L5s2b6dy5M5WVlfz4449UV1czfPhwEhMTAfjmm294+umnOXXqFBEREVitVqBm+8BLi2733HMPZ86cUXprVVdX4+3tzahRoxg1ahRQs5Kse/fu6HQ1/7iv/f8HH3yQf/zjHwrr5Zdf5pNPPiE5ORmA0tJSJVefPn2UcV26dKGsrIzKysorPtv/JW1/ziurMeyHqkaPB05Y+NfKX5Sv048X8NG3x3lheBdiWgU10KE6M4tmyuhRxswimDJ6lDGzO5ilZVXMW5vhsr1bpzZBJA3uiNnLPZ8b1f4c1TgvonkNZWr9c8TwRDBl9ChjZhFMGT3KmFkEU0aPMmYWwVQ7TwRT8ygHTwRT86hOnkxq0GaKVquV++6777oXq36LMjMziYqKcjnm5+dHSEgImZmZV70OqHNtdHQ0eXl5ylZ6l+MnJCRgt9uVVUzZ2dlUV1fX6a0VHR3tci+73Y5OpyMtLU0ZU1VVRWFhIXr9xSkqLy+ntLSUEydOKMcyMjIAlNVQtVsQ1vasqlVtL6varQe9vb0xGo3KtoO1cjgceHh4EBTU8CKAJk3u0qXFqks1a/nlj2vSpEmTpt9f89ZmcOCExeXYgRMW3l+T8Ts50tRYlDS4Ix0iXT+LdoisKXZq0qRJkyZNmjRp0qRJk6YbVw1aYRUXF0dWVpa7vFyTjh8/zvTp09mzZw9ms5nBgwfzl7/8BZPJBNQU0/z8/Opc5+/vz48//shdd92lbAk4efJkunTpolxnMpkoKipi+vTppKWl4eHhQWxsLE6nk+LiYry8vLBarTRp0oRvv/2Wt99+m6ysLMLCwtDpdOzYsYO0tDR27NgBQN++fUlNTWXt2rXYbDaMxprHXlxcDMCFCxcICAhgwYIFfPnll+Tl5QE1K6pqV0xBTcEqODiYUaNG4XQ6KSgoQKfT4enpiaenpwvz4MGD9OnTB6vVSmVlpbJiqvZ8VFQUO3bsYOnSpXzxxReUlJRgMBioqKhocLGqIQ1fb1Q1hgZ+avW45ocrF5gBNvx4kkF31G97QLVmFsmU0aOMmUUwZfQoY2Z3MU8X2FxWVtXK4YT9WRbOW8tpFuTzu3psTDwRTDV79Pf15IUR8ZwrKsPqxu0k1ZxZFE8EU0aPMmYWwZTRo4yZRTBl9ChjZhFMtfNEMDWP6vQoY2YRTFk9yqYGFaymTZvGE088QadOnRg0aJC7PP1PFRcXM3r0aCIjI5k9ezb5+fnMnDmT8vJypkyZctVrLRYLFouFSZMm0b59e5YtW8Zjjz3GmjVrXHpUPf744wC8+eablJeX88orr9RhnTt3jmeeeYYhQ4bw4osv8uOPP5KSkkJ1dTXJyclK8ay6uppPPvmESZMmERYWxvjx4wGU1VoATZo0obKyktzcXJxOJ82bN+fMmTPY7XZljE6n4+abb+abb77Bw8MDT09PwsLCyMrKoqCgwMWbt7c3FouF6upqgoKC8PLyIjs7m+zsbOLj4xk6dCipqak4HA7Ky8ux2+1Knyubrf77YOv1ugY3wr6R1Rga+KnN45FTxVc9fyiniFEqa74uw7xcD6baeSKYmkc5eCKYavN4/EzpVc/bKh1u+byg9ueotnm5Hjx3MkV9plRzZlE8EUwZPcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLD6y1/+QnV1NS+88ALTpk2jWbNmLlvYQU2RZe3atQ0y+WutXLkSm83GnDlzCAgIAGq21XvllVdISkoiLCwMPz8/SkpKXK6rqKjAYrFw8803M2bMGABuueUW7r//fhYsWMC0adPw8/OjsrKSI0eOsGHDBmXbvz179rB48WKys7MV/r59++jcuTOvvvoqALfffjsrV66kqqqKPXv2cOzYMQYOHMjmzZuZNm0aQ4YMASA1NZWhQ4eyZ88ehg4dip+fHwUFBTidTtLS0pRMTzzxBFu2bCE/P1+5586dO3nggQd48803lVydOnXiyJEjQM0KMqgpOn3zzTdEREQA8N133/Hkk0+Snp7OH/7wB6KiooiOjiYzM1MpUPXt25fs7GwOHTrE6dOnad68+TXPjcPhxGq9cM3X3egyGNTfwE+tHttF+JN+vOCK52NaBqim+bpM8yKSqXaeCKbmUZ0eZczsLqaPh+6q580mfYMaxav9Oap1XkTyAM4WlVHi5hVRas+seZTDo4yZRTBl9ChjZhFMGT3KmFkEU+08EUzNozo9yphZBFNWjzeC/Py8f/OqswYVrAICAggICKB169YNwVyztmzZQo8ePZTCDkD//v2ZOnUqW7duJTExkaioqDq9qtLS0nA6nfTu3Vs5ZjKZuOeee9i0aRNwsXdVZGSkS48qvV6PTqfjxx9/pFu3bkRGRpKens4TTzyhjHE6nVRWVmKz2Th16hStWrXCYDBgt9u5//77lXHnz58H4NixY8o9bTYbCQkJLpkMBgOAkqlFixakp6fTv39/ZUxJSQlVVVXk5eVRWVnp4rlJkybKf9f2sLp0m8SmTZvSokUL/vrXv+Lv709YWBh33nmnkqW+0hrKXVmNoYGf2jwO7BHJfzdfeVvA/re3Vl3zdRnm5Xow1c4TwdQ8ysETwVSbxxB/b2JaBXAou6jOudjWAQT7ebnFr9qfo9rmRRSvtKyKeWszXLaB7NSmpueU2avhvW7VmFk0U/MoB08EU/MoB08EU/MoB08EU0aPMmYWwZTRo4yZRTBl9SiLGlSwWrJkibt8XJMyMzN5+OGHXY75+fkREhKiFKkSEhJISUlh5MiRpKenYzablWLN4MGDXa6Njo5m0aJFlJeXEx8fj16vx2q1Kn2uYmJiyMvLo2nTpgo/JiaGdevWsWbNGt555x08PDzo3LmzslopMzOTiIgImjVrxpkzZxg5ciRZWVmEh4cTEBBAYGAgubm5ANxxxx1ATV+uXr16YbPZiIuLU3zX3vOmm24iPT2dlJQUJk6ciNlsJiYmBr1ej91uJycnh+joaFq1akVeXh533303FRUVtGrVioKCAgwGA4mJiQBUVlbi5eXF5s2b2bZtG5WVlbz44oucOXOG2267jfDwcBFTp0lTvfTC8C7MWv7LZY9r0qRJkyZ16oprrOr/NzGaVKp5azM4cMLicuzACQvvr8lgwiNdfh9TmjRp0qRJkyZNmjRp0qSp0alBBavfS1ar1WWlUK38/f0pLq7pdzNgwADefvttDh48yJNPPsnJkydZtWoVgEuvqtGjR3Ps2DGcTifFxcWEhYVhMpmwWCz069eP+Ph4UlNTOXfuHDfffLPCj42NBeDo0aM88cQTlJaWsnTpUgIDAyksLFTGhYSEkJubS3V1NRMmTGDTpk3s2rWLW2+9lb179wLQrFkzAHJzc0lMTKR169bMnz9fKTbVsmJiYgDIyspyydSqVSuys7OVcQkJCSxdupSqqiqqqqo4fvw4ULPFYG32wsJCtm3bRlhYGGfOnAHg9ddfp1WrVrz//vsNmh+jUWsq92s1hgZ+avbYKSqYxS/344ttWRw4WUSH1gEM7NmmwVw1ZxbFlNGjjJlFMGX0KGNmdzFPF9g4eJnVVQAHs4s4by2nWZBPvflqf45qnRcRvNMFNpeVVbVyOGF/lqVBc63WzCKZmkd1epQxswimjB5lzCyCKaNHGTOLYKqdJ4KpeVSnRxkzi2DK6lE2XVPB6ueffwagW7duLl//L9WOv55av349JpOJmJgYUlJSMJvNSmGnticUgMPhwOG4uDyvoqKC8vJyQkJC2L9/P1u2bCEmJgaHw8G5c+fw9fUFYPfu3UBNESk1NRWj0UjPnj1JS0tz8XH69GmMRiMGg4E333yT8PBwOnfuTFZWljKmtmDUqlUrNm/ejM1mo3Pnzuzbt8+lD9cvv/wCQNu2bZVMvXr1Ytu2bS733Lx5M56enuh0OoxGI82aNaOwsJDU1FQeeughoqOj8ff3JyoqikOHDqHX63E4HDz44IPs2rWLv/zlL7z33nvodFfvP3E56fU6YQ2ybwQ1hgZ+avb46MBObmNdKjVnFsWUyeNXO06SfuwcN7cNoV9392xh+9HXh9l75Bxd24cytG87tzBBrnkRyVQ7TwRTbR6Pnym96nlbpcMtnxfU/hzVNi8ieNdjrtWW+XowNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7LomgpWI0eORKfTsXfvXkwmk/L1leR0OtHpdBw8eLDBRi+Vn5+fSyGnVsXFxfj7+wM1fa569erF3LlzlfMLFixg1qxZfP/99zzyyCNAzbaGH3/8MVOmTMHf39+lEPXBBx8o186YMYPly5fTpUsXADIyMgAYP3680hPL6XTSrVs3SkpK8Pf3p7KyknPnzuHt7c26desU1jfffMPTTz9NYGAggFLk6tOnDy+++KIy7plnnuH7779XMh09elTxUturymq1KgVBf39/cnJyyMnJwd/fn7S0NEwmEwCpqanMmDGD2bNn8/bbb+Pp6cnZs2cZO3Ys7dq1Y/LkyUyePJmsrCyGDx/O1q1bla0Kr0UOhxOr9cI1X3ejy2BQfwM/zaP6eCKYMnnMyivm1dSfqUV8vzuXOZ/8wtSx3YlsXneV7m9RRlYBbyzbo3ydfryAxesPMvnReGIjg+rtVaZ5EclUO08EU60efTyu/kcvZpOewkJbvdig/ueo1nkRwRM512rNLJKpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ9dUsFq8eDGAUgSp/fp6KyoqSunrVKuSkhLOnTunFHIu1+eqQ4cOQM1KpdqCVe3Y8PBwvLy8FO65c+fq3LOyspJWrVoBcPbsWXQ6HZmZmUrBSqfTERISQklJCVFRUWRnZ+NwOLhw4YJLMS06Ohqo2S6w9v5Go1HpaVWriIgIqqqqlEy1njIzM5Vjfn5++Pr6cuHCBVq2bMn27dsBiIyMVOYJalZlAcr2gBaLRenPdekKs9pnlJ2dfblH/5ukNZS7shpDAz/No/p4IpgyeLy0WKUwHfDKhz8x/4W768W8tFh1qWYs3c3CSfVjXioZ5uV6MNXOE8FUm8cQf286tQniwAkLjkt6Vul10CEyiGA/L7f4VftzVNu8iOBdj7lWW+brwdQ8ysETwdQ8ysETwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KomsqWHXv3v2qX18vJSQk8N5777n0svryyy/R6/X06tULqFl5VFVVxdixY9mzZw9ms5mBAwei1+tdVnxVVVXx1VdfkZCQoFxnMBg4dOgQd9xxB1arldjYWKVAVLvCqrS0lODgYObOncvbb7+Nh4cH99xzDyUlJXh7exMREcGuXbuU+wwaNIjCwkLCw8NJTEwELhaRrFYrvr6+fP/99/To0YOysjK6du2KXl9TdazNZLPZ8PPzY+rUqUycOBGz2czgwYOprq4mNDQUk8mk9LHav38/Xbp0Qa/X06ZNG/r27QtAUFCQ8v/e3t4cOHBA6Y3Vo0cP7rnnHgBatGhR7/nReljVVWPYD1XzqD6eCKYsHr/fc6pOsapWdgds23+ahC7X9nNuzQ+ZVz2/4ceTDLqjfr3VZJkX0Uy180Qw1ewxOTGOuavTSc+0KMc6tgni6YfiGvxZQe3PUc3zIoInaq7VnFkUU/OoTo8yZhbBlNGjjJlFMGX0KGNmEUy180QwNY/q9ChjZhFMWT3KpmsqWP1a1dXVlJeXK32dfq3S0lK8vLwwGht0mzoaNmwYS5YsITk5maSkJPLz85k1axbDhg1TelM5nU4WL16Mh4cH7777Lvn5+cycORMPDw8OHz7MokWLaNeuHStWrKCoqIhx48YpfKfz4p+HPvXUU3zzzTesWrUKuLgCyel0cuHCBWw2GwkJCcTFxZGamorNZqN9+/Yufp1OJxaLhVGjRpGfn8+///1vAG6//XZlTHV1NXa7HX9/f0aPHs3q1as5ceIEOp3OJVNFRQVWq5VBgwYRGhrK4sWLqaqqUnxdes82bdpw991388svv/DOO+8AcO+99wI1q8H++Mc/snz5crp27QrUrJzbunUrbdu2pUePHvWaG62H1dXVGPZD1TyqjyeCeaN7zDxdd9vYS3Usz8rgPtfWe+rIqeKrnj+UU8QolfVpEcGU0aOMmd3BDAyE15N7k3eulLzzNsKDzYSHXP4zY32l9ueoxnkRwRM912rMLJqpeZSDJ4KpeZSDJ4KpeZSDJ4Ipo0cZM4tgyuhRxswimLJ6lEUNqiRNnz6dnTt38vnnn1/2/J/+9Cduu+02Xn755Ybcpo78/f1ZtGgRr732GsnJyZjNZoYMGcJzzz2njDGZTJSVlREUFKRs2We325kyZQo333wzCxcuxGKxEBsby4IFC2jZsiUAPj4+OBwOhg0bhsViYd68eRgMBkwmE5WVlcq2fk6nE5vNxtSpU1mxYgXbt28nMDAQm81G06ZNFZ9Qs53gXXfdxWeffYbNZiMwMJDCwkKaNWsGgMFgoLS0lGeffZadO3eSkpKCt7c3Op0OLy8vl0xVVVW89NJLLF26lLy8PIKCgsjPz1dYtff8xz/+werVq1m6dCkOhwMvLy/Ky8tdimQTJ04kKCiIOXPmADVFsxYtWvDBBx+4bCd4LdJ6WF1eBoP690PVPKqPJ4Ipi8eo5k34/irnbwr3u+aeKu0i/Ek/XnDF8zEtA1TTp0UEU0aPMmYWwfT1NHBrbBhWa1mD+lZdKrU/x8YwLxlZFnLO22gVYqZDA3rwXSp3z7WM86J5VKdHGTOLYMroUcbMIpgyepQxswim2nkimJpHdXqUMbMIpqwebwT5+QnqYfVr/fDDD/zhD3+44vn77ruPtWvXNuQWV1R0dDSpqalXPG8wGAgJCeH7779XjtUWrtq2bcvHH3982euqq6sBuO222xgwYIByPDExkUOHDikFJKfTiaenJ8OHD2f48OEAOBwOOnToQFVVFYBSRIqKiuJvf/sbf/vb3wB46623eO+99/Dx8QGgoqICgMGDB/PMM8+4+C0rK3PJ5Ovry6hRoxg1ahQAeXl59OnTh8rKSuVeUNPbasmSJcq1DzzwAMeOHVMKc1BTAAsPD8fHx4fi4mJCQ0Pp2bOn4ru+0vbnvLIaw36omkf18UQwb3SPd8SFs2jDoctuC2jQQ89Oza+ZPbBHJP/dfOVtAfvf3lp1fVpEMGX0KGNmEUwZPaoxc37hBf6xeCelZdXKMV9vI38ffSshAT7usKj656jGeRHNE8GU0aOMmUUwZfQoY2YRTBk9yphZBFPtPBFMzaMcPBFMzaM6eTKpQQWrs2fPKtvVXU6hoaHk5+c35Bb1lt1up6ioyKXP1datWwHqrB46fvw406dPZ8+ePeh0OgDS09OVglVVVRW5ubnY7XbKy8vx8vJCp9NRUVHBG2+8wYYNG7BYLEREROB0OvHw8ADgzJkzCv/ZZ58lLS0NDw8PpVB14ULNSiRPT08APvzwQ3bu3ElWVhZhYWEUFBQo52ozFRYW8uqrr7Jx40ZsNhvh4eEumVq2bElERARvvPEG//nPfzh69CghISGcPXuWmJgYl+zDhw936bN15swZli1bxogRI4iOjm7wHGjSpEnT76mXR9/K9EU7XYpWBn3N8frqheFdmLX8l8se16RJk6bGoF8XqwBKy6p5bdFO/jM+4XdypUmTJk2aNGnSpEmTJk2aNDWwYBUQEEBWVtYVzx8/fvyK/a1Eq7KyEi8vrzp9rvz9/ZVVVAAjRoxg9+7d3HLLLcyePZuVK1fy9ddfk5qaSrNmzZQ+V7UrnYqLi5WClZeXFwsXLmTIkCGEhoaycOFC4OKKqeLimn4nWVlZWCwWkpKSyMjI4KuvvnI5b7fb8fHxYdmyZXTr1o0///nPLF++XCmQXZpJp9OxcuVKRo4cCcCSJUvQ6XQuK7HuuecePvzwQ+Xr/Px89Ho9U6dOdXlGmZmZeHl5ceedd7Jx40aaNGlCVFSU4r++amgj9RtRjaGBn+ZRfTwRTJk8RrcI4MMX+5G2L48jp4ppF+HPHZ3DG8TsFBXM4pf78cW2LA6cLKJD6wAG9mzTICbINS8imWrniWBqHtXpUa2Z9x0/X6dYVavSsmoOZhcSF9W03ny1P0e1zotIngimjB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnqUTQ0qWPXu3ZuVK1cyaNAgOnTo4HIuIyODjz/+mPvvv79BBusrnU7H0KFDOXz4sEufqy1btriMO3PmDE6nkzlz5hAQEMD+/fv57rvvcDgczJ8/n+LiYmJjY3nqqad4++23Xa6tqqqiTZs2rF+/HqPRSP/+/fniiy/Izc2t4ycwMJA5c+YQHh7OsGHDWLlyJdnZ2cp5h8NBaGgomZmZpKenEx8fj5eXF5mZrttPVVdXExUVxcqVKzGbzQwbNoxly5Zx/PhxZcwLL7xAREQE8+fPJz8/H51Oh06n49ixY3Tt2hWAo0ePUlhYCMDGjRsBKCkpYe/evXzzzTd15vO3Sq/XERhorte1MqgxNPDTPKqPJ4Ipk8dBd7Z1C+dSPTqwk9uZINe8iGSqnSeCKZvHMa9+SUFxBcEBnnz4d/d91lRzZncw8yw5Vz2fW3CBhFta1ZtfK7U/R7XNy/XgiWDK6FHGzCKYMnqULfNHXx9m75FzdG0fytC+7dzGle05iuCJYMroUcbMIpgyepQxswimrB5lUYMKVuPHj+eHH35g6NCh3H333dx0001ATTHku+++IygoiPHjx7vF6LXKz88Pk8lUp8/V559/jr+/v/J1s2bNaN++PQEBAcp1drsdgAkTJpCYmAjAxx9/jE6nU641GAzY7XbeeustYmNjFd7WrVuVlVO1YyMiIpSiENSsuFq5cqWyOs3X15fy8nKee+45xowZo4z785//TGZmJqdOnSIiIgIvLy9sNhsrV650yfDpp59y7tw55Wu9Xs+jjz7Ko48+yqRJk9i/fz89e/Zk5syZJCYmYjAYmDlzJk2bNiUmJkYpxD344IP07t2b0aNH43A40OuvvRLscDixWi9c83U3ugwG9Tfw0zyqjyeCKaNHGTOLYMroUcbMIpju5L332T627T+rfH2+qIJBf11DQudmPP5g/QvJas7sTmZ40NX/0dSiqQ+FhbZ6sUH9z1Gt8yKSJ4Ipo0cZM4tgyuhRtswZWQW8sWyP8nX68QIWrz/I5EfjiY0MUoVHUUy180QwZfQoY2YRTBk9yphZBFNWjzeC/Py8f/OqswYVrMLCwvjvf//Lm2++yTfffMOmTZuAmgLMoEGDeO65567a40qkoqKi6qxOKikp4dy5c0RFRSnHMjMzefjhh12ug5oVUZden5mZSXh4OF5eXgA0adLEZTyA0+mktLSUsrIyysvLadWqFTqdThlbq9pCVUlJCVBTJAPqPKvS0lLl3hEREfj6+lJVVeVSrCopKaGiokJhXUkdO3Zk0aJFWCwWQkJCyMrKoqCggK1bt9KtWzdl3Mcff8zHH3/M+vXr693HSmsod2U1hgZ+mkf18UQwZfQoY2YRTBk9yphZBNMdvEuLVZdqy74zjBlQv9Xhl0qNmd3J7NA6CF9v42W3BfT1NhLbKtAtftX+HNU2L9eDJ4Ipo0cZM4tgyuhRlsyXFqsu1Yylu1k46e4GsUGe5yiSJ4Ipo0cZM4tgyuhRxswimLJ6lEUNKlgBhIaG8sYbb+B0OrFYLAAEBQWh0+kabK4hSkhIICUlhZEjR5Keno7ZbCYmJga9Xk+vXr2UcVarVSkYAcTHxysrnpYuXcrixYuJiYkhLy+Pfv36KePCw8PZt28fTz31FHv27MHDw4POnTtz4ULN6qLi4mLCwsLw9PQkNzeXBx98kKysLMLDwwkICMDHx4eqqioAZWXawoULmT59Ojabjbi4OPbt26ewAEJCQjh79mydTDqdzqXXFcAnn3zCBx98QHZ2NkajkXXr1uHr60tgYCAAkyZN4tlnn73ss/v73/9OeHj9+7xoPazqqjHsh6p5VB9PBFNGjzJmFsGU0aOMmUUw3cUb//bmq56f+O4PvD3+znqx1ZpZBHPaY92ZtvAnl6KVr7eRaY91b/BnOLU/RzXPiyieCKaMHmXMLIIpo0eZMq/5IfOq5zf8eJJBd9Sv96tMz1EUTwRTRo8yZhbBlNGjjJlFMGX1KJsaXLCqlU6no2nT+jdpdrcGDBjA22+/zcGDB3nyySc5efIkq1atol27di4rmex2OwsWLOD//u//APD09KRLly6kpaXRokULRowYQWpqKufOnaN///7KdbUrq37++Wcef/xxSktLWbp0Kb6+vsrKKABvb28KCwsJCQlhwoQJbNq0iV27dhEREaGMqX1u+/btIzExkdatWzN//nwqKytdMrVs2ZL9+/fXyRQcHKwUtQ4dOsTf/vY3Dh06xKBBg/D29ubYsWP88MMPDB8+HKOxZspre1RNmDCB2267DYDk5GRsNhubN2/m0Ucfrddz13pYXV2NYT9UzaP6eCKYMnqUMbMIpoweZcwsgtlQXmFp1VXPW0qqGvwZRG2ZRTADA82smD6QPYfPcuikhZjWQXRtH+omdzVS+3NU47yI5olgyuhRxswimDJ6lCHzkVPFVz1/KKeIUSp7nxbBVDtPBFNGjzJmFsGU0aOMmUUwZfUoi66pYDVnzhx0Oh1PPfUUer2eOXPm/M9rdDodycnJ9TZYX61fvx6TyURMTAwpKSmYzWZ69erF9u3byc/PV4pWBoOB6uqLf2FaUVGhrJgqKiri7bffJiYmBofDwYYNG5TizpkzZwC45ZZbSE1NxWg00rNnT9LS0lx6XVVVVWE2mzEYDLz55puEh4fTuXNnjhw5QufOnZUxUFNE2rx5Mzabjc6dO/PLL79QWVmpsGr7VP0609atWwkKCgIgODiYnJwcvL292bhxIzqdDqPRSEREBKdOnarznFq3bk2XLl2AmmJdYGAgGRkZ9X7uWg+ry8tgUP9+qJrHhvMysizknLfRKsRMhwbs0X6p1J5ZBPNsURkl5Xb8vAyEBDT8Db4xZNY8qtOjjJlFMN31PR3o63HVolVQE49691+ScV6imzeha/tQrNayBvWtulRqf46NYV40j+r0KGNmEUwZPcqUuV2EP+nHC654PqZlgGrep0Uw1c4TwZTRo4yZRTBl9ChjZhFMWT3eCPLzE9TDqrZg9cQTT2AymVRdsNqyZQu9evVi7ty5yjGr1Ur37t3ZunUriYmJAHTp0oWAgABlzO7du7HZbOh0Ol599VVl3IwZM5QeXXCxYDVp0iRiYmKAmh5WN998MyaTCS8vLyorK7HZbISGhrJu3Trl2q+//prk5GSCg4MBOHHiBAD3338/SUlJyrhHHnmEX375RVnNdf78eQDeffddpYhltVrp1q2b8nVZWRk2m413332Xfv36MWnSJPbv388f//hHZs2aRWVlJSaT6bLP7Ntvv+WVV15h48aN1/Ko60jbn/PKagz7oWoer135hRf4x+KddbZX+vvoWwkJ8HGHRdVlFsEsLati3toM9mdZlGOd2gSRNLgjZi+P393f9WBqHuXgiWCq0aO7v6fffKY3j8389orn/5Xcu8HPQIZ5Ec0TwVQ7TwRT8ygHTwRT8ygHTwSzobyBPSL57+YrbwvY//bWqnufFsFUO08EU0aPMmYWwZTRo4yZRTBl9SiLrmkzxUOHDnHw4EGl4HHo0KH/+b+DBw8KMf6/lJmZqRR6auXn50dISAiZmRc/RCUkJLBt2zasVqtyHVCn11V0dDR5eXlKr6ji4mKMRiMbNmxQxlRXV+N0OpU+UdnZ2TidTs6ePasUpQDlXrXbAmZnZ+Ph4cHmza69GWw2GwaDQRlXVFSETqfjq6++UsY4nU50Oh0hISEu/tu0cd0bOjo6mqqqKnJyclyOT5s2jdjYWHr06MGkSZP49ttviYuLu8JT1aRJ0+X062IVQGlZNa8t2vk7OWqcmrc2gwMnLC7HDpyw8P6a+q/61KRJ0+8nEd/Td8SFXdNxTZo0adKkSdP10wvDu1zTcU2aNGnSpEmTpl+r3j2sKisr+eGHH2jRooWywkhNslqtVFVVMXbsWPbs2YPZbGbw4MH4+fkp/Z4Ahg0bxpIlS0hOTiYpKYnt27cDEBcXxyOPPILFYiE2NpaCggKcTifFxcV4eXlRWlpK27ZtmTdvHgsWLMBkMhEQEIDdbleKR7X3adq0KYMGDcLhcBAYGEhFRQWAso2f1WqladOm7N69m5tvvhmHw0FoaCinTp3CYDAoXmvvOXXqVF555RW8vb3x8fFBr9cTHh7ucs/333+fbdu2cf78eQwGg7LCKycnh+joaA4ePEiLFi3o06cPLVu2ZOHChaxevRqAt956q0HPvqENu29ENYYGfprH+mnf8fN1ilW1Ki2r5mB2IXFR9e/vp8bMIpinC2wuqzBq5XDC/iwL563lNAuq32o1tWYWyRPBlNGjjJndxRT1Pf1/g+P4v8FxPPefzRRYq2jq58Fbf76z3j5rJcu8iOSJYKqdJ4KpeVSnRxkzi2DK6FG2zJ2igln8cj++2JbFgZNFdGgdwMCebf73hf9Dsj1HETwRTBk9yphZBFNGjzJmFsGU1aNsqnfBysPDg/Hjx/PSSy+psmDldDr55JNP6NChA7NnzyY/P5+ZM2ei17u+WPz9/Vm0aBGvvfYaycnJ6PV6dDod+/fv5/nnn6d9+/YsW7aMvXv31uHn5uYSFBSE3W6npKSEs2fP4uPjg5eXl8vYgoICWrRowfnz5ykuLqaysrKOX5vNhpeXF35+fhQUFHDu3Dk8PT2x2+0u9zx16hTBwcFcuHBB2f7P29u7zjZ/a9asUf67urpaKUbV9rFq164dbdq0YePGjVgsFhyOi0sUa7c7rI/0el2DG57fyGoMDfw0j9emPEvOVc/nFlwg4ZZW9ebXSk2ZRTCPnym96nlbpaPBP1vUlvl68EQwZfQoY+aGMkV/T6dOHVDva6+mG31ergdPBFPtPBFMzaMcPBFMzaMcPBFMd/IeHdjJbaxLJdtzFMETwZTRo4yZRTBl9ChjZhFMWT3KonoXrHQ6HZGRkRQWFrrTj9tkMpmoqKhgzpw5So8qu93OlClTMBpdY0dHR5OamgrAokWLeP311xk9ejRjxowB4JZbbqF3794UFRUpvaKcTidWq5UNGzYoWw+mpaUxbtw4nE4ngDI2KiqK9evXK/dLSkri+++/V84bDAZKSkp49dVXeeSRR4Ca7f969erlUogymUxUVlaydu1aJdNHH33kkqmWedddd/H+++8r1w4YMIDjx4/To0cPAFq3bs2CBQuorKzkgQceICkpiRdffBEPDw8yMjIYMKB+vwhyOJxYrRfqde2NLINB/Q38NI/144UHXf0NqEVTnwY1s1djZhFMHw/dVc+bTXpVNWn+YW8eR3OLaRfhzx2dwxvMU+u8iOSJYKqdJ4KpVo8iv6fB/bnPFpVRUm7Hz8tASEDD/2Gh1nkRyRPBVDsPtNeOLB5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53Vu2AFNYWXmTNncv/999fpF/V7y2Aw4OvrqxR2AHr37g1w2RVOtaqurtnaq1Oni38RZDKZaNGiBSUlJcrqKafTiaenp0vunj17otPpqKqqAqBZs2ZA3X5SMTExfP/99/j41GyFU7tFYM+ePZUxAQEBBAUFUVZWdk2Zav21a9fO5Z7R0dEcP35c8VSrBQsW4OfnR2JiIi+++OIVn8u1SGsod2U1hgZ+msdrU4fWQfh6Gy+7LaCvt5HYVoFu8aqmzCKYIf7edGoTxIETFhzOi8f1OugQGUSwn5cqmjRnnbby+pKd1H7m+H53Lgs+P8DLo2+ldZhfg9ju8iiaKaNHGTM3lBni701MqwAOZRfVORfbKsAt39PQ8NylZVXMW5vhsn1hpzZBJA3uiNnL43f3dz2YMnp0B0977cjpUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdSggtXevXsJCAhg0KBBdO/enRYtWtTZDg/g5Zdfbsht6iW73U5hYSEjR44kPT0ds9msbF346+3zLlXtSqXly5cza9YsLBYLMTExZGdnY7fbKS8vx8vLC51OR0VFBY899hh79uzBw8ODzp0743Q68fCo+Udq7dZ6GRkZPPjgg2RlZREeHq48owsXalYieXp6AjB16lQOHz6MzWYjLi4Oi8Xi4vW3ZCovLwdg9erVrFmzhvz8fF544QUyMzMVT9HR0QDk5eWRkpJC27ZtiY+PB6CqqkrpraVJk6bfpr+PvpXXFu10KVr5ehv5++hbf0dXjU9Jgzvy/hrXX/51iKz55Z9adGmxqlZ2B0xftJP5L9z9+5jSpEml0l1pkdXVF19dV81bm8GBExaXYwdOWHh/TQYTHuny+5jS1CikvXY0adKkSZMmTZo0adKkyf1qUMFq6dKlyn9v3779smN0Ot3vUrCqqKjA6XRy8OBBnnzySU6ePMmqVaswmUzKKiqA0aNHk5eXx6ZNm4CaIpJOp+Pnn3+mX79+xMfHk5qaSklJCQDFxcVKwUmv1/Pzzz/z+OOPU1paytKlS/Hw8ED3/39DU1xcDMDp06fx8fFhwoQJbNq0iV27drmct9vteHh4sHXrVhITE2ndujXz58/Hbrcrq69+a6Za5rlz5+jYsSP5+fl89dVXZGVluZyfOXMm69evp7KykpCQENq3b89///tfzGYznTt3btCzNxq1pnK/VmNo4Kd5rL+aB/sy9693ceCEhexzNlqFmOkQ6Z7Cr1ozi2D6+3rywoh4zhWVYXXz9kru8Pf9nlN1ilW1sjtg2/7TJHRpUS+2mudFFE8EU+08EUy1ejxdYOPgyaLLnjt4sojz1nKaBfnUm+8uj5cWyGvlcML+LEuDPKp1XkTyRDDVytNeO/J5lDGzCKaMHmXMLIIpo0cZM4tgqp0ngql5VKdHGTOLYMrqUTY1qGB16NAhd/kQIoPBQExMDCkpKZjNZnr16sXWrVtdttlzOBzY7Xbl6+rqapxOJ927d2f//v1s2bKFmJgYysrKlKIVQFlZGQ6Hg27dupGamorRaKRnz56kpaW5jAMIDw/HYDDw5ptvEh4eTseOHcnIyFDOX7hwgaqqKnr27MnmzZux2Wx07tyZ3bt343C4/nb0t2QCmDRpEitWrAAgNzeX559/npkzZyrn7XY7+fn5eHh48MMPPxAWFgbA448/Trdu3er7yNHrdQ1qon6jKvdcKUcO5hMebCY8xNdtXFmbDKrVY69AM73cQqortWYWwRT1M6Sh/jJPl1z1/LE8K4P7tLvqmP8lNc+LKJ4Iptp5Iphq83j8TOlVz9sqHW75Xle7R7XNy/XgiWCqjae9dsTwRDDVzhPB1DzKwRPB1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiBhWsanXkyBE2b95Mbm4uABERESQkJNTpo3Q9pdfradGihcsqMKvVSrdu3bBarcqxJUuWuFxXuwJp4sSJ3HzzzcrxsWPHsm3bNvz9/YGawlaTJk1YuHChMsbpdNKxY0eleFTbo6p37968+uqryrgVK1aQkZGh9Lqq9fP2228rfID777+fU6dOXVOm2uvvvPNOxo4dS/v27Rk7dqwyF7Xnv/rqKwA+++wzQkNDAZRCldVqxc+vfr1YHA4nVuuFel17I6q0rIqU1emkZ178K9y4qCCefigOs3f9+xsYDHI2GVS7Rxkzi2CqlRfVvAnfX+X8TeF+FBba6sWWcV5EMNXOE8FUq0cfj6vv+2c26ev9/QLq96jWeRHJE8FUK0977cjnUcbMIpgyepQxswimjB5lzCyCqXaeCKbmUZ0eZcwsgimrxxtBfn7ev3nVWYMKVpWVlUyZMoU1a9bgdDrR62tu6nA4ePPNNxk0aBDTp0+/as+o6yndFZsp1G/slcY4nU63+bhWVlRUFACZmZnKf9d+7eHhQcuWLYGL/bUGDhzocv0777zDO++8w759+5TeWtcqraHcRc1dlV6nv0FGloV3V6W7pb+BrE0G1e5RxswimGrj3REXzqINhy67LaBBDz07NW+wXxnnRQRT7TwRTLV5DPH3plObIA6csOC45KOMXlfTmy7Yz8stftXuUW3zcj14Iphq42mvHTE8EUy180QwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoQQWrf/7zn3z22WcMHz6cRx99lFatWqHT6Th58iRLlixhxYoV+Pv789JLL7nL72+Ww+EgLy/PZbXQl19+CVBn9dDx48eZPn06e/bsUY599dVXSi+nqqoqZfvD2h5WRqOR8+fP88Ybb7BhwwYsFgsRERHY7XZlZdWFCzUrjXbu3Mmzzz5LWloaHh4e+PrWbAnn4eHh4uedd95h586dZGVlERYWRl5enkvBqjbTq6++ysaNG7HZbISHh7swWrZsSWRkJJ9++inLli0DYM6cOTRp0oTbbrtNKR726dOHtLQ0AEwmE+Xl5dRujThhwgTFW32k9bCqkdbfQD6PMmYWwVQzb+rY7rzy4U8uRSuDvuZ4Q372yTgvIphq54lgqtljcmIcc3+1yrhjm5pVxg39rKB2j2qeF1E8EUw187TXjlweZcwsgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6lE0NKlitXbuWwYMHM2XKFJfjUVFRTJ06ldLSUtauXfu7FKx0Oh0mk4nk5GSSkpLIz89n1qxZ+Pv74+19cQ/JESNGsHv3bm655RZmz57NypUr+frrr1mwYAEhISG0a9eOFStWYLO5buvh4+ODl5cXCxcuZMiQIYSGhirbA/662HP8+HHOnz9PUlISGRkZynZ8l7J8fHxYtmwZ3bp1489//jPLly/Hbrcrq9ZqMzmdTlauXMnIkSOBmi0NdTodRuPFqRw3bhx///vflWJW06ZNycnJcdniMCQkhKqqKkwmE3/961+Jjo5m9OjRQM2WhUlJSfV67loPq4vS+huI4Ylgqp0ngimjR3fwAgPNfPbPwXz900n2Hj3HzW1D6Ne9tRvc1UjGeRHBVDtPBFONHgMD4fXk3uSdKyXvvM3tfRxB/R7VOC+ieSKYauRprx05PcqYWQRTRo8yZhbBlNGjjJlFMNXOE8HUPMrBE8HUPKqTJ5MaVLCqrq52KYL8Wl27duW7775ryC3qLX9/f/r27cupU6dITk7GbDYzZMgQ1q1b59In6syZMzidTubMmUNAQADZ2dl8/fXXOJ1O5s+fT3FxMbGxsYwaNYoPPvhAudbPz4/c3FzatGnD+vXrMRqN9O/fn88//5xz584pHmoVGBjInDlzCA8PJzExkVWrVmGxWBRWZWUloaGhZGZmkp6eTnx8PEajUekLBtCkSROKioqIiopi5cqVmM1mhg0bxrJly1zGFRYWYjKZlGJXeXk5I0aMYOXKleTn5xMWFkaTJk0AePjhhxkxYoRybUhICPn5+fV+7loPq4vS+hvI51HGzCKYaucBdI8JpV/31litZQ3qw1MrGedFBFPtPBHM5ZsOc+hkER0iAxjWr70bHLrfo6+ngVtjw9z2/QLq99gYXjsyehSRWXvtyOHxbFEZJeV2/LwMhAS455cPas8sgimjRxkzi2DK6FHGzCKYaueJYGoe1elRxswimLJ6vBHk53edeljdcccdpKWlMXz48Mue/+GHH+jVq1dDblFvRUVFUVhYSGpqqnKspKSEDz/80KW3U7NmzWjfvj0BAQHKdVDTO2rChAkkJiYCMHPmTMLDw/Hy8gJqikx2u5233nqL2NhY5Zr169dTWFgIoGyRGBISwsaNG5V7fvPNN6xatYpTp04B0Lp1a6qrqxk+fDhPPfWUMu6RRx4hOzubU6dOERERQUBAAEVFRaxcuVIphpWUlLBs2TKlSAawZcsWevfuzdy5c2nfvj1jx45l6NChLF++nK1bt5KYmMhNN90EgNl8cYXP4cOHGTp0KOfPn2/Ak9d6WNVK628ghieCqXaeCKaMHmXMLIIpo0c1Zt51+Czvrt6vfH3iTAnrf8zhz0M60eWmUHdYlOI5NjaeCKaMHmXMLIIpg8fSsirmrc1w2ea7U5sgkgZ3xOxV/y3UL5XaMl8PpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNWgzxfHjx3Pq1CmeeeYZtm/fTm5uLrm5uWzbto3k5GTy8vIYP348RUVFLv9zh44fP87YsWPp0qULvXr1YtasWVRWVirnExIS2LZtG1arVTn25ZdfotfrOXHiBHfddRedO3fml19+UXpKAcTHx+Pr64u3tzfvv/8+Xbt2pVu3bnz00Uf06NFDGdesWTOgpj/Vgw8+SFxcHHfeeScVFRWUlJRQXl6OyWTCbDZTVVXFG2+8Qa9evejSpQtTpkzBZDIphaHWrWu2k8rKylIy9ejRg/37a34RlZmZCUBwcDA6nY41a9Zw3333ERcXx8CBA9HpdC7PNTMzk4iICF588UUA3nrrLV5++WWaNm2qsBISEtDpdHz44YfEx8fTqVMnEhISSE9Pp0WLFm6ZI02QNLgjHSKDXI51iKz5R64mTZo0adJUX11arLpU//n08sc1adKkSdO1a97aDA6csLgcO3DCwvtrMn4nR5o0adKkSZMmTZo03dhq0AqrAQMGAHDkyBG++eYbl3NOZ82SkoEDB9a57uDBgw25LcXFxYwePZrIyEhmz55Nfn4+M2fOpLy8XOmnNWzYMJYsWVKnh1VcXBwLFy5k4sSJtG/fnjFjxrBmzRqeffZZWrZsiaenJ48//jhvv/02Z86cISkpie+++4709HRycnIUD7VFnenTp9O7d2/69u3L4sWLXTx6eXlhNpvJz89nyZIljBo1irNnz7Ju3TqMRiMFBQXAxZ5Xa9eupXXr1jz55JN89tlnykqt4uJi5Z67d+/mH//4Bw888IByT6fT6VKYKy4u5osvvqC6uhqoKcLt27ePoqIihRkcHMytt97Kzp07lf5ctVsBXroCrT5qaCP1G0n+vp68MCKec0VlWN24jYisTQbV7lHGzCKYaueJYGoe1elRrZmXbjx01fMff3uU4ffWf3tAWZ5jY+KJYMroUcbMIpiyeDxdYHNZWVUrhxP2Z1k4by2nWZDP7+pRJE8EU0aPMmYWwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfUomxpUsEpOTkanu3qfHhFauXIlNptN6TsFYLfbeeWVV0hKSiIsLAx/f38WLVrEa6+9pvSweuihh/j000957LHHGDNmjMLT6XQsWLCAadOmARAREaGcS0lJITY2lkmTJjFjxgz27dtH586dMRgMQE2fql27drF3717uu+8+jh07xt69e5XrjcaaRxwUFMTixYsJDw/npZdeYsaMGZw5c8Yll16vp7i4mJSUFOLj4+nbty8ffPCBUrAymUzodDqCg4P5+uuvMZvNjBw5kg0bNriwHA6Hy7Z+O3bsUP67dhtCgI4dO5KRkYHRaOTChQsEBgZSUFDAtm3bsNvtSsZrkV6vIzDQ/L8HSiZRz0TWJoNq9yhjZhFMtfNEMDWPNz4v91wpRw7mEx5sJjzE939fcBkdySm+6vlD2UVued9R83MUxVQ7TwRTJo/u+P67ktSaWSTzRvd4/EzpVc/bKh3az1qV8EQw1c4TwdQ8ysETwZTRo4yZRTBl9ChjZhFMWT3KogYVrJ599ll3+bgmbdmyhR49eijFKoD+/fszdepUpUcTQHR0tEsPq+3bt7No0SL69++vHAsKCqJFixZs2bJFOfbDDz9gNBoZOXIkEydOBGpWjKWkpLB582Y6d+6s9H76v//7Px5//HHl2ldeeYW9e/dSWlpKWFiYcnzdunVK3ymAOXPmUFJSAqAcj4uL46OPPlLG7N+/nw8++EBZ2aXT6ZTCXL9+/ZRxhw4d4vTp01RWVmIymfDy8sJut7Nv3z6XgmLHjh2VVV1HjhwhNTWVlJQU7r77bmXME088wZYtWygpKXF5vr9VDocTq/XCNV93o8tgUH8DP82j+ngimDJ6lDGzCKaMHt3JKy2rImV1OumZF/9aPy4qiKcfisPsfW19UNq19OfEmZIrno9pFUBhoa3eXtX8HEUx1c4TwZTJozu//0R5FMUTwZTFo4/H1f8w02zSaz9rf2eeCKbaeSKYmkd1epQxswim2nkimJpHdXqUMbMIpqwebwT5+Xn/5lVnDSpY/V7KzMzk4Ycfdjnm5+dHSEiI0qPpSteB65Z3UVFR2Gw28vLyKC8vx8vLi6NHj1JdXe0yTqfT0aZNG4VRW7Dy9natlpaXlwOQm5tLdHQ03t7eGAwGl2KV0+mkoqJCKSa1atXqsqyzZ8+6ML28vICa7fwuVUVFBQA5OTlER0crBatLi1UlJSVUV1crrGPHjgEQGxtLdXU1VVVVZGRkkJ6eDkBBQUG9ClaA1lDuKmoMDfw0j+rjiWDK6FHGzCKYMnp0B2/uqvQ6fVAysiy8uyqdCY90uSbWsL7t+OrnU1c8/8e727olvxqfo2im2nkimDJ4dOf335WktszXg3mjewzx96ZTmyAOnLDgcF48rtfV9KQN9vPSftaqhCeCqXaeCKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpXVasXPz6/OcX9/f2X7vCtdZzKZ8PT0VI4lJCTw7rvv4nQ6lb5TZ86cQafT0atXL5frjUYjW7dupUuXLkrfqb179zJixAgAqqqq2L59O3Cx71TTpk05duwYb7zxBhs2bMBisRAREUF5ebmy5V7tVn9ZWVk8++yzpKWl4eHhgb+/Px4eHjgcNS/uli1bAvDRRx8xZcoUsrKyCAsLU7YDrL1nkyZNOHXqFK+++iobN27EZrMRHh6OTqdTeovV9uBKT0+vs1JOp9MRHh5+9Um4irQeVnXVGPZD1TyqjyeCKaNHGTOLYMro0V08EX1Q/jK0M29/su+yxxv6PqzW5yiSqXaeCKYsHrU+RJrHhig5MY65v1qd17FNzeo87Wft788TwVQ7TwRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBSt3atiwYXzwwQdUVFTw008/UVlZSVFREdHR0S5b+o0YMYKdO3fi5+fH7Nmz+emnn5g3bx7r1q2jY8eOtGvXjhUrVtQpmEVERPDzzz+zcOFChgwZQmhoKAsXLkSn07msgNLr9Zw5c4aysjKSkpLIyMjgq6++wmQyKWNqVzytWrWKbt268ec//5nly5dTVVXlcs9mzZpx6tQpVq5cyciRIwFYsmQJer0evb7mm6VTp0506tRJ6fu1d+9edu3aRVVVFWFhYXVWe/1WaT2srq7GsB+q5lF9PBFMGT3KmFkEU0aPDeWJ6IPS9/Y29L29DQvW7ueXw2fp0j6UcQ92aojNOlLbc7weTLXzRDBvdI9aHyJxTBk8BgbC68m9yTtXSt55m9b/TKU8EUy180QwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7KoURas/Pz8lP5Pl6q4uNhl673LXVdZWUlFRYWyysrf35/Ro0cze/ZsXn75ZcxmM0FBQbRt29bl2tpVTN27d6d37940b96cefPm4XQ6mT9/PsXFxcTGxvKPf/yD5557TvHRpEkTHA4Hbdq0Yf369RiNRvr3789XX32F3W5X+N7e3pSWlhIYGMicOXMIDw9nzJgxpKamKlv+1TKDgoLIzMwkPT2d+Ph4goOD2bdvn3Lex8cHh8NBVFQUK1euxGw2M2zYMFasWKEUtwwGA++99x7vvPMOq1ev5uzZszRt2pSCggLOnDlDeno6cXFx1zw3Wg+ry8tgUP9+qJpH9fFEMGX0KGNmEUwZPZ4tKqOk3I6fl4GQgPp/2BTZB2XoXdGMe7ATVmtZg3qpXCq1z4sIptp5IpiyeNT6EGke3cHz9TRwa2yY9rNWZTwRTLXzRDA1j+r0KGNmEUy180QwNY/q9ChjZhFMWT3eCPLzu8F7WEVFRdXpVVVSUsK5c+dc+k5d7jqArKwsYmJilOOlpaW0aNGCb7/9FoAXXniBI0eOuFwbFhZGfn6+cl2rVq0wGo1UV1czYcIEEhMTARRG7b1qVzS99tprdOvWTeHt3r2b06dPK1/XFtA2btyoHLNaraSmplJaWvOXoREREQD06dOH119/XRn38ssvs2/fPuVetSu3VqxY4dKHau3atS6rsUJCQpg+fTpjx47lgQce4K677mLUqFEAZGdn16tgBVoPq6upMeyHqnlUH08EU0aPMmYWwZTBY2lZFfPWZrhsI9apTRBJgzti9vK4Zt716IMiw7xcD6baeSKYN7pHrQ+ROKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHuXgiWBqHtXJk0mNcjPFhIQEtm3bhtVqVY59+eWX6PX6On2nLlV8fDy+vr5s2LBBOVZVVcVXX31FQkKCC//QoUOcOHFCOXbkyBGqqqq48847gZq+U7fffjseHh4uxbP169cTHR2tFJdqe20dPXpUGVNcXMzp06eprKykvLwcqOmPVVZW5pKptnhV23eq9v9PnXJttH7gwAEAcnJyADCbzS7HoaZIV1JSoqzWuvS5HTlyhOTkZJfjtf2yNGnSpEmTJtk0b20GB05YXI4dOGHh/TUZ9WYmDe5Ih8ggl2MdImuKYJo0aRIr7ftPkyZNmjRp0qRJkyZNmhqHGuUKq2HDhrFkyRKSk5NJSkoiPz+fWbNmMWzYMJe+U3/84x85fPgwOp0Os9nM4MGDGTduHCkpKQQFBSl9p4qKihg3bpxy3b333kvTpk0ZOHAgAC1atKCkpISoqCg6d+7s4iMtLY0PPviA5cuX06pVKw4dOsRbb73l4len0/Haa68xffp0goOD8fHxwdvbm8rKSoqLi/Hy8sLDo+Yvtnv06IFer6dly5bk5+cTHh6uFJlq+2Pt2LGDjh074uPjQ3h4uFIMqz3v7e2Nt7c3L774IgMGDGDBggWYTCZCQ0OxWC7+Au5Pf/oTR48eJTExkXHjxvHLL78A0KVLF5ec16qGNiC+EdUYGvhpHtXHE8GU0aOMmUUwZfF4usDmsrKqVg4n7M+ycN5aTrMgn2vm+vt68sKIeM4VlWF1wzaDtZJlXkQz1c4TwZTJo6jvv0u9qS2zSKaMHmXMLIIpo0cZM4tgyuhRxswimGrniWBqHtXpUcbMIpiyepRNjbJg5e/vz6JFi3jttddITk7GbDYzZMgQnnvuOWVMcXExBw4cQKfTMXfuXPLz85k5cyaDBg3imWeeYeHChVgsFmJjY1mwYIHLiqLU1FSKioqIjo7m5MmT5ObmAris3qqqquI///kPRqMRk8lEeXk5R44coV27dvTv318Zl5ubi9PppF27dpw+fZqCggLy8/N56KGHWL16tTKuqKgInU5H69atycnJ4eTJk5hMJiIjI5UxNlvNfunNmzfHbrdTUFDAkSNHuP3229m2bZvLMwoNDaVr164sXLgQnU6Hh4cHDz/8MB988IFLhsrKSpfeVgB/+9vf6js16PU6tzSuvlHVGBr4aR7VxxPBlNGjjJlFMG90j8fPlF71vK3S0aD3OVHvkTf6vFwvptp5IpgyeRT5GVWtmUUyZfQoY2YRTBk9yphZBFNGjzJmFsFUO08EU/MoB08EU/OoTp5MapQFK4Do6GhSU1OveH7lypV4eHjw3XffKX2c7HY7r7zyCt999x1JSUmXva6iooL333+fcePGMWHCBAAqKyu5+eab2blzpzJu48aNHD16lKCgIBITE5k4cSJpaWmMGzeOffv2KSuUdu/eDcAnn3yi9Kn661//SlpaGjqdDn9/f86cOUNpaSlxcXF8+umnQE0Bq0+fPpw4cYIuXboAkJaWBsCkSZO4//77Afjoo4+YNm0aUFPIg5ptCC9cuEB4eDi33norERER7N+/H6fTqYzJzc3l0KFDvP3228ycOZMxY8bw2muvATWFLJvNpmwteC1yOJxYrReu+bobXQaD+hv4aR7VxxPBlNGjjJlFMBuDx/lr93M4p5jYVv6MG9SpXgwfD91Vz5tNegoLbfVig5zzIqNHGTOLYKqdJ4KpeVSnRxkzi2DK6FHGzCKYMnqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ422YPW/tGXLFnr06KEUqwD69+/P1KlT2bp1K4mJiZe9bvfu3ZSWlrqskjKZTISEhJCVleXCv+mmmzh27BhRUVFAzQqsgIAANm/eTOfOnamsrFT6YGVlZRETEwPAgAED+PzzzwkLC8PLy0spRF3aXyogIICePXvy/fffK/z09HR0Oh35+fkumaZMmQKgjIuKiuL8+fMsXLiQjz76SCnsZWZmKmNOnTpFVVWV0ruqtlgFMGrUKG6++WY+/vjj3/Ko60hrKHdlNYYGfppH9fFEMGX0KGNmEUw1etyanseCLw4pX58tLGPz3jP834Ox3N6h+TWxQvy96dQmiAMnLDicF4/rdTU9b4L9vNySX4Z5Ec0TwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jHDwRTM2jOnky6YYtWGVmZtK3b1/Gjh3Lnj17lB5WISEhZGZmXvU6gO+++46nnnpK2TawVatW/Pzzz5w9e5bQ0FAyMzMxGAwAvPLKK8ycOZN77rmH1q1bK4zs7Gzsdjsmk4mxY8dSWlpKeHg4gwcPBiA2Nla5p6+vr7K9X3l5OV27dsXDw4Pq6mruvPNOAE6cOEFoaCizZ8/mrbfeUjJ5enri7e1NREQEAPv27cPpdFJZWcnw4cPx8PDAZDKRlpbG008/7XLvy+mVV14hLi6uIY9fkyZNmjRpum66tFh1qeatPXjNBSuApMEdeX9Nhksvqw6RQSQN7lhvj5o0adKkSZMmTZo0adKkSZMmTZqurhu2YFVcXMz69evp0KEDs2fPVnpY6fV6iouLr3id1WrFYDAwd+5cJk6cSPv27Vm2bBmbN28G4Nlnn+XZZ58lNzcXi8WCv78/M2fOpLy8nDfeeIPz589z+PBhxQPUbLFXVFTEn/70J6xWK++88w4APXr0UO4JoNfrMZlMPProo3zxxRdkZWWh0+mU7QWLi4vx8PCgvLycu+66i+joaJYsWUJlZSXx8fFKhtzcXAwGAyaTiaFDh7J+/XrOnj2Lv78/w4YNA2q2DYSa1VgzZswA4JFHHgGgY8eOdOxY/1/KGY1aU7lfqzE08NM8qo8ngimjRxkzi2Cq1eP7a9Kvev7D9Qd44sFr2x7Q39eTF0bEc66oDGu5HT8vAyEB7tl/WpZ5EckTwVQ7TwRTRo8yZhbBlNGjjJlFMGX0KGNmEUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsumGLVjVrjCaM2eOSw+rKVOmUFZWdsXrqqursdvtPP7444wZMwaAW265hTvvvBOLxYLBYCA5OVnZvm/JkiW0b98eqCkCjRs3Dp3Otf/FTTfdxKBBg1i+fLlS5CouLqZp06YAXLhwgdLSUiZOnMi+fftYsGABtau3fp3Jbrfzz3/+k/nz57N161aaNGmCxWIhODgYqNlW8OTJkzz//POcPXuWNWvWUFRUhF6vJyoqiiZNmgAX+2Hdd999So8sd0iv1wltaN3Y1Rga+Gke1ccTwZTRo4yZRTDdycs9V8qRg/mEB5sJD/GtF+PoKetVzx/OKa73+5LI9zM1z4sopoweZcwsgql2ngim5lEOngim5lEOngim5lEOngimjB5lzCyCKaNHGTOLYMrqURbdsAUrvV5PeHh4nR5WU6ZMUVY0XU61q6L69u2rHDOZTMTExLBt2zYWLlyIl5cXt912G3a7XSlWQU0PK4PBQGRkJAA+Pj4AxMfHk5SURFJSEgArVqxg2rRpVFVVARdXWP3xj3/kiSeeUHj3338/p06dqpPpwQcf5MEHH1Su7datm8JYtGgRer2ewYMHYzQaeeqpp3jllVf49ttv0el0VFZWYjKZmD59OgAeHh4uz2P8+PG0bt36tzziy8rhcGK1Xqj39TeqDAb1N/DTPKqPJ4Ipo0cZM4tgupNXWlZFyup00jMvbrkXFxXE0w/FYfb2uCZW2wg/zhZe+Q9R2rf0p7DQVi+fss2LKKaMHmXMLIKpdp4IpuZRnR5lzCyCKaNHGTOLYMroUcbMIphq54lgah7V6VHGzCKYsnq8EeTn5/2bV53dsAWry+nXK58aOvZKY5xO52WP18fHtbIyMzM5efKkst3gpdq9ezf//e9/+dOf/kRWVhYA//nPf/jPf/6jjHnnnXd455132LdvH56enr/Z56XSGspdWY2hgZ/mUX08EUwZPcqYWQTTHby5q9I5cMLiciwjy8K7q9KZ8EiXa2KNG9iRren5Vzw/dkCHBvuVZV5EM2X0KGNmEUy180QwNY9y8EQwNY9y8EQwNY9y8EQwZfQoY2YRTBk9yphZBFNWj7Lohi1YORwOcnNzGTlyJOnp6ZjNZmJiYoCL/ZsuJ39/fwD+9a9/kZ2djcViISYmRinwFBcX4+XlhdFo5Pz58zz22GPs2bMHDw8POnfujN1uV1ZWXbhQs9IoLS2NBx98kKysLMLDw/H1rdn2yMPDw8XPc889x+HDh7HZbMTFxXHq1Ckcjosv7N+S6YknnuChhx5i2bJlbNy4UbnWbDaTkpKirP5avHgx77//Ps2aNWPXrl3k5ORgt9vR6/VMmjRJ8VYfaT2s6qox7IeqeVQfTwRTRo8yZhbBdBfvdIGN/VmWOscdTtifZeG8tZxmQT7XxHxycAfeW3Pgsscb8p4k07yIZMroUcbMIphq54lgah7V6VHGzCKYMnqUMbMIpoweZcwsgql2ngim5lGdHmXMLIIpq0fZdMMWrKCmZ9XBgwd58sknOXnyJKtWrcJkMuHtfXEPydGjR5OXl8emTZsAMBqN6HQ6duzYQb9+/YiPjyc1NbXONoLe3t7o9Xp+/vlnHn/8cUpLS1m6dCkeHh5Kn6ha5ebmEh0dzYQJE9i0aRO7du1yOe/j44OHhwdbt24lMTGR1q1bM3/+fKqrq9HrXV/c/ytTdHQ0hw4dYuPGjTz88MNER0fz3nvvYbVa2bp1K7fddhsAt912Gw6Hg//7v//j4YcfZsqUKTz22GM4HA6ysrLq3Pe3SuthdXU1hv1QNY/q44lgyuhRxswimA3lHT9TetXztkrHNb+PDExoy8CEtryzcjf7jp2n803BjB8W3xCbLpJhXq4HU0aPMmYWwVQ7TwRT8ygHTwRT8ygHTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KItu2IKVp6cnlZWVxMTEkJKSgtlsplevXmzduhWj8WJsh8OB3W5Xvvbx8cHpdHLrrbeyf/9+tmzZQkxMDBcuXKC0tFRZgVV7bbdu3UhNTcVoNNKzZ0/S0tKUbfxqxzZr1gyDwcCbb75JeHg47du35/Dhw8p5g8FAVVUVPXv2ZPPmzdhsNjp37syuXbtctuX7rZn+85//8MADD/D6668DcPToUdauXcsHH3zA+PHjMRgMVFdX89JLLzFq1Cief/555dqmTZtSVFRU7+eu9bC6vAwG9e+HqnlUH08EU0aPMmYWwXQXz8fj6tvimk36evecGjsgVvFYX8alkmleRDJl9Hi2qIyScjt+XgZCAtzzDxW1ZxbBVDsParYzzTlvo1WImQ6RQQ3myTgvIphq54lgah7V6VHGzCKYMnpsDJnd/R4I6n+OjWFeNI/q9ChjZhFMWT3eCPLz03pYYTAYCAoKYunSpcqxvLw8+vTpQ2VlpXJsyZIlLtdVV1cDMGLECAYMGKAcT0xM5NChQ3h5eQE1vaU8PT1ZuHChMsbhcNChQweqqqqAmkIVQKdOnXj33XeVcW+99RaHDx9Wtg6sqKgA4NVXX6Vly5bKuN69e1NWdrGR/G/JlJOTw4kTJ1yKUDNnzsRut7N27Vry8/MJDw9n27Zt5ObmMmrUKGXc4cOHGThw4NUe62+Stj/nldUY9kPVPKqPJ4Ipo0cZM4tgNpQX4u9NRIiZU+fqFpRahpoJ9vNSXc8pGeblejBl8FhaVsW8tRku2152ahNE0uCOmL3qv93ypVJb5uvBVCMvv/AC/1i8k9KyauWYr7eRv4++lZCAa9vW9HKScV5EMNXOE8HUPMrBE8HUPMrBcwdT9HsgqP85qnFeRPNEMGX0KGNmEUxZPcqiRluwOn78ONOnT2fPnj2YzWYGDx7MX/7yF0wmE1CzdV5RURFWq5Xa/k5bt24FalYc3XXXXVgsFmJjY5k8eTJdunQBUFYq/fzzz2zYsIG0tDSMRiPl5eXY7XbKy8vx8vJCp9NRUVHBypUrWb58OVlZWQQGBuJ0OpX+T2fOnAEgKyuLN954g7Vr12Kz2ZSiV22Pq9pVVGvXrmXnzp3s2bMHb29vioqKlLGXZlq8eDHLli0jLy+PwMBAACV3ZmYmAJ999hlvvPEGZ8+eJSwsTOmFVVpasxXT3r17CQgIID09nVGjRik9rADuv/9+902UJk2aNGnS9CtdrlgFkHO24auiNGn6PTVvbQYHTlhcjh04YeH9NRlMeKTL72NKkxD9+hd1AKVl1by2aCf/GZ/wO7nSpEmTJk2axEt7D9SkSZMmTSLVKAtWxcXFjB49msjISGbPnk1+fj4zZ86kvLycKVOmAFBZWYmXlxfJyckkJSWRn5/PrFmz8PT0ZN++fUyaNIn27dszYcIEhg0bxqZNm2jZsiUXLlzAYDCwYsUKgoODeeKJJ/juu+/Yt2+fcu/agpXZbGbq1Kn07t2bvn37snjxYgAKCgqUsVBTRMrOzmbUqFGcPXuWdevWAZCfnw/UFKJ8fX2ZPXs2rVu35sknn+Szzz7DYrG4rLCqrKzEaDTyj3/8gwceeMDlnrWsgwcPKv/fr18/AgIC2LJlC7t37wYuFqx++OEHrFYrEydOZMiQIWzevJns7GwAwsPDGzQ/DWlwf6OqMTTw0zyqjyeCKaNHGTOLYLqLt3Tjoaue//jbowy/t3292GrNLJKpeVSPx9MFNpeVVbVyOGF/loXz1nKaBdX/r47VmFk0U628fcfP1/lFXa1Ky6o5mF1IXFTTerFlnBcRTLXzRDA1j+r0KGNmEUwZPao1s8j3QFD/c1TrvIjkiWDK6FHGzCKYsnqUTY2yYLVy5UpsNhtz5swhICAAqCn6vPLKKyQlJREWFoZOp2Po0KEcPnyY5ORkzGYzDz30EIsXL6Z9+/aMGTMGgKioKPbs2cOCBQuYNm2acg+n04nT6SQlJYXY2FgeeughVq9ezcGDBxW+j48PRqORXbt2sXfvXu677z6Xws+lCgoKYvHixYSHhzNhwgT+/e9/s337doYOHQrUrOwyGAwUFxeTkpJCfHw84eHhbN26lfz8fOWeJpOJJk2a8PXXX2M2mxk5ciTLli0jIyMDgCZNmgA1WxsuX76cJk2aEBUVxQMPPMDnn39OZmYm8fHxNGnSBIfDgdPpZPny5TgcDuLi4khPT+fjjz/m4Ycfrtfc6PU6AgPN9bpWBjWGBn6aR/XxRDDdycs9V8qRg/mEB5sJD/F1G1fNmUUx1ezxqx0nST92jpvbhtCve+t6c47kFF/1/KHsoga/j8g0L6J4Ipg3usfjZ0qvet5W6WjQa3v34bMc/vEkMa2D6No+tN6cX+tGnxcRvDxLzlXP5xZcIOGWVg26h4zzIoKpdp4Ipoyf8UQw1c4TwdQ8ysFrKPN6vAeC+p+j2ublevBEMGX0KGNmEUxZPcqiRlmw2rJlCz169FCKVQD9+/dn6tSpbN26lcTERPz8/DCZTKSmpipjtm/fzqJFi+jQoYNybNmyZcyYMYNNmzYB4Ofnh91up127dspKKICPPvqI1atXs2fPHu666y6aNGlCdnY2kyZNUopftT7Onz/PqVOn8Pf3B2qKX+vWrVO+PnHiBP/+9785evSock+bzcadd97J3LlzFdbMmTPZunWrksnX15eioiJmzpxJv379lHEff/wxZ8+epbKykoiICAA+/PBDoqKilDGffPIJn3/+udLrKiYmhrS0NFasWMHEiRMZPnw4Pj4+TJ48WdlWsD5yOJxYrRfqff2NKoNB/Q38NI/q44lgupNXWlZFyup00jMvriiIiwri6YfiMHvXv1eLmjOLYqrZY1ZeMa+m/kwt4vvducz55Bemju1OZHO/a+a1a+nPiTMlVzwf0yqAwsL6bQ0o07yI4olgyuLRx0N31fNmk75er+18ywVe+fCnOn0ipj3WndDAhq3YkmFeRPDCg67+j88WTX1U83NMBFNGj7Jlbiyf8UQw1c4TwdQ8qtOjWjOLfA8E9T9Htc6LSJ4IpoweZcwsgimrxxtBfn7ev3nVWaMsWGVmZtZZAeTn50dISIhSbImKiqpTeDlw4AAAXbt2dTkeHR3NokWLKC8vV4o8oaGuf7malZWFyWRSVk+Fhoayf/9+l6KQ0+nk/Pnzisfbb78dvV6Pt7e3UqyqPQdw7tw5xWtVVRUtWrRwuWdubi4eHh7K+JCQEIqKimjTpo0ypqSkBJvNhtPpJCcnR/GTmZnp4m379u0AdOvWDYC2bdsCNX2zDAYDf/rTn1izZg2AUtSqr7SGcldWY2jgp3lUH08E0x28uavS6/Rqyciy8O6qdLf0alFjZtFMNXq8tFilMB3wyoc/Mf+Fu6+ZN6xvO776+dQVz//x7rYNfgYyzItongjmje4xxN+bTm2COHDCgsN58bheBx0igwj286oX+9fFKqjZcmfawp/c0ifiRp8XEbwOrYPw9TZedkskX28jsa0CVfdzTARTRo+yZG5sn/FEMNXOE8HUPMrBayjzerwHgvqfo9rm5XrwRDBl9ChjZhFMWT3KItUVrI4fP8706dPZs2cPZrOZwYMH85e//AWTyaSMsVqt+Pm5/lW30+nEbrezdOlSFi9eTFBQEPv373cZW9vHadOmTcyYMQMPDw/uueceunXrhtPppLi4mPj4eHQ6HSdPnuTBBx8kKyuL5s2bY7VaCQ4OVvpSdejQgW+//ZZly5YxefJkbDYbbdq0wWq1AjX9q0wmEyEhIRQWFjJ27Fglk7+/P0FBQZSU1PyF+R133AHAzz//zH333UdeXh6tWrUiJycHPz8/5Z5RUVEcPXqUF154gePHj1NWVsbkyZPR6XQu/oODg3nppZd48cUXKSsrIywsjNzcXIKCgpRC1R133IHRaGTRokUEBQURHx+P2VyzTU379vXrHVIrrYdVXTWG/VA1j+rjiWC6iyeyV4taM4tkqtXj93tO1SlW1crugG37T5PQpcXlB1xFfxnambc/2XfZ4w15D5FlXkTyRDBl8picGMfcX61K6NimZlVCfV7bWq8k9fKmPdadaQsvv/JNTT/HRDBl9ChT5sb0GU8EU+08EUzNozo9qjmzqPdAd3psLDwRTM2jOj3KmFkEU1aPsklVBavi4mJGjx5NZGQks2fPJj8/n5kzZ1JeXs6UKVOueu38+fMpKCigS5cu/OUvfyE1NZXNmzczbtw4xo8fT35+Pj/88AMAZ86c4c0336S8vJy//e1vysoiAE9PT8xmMzk5OXTt2pXnnnuOTz75hMLCQpo2vfgLge7duwOwefNmHn30UXx8fPjwww8xmUwuK5Tatm1LWloaR48eJTk5mV27dvHdd98RGRmpFKyaNWsGwMGDB+nTpw9Dhgxh0aJFVFRUuNwzPj6ejRs3cvLkSaKiosjIyGDOnDnce++9fPnll8q4nj17snbtWvr370+nTp1ISUnB4XAQGxurjAkODsbLy4vS0lJuueUW4uLiWL58OVCzkqu+0npYXV2NYT9UzaP6eCKYDeWJ7tUC6st8PZhq85h5+spb9wEcy7MyuE+7a+b2vb0NfW9vw4K1+/nl8Fm6tA9l3IOd6muzjm70ebkePBFMGTwGBsLryb3JO1dK3nlbg/u+aL2S1MsLDDSzYvpA9hw+y6GTFq23mEqZaueJYMr4GU8EU+08EUzNoxw8dzBFvweC+p+jGudFNE8EU0aPMmYWwZTVoyxSVcFq5cqV2Gw25syZo/SnstvtvPLKKyQlJREWFgbUbP9XW+wBqKio4P3338fHx4du3brRo0cPbrnlFvr27Ut+fj7JycmYzWZatWrF0aNH+ec//0lMTAwA7733HocPH0an07n0nPLy8uL06dO89dZbxMbG0qtXL3bt2kW7du2UMQAtW7bkv//9L0ajkfvuu4+vvvoKQGGVl5crX7/zzjuEh4fzhz/8gTVr1hAYGKhk0Ol0+Pn5sW/fPrZv366slDp+/LjCqt0KMDo6moyMDAAGDRrEXXfdxZdffqmM++c//0n37t2ZP38+GzduxOl00rNnT3bt2oXdbsdgMLBr1y5KS0vp2LEje/bs4dtvvyUoKAiAjIwMysrK8Pa+9m8srYfV5dUY9kPVPKqPJ4LpLp6oXi2g3swimWeLyigpt+PnZSAkwD0fatzhMap5E76/yvmbwv0atEf90LuiGfdgJ6zWsgZxaiXja0fzqE6Pvp4Gbo0Na/BrW+uVpG4eQHTzJnRtH6ran2MimDJ6lClzY/qMJ4Kpdp4IpuZRnR4bQ2Z3vweC+p9jY5gXzaM6PcqYWQRTVo83ghptD6stW7bQo0cPpVgF0L9/f6ZOncrWrVtJTEwE6van2r17N6Wlpeh0OqVvk8lkYsCAAWzatIm9e/cCMHbsWI4ePepyzzVr1nDzzTdjMpnw8vKisrKSCxcuEBoayubNm5VxX3/9NVu3biU4OBiAEydOADBkyBCSkpKUcY888gi//PKL4qO2p9Xy5cuVgpLVauWzzz5Tvs7JycHpdBIZGcnHH3+ssObNm8ebb75Jq1atlNwATzzxBFarlcmTJ/Pss8/yxRdf4OHhQcuWLZVrhw4dyvHjx1m6dCnz58/n0KFD/PTTTzgcDgwGA1lZWQBK4QsgPz8fgNOnT/Piiy/y1ltvXXW+riRtf84rqzHsh6p5VB9PBLOhPFG9WtzpUTTPHczSsirmrc1w2XqnU5sgkgZ3xOxV/6bml6ohHu+IC2fRhkOX3RbQoIeenZpre9SrhKl5vDF5Wq+kxsETwdQ8ysETwZTxM54Iptp5IpiaRzl4IpgyepQxswimjB5lzCyCKatHWaSqzRQzMzOVokyt/Pz8CAkJcSlQJSQksG3bNqVfVO05vV5Pr169lHHR0dHk5eUpq5yKi4sxGo1s2LBBGVNdXY3T6VRWO2VnZ+N0Ojl79qxSlAKUe0VERCjjPDw8XIpaADabDYPBoIwrKipCp9MpK6+gZnWWTqdTtt6r9X/kyBHlPgAFBQUAREZGAjWruSIjI122/wNYv349PXr0cOnzNW/ePFJTU3n11Vfx8PBg0aJF/OlPf8LDo+YXoHfeeSd9+vQhJCSEadOmMW/ePAYMGABAYmIiycnJaNKkSdPVlDS4Ix0ig1yOdYisKbZo+m2atzajTlPzAycsvL8m4wpXXH+9PPpWfv1HMAZ9zXFNmjSJ199H34qvt+vfmPl6G/m79j2oSZMmQdI+42nSpEmTJk2aNGn6vaSqFVZWqxU/P786x/39/SkuLla+HjZsGEuWLCE5OZmkpCS2b9+uHK/dNhDggw8+wOl0ctttt+Hr60tlZSWxsbEsXLiQoKAg2rVrx4oVK7Db7UrxqPY+fn5+DBw4EIAWLVooWxDWbptntVoJDAxk9+7ddOzYEU9PT2XLQYPBoHgoLS2ldevWTJkyhalTpxIcHIyPjw8Gg4Hw8HCXezqdTnr06IFer6dly5bk5eUBYDRenKY//vGPzJo1iy+++AKAUaNGkZWVxdKlS5Ux69at48033wRg8uTJAHTs2JEBAwZgsVgICgqivLwch8NBaWkp06ZNc3nezz//vJKzPmpok80bUY2hgZ/mUX08EUx38vx9PXlhRDznisqwunE7OzVndidTZFNzd3kEiG4RwIcv9iNtXx5HThXTLsKfOzqHN4hZK7XPtVpfOyJ5IpgyenQnr3mwL3P/ehcHTljIPmejVYi5zi+S6yNtXtwjzaN7pHaPsmVuLJ/xRDDVzhPB1Dyq06OMmUUw1c4TwdQ8qtOjjJlFMGX1KJtUVbD6rfL392fRokW89tprJCcno9fr0ev1TJo0SRlTXFzMqVOnAJg+fToVFRW8/PLLXLhwgWeeeYaFCxdisViIjY2lc+fOeHl5udyjpKSEtm3bcvLkSXJzc5WeVbVyOBwUFhbSvHlzdDodZ86c4ciRIwQHB7sU15xOJydOnKB9+/acPn2agoIC8vPzCQ4OdlkRBTV9rFq3bk1OTg4nT550KVTVZvrwww+JjIykoKCAkpISjh07RkJCAl27dlXGffTRR3WeWUZGBn/605+YMWMGiYmJ2Gw2Tp8+jdFoZNy4cXh6erJ48WJKS0uZP38+f/vb365xVmqk1+sa3IT3RlZjaOCneVQfTwTTnTxR3/NqzuwO5vVoag7uyz3ozrZu4VxOap9rtb12rgdPBFNGj+7k9Qo00+t/D7tmafOiTqbmUQ6eCKaMn/FEMNXOE8HUPMrBE8GU0aOMmUUwZfQoY2YRTFk9yiJVFaz8/PyUlUyXqri4WOn3VKvo6GhSU1MBWLZsGa+++qpLUWnlypUYDAYcDgf33HMPXl5ezJ07l8zMTP7whz+49J0aNmyYwvf2rnkx3X///UoPp8rKSvr160d+fr4yrqCggKqqKhYsWKBsY5iWlsa4ceNcvOr1eoKDg1m7dq1y7K9//SsbNmxQxjkcNftZPvHEE8pWfEVFRSQkJAAo41auXInNZuPzzz/n22+/ZfLkybzwwgv861//Ij8/X1lddu7cOR544AFllVVtnqysLNq2bav4OnLkCCkpKdx9990ADB48mPvuu4/FixeTnJyMr6/vlSfrCnI4nFitF675uhtdBoP6G/hpHtXHE8GU0aOIzOu2ZnEou4gOrQMY2LNNvRgim5pD43iOavcoY2YRTBk9ish8tqiMEjevdNDmpeGS0WNGloWc8+5b7Qfqf46NYV4ag0fttaPOedE8qtOjjJlFMNXOE8HUPKrTo4yZRTBl9XgjyM/P+zevOlNVwSoqKsqlVxXUrHQ6d+5cnd5Wv74OICsri5iYGAC2bNlCs2bNsNvtyuqpm2++mdzcXLZu3UpiYiJQswIqKytL6X11/vx5AJo3b67wTSYTnTp1Ij8/X7lXUVERAE2bNlXG9erVCw8PD8zmmr9Eq6yspLq6uk6xrU+fPnz++ecEBAQAcPbsWaCmR1WtAgICiIyM5MiRI8rxLVu20KNHD+U6gH79+jFr1iwlU05ODidOnOD55593uecDDzzA7NmzyczMJC4ujmPHjgEQGxvrck+o6euVn59fr4JVzfXaN+OV1Bga+Gke1ccTwZTRozt4B05Y+NfKX5Sv048X8NG3x3lheBdiWl3bL16uR1NzUOdzFM1UO08EU/N44/JKy6qYtzbDZQvRTm1qesmYvTwaalGbF5Uy1egxv/AC/1i8k9KyauVYbT+1kID6b2F7qdT+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08maSqglVCQgLvvfeeSy+rL7/8Er1erxSUanX8+HGmT5/Onj178PHxwcPDg88//1wpWB0/fpyqqioGDRqkXNO3b1/Wr1/P8uXL+c9//oPFYiEiIoKioiLuvPNOAHJycgDYsWMHzz77LGlpaXh4eKDX11QAg4ODASgvL0en0/HOO++wc+dOsrKyCAsLo7q6miZNmgCQnZ2N0+nk5MmTvPrqq2zcuBGbzaYwQkNDgZril8FgYP369axevZo9e/ZgNpu5cOECRqNR2TowMzOThx9+mE8++URZ/fXkk0/i5+enFPpq/z8sLIwXX3yRr7/+mqqqKmX1lYdHzS82WrRoAcBdd9112bmo7a+lSZMmTZou6tJi1aWatfwXFk66+5p5SYM78v4a119Ea03NNWnSdKnmrc3gwAmLy7EDJyy8vyaDCY90+X1MaZJSvy44AJSWVfPaop38Z3zC7+RKU2OQ9trRpEmTJk2aNGnS9FulqoLVsGHDWLJkCcnJySQlJZGfn8+sWbMYNmyYUnABGDFiBLt37+aWW25h9uzZ5Ofn88orr7BgwQJCQkJo164dRUVFeHh4MG7cOOW6++67jxdeeIH09HSGDBlCaGgoCxcuxGAwEBgYCIDVasVoNLJ//35ycnJISkoiIyODr776CqjZntDLy4uysjKaN2/OsmXL6NatG3/+859Zvnw5TqcTu92ujIWaVVwrV65k5MiRACxZsgSAiooK5Z6BgYF89913hIeHk5yczJYtW/jpp5/Q6S5uGWW1Wvnll1+YP38+nTp1oqCggKCgII4fP86+fftc7jlhwgRKSkoYNmwYubm5fPHFFwCEhIQA0KlTJ0JDQ5XVXQC+vr5UVFRw//33K1sj1kdGo9ZU7tdqDA38NI/q44lgyujRXbw1P2Re9fyGH08y6I5r2x5QVFNzUO9zFMlUO08EU/OoTo/u4p0usLkUtGvlcML+LAvnreU0C6rf6gRtXtwjWTzuO36+TsGhVqVl1RzMLiQuqullz/8Wqf05qnVeRPLcxdReO+qcF5E8EUwZPcqYWQRT7TwRTM2jOj3KmFkEU1aPsklVBSt/f38WLVrEa6+9RnJyMmazmSFDhvDcc8+5jDtz5gxOp5M5c+a4bGM3bdo05s+fT3FxMU6nk4cffthlmz2Hw4HD4cDX15f169djNBrp378/P/74IwsWLGDatGku9wkMDGTOnDmEh4dz11138f3333Pw4EGleGa32wkNDSUzM5P09HTi4+MpLy8nNzfXhVNVVUWbNm1YuXIlZrOZhx56iE8//ZTt27czdOhQhWUymdDr9bzzzju0adOGrl27smfPHpf+VDt37gRg//79APz8888uX9cqOzsbk8nE8uXLiYiIYPDgwXz22Wf89NNPdOvWDYPBwMCBA1m8eDFBQUEUFRVRVVWFp6en0kerPtLrdcKa894IagwN/DSP6uOJYMrkMfdcKUcO5hMebCY8pH5bnQIcOVV81fOHcooYVc+ffyJ/bqp1XkQy1c4TwdQ83pi842dKr3reVulo8M8PmebFXe8Hl5NaM7uLmWfJuer53IILJNzSqt78Wqn9OaptXq4Hr6FM7bUjhieCqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFqipYAURHR5OamnrVMc2aNaN9+/YuvZwGDBjAtGnTmDBhAomJifTo0UPZmq9Wu3fvxul0cu+99zJjxgzl+IwZM9i0aRMAfn5+VFdX065dO9atW6eM+eijj/j+++/Zs2cPd911F02aNCE7O5tJkyYxZswYZVz//v2xWCycOnVK6V1Vu8Kq9usTJ07w6aefcvToUeWepaWlJCQkMHfuXIU1c+ZM9uzZo/Sn8vX1paioiHfffZd+/fop42655RYuXLhAZWWlcg+z2cyuXbuUFVpbt27ls88+48CBA8p1ZrMZT09P0tLSACgrK+Pee+9lyZIlTJky5apzcCU5HE6s1gv1uvZGlsGg/gZ+mkf18UQwZfJYWlZFyup00jMvrk6Iiwri6YfiMHtfe9+XdhH+pB8vuOL5mJYBFBba6uVVpnkRyVQ7TwRT86hOj+7i+XjornrebNJrP3d+g9z9fiDCoyieu5jhQVf/B3eLpj71fi2C+p+jWudFJM9dTO21o855EckTwZTRo4yZRTDVzhPB1Dyq06OMmUUwZfV4I8jPz/s3rzpTXcHqt6i2l9Ol8vPzIyQkROnhFBUVpfx3rWqLNV27dnU5Hh0dzaJFiygvLycqKgq42F+qVllZWZhMJrKzs5Xz+/fvV8ZDTWHq/Pnzisfbb78dvV6Pt7e3UkiqPQdw7tw5xWtVVZXSV6pWubm5eHh4KONDQkIoKiqiTZuLW06VlJRgs9lwOp3k5OQofkJCQly2E8zMzESv15Ofn+9yj/Lycm6//XasViuRkZH4+vpy8uRJGiKtodyV1Rga+Gke1ccTwZTB49xV6XX6vmRkWXh3VXq9+r4M7BHJfzdfeVvA/re3bnB+GeblejDVzhPB1DzemLwQf286tQniwAkLDufF43pdTb+7YD8v7efOb5C73w8uJ7VldjezQ+sgfL2Nl93azdfbSGyrQLf4VftzVNu8XA9eQ5naa0cMTwRT8ygHTwRTRo8yZhbBlNGjjJlFMGX1KIsaZcHKarVSVVXF2LFj2bNnD2azmcGDB+Pn56f0cEpISOC9997DarXi5+cH1KywAjh16hR33XUXFouF2NhY+vTpg9PppLi4mPj4eHQ6Hfn5+Tz77LOkpaVhNBqprq4mKChI4Xfo0IFvv/2WHTt28K9//YusrCwCAwOxWq1ATS8pk8mkFJneeOMN1q5di81mw2w2ExAQQGlpzTYvd9xxBwDHjh1TMnl7e1NSUuKSqXXr1hw9epTx48dz5swZTCYTwcHB6HQ6F/9eXl7k5+czYsQIMjIyKCsro3PnzjRr1oySkhLlObZq1Yp7772X/fv3Y7PZOHXqFBUVFfj41K8XQq20HlZ11Rj2Q9U8qo8ngimLR1F9XyY/Gs+Mpbsve7whP/tkmRfRTLXzRDA1j+r06E5ecmIcc3+1Oqhjm5rVQdrPnf8tkX3A3OVRJM+dzGmPdWfawp9cCg++3kamPda9wZ//1f4c1TwvonjuZGqvHXXOiyieCKaMHmXMLIKpdp4IpuZRnR5lzCyCKatH2dQoC1ZOp5NPPvmEDh06MHv2bPLz85k5cyZ6/cUXwrBhw1iyZAnJyckkJSWRn5/PDz/8gE6nY+HChUycOJH27dszYcIE3nrrLeU6T09P/Pz8OHr0KEVFRTzxxBN89913pKen4+FxccuQ7t27A/DBBx/Qu3dv+vbty+LFi+t47dChA9999x1Llixh1KhRnD17lnXr1uHl5YXTWfOnss2aNUOn07Ft2zYiIyN58skn+eyzzygsLKSqqkphtW/fnq+//hqr1crjjz/O+fPnWb58eZ17tmjRguPHj5Ofn090dDT79+8nIyODvn37cuTIEQBmz57NL7/8QlpaGgMGDCA8PJz169eTl5fHgQMHqKqqcsn7W6X1sLq6GsN+qJpH9fFEMG90j6L6vvQMNLOua0s++eYIew6fpWv7UIb2bVdfm3V0o8/L9WKqnSeCqXm8cXmBgfB6cm/yzpWSd97m9v5LN/q8XI8+YKCuzKKYgYFmVkwfyJ7DZzl00kJM6yC6tg/93xdeg9T+HNU4L6J57mBqrx11zotongimjB5lzCyCqXaeCKbmUQ6eCKbmUZ08mdQoC1Ymk4mKigrmzJmj9LGy2+1MmTIFo7Emkr+/P4sWLeK1114jOTkZs9lMly5d2LFjB6NGjVL6TkVFRVFYWIjT6VS27WvSpAnFxcU4nU5SUlKIjY1l0qRJzJgxQykyNW3aFLjYK2rv3r3cd9995Ofnk5aWprDCwsIACAoKYvHixYSHh/PSSy/xxhtvYDKZlEyenp5UVFRQXFxMSkoK8fHxPPDAA8yePVvJ1L59ewCaN29OSkoKZrOZ4cOH89///pcLFy4o92zbti0VFRXo9XoOHjwI1PTD2rx5szKmQ4cOLF68GKPRyNdff01YWBi33347VVVVrFu3jl27dnH77bdf89xoPawuL4NB/fuhah4bzsvIspBz3karEDMdIoMazAP1ZxbBdAdPZN8XgPu6tWRo33ZYrWUN4tRKlnkRzVQ7TwRT86hOjyIy+3oauDU2TPu5c40S/X6gxsyimdHNm9C1fajbXoug/ufYGOalMXjUXjvqnBfNozo9yphZBFPtPBFMzaM6PcqYWQRTVo83gm74HlYGgwFfX1+lWAXQu3dvACorK5Vj0dHRpKamKl8vWLCAHTt20KlTJ+XYsmXLSExM5NChQ3h5eSnHPT092bp1q/K1w+Fg5syZyoqnZs2aAdCjRw/effddZdxbb71FWlqasq1eRUWFcp+WLVsq4+bPn09ZWZnytdFopEmTJqSlpSnH8vLymD17tpKpQ4cOADzxxBP069dPGffNN99QVlam8KOioti+fTs7duxg9erVTJ48mTvuuIPU1FTatatZCdC3b186dOiAj48PKSkpCuuNN94AUApz9ZG2P+eV1Rj2Q9U8XrvyCy/wj8U762xz8vfRtxIS0LAtNmultszXg9kQ3vXo+9JQj9eDJ4Ipo0cZM4tgyuhRxswimNr7gTqZMnqUMbMIpoweZcwsgimjRxkzi2CqnSeCqXmUgyeCqXlUJ08mNcqCld1up6ioyKU/VW1x6dJVS79W7UqlAwcOMGDAAACqqqrIycnBbrdz88034+vrS0VFBRUVFZw4cYLIyEgAfvzxR5xOp7JN3pkzZwDYs2ePSz8sg8EAwIULNauMPD09Afjzn//MiRMn8PDw4M4778Risbh4tdvtWCwWBg4cSHZ2NuHh4XTt2tUlU8uWLYmMjOTf//43U6dOxWazERcXx+nTp4mIiFDGJSQkMHfuXBITEzl69CgAr7/+OgcOHODxxx9X7nnTTTexbNkyunfvTnFxMY899hiffPIJRqORW2+99donRpMmSfXrYhVAaVk1ry3ayX/GJ/xOrjQlDe7I+2syXHqXdIgMImlwx9/RlSZNmjRput7S3g80adKkSZMmTZo0adKkqXGoURasKisr8fLyculPNWvWLPz9/amuvvhL49GjR5OXl8emTZuAmiKSwWBg0aJFhISE0K5dOxYvXozVagVg+vTpVFRU8NJLL2EymXj22WeZMGECZWVlzJo1i6CgIHS6mm1FiouLASgoKKBr166MGjWKTz75hMzMTJfzVVVV6PV6Dh06xIgRI/Dx8WHhwoU4nU6X1WAVFRU4HA6Ki4v585//zK5du1i9ejU+Pj4umZo1a8aPP/5Inz59uOWWW5gzZw5Op5Px48crY6KiovDw8ODYsWP06tWL77//ns8//5yAgADuvfdeZdzatWvx9fVV8i9cuBCA559/vl79q2rV0Ma5N6IaQwM/zWP9tO/4+TrFqlqVllVzMLuQuKim9earMbNoprt4/r6evDAinnNFZVjL7fh5GQgJcM8ewmrNLJIpo0cZM4tgyuhRxswimNr7gXukeVQfTwRT86hOjzJmFsGU0aOMmUUw1c4TwdQ8qtOjjJlFMGX1KJsaZcFKp9MxdOhQDh8+rPSnGjJkCFu2bHEZ53A4sNvtLsf0ej3PPPMMCxcuxGKxEBQUhNFopLq6mu7duxMWFsabb76JxWKhWbNmTJgwAaPRyD333EN+fr6yVV7t1oC33XYbhYWF/Pvf/6Z58+b4+fkpBSCAU6dO4XA4SExMZP369dhsNtq2bcuBAweoXY0FNVvwBQcHc9NNNzFnzhzMZjNRUVFkZ2crY86cOcPPP//M4MGD+eWXX9iyZQt2ux2j0ais+AJYuXIlRqORfv36KcW6tm3bcuzYMQoKCpS+WtHR0fzyyy/o9XrsdjsBAQHodDp+/vlnxo0bpxTnrkV6vc4tjatvVDWGBn6ax2tTniXnqudzCy6QcEurevNrpabMopm550o5cjCf8GAz4SG+DeaJ/Jkk07yI4olgqp0ngulOnru/B2sl23MUwRPBlMmj9n6geVQbTwRTNo+7D5/l8I8niWkdRNf2oW7jqjmzKKbmUQ6eCKaMHmXMLIIpo0cZM4tgyupRFjXKgpWfnx8mk8mlPxXA559/jr+/v/L1kiVL6lxXVVXFmDFjSEpKAmDEiBEEBARw+PBh5dpmzZphsVjo378/8+fPV64fNmwYzZs3B2oKUQD3338/w4cPV8ZMnjyZVatWKayCggIMBgMzZsxQxjidTm6++WalYFVZWYnT6aRt27Yumb755huefvpp9PqaimxaWhoOh4OXXnqJX375haeffpqnn36ao0ePsmXLFp544gkAtmzZQs+ePfnXv/7FqlWrmDx5Mu+++y733nsvW7duJTExEafTycmTJxkzZgyTJk2iffv2/N///R9dunRh+PDhbN26lTvuuOMaZwYcDidW64Vrvu5Gl8Gg/gZ+msf68cKDrv4G1KKpj9bM/TeqtKyKlNXppGde3LIpLiqIpx+Kw+xd/1Wfas4siieCKaNH2TJr34Pq5YlgyuhRxswimDJ6lDGzCKY7efmWC7zy4U91eshOe6w7oYH17yGr5syimJpHdXqUMbMIptp5IpiaR3V6lDGzCKasHm8E+fl5/+ZVZ42yYBUVFaVsvVerkpISzp07R1RU1FWvA8jKyiImJgaAzMxMWrRoQXh4OF5eXkDNaqQjR4643MPpdJKVlUWvXr0AsNlqfgFdVlbmcg9v75pfXrdo0UI5b7fbKS4uVopYOp0OLy8vpWBVu4rq16xmzZoBKL4yMzNp2rQpWVlZjB8/nj/84Q+MHz+et956i08//VS5LjMzk4cfftiF5evrS0hIiJLJYrFgsViU51CrDh06uHiqj7SGcldWY2jgp3m8NnVoHYSvt/Gy2wL6ehuJbRWoNXP/jZq7Kp0DJywuxzKyLLy7Kp0Jj3RpoDt1ZhbNE8GU0aMsmbXvQfXzRDBl9ChjZhFMGT3KmFkE0x28XxeroGY77mkLf3JLD1k1ZhbN1DzKwRPBlNGjjJlFMGX0KGNmEUxZPcqiRlmwSkhIICUlhZEjR5Keno7ZbCYmJga9Xq8UlC6n+Ph4fH19ef3118nOzsZisVBRUUFFRQUPPvigC3/NmjWsXbuWZcuW4eHhQefOnSkqKuLOO+8Eavph6XQ6/vvf/7JmzRqysrIIDw9Xtgz09b24hY5Op+O5557j8OHD2Gw24uLiKCkpoWXLlsDFflcHDx50yRQdHQ2gjLNarXh6ejJmzBgA1qxZQ3p6Oh07dlQYtbyvv/6atWvXcv78eQBefPFFvL29lXFBQUF4eHjwt7/9jb/97W8AzJo1i1mzZgEXC271kdbDqq4aw36omsf6a9pj3Zm28PJ/4dnQ7we1ZnY383SBjf1ZljrHHU7Yn2XhvLWcZkH1+2tZtWYWyRPBlNGjTJm170F180QwZfQoY2YRTBk9yphZBNNdPJE9ZNWaWSRT86hOjzJmFsFUO08EU/OoTo8yZhbBlNWjbGqUBasBAwbw9ttvc/DgQZ588klOnjzJqlWraNeundKfCWD06NHk5eUpfZw8PT3p0qULaWlp9OvXj/j4eGbNmoXNZqN///7KdXfffTc6nY5z587x5JNPUlpaytKlSwkKCqJz587KOL1ez/Hjx4mOjmbChAls2rSJXbt2uXg1Go00adJE2YqvdevWzJ8/H4fDQXBwsMvYysrKOpkAZWVWeXk5p0+fxuFwMGjQILp06cK2bdtYvXp1nX5TDoeDfv36kZ+fz9dff82BAwdcVqDpdDq6d+/O1q1bGTRoEOvWraNHjx4cPXoUX19fevToUa+50XpYXV2NYT9UzeO1KzDQzIrpA9lz+CyHTlrcvoc+qC+zu5nHz5Re9byt0tHgny1qy3w9eCKYMnqUIbP2Pdg4eCKYMnqUMbMIpoweZcwsgtlQ3vXoIau2zNeDqXmUgyeCKaNHGTOLYMroUcbMIpiyepRFjbJgtX79ekwmEzExMaSkpGA2m+nVqxfbt28nPz9fKVo5HA7sdrtyXUVFBXv27KF79+7s37+fLVu2YDAYMJlMbNiwgdtuuw2Ab7/9FqfTSXh4OKmpqRiNRnr27ElaWhr79u2jc+fO+Pn5YbfbiYyMxGAw8Oabb/L/2Hvv8KiK9v//tTVlk00nEFpIgCR0kB5AaUoTfBAU9YvYUdGPCioWQFRQHhRFsSGCFBUriiAWBCEU6UgnlBAgJCSBlE02ZTe7+/sjvzPu0h4pRxfPvK7LC7N79n3ue2bOzJwp98TFxVG7dm1OnDghJpmCgoI4evQonTt3ZvXq1djtdlq0aMHmzZspLa0eHFKuNRqNPj61adOGbdu2iV1bLpcLt7t6K+GSJUtYsmTJOdMnLCyM06dP8+mnn4rPcnNzgepwiAp9+vRh3bp17Ny5E4Ddu3fTs2dPnnjiCcxm8yXljTzD6twYDP4fD1XaePl6ibVCaZ1UA5ut/LLOrfLG330GyCsqp6TChTXQQEz4pTXIwSbdBb+3mPWXnKZaLItqaGrRRi35LJ9B/9ZTQ1OLNmrRZzU0tWijFn1WQ/NK6al5hqy/+qym5pXoy5+JFtPR3/XU0NSijVr0WQ1NLdqoRZ/V0NSqjf8GrNZ/+RlWaWlppKam8t5774nPbDab2DE0ePBgABYsWODzu23btmG323nuuedISUkB4I477iAvL4+0tDRx3YoVKwB45JFHhJbH46Fjx46sXr2aFi1aUK9e9YqtHj16iJB6AA8//DAnTpzg1KlT1KlTR5w/NX36dDEx5fF4RFhAgHr16qHT6YiPj+eTTz4RWkuXLmXbtm3k5+cDkJKSwrJly3jttdd8QhgOHDiQAwcO4HA4MJvNJCQkEB4ezrvvviuuycrKomfPnrRt21Z8ZjRWZ//nn39Op06deOihh7j33nv/ajacFxmf8/xcDfFQpY3+p6eG5pXQKy138uH3e3zCiDVrEMnIQU2xBJouSismLIhmDSLZm1mA2/Pn53odNImPJNoaeNn2aiVf1NbUoo1a8Fk+g1eHnhqaWrRRiz6roalFG7Xosxqal6v3d5wh628+q6F5Jfvy50ML6Xi16amhqUUbteizGppatFGLPquhqVUbtcJVGUwxIyNDhLZTsFqtxMTEkJGRccHfAT6/7datGydPnuTEiRNUVFQAsGPHDnQ6nc95WDqdjgYNGgiNqKjqmNgnT54U1zidTnbv3u1zr/DwcAAKCwvFdb///jtOpxO7vXrVl9lsxmAwYLPZfOxdtWoVBoNB/LZGjeoQY8eP/xkCobi4mMzMTDwej/i8W7durF+/nqKiIpxOJ1lZWTzxxBMA55yQGjBgAAAzZ85k5syZeO9Kk0gkkvPx4fd72JtZ4PPZ3swCZi7ec0l6Iwc1pUl8pM9nTeKrX5olEon6yGdQIpFIJFcL40e0JSTId/1tSJCR8SPanucXkjO50n15iUQikUgkkivBVbnDymazYbVaz/o8LCyM4uLiC/7ObDYTEBAgPhs2bBgfffQRDoeD5cuX43A4yMnJITEx8azzsPbt20dwcPWB4+Xl5QAsX76cefPm0bhxYxYuXCh2TSl2REVFYTKZePTRRxk9ejTl5eVMnTqVhIQEn4knj8dDXl4eEydOpG/fvmzcuJGlS5cSGxsrtPT66vnFOXPmULNmTWJjY5k5cyYWi4XKykpx3bBhw1iwYAH9+vXj9OnTQPWE24033ugzWed0OunatauYXCspKeGNN95gy5YtzJo16y/lxbkwGq/KeVBVuRoO8JM2+p+eGppXSi/ntN1nNaaC2wO7jxRwylZBzcjgi9IMCwng6TvakF9Uju0KhiXRUr6oqalFG7Xms3wG/VdPDU0t2qhFn9XQ1KKNWvRZDc0rqVcrOoT3xlzH3swCjuXbqRdjOWvRxaXgzz5fSU01+vJX2kY19dTQ9Hc9NTS1aKMWfVZDU4s2atFnNTS1aqPWuConrK4kYWFhjBo1ildffZXnn3+ekJAQQkJCaNWqlc91brdbnCXlzc0338ycOXMoKCggJSWF6dOn88ADD4jv9Xo9NWvWJD4+ntGjR2M0Gunduzd16tTxCWmo0+kYOHAgW7du5euvvyYuLo5JkyYxd+7cs+45YMAApk2bht1up02bNrzwwgs89thjPj7NmzeP559/npKSEgICAggODmbz5s1kZ2cTFxcHwLFjx1izZo2Pj1AdcjEvL0/s6LoY9HrdZR/K/m/majjAT9rof3pqaF6u3uGTpRf83u5wX3JdoFYdooV8+Ts0tWij1nyWz6D/6qmhqUUbteizGppatFGLPquheSX1UiMspP7vyy4af/b5Smiq2Zf35t+ejlejnhqaWrRRiz6roalFG7XosxqaWrVRK1yVE1ZWq1XsZPKmuLhYnBN1vt85HA4qKyt9dlkFBwej0+nYtGkTgYGBDBkyROygUliwYAHDhg0T+sq/vXr14sUXXxTXZWZm+nxvtVqpqKhgxowZPnpvvvmmj61Wq5XY2FimTp3qc91bb7111j1HjBjhc89169b5fA+QmJjI559/Lv4uLy/n+uuv56OPPmLChAninsHBwfz2228idOEbb7zBzJkz+f333xk0aNB50/J8uN0ebLayi/7dvx2Dwf8P8JM2+p+eGpp3TvpV/P/8cb0uWSfYpLvg9xazXh52/Q/qqaGpRRu16LMamlq0UYs+q6Hp73pqaEob/dNGLfqshqYWbfRXn9Xsy4N20vFq0lNDU4s2atFnNTS1aKMWfVZDU6s2/huwWoP+8q6zq3LCKiEh4ayzqkpKSsjPzz/rbKszfwdw5MgRkpOTxecZGRnExcURGBgorjtw4IDPbz0eD0eOHBHnWtWrVw+TyURGRgZdu3b10fK+V0JCAqdOnTprMu3Mc7j+ik/Kv2f+NiMjA5PJRN26dc/re1BQEImJiRw9elR8lpaWRqdOncRkFUCnTp2YOXMme/fuvaQJK0AeKHcBroYD/KSN/qd3JTTvmbLyrM+Uyas5z/S4aL2YsCCaNYhkb2YBbq/Np3pd9Zk30dZAedi1H+ipoalFG7XosxqaWrRRiz6roenvempoShu1oaeGprRRG3qXq/l39OUv18a/Q08NTX/XU0NTizZq0Wc1NLVooxZ9VkNTqzZqhatywqpbt268//77DB8+nF27dmGxWEhOTkav14sJpXPRpk0bQkJCeOWVVzh27BgFBQUkJyeTnZ1Nr169fPS///577rnnHrZv347JZKJFixYUFRVx7bXXAmA2m+nQoQNffPEF33zzDUeOHCEuLo7w8HASExOpU6cOAF26dEGv1/PEE0+Qnp6O3W6nefPm7Nq1i1GjRl2UT3Xr1iU+Pp4PP/yQ1157jezsbBo0aEBVVRWdOnXCbDYDMGPGDN55551zpkFiYqL4/z179lBeXk5SUtJZ1zmdzr+aHRKJRKOMHNSUmYv3+MS/bxIfychBTf9BqyQSiUQikUgkEsn/QvblJRKJRCKR+CNX5YRVv379mD59Ovv27ePBBx/k6NGjLFq0iMaNGxMbGyuuGzFiBNnZ2SxfvhyAgIAAWrVqxdq1a+nVqxdt2rRh7ty55Ofn07dvX/G7Hj16YDKZ2Lx5M/fddx+lpaV88sknREZG0qJFC5/rXnrpJRITExk9ejTLly9n69at3HXXXeIa5fyqdevWMXjwYOrXr8+sWbOorKxkwIABF+1Tly5d+OSTT7jmmmsYNmwYX331FYcPH/a5Z2lpKf369SMpKQmr1Up+fj7ffPMNubm53HHHHeK68vJy9Ho9Tz/9NABbtmxhxYoVhIaGXtaEldEoD5U7k6vhAD9po//pXSlN7zCA5+KeKSsvKTxgWEgAT9/RhvyicmwVLqyBBmLCLz9Gr1byRU09NTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0NSijf7ss1p9eW/btJCOV4ueGppatFGLPquhqUUbteizGppatVFr6Dwej+d/X+ZfzJw5k/fee0/sVFJ2I/3++++sWrVKTPAMHz6cEydOsHJldSisyspKOnXqRNOmTc/aYdW7d28mTpwIwNKlSxkzZgypqals374do9FIixYtWLt2LV999ZWYtLr33nvJzs7GaDSKHVZhYWHY7XaWLVsGwMmTJ+nevTsdO3YUO6xatGjBzp07eeSRR7j//vsvyqcbbrgBq9VKcXGxzw6rOnXqMGvWLABWrFjB3LlzOXDgAGVlZcTGxqLT6SgsLGT9+vViJ5aysyowMBC32018fDxDhw7liy++oE2bNrz88ssXnTcejwed7sLxsCUSyV/nRH4pOafsxEVbiIsJuSSNG8cs/p/XLJl2aSFA1eBK+CyRSCQSiUQikUgkkquLXzYeZdehfFo2iqFX+/r/tDkSiUQi+Qe4KndYpaWlkZqaynvvvSc+s9lstG/fXuxkAliwYIHP77Zt24bdbue5554jJSVFfP7qq6+KXViKfnJyMnPmzBGfeTweOnbsyOrVq2nRogUOh4ONGzfy5JNP+uxuWrFiBQ8//DBZWVnUqVOHtWvX4vF4mD59us8ZVo888ghpaWliwuqv+HT8+HEyMzN59913fUIYzp8/n6lTp+JwODCbzfTs2ZOePXuK7ysrK+ncuTP9+vUTk1VQfa6V0+lkx44dPuk0a9YsH1svBrfbg81Wdkm//TdjMPj/AX7SRv/SKy138v63u9iV8WeIjuYJkTz8n+ZYgkyXa+pZ+MOhymr5rMWyqIamFm3Uos9qaGrRRi36rIamv+upoSlt9E8bteizGppatFGLPquhqUUbtebzkexiXpq7GUVm1bYTvPPVH7xwd3via1n9wkY19NTQlDb6p41a9FkNTa3a+G/Aag36y7vOrsoJq4yMDG6++Wafz6xWKzExMWRkZFzwdwAJCQk+nycmJjJv3jwqKioIDAwkIyPjrGt0Oh0NGjQQGseOHcPpdJ5TS7lXnTp1yMjIICoq6qwJoMTERL7++uuL8kn5t0GDBmdpOZ1Ojh8/7nNGlcJvv/1GaWmpTwhCgIiICLKzs+nYsSM2m434+HiGDRtGfn7+WX5dDPJAufNzNRzgJ230D733Fu1ib2aBz2d7jhTw7qJdjL611UVpzXmmB/dMWXnB7/3hUOUr6fO50GJZVENTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1pozb01NCUNmpDTw3NK6HnPVkldN3w4sebmPV0j8vSBv/0WW1NaaM29NTQlDb6p56WuConrGw2G1br2SsswsLCKC4uFn8fPnyYSZMmsX37diwWC/Xr18dsNhMQEODzO6vVisfjobi4mMDAQGw2G1lZWVx33XUUFBSQkpLCs88+66Ov/Dtr1iwee+wxTCYTvXv35sEHH/T53mazYTAYGDhwoAgb+MADD4iwft4+bdu2jdTUVOx2O61bt2b8+PHnvOeECRPYs2cPFouFQYMG0bt3b5/vAb766is++ugjsrOzMZvNhIeH065dOx+/ExISyMnJETuzCgsLmTx5MjqdjtTU1EvImWrkGVZnczXEQ5U2+o9ezmm7z+HHCm4P7D5SwClbBTUjgy/rHt5c7jN7JfxW02ctlkU1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KKNWvRZDU0t2qgln1dtzzprskrB5Yb1u3Po1qr2JWn7q89qakob/dNGLfqshqZWbdQaV+WE1V+huLiYESNGEB8fz4wZM8jNzeXFF1+kqqrqL/32+PHjjB07lqSkJD799FPuuecemjVrhtFYnWSKTk5ODtOmTaOiooL//ve/ZGdn+2jl5+eTm5vLddddx3PPPceGDRt4/vnnGThwoM91LpeLXbt28cILLxAbG8sHH3zAXXfdhcViEdeUl5eLeys+TZkyhRMnTvho/fDDD4wfP54HH3yQ5s2b88gjj+DxeNi5cyetWrUCoKCggD179mAwGIiLi6N///4sW7aMgoICDAYDkZGRF5fg/z96vY6ICMv/vlCjWK1X5hBbtfTU0NSijZerd/hk6QW/tzvcF/2cKWdUeZ9ndaXPrbocv9Xw+Uy0WBbV0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NKWN2tBTQ/Ny9TJySi74/aFsG4O6N76se/ibz3+HprRRG3pqaEob/VNPS1yVE1ZWq5WSkrMbtOLiYhF67/PPP8dut/POO+8QHh4OwO+//87SpUs5duwY9erVE7+z2WzodDrCwsKorKzEZrPRoEEDcTbVNddcQ58+fTh8+DDt27cHYNeuXQCMHDmSHj16CLvuvfdeAGHHwYMHMRqNvPTSSwB07NiR48ePs3r1anHNyZMn8Xg8dOjQgSFDhgDQvHlzunfvjt1uF9dt27YNgOeee46WLVsC1RNdEydO9Lnn22+/Tf/+/Xn88cf56quvcLvdJCUl8e677zJr1iyRFoWFhcyZM4dZs2bxwQcfEBwcjE6no6qqimPHjp0zvOD/Qp5hdW4MBv+PhyptvHy9NTuyOXiimMZ1wujSIu6SdYJNugt+bzHrL/nMqU9fuF74fDnnVnlzJdJRTZ+1WBbV0NSijVr0Ga5cXaagxXTUos9qaPq7nhqa0kb/tFENn/ccKeD4KTv1Yiw0ib+0xYLeaDFf1ND0dz01NKWN/mmjlnxOqBXKqgt83zDO6jfvglrKFzU1tWijFn1WQ1OrNv4bsFr/5WdYJSQknHVWVUlJic/ZS2lpaXTq1ElMVgH069ePpUuXsmTJEkaNGiU+z8jIIC4ujsDAQH7//Xfcbjdu958Fymw206tXLxYsWCD09+3bB0BZ2Z+TM6mpqVgsFux2OwkJCTgcDk6ePInL5fKZTFPsUCad1q5dC4Be/2emhYeH0759e1atWiXuefjwYaB615ZC3759mTBhAgaDgbp163L8+HEyMzN56qmnAFi6dCkJCQkMGTKEqVOnivB/TqcTgGbNmjF37lwAPB4PrVq1oqKi4q9nxjmQ8TnPz9UQD1XaePEcybHxyoItPgfEzl66l3Ej2lI/9uzwpf+LmLAgmjWIZG9mAW7Pn5/rddAkPpJoa+Bl++9v+aJFn/8OPTU0tWijVny+0nWZGjaqrenvempoatFGLfqshqYWbbwSermFZUyev4XS8j8jf4QEGRk/oi0x4Zcf8lmL+aKGpr/rqaEpbdSGnhqal6vXpXkc837cf86wgAY9dG5Wy+/eBbWQL3+HphZt1KLPamhq1UatcFUGU+zWrRvr16/HZrOJz3766Sf0er04eykjI4PIyEjuvvtuWrVqRWpqKhs3bkSn05GWliZ+53Q6+eWXX+jWrZv4HcCRI0fo0qULLVq04NZbbyU/Px+3203Hjh0BOHr0KJGRkbz33nu0bt2a9u3bM27cOEwmEyEhIdSpU4djx47hcrnQ6XTceOONNG/enBtuuIHdu3cD0LBhQ3HP4OBg1q1bR6dOnWjVqhV33303paXVIbIUn06cOEFISAgvvPCC8OmDDz7AZDIRFxeH2WwW9n/22WekpqayYcMG4uLiSExMxOl0cvz4cQBq1KhBQEAA3bt3p0WLFvTs2ZNbb70Vp9NJSEiIzw40iURyYbwHeBVcbpg0b8sla44c1PSs1bZN4iMZOajpJWv6O1r0WSLxJ9SoyyQSiUTiy5mTVQCl5VW8LOtaiUSiYcaNaMuZC+8N+urPJRKJRKItrsodVsOGDWPBggWMGjWKkSNHkpuby9SpUxk2bBixsbFAdXjAb775BpPJxLvvvivOezKbzezcuZN58+bRuHFjFi5cSFFRkQjlZ7PZ0Ov1YofVQw89xIoVK1i2bBmAmMgpLi6msrISu91Ot27daN68OXPnzsVut5OUlCSugeqdSwUFBdx5553k5uby3nvvAYjzpJR7ulwuwsLCGDFiBN9++y2ZmZnodDofn4xGI6Wlpdx4443UqFGD+fPn43Q6adSokc898/LyqFOnDqdOnSIlJQWr1erz/Zo1a9DpdHg8HiorK8nKyiIrKwuA//u//8NkMl1y/hiNV+U8qKpcDQf4SRsvDbUOiA0LCeDpO9qQX1SOrcKFNdBATPjlx7/153zRos9q6amhqUUbteTz1XTYtRqa/q6nhqYWbdSiz2poatHGK6W38/CpsyarFErLq9h3rJDmCVGXpK3FfFFD09/11NCUNvqnjVrzObF2OB8/14u1O7M5kHVlQ1N7/+tvempoShv900Yt+qyGplZt1BpX5YRVWFgY8+bN4+WXX2bUqFFYLBaGDBnCE088Ia7xeDx4PB4iIyPp2rUrUH3e04QJE0hOTmbOnDkUFBSQkpLC7NmzqVu3LgBVVVW43W7+3//7f+Tl5fHhhx9iMBgwm804HA6hX1ZWht1u54UXXmDhwoX8/vvvREREYLfbCQgI8LG3YcOGdOvWje+++w673U5kZCQFBQUEBQUJrdLSUh599FG2bNnC+++/T1BQEHq9Ho/nz9hYHo8Ht9vN888/zyeffEJ2djaRkZHk5uZisVh87vnxxx/z4IMPAhAREXFWGt588818+eWXOJ1OTCYTOp0Og8FAeXm5mCC7FPR6HRERlv99ocbYlp5H+oajJNePpHVSjSumq9VDBq+U5on8Ug7syyUu2kJcTMglaah9QKxaz5M/54sWfVZLTw1NLdqoBZ+vxsOu1dD0dz01NLVooxZ9VkNTizZerl52wfELfn/idBndrrm8SBdazBc1NP1dTw1NaaN/6V2Jd9Xz4a8+A9x4baMrpuWNP/uslqa0URt6amhKG/1TT0tclRNWAImJieLspXOh1+upXbs2v/zyi/hMOe8pNjaWxYsXn/N3yg6kgQMHijOmAO6++27Wr18vzqGqqqoiNDSU22+/ndtvvx2onlBq2rQp5eXlAAQHV8cgv+aaaxg7dixjx44FYOHChUycOFGcI6WENhw+fDiPPPKIuGefPn3ErifFp7i4OO68807uvPNO8dt27doJDcW+kpISvvnmG7Hb68zvv/nmG4KDg1mxYgVmsxmAAwcOcOONN7JgwQL69Olz3rS9EG63B5ut7H9fqBFyC8p48eNNZ8Won3hPe2pEXHqMeoNBm4cMXinN0nIn73+7i10ZBeKz5gmRPPyf5liCLm53oZoHxIK28uVq0VNDU9ronzZqyeer6bBrNTT9XU8NTS3aqEWf1dDUoo1XSi8u8sIDF7WjgmVd+w9r+rueGprSRv+y8Uq+q6plo1p6amj6u54amtJG/7RRiz6roalVG/8NWK1Bf3nX2VU7YXUp6HS6K3rt+a7x3hV1uXZcrFZCQgJQfS6W8v/K3yaTSewkO3ToEAkJCWKyCuCPP/4Aqie7Lgd5oNyfnDlZBdXhPibO2cTbj3W7bH2tHjJ4uZrvLdrF3swCn8/2HCng3UW7GH1rq4vS+jsOiAVt5MvVpqeGprRRG3pqaF6Jw67n/LD/vN/742HXamj6u54amlq0UYs+q6GpRRsvV69J/UhCgoznDAsYEmQkpV6ErGv9RNPf9dTQlDb6h96VfFc9H/7m89+h6e96amhKG7Whp4amtNE/9bTEv3bCyu12c+LECYYPH86uXbuwWCwkJycDiPOczoWyA+n111/n2LFjFBQUkJyczJEjR4DqHViBgYEYjUZOnTrFPffcw/bt2zGZTLRo0QKXyyV2VpWVVe80Wrt2LQMHDuTIkSPExcURElK9nVs5J0qx54knniA9PR273U7z5s3JysoSZ2n9VZ/q1q2L1Wpl7Nix4rfz588nKCiITp06iQmqmJgYli5dSvfu3Tl9+jSBgYGUlZURGBjoM9F1KcgzrKqRMer908ac03Z2Hyk463O3B3YfKeCUrYKakRe3++2Fu9vz4sebfCatDPrqzy/3edBKvlxNempoShv900Yt+Tz9y+0X/P69b3fyf0NbXZK2ltJRLT01NLVooxZ9VkNTizZeSb2J97Rn4pxzR2C4nH6jFvNFDU1/11NDU9roPzaq8a56pW1UU08NTX/XU0NT2uifNmrRZzU0tWqj1vjXTlhB9ZlV+/bt48EHH+To0aMsWrQIs9kszo4CGDFiBNnZ2SxfvhwAo9GITqdj48aN9OrVizZt2jB37lwRUk9BOWNq8+bN3HfffZSWlvLJJ59gMpkIDQ31ufbEiRMkJiYyevRoli9fztatW32+Dw4OxmQysW7dOgYPHkz9+vWZNWsWVVVV6PW+hfuv+NSkSRM2bNjAgAEDWLp0KQ6Hg5MnT9K/f39xTf/+/Vm0aBHBwcEMHjyYb7/9FpvNhtPppEePHpec5vIMqz+RMerV0btczcMnSy/4vd3hvugyHBFh4bvXBvHrpqPsOJhPy0Yx9Gpf/5JtPBf/9ny5GvXU0JQ2akNPDc3L1TucfeHd1QdP2C67fddCOqqtp4amFm3Uos9qaGrRxiuhFxFhYeGk/mxPz2P/0QJ5xq2favq7nhqa0sZ/Xk+Nd9Vz4U8+/12a/q6nhqa0URt6amhKG/1TT0v8ayesAgICcDgcJCcn8/7772OxWEhNTWXdunUYjX+67Xa7cblc4u/g4GA8Hg9t27Zl9+7dpKWlkZycTFlZGaWlpWIHlvLbdu3aMXfuXIxGI507d2bt2rUijJ9ybc2aNTEYDEybNo24uDiSkpJIT08X3xsMBpxOJ507d2b16tXY7XZatGjB1q1bCQgIuGif5s2bx1dffcWsWbOEfkJCAtu3/7l6OjU1lY8//ph3332XmTNn4nQ6SUpK4tChQ2RnZ19yusszrP5Exqi/8jbmFZVTUuHCGmggJvzSKv5g04VDclrM+kvOl/bJNejVvj42W/llnVvlzdWQL/5uoxZ9VkNTizZqyefEuFC2HTh93u8b1ZZnWP2TempoatFGLfqshqYWbVTD58RaobROqnHF+o1azBc1NP1dTw1NaaP/2Kjmuyr4p89qa/q7nhqa0kb/tFGLPquhqVUb/w3IM6yonqSJjIzkk08+EZ9lZ2fTvXt3HA6H+GzBggU+v6uqqg7NcMcdd9CvXz/x+eDBg9m/fz+BgYFA9dlSAQEBzJkzR1zjdrtp0qQJTqcTqJ6oAmjWrBnvvvuuuO7NN98kPT1dhA6srKwE4KWXXhJnTAF07dqV8vLyi/YJYOjQoQwdOpSkpCTuvvtuNm/ejN3u27Hp1KkTq1ev5o8//mDu3Ll07NiRtm3bCvsvFRmfsxoZo/7K6ZWWO/nw+z0+4RGaNYhk5KCmWAIv7uDZmLAgmjWIZG9mAW6vI+L0OmgSH0m0NVDmi59o+rueGprSRm3oqaF5uXqPDG7JPVNWnvf7h//TQtaNfqCnhqYWbdSiz2poatFGLfqshqYWbdSiz2po/ttt/DveVS/Xxr9DTw1Nf9dTQ1PaqA09NTSljf6ppyX+tcEUXS4XRUVFPqH81q1bByDOcToXyk6lvXv3is+cTicnTpzA5XJRUVEBgE6no7KykszMTHHdhg0b8Hg84myqkydPAvhcA3DgwAHgzzOulF1UGzZsENcUFxdTWFiI9+6vi/HJ4/GIybfdu3ezbt067rjjDp9rPvzwQ+bOncvkyZNJTExkypQp6PV6brrppvOmj+TiGD+iLSFBvvPCIUFGxo9o+w9ZdHXy4fd7zjp4dm9mATMX77kkvZGDmtIkPtLnsybx1RNgEolEolVG9Gl8UZ9LJBKJRCKRSK4s8l1VIpFIJFrnqt1hdfjwYSZNmsT27duxWCwMGjSIxx9/XEzcOBwOAgMDGTVqFCNHjiQ3N5epU6cSFhbGrl27uO666ygoKMBkMmGxWEhLSwOqJ5EMBgPz5s0jLS2NzMxMXC6XCPNXXFxMYGAgOp2OsLAw7rnnHuDPySmLxYJOpxPXAmRkZDBo0CCys7MpKysTE0nK9y6Xi9DQUKZMmcLHH3/M8ePHcbvdYlJMQfFp6NChlJeXc+rUKTweD8HBwUITYN++fUycOJE//vgDgGXLljFo0CCfc66WLFnCtGnTqFGjBk8//TRQPQnXpk0bn3teCpdzWPC/jVrRIbw35jr2ZhZwLN9OvRjLWZ3PS0FLhwyqcfBsWEgAT9/RhvyicmyXGWLQGy3li5qa/q6nhqa00T9t1JrPPdvWo2fbesz4+g8OZNloXMfKo0NaXbau1tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaq9a7qbZu/+aympr/rqaEpbfRPG7XosxqaWrVRa1yVE1bFxcWMGDGC+Ph4ZsyYQW5uLlOmTKGiooIJEyYA1ZMvQ4cOJT09nVGjRmGxWBgyZAjfffcdu3fv5plnniEpKYknnniCvLw8jh8/LsLx6fV6rFYrhw8fBqB27dqcPn2a0tI/D8DU6XQ0a9ZMnB8VEBBAvXr12L9/P6dOnfKxt1mzZuzevRudTkdUVBRms5msrCyxWwsgIiKC/Px8jh49il6vp379+hw7dkxMlCn3vOaaa0hLS8NoNGKxWKhVqxbp6emcPv3nuRM//PCDmKxSWLx4MatXr2bjxo3Anzuz8vLyxDUej4etW7dy7733snr16kvKG71ed0UOAf23kRphIVUFXS0cMqjmwbNqlVUt5MvfoenvempoShu1oaeG5pXUm3C/Gi2W9tJRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTSzaqOa7irz6rqenvempoShu1oaeGprTRP/W0xFU5YfX5559jt9t55513CA8PB6p3Kb344ouMHDmS2NhYrFYrZrOZuXPnit9VVlby8ccf07JlS+666y4A0tLS6NOnD7Nnz2bixIlYrVacTienTp3ixx9/JCEhAYDJkyczf/58jh07JvTT09Np3bo1n3/+ubhHx44dOXHiBABhYWFAdUi+iRMncuuttwKwc+dOhg4dyvbt2xk6dChWq5XTp0+j0+lYt26d8On+++8nLS2N3Nxccc8dO3YwYMAApk2bJu7ZrFkzEWYQ4Mknn2TMmDHodDoWLVrEs88+y6233soPP/yAy+XCYDAwZcoU8vPzcTgc4hwvt9tNz549yc7OJicnh1q1al103rjdHmy2sov+3b8dg8H/D/C70pprdmRz8EQxjeuE0aVF3CXrqHnwrBbzRYs2atFnNTS1aKMWfVZDU4s2atFnNTT9XU8NTWmjf9qoRZ/V0MwrKqdEhR0j/pyOV0O+SBv900Yt+qyGpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dlRNWaWlpdOrUSUzsAPTt25cXXniBdevWMXjwYBISEsjIyPD53dq1a/F4PHTt2lV8Zjab6d27N8uXLwcQE1Tx8fHi/6F615VOp2PDhg20a9eO+Ph4du3axf333y+u8Xg8OBwO7HY7WVlZ1KtXD4PBgMvlok+fPuI6ZQfWoUOHxD3tdjvdunXz8clgMAAIn2rXrs2uXbvo27evuKakpASn00l2djYOh0OERFTCEiokJyfzxRdfUFBQQExMDABVVVWEhIT4+Fi/fn2ys7N9dnZdLPJAufNzNRzgd7maR3JsvLJgC0qdvGrbCWYv3cu4EW2pH2u9aL2/4+BZLeSL2npqaPq7nhqa0kZt6KmhKW3Uhp4amlq0UYs+q6GpRRu16POV0Cwtd/Lh93t8wnw3a1B9Jo8l0HQlTPT7dPTHfFFbTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9ron3pa4qqcsMrIyKBnz57cfffdPmdYxcTEiEmqbt268cEHH2Cz2bBaqwfJly1bBlSfU6WcYZWSkkL79u3Jzs6moqKCNm3aoNfr8Xg8PProo6xduxaj0UhVVRWRkZFCPzk5mSVLlnD8+HEGDhzIkSNHiIiIwG63Cxvr1KlDzZo1yc3N5YMPPuD777/HbrdjsViwWq1iJ1aXLl0AsNvtwqegoCBKSkqwWCzing0bNmTXrl3s2rWL1157jezsbCIiItDpdLhcLo4fP05iYiIABw4cYNq0aWzatAmAd955h6CgICIiIkQ6hoaGsnz5cpKSknzSNzg4mLi4S98RI9E23pNVCi43TJq3hVlP97gkzZGDmjJzse9Lrjx4ViKRSCQSiUQi8X8+/H4PezMLfD7bm1nAzMV7GH1rq3/GKIlEIpFIJBKJX3JVTlgVFxezbNkymjRp4nOGlV6vp7i4GIBhw4axYMECRo0axciRI8nNzWX58uXodDrmz5/Pk08+SVJSEqNHjxbnPRUXFxMbG0toaChHjx6lrKyM+++/n99++41du3ZhMpmEfvPmzQH45JNP6Nq1Kz179mT+/Pk+NgI0btyYEydOsGDBAu68807y8vJYsmQJgYGBlJVVh86rWbMmOp2OrVu3Eh8fz4MPPsh3331HYWEhQUFBQqtVq1Z8++23fPDBBwwYMOCc99y/fz+vvvoqu3btombNmrRv355Vq1Zx+vRpDAYDhw8fJikpiS+++IITJ06g11dvxXO7q2cYdDodr7zyymXlj9EoD5U7k6vhAL8roblqe9ZZk1UKLjes351Dt1a1L1pXrYNntZIvauqpoenvempoShv900Yt+qyGphZt1KLPamj6u54amtJG/7RRiz5fKc2c03afRWcKbg/sPlLAKVsFNSOD/1EbryY9NTSljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJKyX03plnWE2YMIHy8nKg+vyoefPm8fLLLzNq1CgsFgtNmjRh+/bt3HPPPeIMq4SEBIqKisSEDYDRaBT3ef/990lJSeGZZ57h1VdfpaSkxOeakJAQtm7dyo4dO7jhhhsoLCxk5cqVQis4uLrzHRkZyfz584mLi+P5559n6tSpPvcExITb+++/T5s2bRgwYAAzZswQPnlr/frrr1gsFoYPH87atWvFGVbR0dFUVlZit9s5duwYubm5AEyaNImXX36ZX3/9laSkJBo2bEheXh5utxu9Xk9UVBS1atUiLy+P7777jj59+pwVVvCvoNfrVD0c9GrHnw/wO5FfyoF9ucRFW4iLCfnfPzgHGTklF/z+ULaNQd0bX5I2qHfwrD/ni1qaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShu1oaeGpr/ZePhk6QW/tzvcV6SP7+/p6G/58nfoqaGpRRu16LMamv6up4amtFEbempoShv9U09LXJUTVnq9nri4uLPOsJowYQI2m018lpiYyNy5c8XfkyZNYvv27fTs2VN89umnn3L33Xezfv16wsLCgOrJr9DQUNatWyeu83g8TJ069azJo/79+/PSSy+J6xYuXMjKlStxOp0Awp4lS5YIfYDPPvuMrKws8bfBYKB27dr88ssv4jObzcaMGTOEhsPhAOCRRx7hjjvuENeVl5dz4MABgoODiY6OZtiwYWzfvp1169axYsUKnn32WXr27MmUKVPE2VRt2rTBbrdjNBrZs2eP0Nq6dSu3334769atE6EKLwa324PNVnbRv/u3YzD47wF+peVO3v92F7sy/lz52Dwhkof/0xxL0MXFlE+oFcqqC3zfMM5KYaH90gzFv9NRDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2+qeNWvT5SmkGmy68CNJi1sv3g39YU9ronzZq0Wc1NP1dTw1NaaN/2qhFn9XQ1KqN/was1qC/vOvsqpywOhcXsxvor1x7vmuUCZ8rYcelap35uzOv6969O9HR0UyZMkWcT/Xee++h0+kYNGgQAAUFBZSXl6PT6ejYsSM2m434+Hhuv/12AI4dO/aX/TgTeaDc+fHHA/zeW7TrrJjye44U8O6iXRcdU75L8zjm/bj/nGEBDXro3KzWFfHfH9NRTT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkPT3/XU0JQ2akNPDU1/szEmLIhmDSLZm1mA2+s1Vq+rPpM22hoo3w/8RFPaqA09NTS1aKMWfVZDU4s2atFnNTS1aqNWuConrNxuN9nZ2dhsNqxWKwA//fQTgPj7XCg7nFasWEGLFi0AcDqd7N+/H6g+AyowMBCj0cipU6fIzMwkPj4egN9//x2Xy0VOTg6tWrXCbDYDsH37dp97rF+/HgCTyeRjz/PPP8/u3bspKCigUaNGHD9+3CckoOLTgw8+yMaNGzGZTDRs2NBHQ7nnkiVL+PLLLzly5AhxcXFUVFQAiDOxwsLCmDt3LrfffjuLFi0Cqnd0vfbaa9StWxeoDitoMpkICwsT52Tl5eXx8ssvA1C79sWfM6Qgz7A6G3+Nh6pGTPkX7m7Pix9v8pm0MuirP7/csuGv6aiWnhqaWrRRiz6roalFG7XosxqaWrRRiz6roenvempoShv900Yt+nwlNUcNbs57Z0R0aNqgOqKDfD/45zWljf5poxZ9VkPT3/XU0JQ2+qeNWvRZDU2t2qg1rsoJK51Oh9lsZtSoUYwcOZLc3FymTp0qJl8URowYQXZ2NsuXLweqz50yGAzMmTOHyMhIGjduzMKFC7HbfUMQBAcHExYWxqOPPsro0aMpLy9nypQp6HQ6DAYDb731Fps2beLDDz/k4MGDTJw4kb59+7Jx40afkH6KltlsZvny5fTv35+UlBRmz55NVVUVBoPBxyeXy8WGDRu47777yM/P54svvsBoNPr4BPDHH3/QtGlTRo8ezffff09mZqbP96dPn2bo0KFUVlaSlJREeno6AQEBjB49GqgOY6jT6ejSpQu//fYbiYmJxMXF4XA4cDqdOBwO2rZte0l5I8+wujD+Fg9VjZjyEREWvnttEL9uOsqOg/m0bBRDr/b1L8fMs/C3dFRbTw1NLdqoRZ/V0NSijVr0WQ1NLdqoRZ/V0PR3PTU0pY3a0FND0x9tjIiAV0Z1JTu/lOxT9ss6M/d8+Hs6+mO+qK2nhqYWbdSiz2po+rueGprSRm3oqaEpbfRPPS1xVU5YhYWF0bNnT7Kyshg1ahQWi4UhQ4acdU6U2+3G5XKJv61WKy6Xi1GjRjFnzhwKCgpISUnhzjvv5KOPPhK/DQsLo1GjRhgMBkaPHo3RaKRu3brk5eXRvn17unbtSq1atfjwww8B2LhxI19//TVxcXE88cQTvPHGG0LLYrHgcDho0aIFGzZsYMWKFbRq1YqdO3f67LAKCgqitLSUpKQkZs2ahcVi4frrr+fnn3+msrJS2AVQq1YtSktLmTZtGg0aNKBx48YcOHBAfP/WW2+Js7bS09OBP3dfTZ48mf79+wMwffp05s6dy+LFi8nJyUGn09G6dWs2b97MyZMnSUxMvOi8kWdYnRuD4crGL80rKqekwoU10EBM+KVXgGrGlG+fXINe7etjs5VfVlx6b650Ovq7nhqaWrRRiz6roalFG68Gn/ccKeD4KTv1Yiw0iY+8AhZqMx39XU8NTS3aeDX4/N9Pt3Ikp4TEuFCeuv2aK2ChNtPR3/XU0LwabAwJMNA2JVa+H/iZprTRP23Uos9qaPq7nhqa0kb/tFGLPquhqVUb/w1Yrf/yM6wSEhIoLCxk7ty54rOSkhI+/vhjEhISxGcLFiw463cAPXv2ZNSoUeLzKVOmEBcXR2BgoLjuwIEDfPfdd+Ka22+/HaPRSHJyMgD16tXDaDRSVVXF/fffz+DBgwFYuXKlz730+uqMePrpp2nXrp3Qu/7668nJyRF/BwQEAPDFF1+Iz2w2Gz///DOlpdW7YOrUqQNA586deeWVV8R148aN48CBA+JeSpjCTZs2+UzgdevWTWgBBAYG8uCDD/Lggw/yzDPPsHv3bu644w42b97M5SDjc56fy41fWlru5MPv9/iE8WvWIJKRg5piCTRdtN7fEVNei3FlteizGpr+rqeGprRRG3pXQjO3sIzJ87dQWl4lPgsJMjJ+RFtiwi8ulOv50EI6Xm16amhq0UZ/9PnHDZl8tSpD/L0ro5A7J/3KsJ6JXN/uyuxU10I6Xm16amhKG7Whp4amtFEbempoatFGLfqshqYWbdSiz2poatVGrXBVBlPs1q0b69evx2azic9++ukn9Ho9qamp5/1dmzZtCAkJ4ccffxSfOZ1OfvnlF7p16+ajv3//fp9QewcOHMDpdHLttdcC1edJdezYEZPJREbGny+Xy5YtIzExUUwuKedPHTx4UFxTXFxMTk4ODodDnD9lNBopLy/38ennn38GwOPx+PyblZXl49fevXsBOH78OFA9SabX68UEHIDL5aKiogKn03ne9FHst1qt1KtX74LXSf4ZPvx+D3szC3w+25tZwMzFey5Zc+Sgpmetym8SXz0JJpFIJBL/5MzJKoDS8ipenrflH7JIIpFcDt6TVd58vuLw32yJRCKRSCQSiUQikfxzXJU7rIYNG8aCBQvOOsNq2LBhxMbGiuvOPMMqICCAkSNHMmPGDJ8zrAoKCkhPT6dVq1ZYLBZuvPFGEhMTfc6wKikpISEhgRYtWgj9hx56iLVr1zJnzhzmzp1LZGQkeXl5vPnmmz726vV6Jk+ezKuvvorRaCQ4OJjAwEAcDgfFxcUEBgZiMpkwGAx069YNp9NJWFgYdruduLg4ERKwuLgYqA5B2KpVK1wuFxEREZw6dcrn+zp16rB//366detGWVkZgYGBhIeHU1xcLM7NKi0tpU+fPhiNRoqKinA6nXg8Hg4ePMhzzz2HyXTxu3UULvfg3H8jV+LAvZzTdp+dVQpuD+w+UsApWwU1Iy9+VX1YSABP39GG/KJybFcgzKCCFg9C1KLPamj6u54amtJG/7TRX33eefjUWZNVCqXlVew7VkjzhKhL1tdKOl5NempoatFGf/V5yicXnmh+44vtPH3HpYcH1Eo6Xk16amhKG/3TRi36rIamFm3Uos9qaPq7nhqa0kb/tFGLPquhqVUbtcZVOWEVFhbGvHnzePnll33OsHriiSd8rjvzDCuA+++/H4/HI86waty4MQEBARgMBmbMmEFubi5TpkyhV69e2O12cYaVTqejX79+Plrbtm0DwGQyUVVVhc1mw2Qy0axZM3GNy+XC7XYTFhZGVVUVFRUV2Gw26tat67Obyul04nA4iI2NpbCwkLKyMioqKnxC+imYzWaCg4MpLi7GZrNhsVh8tCwWC3q9HpfLhclkwuFwcOLECRITEzl27BgA2dnZ2Gw2zGYzVVVVuN1uPB4PJpOJLl26XGLOgF6vIyLCcsm//7dyIr+UA/tyL+uA4cMnSy/4vd3hvqy0VyvftHgQohZ9VkPT3/XU0LySelei3jkXWktHNfQuVzO74PgFvz9xuoxu11z+Tul/ezpejXpqaGrRRn/zOfN/9PEyckquSD/t356OV6OeGppas/GXjUfZdSiflo1i6NX+yoTPBP/2WS1Nrdl4tfSVtZYvamn6u54amtJGbeipoSlt9E89LXFVTlgBJCYm+pxhdS7OPMMKQKfTMXLkSEaOHAnAzJkz+eCDD3jnnXcIDw8HqieZXnzxRX777TexY6tTp044HA6hU1lZycyZMwkODuaOO+7gySefxOFw0KdPH2bPns3EiRMBOHHiBABz584V51+tXbuWe++9F51OJyak7HY74eHhpKWliXuMGTOGX3/9lQYNGgCIcH633nor48aNA6CoqEiEKVS0Tpw4gdvt5tdffxU+ffHFF7zwwgtEREQA1buwNm7cSFBQ9cPzzDPPsHPnTk6fPs1nn33G+PHjL5i258Pt9mCzlV3Sb/+NlJY7ef/bXezK+HNnVPOESB7+T3MsQRe3iy3YpLvg9xaz/rIOLzYY/P+QQX+3UYs+q6Hp73pqaF5JvStZ76hlo1qa/q53pTTjIi/c8a0dFSzbg3+ZnhqaWrTRX32OrxnC3syi836fUCtUPtP/Mj01NLVm45HsYl6auxlFZtW2E7zz1R+8cHd74mtZ/cJGNfTU0NSajVdLX1lr+aKWpr/rqaEpbfRPG7XosxqaWrXx34DVGvSXd51dtRNWV4q0tDQ6deokJnYA+vbtywsvvMC6desYPHgwAAkJCT5nVW3bto3S0lJ0Oh0JCQlA9c6n3r17ixCEACdPnjzrnqmpqQQEBGA2m0VoQLvdTo0aNXyu69u3L0uXLiU6OhpAnKkVExMjrgkPDyc5OZk//vhD2KGECNTpdD5aEyZMEJNawcFnh47T6/XUq1ePvLy8/5FqF0YeKPcn7y3addaZU3uOFPDuol2MvrXVRWnFhAXRrEEkezMLcHv+/Fyvqz5zKtoaeEXS/mo4ZNDfbdSiz2po+rueGppXQu9K1jvnQivpqKbe5Wo2qR+JQQ/n6vsa9JBSL0K2B/9SPTU0tWijv/n85LA23DNl5Xm/H31ra/lM/0v11NDUio3ek1VC1w0vfryJWU/3uCxt8E+f1dbUio1XW19ZK/mitqa/66mhKW3Uhp4amtJG/9TTEpqfsMrIyODmm2/2+cxqtRITE+MzQdWtWzc++OADbDYbVqtVfKfX60lNTRXXJSYmMm/ePCoqKggMDKS4uBij0ciPP/4odlhVVVXh8XjEbqdjx47h8XjIy8sjMzOT+Ph4ABHmr06dOuI6k8nE6tWrxQ4xqN6dZTAYxHVFRUXodDp++eUXhg4dCoDH40Gn0/lMdp2Jy+Xi4MGDdO7c+eIT0gt5hlU1apw5NWpwc947YzVY0wbVq8EuN92vhpit/m6jFn1WQ9Pf9dTQvFJ6ap11dyVtVFPT3/WulGbOafs5J6ugerDucvIZtJOOV5OeGppatNGffb69V0M++/XQOT+Xfbx/n54amlqycdX2rAu2g+t359CtVe1L0vZXn9XU1JKNV1NfWUv5oqamv+upoSlt9E8bteizGppatVFraH7CSpmAOpOwsDCKi4vF38OGDWPBggWMGjWKkSNH8vvvv4vPlbCBAPPnz8fj8VBcXExgYCClpaU0adKEOXPmEBkZSePGjVm4cCEul0tMHin3qVOnDo8++iijR4+mvLyc6dOnAxAZGSlsjY6O5o8//mDixIn07duXjRs3cvDgQQwGg7ChtLSU5s2bM3XqVPR6PbGxscycORODwUBcXJy4rry8nNWrVwPVYQRPnjyJy+WiVq1aFBQUiPteDPIMqz9R48ypiAh4ZVRXsvNLyT5lv+LxtuHqiNnq7zZq0Wc1NP1dTw3Ny9VT+6w70EY6qq13uZp/Rz7Dvz8dr0Y9NTS1aKM/+nxb36bc1rcpE2auI/1YEUn1wnlpZOr//uFFoIV0vNr01NDUgo0ZOSUX/P5Qto1B3Rtf1j38zee/Q1MLNl6NfWUt5MvfoenvempoShu1oaeGprTRP/W0hOYnrP4qYWFhzJs3j5dffplRo0ah1+vR6/U888wzPtd5PJ6zfpucnEyvXr2YM2cOBQUFpKSk0KJFCwIDA32ue+655/j2228ZPXo0RqORbt26sXTpUp9rAgMDmTFjBtOnT+frr78mLi6OPn36sGLFCp/runfvTklJCdOmTcNut9OmTRtq166N2WwW15w+fZrHHnvsLHtfeOEFGjRoQIcOHS46neQZVn+i5plTIQEG2qbEYrOVX9aZBt5c6RirV1pPDU1/11NDU4s2aslnNesdLaWjWnpXSlOeaej/NmrRZzU0/V1PDc0nb2sj9GQf79+rp4amlmxMqBXKqgt83zDO6jf9HS3li5qaWuwraylf1NT0dz01NKWN/mmjFn1WQ1OrNv4bkGdYXQRWq5WSkrNXaBUXF4vznhQSExOZO3cuAJ9++ikvvfTSWRNUI0aM8Dkrymq1YrfbGTlypE8Yv2HDholrlH9NJhMzZswQ12RmZrJ06VIfrdLSUnr27EnPnj3FdW+++aaPrVarlfLycsaOHcvYsWPF5127dvW5rk6dOqSnp7N69WoefvhhHnjggXNOYF0sMj5nNX/HmVNajdnq7zZq0Wc1NP1dTw3Ny9WT9c7VoXe5mvJMQ/U0/V1PDU0t2qhFn9XQ1KKNWvRZDc3L1evSPI55P+4/71mOnZvV8rv+jhby5e/Q1GJfWQv58ndo+rueGprSRm3oqaEpbfRPPS2h+WCKCQkJPmdVAZSUlJCfn09CQsIFfwdw5MgRn88zMjKIi4sTu6fOpe/xeDhy5IjQqFevHiaT6azrlL+V6xISEjh16pRPqELlOm9bL8anP/74g8cee4ybbrrpikxWSXwZOagpTeJ9Qys2iY9k5KCm/5BFEonk346sd7SBzGeJRCKRaJlxI9py5iJdg776c4nkQsg+lEQikUgk/o3md1h169aNDz74wOcsq59++gm9Xk9q6vnjxrdp04aQkBB+/PFHkpOTAXA6nfzyyy9069bNR//7778nMzOT+Ph4AH7//XeKioq49tprATCbzXTo0IGff/6ZESNGiN8uW7aMxMRE6tSpA0CXLl3Q6/X88ssvDB06FKjeCbZ27Voefvjhi/bp0KFDjBw5ko4dO/Liiy9echpKzo8l0MToW1txylaB3eHGYtYTbQ383z+USCSSS0TWO9pA5rNEIpFItEz9WCuznu7B+t05HMq20TDOSudmtf5psyRXAbIPJZFIJBKJf6P5Cathw4axYMECRo0axciRI8nNzWXq1KkMGzaM2NhYcd2IESPIzs5m+fLlAAQEBDBy5EhmzJhBZGQkjRs3ZuHChRQVFXHvvfeK391www3MnDmTRx99lNGjR1NeXs7UqVO57rrraNGihbjuoYce4s4772TixIn07duXjRs3snTpUt58801xTc2aNRkyZAhTp05Fr9cTGxvLzJkzCQ0NZdiwYRfl0+nTp7n33nsJCAhgxIgR7N69W/w+JCSEhg0bXvnE1jA1I4OJiLBQWGiX20ElEsnfgqx3tIHMZ4lEIpFomW6tajOoe2PZDkouGtmHkkgkEonEP9H8hFVYWBjz5s3j5ZdfZtSoUVgsFoYMGcITTzzhc53b7cblcvl8dv/99+PxeJgzZw4FBQWkpKQwe/Zs6tatK64xmUx89NFHTJo0idGjR2M0GunduzfPPfecj1bbtm2ZMWMG06dP5+uvvyYuLo5JkybRt29fn+vGjRuHxWJh2rRp2O122rRpw8cff0xoaOhF+XTo0CFOnjwJwF133eVzj/bt27NgwYKLT0yJRCKRSCQSiUQikUgkEolEIpFIJJJLQPMTVgCJiYnMnTv3gtecawJHp9MxcuRIRo4cecHfxsbGMmPGjP9pR8+ePenZs+cFrzGbzYwdO5axY8de8Lr/5VOHDh1IT0//nzZJJBKJRCKRSCQSiUQikUgkEolEIpGojf5/XyKRSCQSiUQikUgkEolEIpFIJBKJRCKRqIecsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/ipywkkgkEolEIpFIJBKJRCKRSCQSiUQikfyjyAkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KnLCSSCQSiUQikUgkEolEIpFIJBKJRCKR/KPICSuJRCKRSCQSiUQikUgkEolEIpFIJBLJP4qcsJJIJBKJRCKRSCQSiUQikUgkEolEIpH8o8gJK4lEIpFIJBKJRCKRSCQSiUQikUgkEsk/is7j8Xj+aSMk/x48Hg9utyxS58Jg0ONyuf1WTw1NLdqoRZ/V0PR3PTU0pY3a0FNDU9qoDT01NLVooxZ9VkNTizZq0Wc1NLVooxZ9VkNTizZq0Wc1NP1dTw1NaaM29NTQlDb6p96/Ab1eh06n+0vXygkriUQikUgkEolEIpFIJBKJRCKRSCQSyT+KDAkokUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8UOWElkUgkEolEIpFIJBKJRCKRSCQSiUQi+UeRE1YSiUQikUgkEolEIpFIJBKJRCKRSCSSfxQ5YSWRSCQSiUQikUgkEolEIpFIJBKJRCL5R5ETVhKJRCKRSCQSiUQikUgkEolEIpFIJJJ/FDlhJZFIJBKJRCKRSCQSiUQikUgkEolEIvlHkRNWEolEIpFIJBKJRCKRSCQSiUQikUgkkn8U4z9tgERyJTl8+DCTJk1i+/btWCwWBg0axOOPP47ZbL7g7zweD7NmzWL+/PmcOnUKnU5HaGgoQ4YM8fl9bm4ukyZNYu3atZhMJnr37s2zzz6LxWJh1qxZfPbZZxQUFBAXF4fL5SInJweDwYDL5SIsLIzevXuTn5/P+vXrMRqN1KxZk1OnTlFeXk7r1q0ZP348CQkJrFixggkTJnDq1CkAAgICAAgKCiI4OJjCwkKCg4OJiYnh8OHDOJ1O9Ho9gYGBOJ1OEhISeOKJJ+jevTuHDx/miSeeID09Xfhbp04datasyZYtW3j66aepVasWP/zwA2vXrqWiogIAnU6HXq8nPDycm266iZ49e/L111/z+++/c/LkSTweDwBWq5Unn3ySW2+9FY/Hw7Bhw/jjjz/OSuPY2Fjmzp1LQkICubm5PP/886xZs+as6/R6PSaTiZSUFHr37s2cOXM4ffr0WdfpdDo8Hg8BAQEkJiZy8uRJbDYbQUFBOBwOKisradiwIceOHSMuLo6bbrqJjRs3snnzZqqqqny0DAYDRqORa665hnbt2vHNN99w4sQJkeZdunShqKiInTt3otfr8Xg8VFZWijTQ6XTUrl2btm3bsn37dk6cOEFAQABOp5OAgACRzzab7ax7KwQHB1OrVi2ys7PR6XSUlZWddY3RaKR+/fpUVFSQn59PgwYNfPJ50qRJbN26lcrKyrN+Gx8fT2FhIcXFxeIzi8VCVVUVgYGBoiyHhIT4/G7IkCHs2rVL+FmnTh1ef/111qxZwzvvvHNOX4xGIz179uSVV17x0Tt8+DCjR49m//79Qs9gMNCsWTOeffZZWrVqBcDChQt54403sNlsProBAQHceuutPPXUU1RWVvLss8/y22+/UVVVhU6nE9coz5vNZsPhcJzTxubNmzN16lQsFgtPPvkkW7Zswe12C7u8y/7jjz/O2rVree655ygsLDxLq0aNGrz00kukpqby5ptv8tVXX1FaWgpUlx+lnNSpU4fi4mKKioowm82Ul5fjcrnOaZ/ZbMZgMIiyoNRJQ4cO5fHHH2fNmjU+dYT37+Li4ggMDOTo0aN4PB4cDgdutxuDwUDNmjUpKSmhqqqKrl27Mm7cOEpKSnzyRUGn0xEbG8stt9zC/fffzzfffHPOfPHm5ptvZv/+/ezbt0/cU/F95MiRDB48WNSV+fn56HQ6nE4nACaTiXbt2jF+/HgsFgtPPfUUW7duFfliMpmoqqoSz2BYWBht2rTh6NGjHDlyBIPBgNPpFGmamJjIU089RWpqKm+88QZffPEF5eXleDwegoKCcLvdVFZW8tBDD/Hdd99x+vRpTCYTZWVlIs/OxGis7jbp9XocDgd6vR6z2SzKXOvWrcnMzCQjIwOj0YjT6cTtdhMYGEibNm0YP348derUEfac+ZxHRESQlpaG2Wzm8OHDTJgwgW3btuHxeDCZTAwYMIDnn3/ep83Jz88HEHWLyWQCOKvNUdJGyZfWrVvz8ssvizbnueeeo6ioyCf/o6KiRDsKMHnyZL755huRZ1D9zHXs2JExY8aQlJQknvP09HQ8Hg8GgwG9Xk9YWBjx8fFkZWVRWFhITEwMBQUFonwnJSXx5ptvYrFYePnll1m1ahVVVVU+eWE0GuncuTMOh4Nt27bhcrlEfptMJgICAqisrBRtoPczWVJSclZeXnPNNdx33328++677Ny5U5Q1JY+Dg4Pp06cPzz77LO+++y5paWlkZWWJZ0pph7zrRqUe++qrr5g1axbHjh3z8cFsNjNs2DAef/xx3n33XXbs2MGuXbuorKwUekq7esstt/D444/z66+/nrPtSE5OFmmm9E2qqqoIDg6muLhYlHW9Xk9VVZXomxw7duyc5Ruq24XatWuLvknz5s0JDw9n48aNlJeXYzQaKS8vF2mk0+kICwujfv367Nu3T3ynPPtKHRgQEEB4eDg2m42SkhIaNGiAyWTi4MGD6PV68TwqZQ+q28WkpCTy8/PJycnBZDKJZ9hkMmGxWLDb7cTExBAaGkpeXp7ow1RVVWGxWIiKiiI3N5fS0lJCQkKw2+0iH9xu91llzLtcX3vttRQVFbFr1y6fPqV3/avT6QgPDycgIICCggLRLqempjJ58mQWLVok2iHFL4PBQPfu3RkxYgSvv/46O3bsEDbo9XqCgoKA6jqsQYMGbNq0ifz8fDwejyjvMTExANhsNho2bIjH4+HAgQO4XC6hFRgYSEREBLm5ubjdbkwmE8HBwZSUlGA0GtHr9VgsFuLi4igpKSE7O5tatWpRt25ddu/e7VMfKCh92vPRokULAgMD2bVrF1VVVT51hTcRERH/M8282yyHwyHyOygoCJ1OR0FBwXntALjvvvv44osvznr2vVm3bh0TJ05k5cqVwi+z2XxJde2hQ4fYvn07brdb5IHBYOCaa65h4sSJ561rofo5ufXWWxkzZgyATzoofarevXvz5JNPEhUVhdls5vTp0+j1elHmveta77rf7XbjcrlwOp0+da1iz5NPPnnOPidU9wFfeOEFH3t0Oh1ms5nKykruvfde9u3bx/bt2wkMDPTJF4vFQmxsLCdPnhR1ZGBgIHv37vV57hSbXnzxRVF/lpSU8Oyzz/rkC1Q/H02bNmXcuHG0atUKt9vNmDFj+Omnn3zq76CgIPr378+zzz5LVlYW9913n8g/b2rXrs2oUaO4+eab+fzzz/nll1/Ys2ePqD+90el0NGzYkEmTJpGdnc2PP/7I1q1bz3pHMZlM9OvXjwkTJoj3xs2bN4v2VyEqKooxY8Zw8803c/z4cSZNmsTevXvFs+59X+/0WbhwIW+//fZZ5d+73IaEhPj0372JjIzkySef5OabbxaflZSUMHnyZH744Qef+srj8WA0Gunbty8dOnTgq6++4vDhw9jtdtG+63Q6goODuf766xk+fDiDBg06R0mqJjAwEJfLJdqoM9HpdJhMJoKCgqioqCAgIACDwUBRUZFPedHr9URERDBo0CBGjRrFO++8w6JFi86ZbwozZ85k3rx5bN68WdRLer2eNm3aMHPmTJFX27dvJzg4GL1eL8pMYGAgDz30EA8++CBZWVn07NnzvD4mJSXx0ksvibK8bNkyvv76azZt2uRTH3qX5ejo6PNqepfl3Nxcxo4de8589S7LANu2bePFF188q2+vpGHDhg0pKio653OhoJTlwYMHs3jxYrZs2UJ2djZut9unLAOEhIRw66238vjjj1NZWcnkyZNZunSpT1q3bNmSV155hQYNGoh69fTp0xgMBtF3MJvNREdHU1hYSFBQEBaLhezs7LPandq1azN+/Hi6d+/OypUrefPNNzl06NBZdlmtVqZPn067du148803+fbbb88qJ3FxcTRu3Jj9+/dz+vRpzGYzFRUVPu0pVNexERERoh9hMBjE+975UJ6j82E0Gn36d8r1Y8aM4YEHHhDv91u2bKGqqkr0PS0WC5GRkRw7doynn34aq9XKrFmzOH78OIBPOgQHB9OgQQPy8vJEfeXxeER78Z///IfHH3+cgoIChgwZ4lMmlGc8MTFRjDksWrSISZMmiXRQ7DYajVgsFmw2G263W/TtoDr/AwIC0Ov1NGzYkAceeIBevXqxf/9+HnzwQXJycs6bRjt37iQ9PZ2HH374LNsGDBjAhAkTAPj4449ZvXo1hw4dorKbDUD5AACB/0lEQVSyUtzbu66dMWPGeccvoqKi+OSTT0hISDjvdUajkeHDhzN69GgxPlheXs4rr7zCd9995zPuoNfr0ev1PuMcSUlJ57y3TqdDp9P5tG/ffvst8+bN4+DBg6Is6nQ6AgMD6devH3Fxcfz++++i3AcHB2Oz2cSzFBcXx6hRo3juuefOm7bKvc8c91i8eDHvvfeeyBdlDOKxxx5j3rx5fPLJJ+Tl5QGI8m00GkU7n5mZKfoe5+sztmzZkgcffJDp06dz6NAhPB6PyDOj0YjH4yEiIoL27dtTs2ZNFi9eTGFhoU/ZNpvNdOjQgaeeeoqkpCS+/fZbPvzwQzIyMs55z6SkJKKiotiyZQuAeA/wHoNISEgAwOFwMH36dDZs2MDevXvFM6Pk0/PPP0+rVq1E32HHjh2iPtDpdAQFBdGvXz8xvtajRw8xvujNmXW3YtfHH3/MzJkzRVsZHR3NggULhH3Dhw9n06ZNZ+nddNNNjBs3jtDQ0HOmwdWGznOhGlQiuYooLi6mf//+xMfHM3LkSHJzc5kyZQoDBw4UDdn5+PDDD3nrrbcIDAykdu3aBAcHs2fPHkwmEzfddBMTJkzA6XQyePBgAJ544gkqKir473//S3JyMtdccw1vv/02Tz75JACvvvoqBoMBi8WCxWIhJyeHW2+9la+++orQ0FCmTJnC/Pnz2bBhA40bN+app57igw8+4Pjx40yePJn7778fnU7Hf/7zH5YuXUplZSVGo5GAgAAqKiro1q0bJSUlbNmyhaCgILp168bPP/8sKtCmTZvyzTffMHPmTMaMGUNRURE6nY727duzdetWn47R008/zY4dO9iwYYOoEE0mEwaDgcrKShITE8nNzaVOnTqYzWYOHDhAeXm5mAxyOp14PB7eeustDhw4wLvvvntW+kZERFBYWEhkZCTLli3jzjvvxOl0cuTIETHBYLVaKSoqwmg00rVrV4qLi9myZQuNGjUiIyNDNHahoaGUlpbi8Xi46aab+OGHH3A6nbRt25a9e/diNBopKyujqqqKm266if/85z+sXr2aOXPmEBkZSWFhoWhYTSaT6EhbrVaio6PJyMggIiICi8VCfHw869atE9/37NmTxYsX43K5CAoKory8HIPBgNVqxeVyYbPZROerpKSEkpISmjRpwp49e2jcuDH33Xcfs2fPJi8vj5EjR/Lf//6X5s2bs2PHDjHQ+uCDD7Jnzx7WrFlDREQE8fHx7NmzB4AGDRqQnp6OxWLh9ddfZ9WqVSKfn3nmGeLj44mKiuKnn34SA9I6nY6dO3ficDhE+WncuDEHDhxAr6/eZPvoo4/yxRdfkJyczMyZM0W+ffTRR7z22msAtGrVirp167JkyRICAgIYNGgQX375JSkpKZjNZnbs2IFOpyM5OZnKykoyMzPp3Lkzs2fPFs/n9ddfT1FREQ0aNODo0aPodDpcLhctWrTg8OHDLF68mJ07dzJ69GgxsGaz2cRgZ0BAAHa7nWHDhnH48GE2bdpEzZo1ycnJEb4FBwdTVlZGixYt2LlzJ7Vq1SInJ0d8rvybnJxMQUEBoaGhZGdnU15eTrNmzdi9e7foYCsTTB07dmTFihUYjUYcDodP3ns8HurXr8/x48e57rrrWLduHZWVlXTt2pXNmzeLwfi2bduyZcsWoqKicDqdlJeXiwE5l8tFSEgIbreb8vJyLBYLAwcO5LPPPhMDAddccw1r164lMDCQ1NRUfv31VzHhFxgYKAbkxo4dy1tvvUVFRQUtW7Zk586dADRr1oxDhw5RXl5OUlISjz32GNOnT8fj8ZCfny/yJTMzE6jufCYkJJCVlQVAmzZt2LBhg8gXpa4wm80YjUYqKyu59tpr2bRpE5WVlWLip3Xr1uzfv5/ExET27NlD//79+fnnn+nXrx+LFy/2eVHT6/U0btxY5EtBQQGRkZE0a9aM77//HrPZLMqxTqejQYMG7N27lwYNGoiJxKKiIpGmDRo0EPmSlpaGy+Vi4MCBLF68WHQ6XS4XJpOJUaNGMWfOHFG3NG7cmPT0dAIDA0lJSRETg+Hh4eTk5Ii/lQHhnJwcevbsyYoVK0hISBADdMrL3Jtvvsmnn37K8ePHSU1NZenSpVRVVYlJNu+Jj9tvv53HHnuMfv36YbfbiYyMpHv37nz99dfo9Xrat28v2hwlHZX0U/QAnzYnLi6OQ4cO4XQ6qV27NgAFBQVYrVbR5kD1S215ebl4IR49ejQffvghAwcOxOFw8PXXXwN/dsTtdjt6vZ6oqChKS0uZO3cuI0eOpKioiM6dO/PHH3+ICQblmWzTpg3t27fngw8+AGDo0KHk5uaSlpZGREQEkZGRFBcXU1hYiMFgEM+JwWAgNjaW48ePi3YgMDCQiooKoqKiOHXqFAEBASQnJ5OcnMw333zj80zWqlWLEydOiPJRr1498WKvtNd2u12UR6vVKl4MU1JSiI2NpVatWnz88cdYrVbxW71eT7169cjMzCQgIIAffviBnTt3MmbMGJKTk9m3b5+oU5U2SK/XM3jwYH7++WdSUlJEu9ypUyeaNGnCggULcDgcDBgwgGXLlolBAO8Jc6UuU9LMYDAwcuRInn/+eSoqKsSzpSzIaNq0Kdu2bcNoNPLII4+wbt06Nm/eTLt27cjMzKSgoACXy0WzZs1Em/XUU08xevRoSktLue+++5g1a5ZoO3U6HZWVlWKSPDMzk+DgYBo2bCgGpAFq1apFRUUFnTp14scff8RoNDJ27FimTp1KVVUVvXv3Zvny5bjdbuLi4igoKKCyspLrrrsOs9nMzz//TL169aiqqiInJwePx0PXrl1Zs2YNOp1O1LXl5eUivWJjY8nPzxcTXDVr1uTkyZMA9O3bl82bN4vJJmViWRkgtVgsPPzwwwC8/vrrhIaGMm3aNNGn7Nixo6h/b731VtLT09m2bRsGg4EpU6awZcsWvvnmG7p168aqVauIiorCbrdTUVGB2+2mZs2aFBcXYzKZRP3gcrlEu6zw8MMPs2bNGnbt2kXr1q3Zvn27sDc2NpasrCyCg4P573//y7PPPktpaSm1a9fm9OnTVFVVERMTIwYawsLC6NatG0uWLBEDws2aNWP79u3079+fpUuXYjAYmDRpEgsWLGDv3r0kJyezf/9+wsLCcLvdBAQEcOrUKYKCgqhRowYul4uCggLRZiUlJbFt2zaxuGLatGlMnjyZY8eOcc0117B582YAevXqRX5+Pn/88cd50ywmJoahQ4cye/Zs0d/Mz8/H4XCI/mR5eTlxcXG0a9eOH374gaqqKp544gk6duzI66+/zt69e7Hb7RgMBp8JJGUAyWQy0a1bN4qLi9m4cSMmk0m0w8XFxQQFBZ23rj0Tpa41mUzUqVMHj8fDkSNHiImJEXVkcHCwqGuVut77FTwgIAC3280tt9yCw+Hgm2++EemwbNkyMjMzRd+8vLycxx57jFmzZgl9ZQLJ4/H41P1DhgwR/bC6devSoEED0tLSiIyMZOrUqeJ949prr2Xt2rU4nU6MRiOdOnVizZo1dOvWjdjYWGFPSEgIx44dExNOFouFJk2a8P/+3//jmWeeEfnSsmVLfvzxR1GWGzVqxPPPP094eLhYmKVMzttsNkwmE2azmcWLF1O3bl1GjBgh8sV7MDsgIIDQ0FDKy8tZvHgxzz//PBs3bhTtsbKgTGmvmzZtyr59+ygoKPCZbNXpdFitVqqqqigrK2P69OlMmTKF9u3b89tvv6HT6UT/U9FWJpwCAgK45ppryM3N5ejRo1RWVqLX67FardjtdjHw2LZtWw4cOEBoaKjP4JlSdyt2TJ8+nfj4eBYsWMDx48fFQh2lHdbr9RiNRoxGIy+++CJPPfUUAQEBVFVViUHMkJAQsaCnffv2TJ06ldTUVPR6vUgPg8FAZGSk6L9Mnz6dPn36AHDvvfeydetWPB4PDRs2ZPfu3aL/WbduXfbv30/Tpk3p1KkTX331lZiItNlsGAwGOnbsyOHDhwkMDCQrK0uUn6ioKJ8JvejoaE6dOkVoaCglJSUEBgaKPqSy8Ke4uJjg4GAAUUd6PB5iY2PJzc0V7249evRg8+bNwhaA0tJSTCYTLpdL5Pf06dOxWCw888wzREZGkp6eTlBQEC6XS/QpkpKSKCgoEGMIr732Gunp6eh0OiIjIzGbzeTk5DBlyhSSk5O56aabiIuLIy8vT7R1kZGRYiI6KChIlOWHHnqI1atXi7pe6bsbDAZRlj/44AOGDx8u0ikwMBBATEYrZXn//v1iolKn0/lMBgQFBYmynJKSwqBBg8T7aklJibjWuyy73W4iIyMJDg7m5MmToj8aFhbmU5Zr1KhBjRo1SEhI4KuvvjpnPaj4PXjwYI4ePcrWrVvFwi6LxSIWg0VERHDLLbcwc+ZM0fc+cyGcUift27eP3Nxcn4kPk8kkFm/o9XomTJjASy+9RFJSEnv37hUaShkzm83UqlWLDh06sGzZMiwWC06nk4KCAsxmsyhrRUVFXHfddWzfvh2Xy0VpaSnh4eEUFRWJd3Sl7MXFxZGdnY3RaMTtdpOUlER6ejp169aloKBAvEso9ir1alVVlah3IiMjxaIDZRGc0t+tqqqiRYsWfPTRR/Tv35+oqCjS09MxGo24XC7q1q3L0aNHha833ngjS5cuJSUlhSNHjuBwOHzeLR0OB1VVVQQEBBAUFERpaSkul4saNWqId5UBAwawfPlyTp06RXx8PFarVbxDAnTo0IGtW7fy3HPP8dJLL2EwGMT7mN1uF31oqH43rFGjBllZWaLfoix2uvfeeykpKeHLL79kwoQJTJ06VdRR3pPSSl/ltddeo0WLFvTs2VM818p7OCDaqzFjxnDPPffQuXNnvv/+e7GwUSn7Sl3bqlUrTp48yaxZs1i1ahV6vZ7mzZuzdetWDAYDUVFRLFu2jA8++ICPPvpIvOt7axkMBm655RYxvjhmzBh++OEHjEYjkZGR6HQ6Tp48idVqpW/fvhQUFLB+/XoWL15Mr169GD58OOvXr+f48eN4PB4xjnX48GHR3t1yyy18+eWXDBgwgC+//FIs1OncuTPLly8Xz+gdd9xB586d+fTTT1m3bp0YV6moqODEiRPiuRs+fDjXXXcdTz75pBizUvpTyvPVtm1b0tPTadGiBevWrRP1f2xsrBiDaNy4MXv27KF9+/asW7dOvJvDnwvsKysriY6ORq/Xi0ktpZ4yGAw0b96csWPHkpmZyXPPPce1117Lb7/9JsqA8qyYTCaefvppvvjiC3Jzc2ncuLFYxGk0GomKiiIvL4/Q0FCcTidDhw7l66+/xu12i7ozOjqaoqIiqqqqeOihh/jiiy9ITEzE7Xazd+9eUY6GDh3KkSNHOH78OD/88AOhoaHYbDZ69OhBRUWFaEt79erFunXriI6OpqCggMWLFzN8+HDq1avHxo0bCQ0Nxe12i/ozMjKS5s2bM3PmTKHldDopKSmhRYsWYgFAQUGBT5s8btw4lixZIvJOr9fz7bffotfrhX3Dhw8nNzeXFi1a0KhRI0JCQjh+/DjffvstTZs2Zc6cOeetq68mZEhAyb+Gzz//HLvdzjvvvEPXrl0ZMmQITz31FJ9//jm5ubnn/V1lZSUzZ86kTZs2uN1u5s+fz/z584mJiaFJkybi9z///DMHDx7krbfeokePHvTr14/JkyezatUq3nvvPe655x7uuusu1qxZQ8uWLUXn7LvvvmPAgAH89ttvuN1uSkpKqFGjBps2bWL48OGkp6cTFhbGu+++S0lJCS+99BI6nY7777+f+vXrYzAY6NWrl1h9/NRTT7F69Wq2bt0q7N+zZw/XX389ZrOZ3bt3M2TIEJo3b87LL79McXExer2eBx54gPnz54tVFspkBcAzzzyDzWYTq38BMUB86NAhbrvtNg4cOECHDh2orKykRYsW/PDDD7zyyiuiczFt2jRmzZoldHU6HRMmTBCdveTkZIqKinj55Zc5ePAgL7zwAlDdME2ZMoWCggIGDRrEuHHjWLVqFR6PB7PZLF7iBwwYQOvWrSkpKSEgIIDmzZuTk5MjXj63bdvGjz/+yPLly8VnY8eOpWPHjoSHh6PX6yksLCQ0NBS9Xs/DDz+M0+nEarXSoEEDbDabaFwKCwuZNWsWs2fPpmbNmkD1hMuPP/7INddcIzpdTZo0oWbNmrRv3140/Dt37uTEiRN8/vnnpKSksHv3bpHP8fHxzJs3j4qKChYuXEjLli1JSEgQK46uv/56li9fLgaclV1d48aNY9y4caSnp9OtWzc8Hg+HDx/mpZdeEvlst9uZNm2a2LU2ZMgQtm/fzpQpU0Q6R0ZGMmDAAEaOHAlUr95o1KgR27dvF2VZ6Zw6HA7efvttUSauv/56Xn/9dfr164fL5RIvX1OnTmXnzp3odDoeeeQRDhw4wNtvv43JZGLt2rVC7/PPP8dms9GsWTPy8/O5//77eeGFF9DpdBQVFREeHs7s2bN544030Ol0DBo0iOLiYgICAkhJScHpdHL99dfjcrlYuHChmDypW7curVq1YtiwYUD1Kqfu3buze/duunbtSk5ODl26dKGiooKwsDAcDgfR0dFERERQVFTE4cOHARg5ciTXX3+9KG+hoaFkZWVx++23s3z5cuLi4kSHLDo6mri4OGrUqIHb7ebo0aMkJCTw66+/ioGahx56iLKyMgICAmjUqBFbt27llltu4dSpU2JCVRlcUSZ3lMnfsrIy0tLSSE5Oxmw2Y7fbefTRR2nZsiW1a9dm+fLloo7YtWsXUVFRhIWFAbB06VL0ej1JSUns2LEDgEceeYQ9e/ZQXl6OyWQiPT2dmJgY3nrrLQ4ePEhxcbHIlwceeIAXX3wRnU4nBj0bNmzIhg0bfPJFr9eLARKlTKxatYrbbrsNk8kknpv33nuPZ599ln379nHdddexbNky7rnnHnbs2EHr1q1FOgA0adLEJ1/ef/99li1bRmJiohiMMxqNVFRUcOedd7J3714xUK4MnCu7HQCOHTsm8kWn03HfffcxZcoU1q9fLwZJAG677TaxSt3tdhMTEyMGzxwOB9u3bycrK4sPP/yQL7/8kuTkZBITE4HqlfZKHb9+/XrxEjl//nwqKiqoU6cOAB07duTdd9+luLiYRYsWicEsk8nEL7/8QmBgICaTicDAQD7//HM++ugjbDYbFRUVfPTRR4wfP57nnnuOiooKnzZnx44dxMTE0LJlS0JCQvB4PDRv3pyYmBifNmffvn24XC7uvPNOTpw4wVNPPUV5eTmFhYU+bc7IkSMJDAwUaZidnc1TTz3FwoUL+frrr8Xk8W+//cbKlSvF86LsTnv11Vex2Wy0aNGCjh07inoYYNeuXXTt2pUdO3aIgd/w8HCMRiOzZs2iefPmIu/Lysq47rrrcDgcYvApLCxM2FVSUkLt2rWpqKjgkUce4dSpU8TGxhIZGcmOHTsYMmQISUlJ4pls3rw5xcXFjBgxQkxqKZNAShtWWlrKTTfdJNovZVdH165dWbVqFUOGDEGv1+N0OqlZsyZ6vZ7Y2FgxMdqjRw9cLhezZ8/m7bffpkePHmJlc2BgIM2bNycpKUnsyPvmm29YsmQJqampos164403ePrppxk3bhwAP/zwA23btsVoNIpdN+Hh4dSqVYuysjKSkpJEmr311lsMGDCAhx56SAxmAtxxxx08//zzbNu2jSZNmohBv/z8fOLi4jh69CinTp3CZDIRERHh02YpA5JKOa1ZsyYBAQE4HA48Hg/XX389YWFhZGZmotPpKC8vZ8+ePUyYMIEBAwYA8Morr1BRUcGvv/7KXXfdhcViERMkNWrUYOXKlQQHB9OlSxdycnKoqKggJiaGmjVrkp6eTocOHTh27BjZ2dmi7d+9ezeJiYl4PB7WrFlDeXk5PXr0YMWKFbRs2VIs2rjvvvvo27ev6AM2adKEZ555hoKCArEbRGlvlQH4srIyBgwYIF6Wi4uLCQsLE31K7/p34sSJFBQUUKtWLXQ6Hdu2bRMDZytXrsTj8dC/f38xOGgymTh58iTDhw+npKRErDxt0aIFS5YsYezYsaIPtXTpUo4cOUKdOnXYtWsXOp2O8ePHExMTQ25uLjExMVRUVPDTTz9RWlpKdHS0GCCYNm2az2plm83GyZMnRZpVVVUxffp0+vfvL9IsNDSUTZs2kZ6eTtOmTcWu/LFjx+J0Ojl16pTYxf/ll1/yzTff+KRZdnY28fHxPmkWExNDYmIimzZtQqfT8cADD/DOO+9QVFR0wTSbO3cuXbp0EW3WoUOHqKioYN68eWRnZ1NRUUHt2rXJzc1lzJgxTJgwAZ1Ox6JFi8RgioLRaKRdu3aibfB4PAQHB9OsWTOWL1/Ohg0bxET/p59+ysKFC/F4PJSXl59V14aFhYlV4lC9w9G7rnU6nUydOpWjR48yfPhw8vLyxOBccXGxqGvbtWsnnunw8HB0Oh0tWrTA5XLx2Wefibp27ty5PProoyxcuBCo7u9XVlaKPq2y0EZpi8eOHYtOp2PFihWi7l+2bBk6nY4777yT48eP8+ijj4q61rvub926NSaTiV69egGQlZVFUFAQa9eu9bFn6dKlrF27Vqwsr6ys5J133hETN0q+bN++nS5duqDT6fjhhx/EBL+C2Wyma9eu2O12kpOTxeT67Nmz2b59u8gXZUfXrbfeClT3TU+dOkVwcDCvvPIKGzduJCIigvr161NZWcl3331H69atiYmJISgoiHXr1lFcXCz6WzVq1BA2KM9gUlISb7/9NosWLSIxMRGHw0FJSYmYmA8ODhYDycHBwSKqwoABA0T74Xa7WbhwIePHj6eqqorKykrWrVuHzWbD6XSKXaB169alsrKSZs2aERMTg8Vi4e233yY5OZlHH31U7MQymUwicoWyuMZgMPD+++8TFxfHQw89JHab16hRg9LSUu644w7RR5g+fbqYHImJiaF169a88MILFBQU0LRpU6Kjo3n77bcB2L59O2vXrhXvmAcOHBCr251OJ3379iU8PJy9e/eKNvD//b//R0lJCcOGDUOn07FhwwYee+wxMjMzufHGGzl+/DgDBgwgKSlJ9LV0Oh2NGjUS73JQPSHz+OOPizqyc+fOuN1u/u///o+ysjJq1qwp0njQoEEYDAbi4uKwWCysXbuW++67j0OHDtGlSxdKS0u59dZbad68OVA9iO92u1m6dCl79uzxWZCyaNEixo0bR3Z2Nq1btyY9PR2bzcY777xDo0aNxOQAVC9e+e677zAYDEydOpUlS5YA0LBhQzEJoCy0fOihh8TguDJR3LJlS5/J0g4dOoiFsEpZViaBQkNDRVn+9ttvzyrL3jvNGjRoQN26dcXElcPhEGV59uzZmEwm9Hq9z45a70VAykRNZGQkQ4cOFe97Ho/nrLJ8/PhxnnnmGTZv3izKstIfa9iwoZjMDgkJYeHChSJii8fjYfz48SxcuFC8QxUUFPDhhx9yzz33oNfrxSSHshtAmZBavXq1GOx2u90EBQXRoUMHnE4nwcHBREdHY7FYmD59uliUp9ChQwdiYmIIDw/H6XRy9OhRvv76a/r06UNeXh5BQUF0794do9FI9+7dKSoqIiEhgQ0bNuBwOMRizHr16mE2m3E6nTRr1kz0/7Kzs+nSpYuY9HvppZdo2bIlcXFxYvJG4cYbb8RgMIj3NJvNRvfu3cnKyqJHjx6UlZXx5ZdfsmrVKoKCgkQ/dM+ePXz22WfY7XbCwsKoWbOmGMBX6mcliklaWho9evQgPT2d1NRUsZigefPmeDweUWYqKyvp1KmTeE9r27Ytubm59OnTh4ULF3Lq1CkGDRrEzz//jM1mo127dkD1zqPCwkKaN2/OtGnTAMQ7hd1uJykpSeSjMvmWlZVFRESEuParr77CYDDw7bff8vLLL5OamsqMGTOoqKjAZDKJRSNt2rQR0QGMRiO///47Y8eOFc+P2+3mq6++4oUXXhALFFatWkVRURHLly/n9OnThISE0Lp1a3777TesVivx8fGirq1ZsyY1a9Zk5cqVuN1uxo0bR7169QgNDcVkMlFYWMjnn3/O7t27Rb0VEBDATz/9JCaonE6nGB90u9389NNPoq5dvHgxpaWl1KhRA5vNxqhRo3jjjTfEOAdUL+R66KGHxA5qqN4t8/rrr3Pq1CnMZjNz587lzTff5MSJE9SrV4/y8nK+/vpr3nnnHQYMGEBQUBBVVVUMGDCA5ORk1q1bR3x8PAEBAURFRbFgwQI8Ho/Y0VWrVi0xFqBM5A4bNgyz2SzGIrZu3crIkSNZt26deM+dP38+s2bNomXLltSqVYs//viDIUOGsGPHDh544AFq1aoldpsr72t16tTh1KlT3H333aJMKHlXv359tm3bhl6vZ8mSJWJyTafT0b9/f3Q6HbVq1aJfv354PB52797NkSNHeOCBB8Rk1YIFCzCbzQwZMgSz2SzGMBcsWMANN9yAw+EgOTkZgPHjx4s+4sKFC6moqOCdd97h9ddfx+FwMHDgQKB6bEcZi/38889F3X/fffeJPAJ4+eWXefbZZzlx4gShoaHMnj2bRYsWkZubS+vWrSktLeXrr78mNTWVgIAAatWqJcbXKisrOX36tBi7+vLLL5k/fz4FBQW0bt1atMm///672NH+/PPP89RTTzFmzBiWLl3qY5/SDrz++uuMHDmSO+64g2eeeYYxY8awbt26C45/X03ICSvJv4a0tDQ6deokOlBQvZLW7XaLXTLnYtu2bZSWllJaWip+bzab6d27N1lZWeL3aWlpJCUliW2YAKmpqVgsFsrLy+nbty8Oh4ONGzfSr18/0dCGh4fTr18/8vLyiI+Px+Px8Nlnn+F2uxk1ahTh4eGsXr2a8PBwOnXqxLFjx3C73fTt21f4NHjwYLGi4+abbxarRRMSEvB4PGRlZTFo0CC6du2KwWBg9erV9OvXj8zMTLG9tm/fvgA+nT+F9evXi1V1APXr16d+/fpCT9H4+eefxQAMVA/+KBw7dgyHwyHC1AAMGDCAvn37Cm2Px8OGDRtISkqibt26QPXW3OjoaKD6BbZv3764XC7++OMPWrZsyZEjR/B4PPTt21c0iG3atGHgwIEijJuymmH9+vWiA35m2VDS3ul0EhkZSVRUFAApKSli1Xpubi7h4eEEBweLfFZe2pR8NplMNG7cGKhuJHr37i1CIMbExJCXl0dCQgIJCQnEx8cDcP/995+Vz0ePHqVXr14sX76c0NBQOnfuzE033cThw4fJyMgQL6Jut5s+ffrQokULAOrVq0dqaippaWkA9OvXj6NHj9KhQwcyMjJEh1EJlbJp0yaxLT87O1uUK8W/wMBAfv/9d9q1aydshOrQGd5hD73ztKqqivz8fAwGAzt37hT5e+edd+J2u8XAtFIWATEJ2bx5c0pLS+nbt68oG8eOHaNjx46sXLlS7OhRwv6kpqYyePBgsWLZarWKl8HOnTuzbds2+vTpw+7du8WAvbKi+tprrwX+HIhs0qQJVVVV9O/fny1bthAZGUlAQIB4ftPS0khNTSU8PJy4uDgAkU7Kah69Xs/x48e58cYbycnJEavXlEmXo0eP0qdPH9LS0rBarXTr1g273Y7b7aZHjx4EBwcTHByMx+MRk6dKGQsODhbhs7KysnA6ncIe5ZlWVukqz3RWVhaZmZki9MiBAwfo1KmTeJYTExNFvgQGBnLttdeKfElISBD3u1C+eIcPU/LFarWKgQTvfFm+fDkdOnTwWfmn1MNKvii7UZTJwpYtWwLQu3dvkS/BwcG0bt1aPL+pqalih413vjRr1gyPxyNWunbu3Fms5vXOl8rKSlEHRkRE0KVLFxGyrGfPnmLFeXJysnimFHuVFXcJCQlUVVWxceNGhg4dClR3ZsPDw+nduzfl5eVcf/31YuDK7XaTmpoKQE5ODuHh4cLuyspK0ebUr19f2KOEmvvll1+IjIz0aXOUdFTCSSrhBwsKCnzanIEDB1JQUCDaHLfbLcqY0uYcOnSIJk2aiHAibrebfv36iTYnJiYGj8fDypUrxX09Ho/Y4VezZk3Cw8Pp2rWrWJlntVpFfd2/f3+hpYQ48Hg8oi3LyclBp9OJ5w5g4MCBYqFCWVkZzZo1AxCLDuLi4khPTxf5pryIKWF0UlJSyMnJITQ0lNWrV4v69+jRozRp0oTS0lL+85//iAmijIwMMegaExMjVsQ1atRI6CckJHD06FHxDKalpdGhQwe2bduG2+2mbt26YgdK7dq1qaqqYvny5WRmZornQ0kzpc1SVr0rbZZSN3qjlFWPx0NBQQGtWrUSuxL69u0rQk4oExBBQUGinKxZs4bU1FThW8+ePUUaKW3WypUrxcBiXl6emPxISUkB/myzvvvuO6xWK126dOHo0aMYjUZSU1MJDg4mKiqKm266SYRnUeo1pc3q2rUrAPv376dJkyY4HA4GDRoknv/OnTvTsmVLnE4npaWlPPvss3g8HvGiqtg4fPhwTCaTuHe/fv0oLCxk8ODBWK1WkXbXX3895eXldOrUiYCAANFfaNy4sbgmODiYtWvXivpXKUtQPWin1IdKn8+7/lXSUqlb+vbty/Hjx8nMzGTgwIFUVVWxcuVKAFH2EhMT2b17N3FxcVitVlH/KgNIygCh0qcaMmSIsPXYsWOUlpaSkpIiBlkHDBhA8+bNcTqd4nnZsGEDycnJDBgwgH379tGpUyduuOEGsTNFKUfbtm1j6NChoq5et26dqLdSU1OpV68eBw4cwO1206VLF2GHxWIRZSs0NFT0a8PDw4mKihJppkyQnZlmSl/rYtIsISFBtKFKO64sBlHud8sttwg/lDbr6NGjfPXVV5SVlYm+kFLXKuGjoXoXgNK/1+l0REdHi7o2ISGBpKQkURd517XFxcX06tVLTFgpK6cv1L9XdrM0btxY1LXKgGbnzp1FXav015X/GjZsKNJd2emjTFyEhoaKeuPGG28Udf/NN9+Mx+MRK+S988X7fWPgwIG43e7zvm9UVVWRmZnJDTfcIOp+xR4lbJDyfCttoGKPki8nT54Uz/TRo0dZvny5qMeUfBk5ciRut1v0V4KCgkhLSyMtLQ2DwUB0dDR169alrKyM2267TeRLUFAQcXFx4t3OZrMRGhoq8rBfv34UFBSIsOxGo5EGDRrgdrtFfijpnJiYiMvlEgsl0tLSxLXKxLyyGywxMVG8Y6xZs0b0GwCx+0RpL5UJ8ZiYGI4fP87p06ex2+0MGTIEt9tNfHy82Ilx+PBhsrKyWLt2rUjvyspKMenfuHFjQkNDCQoKEv3ONWvWEBkZSVhYmCiP7du3FwP7q1atIiQkhIqKCgoKCujTp4+wTbm3ct+0tDQxoVezZk0cDgdt2rShSZMmREVF+Txjv/zyC506dWLr1q0kJSWJcuZ2u8WEgcPhIDMzk169erF582YxaN2wYUO2bNniM/gHiBDSVquV7du3Exoayu7du8U7m8Ivv/xC06ZNOXr0KC1btsThcIj+nrI79bbbbhO7HI4fP05AQAC///67aLszMjLOyqumTZuKPAwPD2fRokUA4t2jrKyM8PBwmjVrRkFBAWvXrhV2n1mWlfpOr9eLvo2SV97vvcq9lbK8fv16oLqPfaGyrGgrz5V3mxEcHCzK8sqVKwkMDPQpy0p4rjPLcnZ29v8sywEBAXz//fdkZmaKsqz0r1u3bi3Kcn5+vk+f0+Px0KdPHxISEmjSpAnR0dEEBgbicDjo27cvq1evxuOpjmqgvOe2bdtWPKPeIa3Ky8sZPnw44eHhxMTEiInlwsJCkpOTxYRYSEgII0aMICMjg65du+LxeEQ/TenjZWdnc91115Gamip2ZcXFxVFRUUFycrIYg9i/fz+dOnUS5QAQYxZKeD6r1Sre0TZv3uxzrcVi4dixYzRt2lSUVaPRKCbwld19CQkJREZGkpqaKt6fXC4Xa9asEX1OZSfRzTffLMJqKn3B4uJiatasKeq4sLAw4uLi2LJlC8nJyT4h2bKysujcuTM33HADf/zxBykpKT4hYe+++27RTiuLvEpKSsjLy+P666/HbreLcp6ZmYnVauX//u//AMTuPGXR4rBhw0Q52LVrl3iGsrKySElJETsSld1lQUFBop1WyntaWhrbtm0jJCQEq9Uq0uvM8rlx40YMBgMbNmzAbrfTp08fLBYL9erVIzw8/Lx1bY8ePVi+fDl9+/ala9euhISEkJaWJt59Q0JC6Ny5M+Hh4aIeBET/w+PxiMUBt9xyC/v376e0tFS8lyu7kHr37i3qBKVeqFu3rtjhA3/2n5TJ5k6dOoldO97vg/369ePkyZPifWft2rW4XC6OHj0qQj+Gh4f7LIgrLi4W7ZvH46Fjx45s3ryZTp06AX++Tyi73Ww2m8+4ar9+/Thy5Ih4FktLS0VYfu++ldFoFHX+3r17adSokUgzZTIzPDyclStXsnHjRnr16sXGjRtxu90UFRWRlJTE8ePH6dSpE1VVVWJBkNL3a9SoEe3btyc1NZXNmzeL8Sbl/spxCD169BDpquRbUVERrVq1Ijw8XIRLfuyxx0RZVtLszHxSogedqZeYmEhaWhp2u13sglfyKSUlBZ1OJyIVrF69WuzSPHDggNhJpVyrHAWQlZXFl19+KdpW5TrgnPadCyXPzheS+2pDTlhJ/jVkZGT4TCZB9YtUTEzMeeOYKr+D6gFF798r5yIpYeLOpa/T6USlnJCQIELkJSQkUFpaSkVFBRUVFaIij4mJISYmhoMHDxIVFUV4eDgNGjQQNigdAEVPuafye2UXgbLCLzk5WazYadCgAQ0bNkSn05GRkSF+462XnZ3N/PnzsVqt4nOn0ynOW1EG6hQ7FL0TJ04QExMjzq5S0kHZ5eU9Sei9qqhTp078+uuv4j5KY+GdjgcOHBArMNatWyfCE7hcLpKTk8Wgw7PPPivitBYXF4uXzIiICBo2bEhAQAAZGRlnnRei5LFiozIAuG7dOrG6WglFqOSpd8hEpTOlrGIyGo2iYXS5XCQmJopJQO+wEMr3gDivwzufPR4PJSUllJaWijNXlDwrKioSE4oej4fbbruNb775RpSBxMREoaV0MJRyqjB16lQ8Hg8vvviiOEcHEHYkJyeLlXdOp5OsrCzxXXZ2Nh999BHnQrFRefF56aWXgOrn56effhLPm3dZhOrzq7wH6BISErBarSKtgoODRTqGh4eTlZWFy+US6eLxeES5VlbqKOH1lGclNjYWo9EowmV4/+sdlqRTp044nU4RIkGxR9Fv0KCBWP2plGclvIOSjh9++CFQ3TlSOvNKGVC0GjRoQGJiorDD5XJRVVWFw+EgKipK7LRStq0XFxdTUVEhXgJPnTol7FFs837pUO6j5CtUP2f16tUT1ykDpUoYnTPzRfH/fPmiTOafmS9BQUEilIESvkbZPRAVFeVjp1IPK+ngHb85KipKTA7GxcWdlS+A8F2n04m0UupdJZa0on3nnXf6aHufXXJm/a683NasWVOEHVXKW0lJiVjx5XK5KCoq4ssvvxR1/Jn1q7L7JSQkxKeOV3ZYKaEWlUF08G1zEhMTRXif6OhoTp48KezxTscaNWqIwSkljZXrlDanbt26PufcKIP3Z7Y5CQkJPrHWvdscxZ+8vDyfNsftdvvY1LBhQ7Hj4OTJkyL8n7eW1WoVAyMBAQGizQoPD6dJkyZix4T3rjWoHrzX6XQiJK3yQquU1dLSUhISEkS9oTw3NWrU8Akj633GnTIgrUyyKPW7MvielZVFo0aNiImJITg4GJPJxJEjR0SaKSFjFe0dO3YwYsQIYmJixMuMMiCqhOVT0iwxMVGstFfSLCMjQ0zceuOdZkpYEagO7aacQeR9Pt65nhelzapfv77w88w2SxkYUnZGKPdR2iylXVDaLKX9rqqqoqqqyuc5sNvtYld0Xl4e8+fPx2w2U1BQIMpTdHQ0Bw8epKKigoSEBPE8WK1W8Yzk5OSwZMkSsTJQuU5JR8Wv0NBQEhMTRYib77//HqiuA6dNmyZ2hHrXRdu3b2fFihVERkZSWlqK0+kUC1Dy8vIoLy8XO3W82wOlvvQ+d8i7/lXSUTlDy7v+9a6jvftU3iHhlGdKqSuVsgxw8OBBoLqMhoWFiTRr1aqVT54odVq9evVEKB2lvQoLCxN5r6RZRkaGyBe3283BgwdFiBXvemrixIliQK+wsFD0a5Vy53K5xI7VoqKis9Js+/btQuvll1/mp59++p9pBog2VJnMUJ5hpaw3adJE9De826wlS5aIPqziX05ODkajkX379uHxeMjNzRVnogFn1bWNGjUSYbiV7xVuvvlm4b9yJhycv3+v7DJXQrEq9ijPgGLjqVOniI6Oxmg0ip0oCpMnTxaTyVDdjih9Ku+632w2i3SIiYnxqWu96/5zvR+c+b7h8XgYMGCAqPu97QFEvigo9jRp0oTg4GACAgLIzMwU/bXdu3eL9ljxuUWLFsTExIhBdOUctYMHD4q2V6mTEhISRL4oYbiVOtflcrFnzx4OHDjAvffei8ViEfmiTHqYTCbCw8PFMxEdHU1QUBAmk0nU2Uodr5yP510elJ3Xdrsdu91OdnY2hw8fFvdR0l1pp5W0V8qrshNDKbdnnuuh3Fs5swfgs88+o1atWrRv356Kigrsdjvl5eUcOnSIvXv3ilCmp0+fpkmTJnTt2pUaNWpgtVo5deqUz7vKa6+9xsMPP0x4eLjPvZX7hoaGivP+oHoxUEJCgljcoSxQzMnJIT4+nkOHDhEeHs68efO4/fbbiYmJETuKlUlSZUemEkI6ISEBp9N51plJ33zzjaiz8vPzqVGjhjgDUykzoaGhnDx5UqyaV9pHJby50qYpz3FRURGbN2+msrKSkpIS0tPTRTtWv359kVfKxAf8Obj3xRdfANWLqJTJFUA8g8rCOm8/Fi1aRHR0tAhBXl5eLvo2Sn9EKctRUVGinChl2Xuh1e7du8nIyGDcuHHExsb6lGWXyyXOg6yqqqJJkybUqFFD7BJVynJ+fr4Ij+hdlqG6n6qUZY/Hg91uZ8uWLSKsonK9d1kODg4WE3RKWVbaw9atW/uUZWW3TFBQkE8ECKU8Kc9tQkKCqJO7desmdj/Gx8dTo0YNEbrVOyJMQkICDRo0oLy83OccKmWsQFmAMHnyZKB6saSSdkajkaysLOrXry8WJCQmJor89D7D2+l0ikmwhIQEn3cYpT0sKCggOjpavJMr/Xjvd0WlblXKLVTXCUq7cPjwYfH+VlVVJRaOQPV7zpEjR8S7rtLWl5aWotPpxISIghIe+9ixY7Rq1YqMjAyf93+drvrswRMnToi6Pjs7m/r165OVlSXS2btvk5CQQI0aNXA4HMTHx4v+V61atejSpQvr16+nVq1aot32eDwiEg1ULzCuUaMGQUFBZGRkiGcoIyNDjCF59zdMJhM///wzgIh8k52dLZ7NsrIy0tPTRdhTpZ0LDg4WY0DKRGJCQgI2m42DBw/6pL93XRsVFcXWrVspLS1lwIABJCYmimfWO9T8ypUr6devnxjnUBaYZmRkYDAYxM6isrIyJk2aBFRPsBuNRrFbXUlvqB5DWLx48VnnGSnh7pWQjZMnT8bpdLJ7925OnjwpIqco7bTyvqOMc1VVVYmF08p1Sp3wySefsGXLFmFP7dq1ycjIoHHjxqLfqdfrRZ/TZrOd9d6slCWlrlCee+VdV+mLKCE+09PTadSokWgflYUypaWlfPvttzidTkJCQnzaI6VPrqSjzWYjLCxM9OOUvojy3Cn96rKyMmJjY9mzZw8ej0eEnR83bhwdOnQQfij9j4yMDJ/3dOX59h5jU647czG80nYoi8GVts97jHPr1q1iIaPyLqD0mZ1OJ2+88Qb3338/6enpJCQkiPo0IyNDHBUSEBBAz549adGiBffeey9Hjhw5y75NmzbRqlUrmjVrxu23386XX37Ju+++S48ePYRvVztywkryr8Fms/lMxCgo8bAv9Duz2UxJSYnP75WVuyEhIRQXF4sVfGeihIwICAgQ97FarWLFRHFxsdBVtoSXlJQILW/7lAZeOTxe8cnbruLiYnFdaGioeKFWrnO5XD739NZ79dVX6d27t1gdBPg0LIpN3pqKnhJOTfm8oKCAGTNm0LNnT9EQwZ8TOzExMbz33ntiRZnSQCqrQ5XwXX369OHmm29Gp9ORk5PDXXfd5TO4pfDSSy+Jv/fs2SNi3QYFBREWFobBYKC4uPiceW2z2USaORwOevbsyW+//SZWHcOfk27KpIKCsupIGRhPTEwUK0yUkIJKh/nMw+aVDpaSfmfm89atW4mNjaW8vNwnn51OJ926dRMdopSUFBYsWCDyy2q1+pQ15Z5KWb7tttuYNGkStWvXFg2/gnJegLJSWnlx8Lbx1VdfFYNJ3h1J7/tBdciFDh06iLj448ePx+12i/KnlB1ATHq4XC4xqOmd7vBn7GolbrCSvso9lU6Lcq13uA7FJ6XTAn8OLirnkigDw0qnXOnIK/Yoz1tQUJB4aVcGyZTVicrWeiUsgLKa0Hvljbc9VqtVfKeE/VOeAaW8PPjgg8TFxYkVicrnpaWlWK1WkS/eaa8MeCvp652O3mezKfmnxEg/M1+U67zzpaqqSnTclBeHM/Olfv363HPPPUJbecFTwvadSVhYmMgXpcPepEkTkd5n5ov3M6jki3JmSs+ePUU94z0RDr6Dgt5pr9SB3t8pHeTKykrhl5Jn3vmi7IIbP348n376qfg9/LlT1ftf7zpe8U2pY5Uyfmabo8S0h+o6tLKyUthzZjoqE7/eKyK92xzvl3uDwSD+PrPNCQsL8ykn3s+A984M7zZHeSbPTEflJd37/CfvNllJeyVMpmKPUn96l2/lmVQGLPLy8ti9e7cYLFFCDTocDkJDQ0VZ9k7b4uJin5Vl3uV7zZo1orx4LwxQyndoaChhYWFissm7blRCOiqMGzeOPn36EBYW5lMHKPZ5p5nin3IGgdJmne/Abu80UygqKhIhRWrWrOkTzk5BSXcl373riTPbrFq1aonfDBgw4Kw2q7y8XLQVii1WqxWn03lWWahduzYOh4OKigoGDBhAWFgYdevWpbi4WNSjSpjZM+2oqKjgueeew2AwUKtWLZ/BceU6ZSWzd+hipe2/7rrrxOr+WbNm8dVXXzFr1izq1q3rUw7uv/9+fvvtN06fPi2el7vuuotHH30Ui8Uizl/avHkzhYWFPvWvki5wdv2rpKNSlr2fK5vNJup87/rXe2GNdzp6T0rp9Xoxkac8v8rvvCdTvOstZWedsno4MTFR/FbxRyl7SvlPS0tDp9NRp04dcRYAVC/umDRpkngBV8qZMriqlAOHw0FcXJyo05Q0a9eunVhEoNNVn3n25ptv/s80A3zaUMVP5Z7evpzZBu7du1fs2jWbzaKubdSoEffdd5+wRdFSJjS869qwsDAxoaj0TRWUEFNK/iicq38fFBREeno6PXv2FIssTCaTCDd6Zl0bEhIibFM0Vq5cyfbt2+nRo4fIL71eL+qNM983lHRQzhfx1jpXX+JC7xvKBJG3hsKZbaC3Pcrg+XPPPSf8huqwsN75oixgUNJTCSFWWFgoBnWU87mUa5Uznc5cOaxEL8jJyRFlTLFPeRaU9lixU6njlUkR5V1PGcxW8vf+++8XOzSUtPZ4PKLfAPj0L5TQkWfuEFLu6+2zgnJvp9MpBt/37NnDxx9/LBZEKAtCJk+eTFlZGadPnxaLW2bNmiXKoJI+3osgR40aJcIAe0+0KPfV6/WiDYTqwXLFTu8y43A4mD17Nnv27GHDhg107NiR5557TpQtJZyud9lQ2kJl0E9p05TFVLVq1RLhoJUBTOW5Vn6rnB+i+KT44H3WpVIvt2vXjl69emEwGMSAXUlJiWj3lfcI5f+VtlvZ8aloW61WMRANf76TlpeX+9QxBoOBX375BZvNRmFhIcHBwaJP5J2vSnlSninvvPJuDyIjI0lJSeHHH3/kjTfeEJ8r6amc4eNdnpRzj7wXaSnPk2KjsvjIbDb7lGUlfZX8OXr0qNgBpNiolBVvlIVGZ75nKnWYckaXd1p79++LiopEWQgLCxM+mUwmkUfeCy8Uf5UxCG+8n8usrCwRJl6JyqCUI5vNRmRkJOHh4ezcuVPUn8riKPiznVPyOy8vj7y8PFF+rrnmGuG/0l88s15V0iEsLAybzSbKrbKLTJkcsNvtHDx4kKqqKpo2bcrs2bN93u+9xy+U/terr74qwi96o/SJbTYbbdq0EWfTKpMcyiJr77re46mOAqDc81zjWUr6e4eNDAwMZMaMGRgMBtLT0/nPf/4j7GjZsqX4jVI+jUYjxcXFIk1/++03tm/fjsfjEee5mc1mevbsyfjx432eUeXZUNqYa6+9lieeeIKffvqJ8vJyn/LpXUatViuvvfYaOp2O2267TXyuPJNK+Vy6dCmxsbG0a9dOvDcWFxdTWVkpztK66aabSElJEeMcyplcyv2UOuLDDz8UfdGuXbvidrtZsWKFsMfj8XDDDTcwceJEYmNjxU40JX8Bn/ZB+W1ERARGo5F77rmH06dP+/T3FH+U56eoqIi77rpL3FOpEwYPHuwTqWTp0qWi3Crtm9vtFhN1Sh/FOz0VSktLRTQM+LN/4D3eAb79OGVXZ2hoKPXq1ROLDJS0UxaPLF++HKhetKig7KT0vpcyFub9/nzy5Elhw0MPPUR0dDROp5O3335b9EU2bdqE0+k8a1xXqUO8x9gUH4CzxjbCwsJEPa9M4JWVlREaGsqSJUvYvn07gwcPBhDPWFhYmBinuPfeezl27Bi33347BoNBtPvFxcXk5+eLRRuvv/66CPN97733+pwj3q5dO55//nk++ugjgoOD2bp1K+PHjycgIECE7vw3ICesJBKNsHbtWtauXcuYMWMuW6uqqorRo0cD1atvvVEq4sDAQK677jpGjBgBcFb8emVFWMOGDcULRePGjdm3b59PJ1qhbdu2YtWxcgD0peDxeFi1ahVNmjTBaDSKVaJnHvh6PoYOHSrsq6ys9Hnx8x5I+ysoB56fifcZB0ajkWnTpomY7GfuHjsXEydOpFevXgQHB/usKvorFBYWsnbtWp/O3flo2bIlsbGx6HQ6EYaooKDA5+XCX1BWK/0vXC6Xz6G9Z6IM3Clxvb0H9y6V2267jeDgYDGp5D3w8U9gNBrFajll4vJMUlNTRXgsZaWf946BC6GsuPcOH/q/UFZgw9l1DuCzI+FKoeSL8tJ5ww038O23317x+1ztKKt+r7nmmouuAy/E6dOncblchIeH07x5c9FObNy48bJ0u3Tp4jPQDBcXNqF3795A9QDPpEmTLngA+ZVCSdfY2FgxMHDmKvXLQdlNcTnk5OSI3U7//e9/OXr0KDk5OT51gnLI9pnlxOFw8Mgjj4iD4/9K++Nt+7p160R9MnDgQDp06MBDDz101u61uXPn0qRJE3F+ElTvyBk1apTY2VKjRg0RRvB8k4l/F263+7x18IVQJpEAcSbRhdi7dy8TJkw4axXpzTffTK9evcTAmJJmZ4Yjcblc1KtX7yzd//u//+O6664Tv12wYME5F5ZdSVwulwgn602PHj3o1q2bsOViWLp06UXb4XQ6RV/iXG3WX6GyspJXXnmFRx999JzhrtXAu8xfTn3udrt55JFHxMCj9/mSahAQEECdOnV45513xIryy0E5U0mv1/+lZ+hKoeyGNxqNxMTEcPfdd5/1fjJp0iR0Op3Y0e3xeHjggQfEDhEF74n/zp07s2DBAoxGo1h0d6nccccdxMbGijNinn322Qtef76yGxsbi8FgIDAwkLCwsP+vvfOOr7q+/v/r7tzkJiEJGWSQkAAJK8jeCSsqIkNAoiB0WmmlCmKp8KXQAoKigKyCVFtBFBSihBE2DkgAQYYIBLJDhtn7Zt3c/P7I4xw+nyTgaJWf7Xk+Hn1Ybu79jPc47/N+n8VRXMpIVuCOQ54yXRU5sLQ2Rp977jmEh4ezkxZBNWnuRmNjI1asWMERL/di9OjRbDzV6XRYvXo1amtrOVrn38FoNCIsLAyrV69mh4UfG4q+Icgx5tv4d+bGqlWr2CBwNygKhigpKUFKSkqLsa6kffv2fNhN6c6U2QQAYNq0afjoo49U+z2lY4ISLy8vBAQE8D7jwoUL3/Jmd4cMnTRuKFq2uLgYe/bswbRp01g/bU2Opaen4/Tp0yqH3ta4desW3nnnHQBQRc3Sof93JTk5mduF0sASCxYsQHV1NUJDQ7Fp0yb+nKJN7kVMTAwmTZrUou8XLVqE8ePHszO4EpIV3t7eePLJJ/HnP/8ZlZWVKoctJZ988gnrnMrahUrsdjs+++wzjB07tsW+gJwxNRoNgoODsXr1aj7naO06QFPU9eOPPw6dTocLFy7A0dGR0yvSGcSCBQswZswYODg4wMPDgw0hlBZUCRkdqU5eY2MjduzY0eq7kGycPHkyp1RX0rNnT45ABO5kjvkp0Ov16NOnD6f3bt++PTu0kywH7uwnAagM4Q0NDaryBErIEcZgMKgMmBERETh48CB0Oh2uX7/OBquKigo2iv0YlJaWYsmSJZg0aRL69++v+ltwcDA/R2RkJDujNz93ojSa3t7eGD58OKKiorBx40bk5uaqvvvcc89hypQp6Nu3L7Zv34533nkHbdq0QWJiImbNmvWdzr1+DojBSvivQelFoIQs2vf6HXmlKH9PBfwqKyvh6uoKFxeXVg8vyDOqtraW70PpegCwdw0A9qx1dnZW5R1unjqCIhXonZSbFbLoA+B0cnRP8hhR3pOut3TpUsycORNms1mlsGi1WlbC6JmU+WvpemVlZazMbtiwAV999RX+8Y9/wMvLS7W5UKbBo2soP9fpdCpPTPLIbWxsRPv27eHo6MieGUVFRaywVFRU8KaHCs4D4LQADQ0NcHV1bbWvld4XjY2NCAsLw8CBA2G32/ngipRZyndP0D3JQ9PPz4/z3Z4+fZpDloE70WVKbxHqs9b62WazYdy4cS362cnJCRUVFZx2CbijdNtsNlWkkbJ9aSw3j5pSQu1NSh4ZR+gZMzIyMHPmTFZo6CCXolCU48pms/FmkOofUU0I5dhRto1er1c9o3LsKPuaPBuV7aL0eqF0McrvV1RUoLGxkfuMDkfMZjPKysrYuEaKJaWaoOdxcXHBgQMHUF5ejg4dOnDhT6BpA0mFgIE74fB0L6Wiq2zj8vJybmNKMabX61FZWamSERTFokw5Sam/qB+VbU9RXdS+Sm8gm83GY0+ZjoY8gpX9Qt9r3i+0GVSmISotLb1rv1RWVnIbtKYglZWVcVuR0knjhK5DhmBqI4L6BWjaDCjT7ZBHEkV5KsPklTKLZCpB7QA0bQTovajPyEhFHsNarRZjxozhZyT5Ru+s/K9Sxis9tuidAbRYc5RelFRvhZ6neTuSYVB5CKRcc5QGYzp0o7WMrkHvRv3ffM2hPqR2oHFEfU/QRkGn06Fnz548z5VzErizpjg6OrZYs5ReqNSGdXV12LJlCzQaDUaNGsV1c6i/gKY5Setz87ZVRqkA6vGt1Wr5N3StkpISjmisrKxEWVkZbDYbf5fazNXVlZ/DbrcjOjoar7zyCsrKyloYmunfzeeLzWbjAuyurq4tvGQJZZuR3KHaItSftA42ny8VFRU8DpRygiJ+qK1IjpnNZnTu3LnFmmU2m1UHAiSTDAYDFzomPDw8eE5NnDgRr732GqxWKyorKzki53e/+x0ee+wxfkblgcTAgQM5okV5XfoepbClttBoNCgrK+MUos8++yyAptR7a9euRZs2bfD3v/9dNQ6ioqIwcOBA2Gw2nj9nzpxBfHw8y52ysjKWexkZGap1lP7bXP5SO9JYVspfijgh2ULyV/nuyjlF70TMmDFD9R36HY0P5bVpjCxcuJDnvbKeBq0nNPauXbsGoKn2DfULpZGi/qZ7ajQa/rcyxSn9LTQ0lPv1bm2m0+nYw/xebQZAtYbSe9I9le9C1yddwt/fnyOhKY0oyVq6FtUXIrndXNZSBBy1H6VvAppqndIzKJ2rmuv3CxcuRHl5OQYMGAAvLy+Vfk9ZBprLWuUBUmVlJbZt2watVouxY8eioKCAn9dms3Harub7DWX6T6WspfdqbX+glP0HDx7kvzXfbyhRymsAKtlPvxk4cCA/j5OTEx88U7+QJzu1J/WLm5sbH+ZS/R36LkVbNo/+d3Bw4DSxyqgucgRSphSm5ycZr4xAowhSZZQQ3ZsM6tRfpDcAUOkXFL1Iae6U0Lht/jndm3Q1m82Gv//971zHhpwBgKbsBm5ubpyS2s/PD9evX0dsbCzfW6mv0n1JB1XKG7ovGehJVyktLeXnVI4Zo9EIJycneHl5wdfXFy+//DL27t2LoqIijgCn9qT9AxmzSQ6RHKG2pjlD65hyXptMJo6QBoB//vOfAMDp0CgFNUUNKfevFPFIz00Hzcrxr1y7KUqLHP3y8vL4s/Lyco4IoD0aPVtdXR0GDhzIke9Wq5UdIKh9lZFCND+UfaVcD2gsR0ZGqvQKmmt2u53HslKm6/V6VcSA2WzmiGZ6TqvVyu90t7FsNpt5bVBG4jbfT5Ju0tygSrKzoaGhhVxV6veHDx9mWVhYWKjaC5Dc0Gq1eOaZZ/ga48aNQ2FhITp06KCqQamMniQ9R4ky4qeyshLPPPMMoqKicODAAZ5zZOClNie9haKDadyS0Yn6qTW5SvOPopmaGzmo3piTkxMaGxtx48YN9OjRAy4ud9LbVlVV8ffoN8ePH8fMmTM5LaASWkNcXFxw7NgxdtCgfiM92Gw2q/TvmpoaVfYSpW7zwgsvqPRCkikFBQU4fPgw+vTpA7PZrEoR+8knn/BYpfFps9ng6urK9bk6dOiApUuX8hyi8amM0Gke0UKRgjSmKNsCyZnmZ0Bbt27FH/7wBzz22GMtztEo2r2oqAh1dXUYN24cAPCenb6jjNIG7tQSJr3T1dUVt27d4nZ5+OGH0aVLFzQ0NOA3v/kNP+tDDz3Ejt4vvvgigDvRayQPyLFbmdWCHJYoNWnXrl2RnJys0vcoIovG2EMPPcTvqowioncinY0i36iPaK2k9VP53nQtwmKxoK6ujucEfU953kHvqJwjJKfKysp4XSCDKK0F9LlyfivXb2pTiio7fPgwR4i2adOG37eiooJTzycnJ7NM0el0uHbtWotzXWVGFOU4ojWxuWGU+kmj0XCknMFgwKefforw8HAsXbqU24zmGJ3T0fN5eXmhT58+KCgoYJ2Ovufm5qbSMYODg+Hj44OcnJxWzzrDwsIwaNAgPPLII3BwcMC5c+d+VMPcT4kYrIT/GpQ5d4mKigoUFBS0qD3V/HdAkxGkec5SHx8fFBYWct2L5tdXequkpaWhffv2MBgMSE1NhcVigYODAxwcHPh3hYWFKCgoQKdOnVBYWIjS0lKkpaXxMygVGvqcctMCTQtBfX09rFYrGhsbkZiYyAfVyloYrT1rRkYGtmzZgn79+qkMVps2bcK7774Lm83GeWvpOeh6fn5+KCgoYIH8xRdfYNOmTQgLC0NFRYXquUmhp82O0tOGPCyVuZGVtcHatGmDxsZG1NTUQKfTITExkb1iqHYIAFXqK/K2ojpQVL+heR8rDSN2ux2+vr6w2+2or69Xed4oU83RMwF3Fsi0tDT23IqIiMDUqVM5vJoWaGpfZSqV1vrZw8MDXbt2bdHPQUFBuHXrFqxWK2pqalSpBoxGo6rNUlNTodFoUFRUxJ+lpaWpxr5yY0y/TUxM5BRLBoMB/v7+SEtLQ1VVFbZs2YLo6GhVG65btw79+vXj1I4GgwGZmZlczyQpKYn7xd/fXzUWAXANIlLq6BmpLaqrq7kfSktL4e/vD51Ox+1CHk6pqam88SgqKuL5FhwcjLy8PNhsNj4Uof9Seg4fHx/4+voiKyuLD1ypj9LS0mAwGJCcnAxHR0dYLBb4+vrymLdYLNDpdKyUkYcdzT9SNPR6PT9PWloaUlJSePNBBikKe3d0dOQc/wUFBTAYDNDpdLypd3NzQ0pKikoWKBU45Zgij1nqF3qvnJwcVFRUcB5/SlVAv6Pv3atfyHhZXl5+z35paGiAVqtFUVGR6jlpLFJ/0L327duHoqIiNkYtWrSI0/8o5yD1C/2N2ga4My8pDzgV/qXxoZR1Sq/i1NRUvlZeXh6Cg4NV70X1tPLz81uk9dHr9S0iN5SKtFLGk7dtUFAQALXCq1xzqI6gTqdDYWEhfHx8+HmU7Zifn8/KLrURfY/WnKysLG7/wsJCTi/YfM1JS0tTHYYoxxnNZS8vL9Wao3ymy5cvcz0aZf0vjUbDcyA1NRUVFRU8puvq6njOlJaW4saNG/D19VWtk/ReX3/9NSwWC9LT05GWlga9Xg+j0cgOFhaLhQuoK/uA5B5dh1Jt0jumpqbymFamsSgsLIS/vz+SkpJQUFDAKTw7dOjAbRMcHKwa323btuU+p+eia1N6TeW4orlPhwLKmjFKlG3Wrl07ljvUZhRNRO+snC/BwcFISUlhY25mZibPF5JP1GaUWpfWruZrFq0L5DxC6zeNVeX4JNlFaxZ5T1ZWVrIHJUUU09hQzquqqirYbDZYrVakpqayTE1JSeF0SqmpqfxeFRUVLHd8fHxUBgGdTofQ0FBkZmaqZFFQUBCCg4NZ/tL4J5mp0+lQUFDAkeK5ubkqOas8CFd+TuuBl5cX13MCmuSvUkaT/PXz81PVdKR2JFmpPPymcUU6FbXZV199peoTkmknTpxAXFwcv8+tW7dQXFwMnU6HlJQUpKamcupWyvFPhyLUNtSW9B6U9qT5ONHpdHzY17FjRx5792oz2mjfq83oWZRtRtD3rl+/znNduWY9/PDDfG+SYyRrac1ydnbmAvX0HsqxnJyczPUmAbUTwHPPPcefL1q0iPtQqd8XFBTg4MGDcHR05IN1pZ7crl07lnn0jG3btuW2b2xsRFJSElJTU5GRkYFBgwbhX//6Fz9HWloaH4QoZb/y0KqwsFAla5Wyv/n+QPn5oUOHADRFvyhlP+0PiOZrIMn+69ev8ztVVVXx8wQFBXG70d+//vprFBQU8KGPs7MzfH19uUi7UueisUBpz0iXov6j9QBQ1y2kyJH6+nqUlJSooheqq6tRX1/PMpv2EKmpqSpjDz0npasj/TAkJITlC70nrdPUP83TMtO4VTon0b2Dg4O5mD3QZNSlesoODg5sFOzWrRvrLNQWbm5uSE5ORn5+PioqKtC2bVtuOxrfFRUVLVKX030rKipQWFjI+5mvv/6a113aM9DYVa7v5MlfUlLChh5y9KK6w5Tui4rS0/pFdR1pzlD/VVVVITQ0lA0pQJPuEBAQwMZoaovw8HDY7XZe00jPoxp4+fn5nLaNjIh0aE7yVpleNSMjg3XS5557DrW1tSgtLUW/fv04AiIoKIjPCEgef/3115zW02q1wmw2s26j7CsaKzROqP/o+ZuPZXpmGss6nQ41NTUoKSmBXq/H9evXkZ+fz0Y7Gsuenp5wcnJCfX19C2Mz1W+821imd1KO5erqapUDm1arhYeHB8tV5VimVLHV1dUqOUTjidqbHDBramqwbds2bvfdu3dzunWr1aqKUli8eDEcHBzg5OQEHx8fbhul4092djbLp+ZOlv7+/khNTYWDgwNWr16N4cOHc00ziuKn2lmJiYm85y4oKODzAIrgdXZ25nFL84Geh76r3HMDTbLPYDDwGYQyeoy+r4zwa9u2Le9127Rpg7KyMmzZsgUFBQXYu3ev6rf79u1DYWEhfH19uR6ycv9P/erm5qba62RmZiIgIEC1vtC+lWqlOjs7w8HBgfUv0oN79uypWqf1ej3XZwaaMmpQfVBvb29OM//aa6/BYDCo5hDdm/Rfs9kMi8XC46W5vkTjmmr7kfwmGd+jRw92MFauec1lbWBgIEd30tig7zQ/xyP5WVNTw/ekd6fnI/1DaUR94YUXMG7cOHh6emLZsmX8veaO9o2NjUhLS2PnjU6dOsFgMKhkQm1tLf//goICuLu7s2GCdHxCud+hdyJH+7q6Ojg5OeHWrVsoKChATU0N7HY7Rza5uLi0uBa1Lcl40sdoD066CMnt0NBQ3lN16NABNTU1MJvNSEtLQ7du3QA0yXaSH76+vqwn0BpMDhW0pyJd5OLFi6isrES7du2g1Wr5mZT7UEKpi9B1Sd+lZ1c6virPjWmsKKG1o7GxEb6+vrz2kfFt48aNfD6l1+u5ZnVwcDBycnL4b0RtbS3LjODgYHTs2BEmk0klP4mSkpJ7nmsDYIefu0Wl/dwQg5XwX0NERAQSEhJUyglZ3YcMGXLX3/Xu3ZuVNvp9fX09jh49ioCAAP59REQEEhMTVeH5Z86cQVVVFcxmMw4dOgSj0YgBAwZwXl1SxuLi4uDl5YW0tDRoNBoO+d68eTNKS0sRGRmJsrIyJCQk8GJ76NAhfqePP/4YWq0WDQ0N/P81Gg1SUlK47sC+ffvw+eefw2azITIyEnFxcQgKCmIv0vHjx2P79u2qujMA8MQTT+CNN96ARqPhRSA9PR1ZWVl8PaqDQgp1QEAAR/wcPnwYGo0G7du3h9FoxLVr1zid2KFDh9hbk55j4MCB3I70fnSonJmZyeG8DzzwAC5fvoyRI0ciKCgIhw8fZqUmKSkJWq0Wffv2Zc836qfW0glGREQgPT2dn+HmzZtswLpx4wYvUu7u7pxTm/qZDuGV/Xzy5EkATYtGfHw8L7q5ubnw8vJCSkoK0tPT+Rpvv/22qp/j4+MB3PFEoXaIjY1FSEgIoqKikJCQwM979OhRxMXFAWg6eDh9+jR7TsXFxSEwMBBnz55Fx44dYbFYcOjQIR77ykKT7u7uOHz4MCIiIliZr66uxqBBg/Dll19yGPP27duxfft2dO3aVTVOtm/fjiNHjkCv16NLly5ISEjgEPOkpCS89dZbAJrCnJVjEQCGDx8OjUbDh9D0jDR2zpw5oyoQabFYUF1djfj4eB7zwcHB7AXU2NiIhIQE9OnTB0eOHEH37t3Z44z6jNIWKY3KERERiIuLQ9++fVFcXMxecytWrGDP6crKSmRnZyMiIoL7WqvVorq6GhcvXkRAQAD279+Pdu3a8YExKUBBQUE4cuQIIiIiUFZWhlOnTsFisUCr1eLkyZOwWq2cL508kT/++GP2zKTxq9FoYDabcfr0aR47cXFxrKSQjAgICEBQUBAuX74MoCmEPiEhgfs3OTkZO3bs4Of/7LPPuF/S0tL4We7VL8qw/Hv1C9B0EHv27FneAJKMIPml0Wgwfvx4+Pj4cAo5mgfBwcEIDw9HcXExz8GtW7dyv9jtduTk5KC8vJz7pbCwkK/h5eWFtm3bstKn1Wp5bptMJj6Io36hzdnJkycRERGB4uJi3LhxA3FxcYiIiFA9d1FREXbv3g0XFxf0798fe/bsAQCOEDt27BjMZjOOHz8OrVbLMp4OONq1a4eysjLuF5PJxGtOVlYWTp06xQcOWq0WDz74IIqLi1VrDj1PbW0tzGYzLl26hKCgIJ7XtObs27cPHh4evOZQjQSNRsNrTqdOnXDt2jUUFxe3uuYUFBRAo9Fg5MiRfF+NRsN9f/nyZTz99NO8QSP5S/Oc2pDWLxrTNGbatWuHxsZG/h7QtNmmaFdHR0fePNEcyM7ORlhYGKdJ0Wq1SExM5KLaiYmJaNeuHcrLyxEZGcntFhQUhBs3bsBisWDv3r04ffo0H8KRYYDWHYvFwgWvacMTFBTEczAiIgJnz55Fnz59WDabTCZotVpkZ2dDr9cjKioKQUFByMvLU7XZvn370LdvX5w+fZpllLLNlJAhkKINLl26BF9fX24zMrQVFxfzPeh9IyIicPr0aX63kydP8nzJyMjA0aNHeV3dv38/X6e1NWvixIm8ZgUGBqKhoQHx8fGwWq0oLi5GbGwsHz7m5+er1qxt27YBAM6fP8+HxjQHIyIiEB8fj6+++gp6vR5arRbbtm2DRtNULyguLo6jB3bs2MF1L+Lj4xEXFwc3Nzd8/PHHHHFYWlqKI0eOwGw249y5c2hoaEBiYiL8/PyQlJSkOugYOnSoSv4CTYdLVNeKHEA0Gg1u3LjBfU9tSWNPKX/37dsHvV6PkSNHArgTgZScnIwePXogNzcXZWVlLH91Op3KgYPW99jYWP7Mz88PFosFly5d4nXx0KFDuHr1KgwGA2JjY1mnIrnl7u6OzMxMPPXUUxyhHR8fD61Wi969e2PPnj28hr733ntwd3fnNgPupMuk31y8eFElt0hvCgkJQVlZmWoukp6sXLOat9n+/ftx4cIFmM3me7YZ6YfUZkCTIevq1avcR7t37+b70nwBmtKVBQQEcJspZS2ttXR4RhQVFbGsTUtLQ2JiInvVOzg48JrVpk0bdOrUiZ0dAgMD4ebmptLvycA4ZMgQjpJort9bLBZUVVUhPj6eZS1FddP/kpKS8Mgjj2D79u3YvHmzyvDh7OyMxx9/HFqtFgcPHmTZT+/Xtm1bfh46xFHuN/bt2wedTtdC9sfHx+PmzZvQarUtZH9SUhL3UVlZGetXtAaSHKN+0ev1PKcDAwMRFRWF7Oxsfg+TyYQ333wTWq0WycnJ3C8RERGIiIjgdTczMxOOjo7YuXMn94vVakV2dja6d+/ORtuqqiokJibi888/R3l5OSwWC9q1a8c1FtPT06HVavnwjto5JSUFer0eISEhcHNzQ25uLurq6lhmGY1GvPnmm/zdqqoqNDY28nPSIVh+fj7S09O5zajeVEFBAQICAuDh4QGLxYKYmBhotVqkpaXxZyEhIfD39+f5RzrNvn37WAbRIa6LiwuuX7/O9y4rK4PVakVJSQkb5KxWK4YPH45Lly4hICAA7u7uOHLkCGJjYwE0Ob55eHjwfSMiInhe5efnw2g04ssvv8S1a9dQVFSEIUOG4ODBg9BoNHjwwQeRkJCAfv36ITExUbVWka5oMpkQFBTE62NSUhLCwsKQlJSEfv36qaIKgSYDHEWnA00H6+Hh4dBoNCoDx5gxY5CcnIygoCCcPXsWnp6ebKggQ9auXbtQVlaG06dPo3379qitrUX//v25nlNISAjy8vJUfUXRRA0NDdi8eTPeeOMNAHccWnx8fLB582bk5ubC3d2d92iku5hMJqxYsQIAVJEq9HfqKzo8vHnzJt+bxjKtye7u7jyWP/jgA446oLEM3DFiBQUFISYmhtuRolJCQkIwcuRI1NTUsA5IuiWtPfcay3V1dQgMDFSN5ZqaGr6/s7MzLBYLGhoakJCQwDonpahU6pwajQZHjx5FWloarl27hsLCQtTU1MBoNGL8+PF4/PHH2RlJmYmDxsc333zDOp7ZbMbRo0dRWlqK9PR0jihzc3PDzZs3WbZUVlZi+/btCA4O5vWDZKfFYmFZX1ZWhi+++IJrUt28eRN+fn64evUqevXqhcuXLyMsLIz34+SwQc4WZrOZx21kZCQOHjzI89dgMHCEemBgIL7++msEBQXh1KlTXAeY+pLalMYtObgBTVFEpHM2NDTAwcEB06dPh1arRZ8+fXi8OTk5sV5KjpYJCQno27cvbty4oTIo+vv7Iz4+HkeOHEGvXr1w/fp1dhACgB07duCvf/0rG1Lr6ur4zOfo0aNwcnLiNPuBgYEoKyvDxo0beQ4BdyKod+7cyeNg165dqKurg5ubG8sKSrFNsvbQoUO8Z6CaSpRBhjLqUP/961//AtAkL2pqahAZGYnMzExef5TvHBcX16qsBe6kTKS9YWVlJc+JqqoqVFZW8vkgpXCj8TRkyBB2cNJoNDh9+jSfKyrThUdFReHixYsYPXo07+MjIiKQmZnJ8i0wMBBnzpxBaWkpp8FPSUnBgAEDWCZcvXoV3bp1Q1xcHO93jh49ys6kdAZB70P7HaWsJ93AarXy+AbAZ4qkw7i4uKjOVePi4tiYSsbES5cuITAwkHXEmpoa2Gw21hW7deuGW7du8XVtNhuMRiNKS0t57BYWFmLAgAHQaptSuGZlZbGM1+v17FDVtm1b1kXi4uLwxRdfICAgAEVFRbDZbBg7dixKS0vRtWtX3oeWlJTg2rVrHMXq5OQEu92OkJAQHgPr168H0LRXpDYj2U39dOPGDZWDJ52TJCcnIyIiAu7u7hzlW1dXx7pxXFwcunTpotpDlpeXo3Pnzjhy5Ajy8vJw/vx5dp6hfhoxYgQKCwt5P0X9k5ubi7y8PNXzKbFarfj0008RGBiI+vp61oN/7mgaf6rklYLwI1NWVoaxY8eiQ4cOeOaZZ5CXl4dXXnkF48aNw+LFi/l7v/jFL5CTk6MKk9y6dSvWr1/PqRUsFguuXr0Ko9GICRMmYPHixaivr8ekSZNw+/ZtWCwWLFy4EKtWrUJoaCj69OmDDRs2cJjvihUroNPp4OTkBIvFgpycHERHR2P37t1wdnbGq6++im3btuHs2bMs9IOCgpCRkYGXX36ZDwInT56Mffv2sTesyWTiRbm8vJzz4w4bNgxHjhxhhYpCZ7ds2YIXX3wRpaWl0Gg0GDBgAC5cuMCp4GprazF//nxERkZi2bJlOHfuHEcYURqBTp06IS8vD927d0dCQgIMBgMr0FQUub6+Hm+88QaSkpKwadMmODk5sReOEnd3d8TFxWHEiBGora3FsGHDuPghhdZrNBoMHz4clZWVOH/+PFxcXNCjRw828tD9gaa0Q3FxcZyOYcCAATh48CAyMzNRV1cHk8mEp59+Grm5uYiJiYGHhweKi4vZs1mv1/O1HB0d4ePjw5FcVquVDyyApoV79OjR7NEUFBSElJQU9pqkBTsqKgo3btxARUUFKioq0K1bN3z99dfo3Lkz5s+fjzfffJP/rtVqER0djYiICMybNw9WqxWzZs3C3r178c0336BNmzbw8/PDjRs3YLfb4e3tjby8PDg5OWH16tVYvHgx8vPz8dZbb2HBggXo0KEDSkpKkJycDL1ez0YcMkQ6ODigpqYGoaGhfCgBAH/84x/x4Ycforq6mlMJAE1K+VNPPYXGxkb06tUL7du3R2xsLM+T/Px8+Pn5cX5goOlg3tHREWlpaRg8eDBsNhtycnKwZ88ePPjggygtLUWHDh1449bQ0ICePXsiOTkZsbGxuHLlCubNmwej0Qiz2cx5600mE0wmE6xWK6Kjo5GSkoIvvviCw6MJSinZs2dPXLlyhaMjKAWXxWJBZWUlunTpgqKiIjg7O7OXo6+vL3JycrgA7JQpU7B//34MHDgQJ06cUBW7r6mp4QPHtm3borS0FMOHD0dCQgKsVisX1KVx1LdvX1y4cAEeHh7seUiHIvTcyiLIAwYMwLlz51jRNxqNnMph8ODBOH78ODQaDSIjI5GRkcGeOhMnTsTBgwdRX1+Pnj17ciqjHj16cBQcbezpML6goID7hRTZxsZGBAcH4/bt2zAYDAgICMDNmzd5U0GebZTegw78n3/+ebz99tvQarUoLS3lw+jAwEAkJydj7NixOHr0KMaMGYPY2FguKgw0Kf+hoaHcL1arFbm5ufweJNucnZ35ea9fv47g4GA+uC8pKeG2Cg4ORmZmJoYPH45Tp06xN1mbNm14M2Wz2aDX6/Hwww/j008/ZW9yFxcXWK1WaLVadOzYERkZGaisrMSDDz4IrVbLhzTk+VZSUoLRo0fj+PHj6NSpE8aPH48PPviAI6ycnZ3h7e2NkpIS3thS6kZKCUHja+jQoXjkkUfw+uuvw2q1wm63w2g0std4//79ec2hdqT2o2L3AFRrjp+fH5KTk1FfX8/XMhqNcHFxUa05AwYMwPnz5/lejz76KI4fP47x48fDbrdjz549KuMKpfkaPXo0fv3rX6OxsRGzZs1CaWkpTCYTdDodH9zRnOzTpw/69evH0R1eXl7sZevm5gZ3d3eUlZWhpKRElUpWp9PB19cX6enpfOBLMs3DwwOFhYVwcHBAaGgocnNzkZ+fj9GjRyMhIQE1NTVwcXFBaWkpy4KAgABkZWXxmufk5MRRVcCdgycHBwf+/MUXX8TGjRthsVi4b2l8U8TawYMHceXKFbz44ovo0qWLKt84rUFarRZPPPEEIiMjUVRUhCVLlqCurg4GgwHDhg3jQ41HH30UBw4c4MMcZZQGreHUZjk5ObBYLBgzZgzeffddGAwGTv2j0+nQuXNnXLt2DXq9Hq+88gqOHDmCY8eOwdXVlVOmtbZmzZ07F5WVlXj66aexdetWHve0PhqNRrRp0wb5+flwcnJCSEgIrl69yuOZ5sioUaMQExMDi8WC559/HqtWrYLNZsODDz6II0eOcEqe27dvsyPAqFGjcOLECbRv3x42m41rYg0dOhSnT5+GRtNU7/H8+fNch+vAgQPcx0OHDkV8fDw6derEm+Y+ffpwehAay+7u7hwdRu02ePBgnD17FhqNBh4eHpgzZw5eeeUVDBgwgOXvk08+icTERHz55ZfQ6XR49dVXceHCBezZswcRERH49NNP0bZtW/YAttvt8PHx4dRIpCc1NDTAbDarHG7mzJmDvLw87Nq1i9cc6ktvb29kZWXB0dERq1atwksvvcSHj3TYFxgYiKKiIk7lOmLECMTGxvJ6Qp7QU6ZMwe7du2E2m7F48WLs2LED165dg6enJ4qKiuDu7s73Juegjh078mad0gu++uqrKCoqwl/+8heW0z4+PkhPT0e3bt1w8eJF1r1CQ0ORmJh41zbz8vLC1KlT8Y9//APV1dXo2LEjCgoK+LCrpKQE1dXV8PX1ZaNOXV0dHBwc4OXlhWPHjuHAgQOYN28eR4EBTYfIdKBIqbi6du2KK1eusCymiDGz2XxXWas0NiplLckK8oo3Go1wdHREu3btUFBQgJUrV3JNCqX+SfPEbrdjypQpsNvtiImJgV6vh9lsZgPU4MGD2bFi7ty5ePPNN1FVVcUH0rW1tWhsbFTJ/ilTpnA9GpPJxPsDd3d3rFq1SrXf+Oijjzhlz5w5c7B161aMGzcOdXV1iImJgZeXF0aPHo1jx45x9LGDgwP8/f0xatQobN++nfvF3d2dawb5+fmhpKQEVqsV7dq1Q25uLrch6QAU9d65c2cUFRXB19cX586dYx2DIk6pxlN1dTXef/99/PKXv2SnJJPJhLq6Ou6bNm3aoFu3bkhMTOQ1idYjcgqirA4dO3ZESkoK7HY715BSGrXNZjNsNhvsdjtMJhPWr1+PgoICrFixgtOWUspbGmN9+/bFrVu34OLioopIU6Y+12g06NixI/Lz8zFx4kQkJSXhiy++4LFIawy1w8yZM7F582YMGjQIly9fZkOMMr3zgAEDMG3aNK4h1rzeIcm9tWvXYteuXcjJyUH79u15jnbs2BFXr17lAz5/f38kJibC3d0dM2fOxL/+9S94e3sjJSWFnS8GDx6MlJQUODg4sAHm8OHDrHP7+/sjKyuLIwkp6o2MAEqDCNCkMzU2NqKqqorna5cuXfjwvaGhAc7OztBomuoO5uXlcVpDMihTPZTg4GCkp6ezHpaYmAiz2cyRWbdv30ZoaCiKi4v5DOG1115DYmIiR/OYTCbk5OSwwYsie2/fvq2KSKU+ozS7RUVFPM9qa2tZ79doNJwetrq6GmPGjEFBQQEbWEiPJZRjmfQAahcaI2azGVarlXVW0vccHBxUaYJpLJMT2a9+9SsAwLvvvst7XrPZzJHNGk1TJoUbN26gd+/euHjxIj+XVqtVRaCazWZMmjQJGRkZuHjxImpra9nARtHubm5umDp1KrZu3YrZs2fjrbfe4kgTZcrfyMhI3LhxA/n5+fD19eVICNp3k/PHX/7yFyxbtgxhYWFsfKQxRNclR9tDhw6htraWs3GQkY+iSWfOnInY2FjY7XZUVFTw+CVdk84gPD09OTMG7S1pL0P7l+7du+Orr75inUIp8+rr6zl1ITkhmEwmNtrW1tbCy8sLc+fOxSuvvAJfX18kJiZCr9dzzcj09HTWGx599FEcPHgQXbt2xfXr11XRKRQtTHsOMjhR+3l6esJqtWLs2LE4duwYOwJ27doV165dg0ajwezZs5GYmMg165YtWwaNRsPp6KqqqritSBbT3NJomurtUV9Q1h43NzesWLECzz77LK9dzWUU0OQouWTJErz44os8Ho1GIxvZjUYjBg0ahBUrVmDSpEkcFQkAffr0gaOjIz7//HNoNBqsXbsWY8aMAdDkiHvp0iWYTCZMmzYNCQkJfAYUFxeHp556ivVRkrG1tbVskHz88cexePFizJw5ExcvXuR9XFhYGMrLy/lsYtCgQXBxccHp06c5deSyZctQXFyMdevW8bihiFC9Xs/RgNXV1ZgwYQI2btzIc23UqFE4fvw42rRpwzrE66+/jpiYGHZW69OnDyorK3H79m1UVVVh8ODBePzxx2EymTB//nzuJ9KnSKbSmkXnbqRzOjk5obS0FE5OTujUqROuX7+OwYMH49SpU7wnJxlI54eenp7QaDTslENnTqSH1dfXIygoCLdv38aIESNw4sQJ/juNa4PBAG9vb9bXQ0ND8eWXX/JY8fHxQX5+Pnr06IGXXnoJS5YsYYc6q9XKRiQAvO+12+3o1asXxo8fj3feeYfX5sGDByM/Px+FhYV89nns2DHExcVhyZIlaGhoQFVVFcLDw3k9rKioQGxsLJYsWYIzZ85w5iIHBwf4+vriypUrcHd3R48ePTBu3Dh88sknuHXrFnJzczn9P0VXlZaW8visrKzE+PHjeV8yceJEnDx5kg19cXFxuHnzJt566y3k5eWha9eucHZ2xokTJ5CVlQUXFxd4e3tjz549qtqEP1fEYCX8V5GSkoJly5bh0qVLcHJywoQJEzB37lzVZJ0xYways7PZ6xNoEo5bt27F9u3bOWexi4sLJk+erPp9Xl4eJkyYgNLSUjg7OyMqKgoLFy6Ek5MTtm7divfffx/FxcXw9fVFQ0MDcnNzWblwcXFBVFQUCgsLkZCQAJ1OBx8fH6SlpcFms2Hw4MFYtGgRQkJCcPz4cSxZsoQt9CaTiQW4o6MjSkpK4OjoCE9PT06ro9VqebOs1+uxceNGjBgxAikpKZgzZw57jQNN3jWvv/46nnjiCcyfPx9Wq5W9Y5rj7u6OiRMnoqioiDfrzZk4cSJeffVVNDY24qGHHuJ0C0rCwsKwZs0ahISEIDo6GteuXeP6KgR5WdlsNnTp0gVRUVF48803W4RMGwwGVgxMJhOngyspKWm1gKSrqyt+9atf4ezZs2ywU0KHoX379kW/fv2wZ88eVowdHR0xdOhQlJSU4KuvvoJW21RfRVmnym63w9/fnxX57OxsTs9iNBo5tWR1dTV69+6NgoICmM1mzJo1C2+88QbS0tLg6ekJs9mM7Oxs2O12NgIojWtarZYVGEopYjabcebMGR7758+fZ88cer7Ro0fjoYcewpIlS1ShxZQb22QyISoqCunp6fjmm29Uc+P48eNcGwRoUvpfe+01bNu2DV9++SWHRJNySWlcRo0ahZUrV+L3v/89z7eUlBTMnTuX0wrShq1bt25YsGABp1nYuXMn1qxZ0yIHuclkwtSpUzF//nzU1tZiwYIFOHnypMpbk5QRFxcXBAUFIT09nVMX0EEUAJ5vFosF0dHRdy22O2zYMPz973/HqVOn8Kc//alVQ2ynTp0wb948DBkyBGvXrsW2bdt4I+jg4MDt4+fnh7KyMpSWlrKyrRz/QNN8o0M+KvhNBxvkVT137lx8/vnnKhnRHA8PDz7ArK+vZ8OCt7c3KioqUFVVBaPRiKNHj6KyslLVL4RWq0VAQACmTZuGadOmISYmptV+AZrC+F9//XX86U9/4rmjxNXVFfPnz8fkyZNZVpL3EG3KDQYD+vXrx/3yxBNPqIyRzZ+tTZs26N27NzIyMjjVCOXqB5oitubPn48hQ4ZgzZo12LZtW4v2bn5NBwcHnnf0mcFg4JQuN2/eVB0yNm/zXr16ISMjo0X6JGL69Ol46aWXsGbNGnzwwQfct62xcuVK7N69mw8HjEYjxo4di0WLFqnWHBoDyjpTAFqsOdQ21AZ9+/bF0qVLec1ZuHBhi9QDxOHDh+Hn54fly5djz549dy3k2r9/f/z1r3/F5MmTVamHyHMuKCgIWVlZKCkpgaenJx9iAnfWCIvFgmXLluHTTz9VGfKApjWif//+aGhowMWLF1XvQweetbW17N362WefYe3atdi9e3eLdYQ2uTNnzuS6jMrxodU25UR/6KGHkJSUhKSkJLi7u3PaW/Igpucj2UhybPfu3di6dStu376tegej0Yjo6GjMnz8fDz/8cKvzxWQy4f/+7/8wdepUTJs2DZcvX2517CrbjHQTAK2ugzqdDgEBAWhoaMA333wDT09P5OTkwN3dHeXl5Xdds8LDw+Hq6opz586hpqaG1yZqI6BprFGqqHsVQyfIG99gMCApKYkNrTS36DDRZDKxsSI3N5flJh0eODo6oqqqCh4eHqwblZeX8xqq0+lU6fWUODg4sNMNQYd+5EQzcOBAnDlzBhUVFfDw8GCdsrn8dXNzg8lkQlFRETp06IAXXngBQ4YMwfLly/Hxxx+zjKN1UqfTYfjw4Zg5cybWrFmDK1eutNpnzfsPuOPB7OnpyTUy6MD7busBjVODwQCTydRqPVb6XkBAAB9eK+WTwWBgw4Oyvk9YWBj8/f2RkJAAvV6vajM6ZK+vr2ddWJmW6NvaTLlm0eE2RbuTgwIAdpIKDAyEzWZjHWbnzp1YvXp1i7lP0Qnr1q2Dr68vFixYgBMnTqhkyaOPPvq9Ze1nn312V11i7ty5mDVr1veStR9++GELnWrZsmU4deoUDAYD6zY055SyVin7yUud2l6pjzffb9BhnLOzM493AC3Gcmtjh2qzKQ1I5OCi0+mwYMECHD9+vIWeSvLgb3/7G9asWYPs7GzExsa26BegaY527doVixYtQq9evXD79m08++yzLfQXBwcHjB07FgsXLuS9W2trt7OzM1566SXExsZyCru7odFoEBISguXLl+P06dN33TcZDAY88sgjWLx4MfLy8rBs2TLeeyjnuYeHB1544QXExsYiOTkZHTt2xM2bN9lRS3lfap8HHngAmzdvxrp161q999ChQ7Fu3ToUFRXhpZdewtWrV1u8t6enJ+bMmYMpU6bwnjg2NhbLly9nJ0C6LxnBH374Ybi5uXFUOBk8APBe5MEHH8SCBQvw/vvv46233mJDJBlBGxoa+N908N/82eierq6uHIlDhiiC9HmNRoNf/vKXePbZZ7Fp0ybExMS0aDtHR0f069cPs2fPhpOTE++TaA5rNBr07t2bo/npDMHR0RHOzs68n3VwcMCsWbOQkJCAW7dusRG5NV2oc+fOWLp0KY/lxx577K5jpXv37li0aBGSk5Oxc+dOJCcnq9YsirZTjuU///nPXD9Uia+vL5599lnExsbi8uXLcHFxgYeHR4u5Qdf18/Pj9L93W7dpLDc0NHDKvrvh5OSE6OhozJ07F7W1tVi+fDk70AFNczc8PBwrVqxAcHAwy1VKeUfrDUUAlpSUwGw2o6GhodV9h06nw6ZNm/iwe+3atUhOTm6xloaHh2Pjxo1wc3NT7dGU7zho0CB06NABR44cQVFRERu3lftGAJx+k+oNUipk5Z577969yMjIwNq1a5GSkoLGxsYWz6TVatGhQwe0bdsWly9fvqueAjQ5dYWEhODChQuqurSOjo6Ijo7GP//5T8yfPx8uLi7YunVrq2nAHB0dERgYiIKCAjZ4klx1c3PDY489hrlz56K4uJijo1tjy5YtGDFiBGJiYrB8+XKVjkBnFY6OjjwPmxs0WyM2NhbPPPMMOxIq26h///5YsmQJgoODERMTg2XLlqkce5Sy9tq1a5g5c2ar99Dr9fjb3/7GJR0A4LHHHkNeXh6qqqrYKYfGZ0hICObMmaM65yBMJhOefPJJdq6dMWMGMjMz0bNnT5w8eVI13qlGV5cuXbBgwQL85S9/QXp6uqqNmuPk5IS3334bgYGBWLlyJT755BMulUHjxGQyYfjw4aro8uZQhPWoUaNw9epVLvvg7OwMnU6H4uJi7hsyKLm6umLixImYO3cuYmNjsXTpUtV68Ktf/Qpz5szBO++8g/feew95eXmt3tvDw4PnCNUoa419+/YhKysLb7zxBqdWVD6TyWRCZGQkXnzxRezcuRN79+6961kf0JSuMyAgAMePH2+xv27Tpg0mTJiAcePGYfXq1ap1QImXlxeCgoL47GrkyJGt7tO8vLywfv169OrVix2BW2Py5MlYuHAhkpOTsXr1aty6dYvrcdJ7+vr64g9/+INqfN6+fRvLli1DfHw819vs1asXli1bhpCQEGRkZGDp0qW4dOkSn01ptVr4+flh3Lhx+M1vfnPXGsk/N8RgJQiCIAiCIAiCIAiCIAiCIAiCINxXpIaVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIgiAIgiAIgiAIgnBfEYOVIAiCIAiCIAiCIAiCIAiCIAiCcF8Rg5UgCIIgCIIgCIIgCIIgCIIgCIJwXxGDlSAIgiAIgiAIws+A0NBQbNiw4Xv95ty5cwgNDcW5c+d+pKcSBEEQBEEQBEH4zyAGK0EQBEEQBEEQBEH4AVy8eBEbNmxAeXn5/X4UQRAEQRAEQfjZo2lsbGy83w8hCIIgCIIgCIIg3Jva2lrodDro9frv/Bu73Y76+noYDAZoteKv+J/m7bffxqpVq3DixAn4+/vf78cRBEEQBEEQhJ81smMRBEEQBEEQBEH4/xS73Y7a2loAgMlk+l7GKgDQarUwmUw/a2NVY2Mjampq7vdjCIIgCIIgCILwI/Pz3bUIgiAIgiAIgiD8TNiwYQNCQ0ORkpKC559/Hr1798aAAQOwfPlyNkgBTXWqli5din379mHs2LHo0aMHTp06xX9rXsMqLy8PCxcuxNChQ9G9e3eMHDkSS5YsQV1dHYDWa1jNmDEDjz76KJKTkzFjxgz07NkTw4YNwz/+8Y8Wz52dnY1Zs2bhgQcewKBBg7BixQqcOnXqe9fF+uijjxAaGorz589j8eLFGDBgAHr37o358+ejrKxM9d2RI0fimWeewalTpzBp0iSEh4dj165dAIDbt2/jueeeQ//+/dGzZ09MnToVn376qer39M5xcXHYuHEjhg0bhl69euG5555DRUUF6urq8PLLL2PQoEHo1asXFixYwO3VWj889NBD6NGjByZNmoTz58/zdzZs2IBVq1YBAEaNGoXQ0FCEhoYiKyvrO7eLIAiCIAiCIAh3+H7ueYIgCIIgCIIgCMIPZs6cOfDz88O8efNw+fJlvPvuuygvL2fDBwCcPXsWhw4dwvTp0+Hm5gY/P79Wr5WXl4cpU6agoqICU6dORXBwMPLy8nDkyBHU1NTAaDTe9TnKysrw29/+FlFRURgzZgyOHDmC119/HZ07d0ZkZCQAwGq14he/+AUKCgowc+ZMtG3bFgcOHPhehqrmLF26FC4uLpg9ezbS0tKwc+dO5OTk4N1334VGo+HvpaWlYd68eYiOjsbUqVPRoUMHFBYW4oknnkB1dTVmzJgBNzc3fPzxx/j973+P9evXIyoqSnWvrVu3wsHBAb/73e+QkZGBHTt2QK/XQ6PRoLy8HLNnz8aVK1fw0Ucfwc/PD7Nnz1b9/vz584iLi8OMGTNgNBqxc+dO/Pa3v8Xu3bvRuXNnREVFIT09HQcOHMCCBQvg5uYGAHB3d//B7SMIgiAIgiAI/8uIwUoQBEEQBEEQBOEnwt/fH5s3bwYATJ8+HRaLBe+//z5+/etfIywsDECTsWb//v3o2LHjPa+1Zs0aFBYW4sMPP0SPHj348+effx7fVqo4Pz8fr776KiZOnAgAmDJlCkaOHImYmBg2WH3wwQe4ffs2Nm3ahNGjRwMAnnjiCf7ND8FgMOCdd96BwWAAAPj6+uK1117DyZMnMWrUKP5eRkYG3nrrLQwbNow/W7FiBQoLC/Hee++hb9++AIDHH38c48ePx8qVKzFq1ChV6sOGhga8++67fK+SkhIcPHhQFU02ffp0ZGZm4qOPPmphsLp16xZiYmLQvXt3AMDYsWPx8MMPY/369di4cSPCwsLQtWtXHDhwAKNHj5YaVoIgCIIgCILwbyIpAQVBEARBEARBEH4ipk+frvr3U089BQD4/PPP+bN+/fp9q7HKbrfj+PHjGDFihMpYRSijlVrD0dEREyZM4H8bjUb06NEDt2/f5s9OnToFb29vlSHJZDJh6tSp97z2vYiOjmYDEgA8+eST0Ov1+Oyzz1Tf8/f3VxmrAOCzzz5DeHg4G6sAwMnJCdHR0cjOzkZycrLq+xMmTFDdKzw8HI2NjZg8ebLqe+Hh4cjNzYXNZlN93qtXLzZWAU3GtVGjRuH06dNoaGj4nm8uCIIgCIIgCMK3IQYrQRAEQRAEQRCEn4jAwEDVv9u3bw+tVquqe/RdInWKi4tRWVmJTp06/aDn8PHxaWHUcnV1VdWTys7ORvv27Vt8r3379j/onkDL93dycoKnpyeys7NVn7fWBjk5OejQoUOLz4ODg/nvSnx9fVX/dnZ2BgC0a9euxed2ux0VFRX3fFYACAoKQnV1NYqLi1v8TRAEQRAEQRCEfw8xWAmCIAiCIAiCINwnWouEcnBw+NHvq9PpfvR7/Dv8J9pAmR7wu3z+bWkUBUEQBEEQBEH4cRGDlSAIgiAIgiAIwk9ERkZGi3/b7fbvXf/I3d0dFosFSUlJ/8nHU+Hn54fMzMwWhpzMzMwffM3m719VVYWCggL4+fl96299fX2RlpbW4vPU1FT++3+S5s8KAOnp6TCbzXB3dwfw7akXBUEQBEEQBEH47ojBShAEQRAEQRAE4SfivffeU/17x44dAICIiIjvdR2tVovRo0fjk08+wdWrV1v8/T8RLTR06FDk5eXhxIkT/FltbS0+/PDDH3zNDz74APX19fzvnTt3wmazfaf3j4yMxFdffYVLly7xZ1arFR9++CH8/Py+te7X9+XSpUu4du0a/zs3NxcnTpzAkCFDOELNbDYDQIt0goIgCIIgCIIgfH/09/sBBEEQBEEQBEEQ/lfIysrCrFmzMGzYMFy+fBn79u3Do48+irCwsO99rRdeeAHx8fGYMWMGpk6dipCQEBQUFODw4cN4//334eLi8m89a3R0NHbs2IF58+Zh5syZ8PT0xP79+2EymQD8sOii+vp6/PKXv8SYMWOQlpaG999/H3369MGoUaO+9be/+93vcPDgQTz99NOYMWMGXF1dsXfvXmRlZWHDhg13TfX3Q+ncuTN+85vfYMaMGTAajdi5cycA4I9//CN/p1u3bgCAtWvX4pFHHoHBYMCIESPg6Oj4H30WQRAEQRAEQfhfQAxWgiAIgiAIgiAIPxFvvPEG1q1bh9WrV0Ov1+Opp57C/Pnzf9C1vL298eGHH2LdunXYv38/Kisr4e3tjYiIiP9IDSgnJyds27YNy5cvx/bt2+Ho6IiJEyeiV69e+OMf/8iGq+/D4sWLsX//fqxfvx719fUYO3YsFi1a9J2MX23btsWuXbvw2muvYceOHaitrUVoaCi2bNmC4cOH/4A3vDf9+vXDAw88gE2bNiEnJwcdO3bEypUrVcbF8PBwPP/889i1axdOnToFu92OEydOiMFKEARBEARBEH4AmkapLCsIgiAIgiAIgvCjsmHDBmzcuBFnzpzh+kc/V9555x2sXLkSn3/+Oby9vb/Tbz766CMsWLAAe/bsQY8ePX7kJ/z3CQ0NxfTp07F48eL7/SiCIAiCIAiC8D+D1LASBEEQBEEQBEEQWqWmpkb179raWnzwwQcICgr6zsYqQRAEQRAEQRCE74KkBBQEQRAEQRAEQRBaZfbs2fD19UVYWBgqKyuxb98+pKam4vXXXwfQZNCqqKi45zVcXV1/ikcVBEEQBEEQBOFnjhisBEEQBEEQBEEQhFYZOnQo9uzZg/3796OhoQEdO3bE2rVr8cgjjwAA4uLisGDBgnteY/v27T/FowqCIAiCIAiC8DNHalgJgiAIgiAIgiAIP4j8/HwkJyff8zvdunWTKCtBEARBEARBEL4VMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xXt/X4AQRAEQRAEQRAEQRAEQRAEQRAE4X8bMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI9xUxWAmCIAiCIAiCIAiCIAiCIAiCIAj3FTFYCYIgCIIgCIIgCIIgCIIgCIIgCPcVMVgJgiAIgiAIgiAIgiAIgiAIgiAI95X/B/V7Nk7OqfW/AAAAAElFTkSuQmCC", + "text/plain": [ + "<Figure size 2000x500 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df[[\"pricing_prompt\", \"pricing_completion\"]].plot.scatter(x=\"pricing_prompt\", y=\"pricing_completion\")" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for /: 'str' and 'str'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:218\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 218\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:242\u001b[0m, in \u001b[0;36mevaluate\u001b[0;34m(op, a, b, use_numexpr)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_numexpr:\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# error: \"None\" not callable\u001b[39;00m\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_str\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _evaluate_standard(op, op_str, a, b)\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/computation/expressions.py:73\u001b[0m, in \u001b[0;36m_evaluate_standard\u001b[0;34m(op, op_str, a, b)\u001b[0m\n\u001b[1;32m 72\u001b[0m _store_test_result(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[46], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprice_ratio\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpricing_prompt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/common.py:76\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer.<locals>.new_method\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[1;32m 74\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/arraylike.py:210\u001b[0m, in \u001b[0;36mOpsMixin.__truediv__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__truediv__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__truediv__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtruediv\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/series.py:6135\u001b[0m, in \u001b[0;36mSeries._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 6133\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_arith_method\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, op):\n\u001b[1;32m 6134\u001b[0m \u001b[38;5;28mself\u001b[39m, other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_align_for_op(other)\n\u001b[0;32m-> 6135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexOpsMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/base.py:1382\u001b[0m, in \u001b[0;36mIndexOpsMixin._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 1379\u001b[0m rvalues \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(rvalues\u001b[38;5;241m.\u001b[39mstart, rvalues\u001b[38;5;241m.\u001b[39mstop, rvalues\u001b[38;5;241m.\u001b[39mstep)\n\u001b[1;32m 1381\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(\u001b[38;5;28mall\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 1382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_construct_result(result, name\u001b[38;5;241m=\u001b[39mres_name)\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:283\u001b[0m, in \u001b[0;36marithmetic_op\u001b[0;34m(left, right, op)\u001b[0m\n\u001b[1;32m 279\u001b[0m _bool_arith_check(op, left, right) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 281\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_na_arithmetic_op\" has incompatible type\u001b[39;00m\n\u001b[1;32m 282\u001b[0m \u001b[38;5;66;03m# \"Union[ExtensionArray, ndarray[Any, Any]]\"; expected \"ndarray[Any, Any]\"\u001b[39;00m\n\u001b[0;32m--> 283\u001b[0m res_values \u001b[38;5;241m=\u001b[39m \u001b[43m_na_arithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res_values\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:227\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_cmp \u001b[38;5;129;01mand\u001b[39;00m (\n\u001b[1;32m 221\u001b[0m left\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(right, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[1;32m 222\u001b[0m ):\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# Don't do this for comparisons, as that will handle complex numbers\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# incorrectly, see GH#32047\u001b[39;00m\n\u001b[0;32m--> 227\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43m_masked_arith_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", + "File \u001b[0;32m/venv/lib/python3.12/site-packages/pandas/core/ops/array_ops.py:163\u001b[0m, in \u001b[0;36m_masked_arith_op\u001b[0;34m(x, y, op)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# See GH#5284, GH#5035, GH#19448 for historical reference\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[0;32m--> 163\u001b[0m result[mask] \u001b[38;5;241m=\u001b[39m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43myrav\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_scalar(y):\n", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'str' and 'str'" + ] + } + ], + "source": [ + "df[\"price_ratio\"] = df[\"pricing_completion\"] / df[\"pricing_prompt\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"total_price\"] = " + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/helpers/notebooks/hopenai_tutorial.py b/helpers/notebooks/hopenai_tutorial.py new file mode 100644 index 000000000..118947e3c --- /dev/null +++ b/helpers/notebooks/hopenai_tutorial.py @@ -0,0 +1,120 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.7 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# CONTENTS: +# - [Description](#description) + +# %% [markdown] +# <a name='description'></a> +# # Description +# +# This notebook examines ... + +# %% +# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet jupyterlab-vim)" +# #!jupyter labextension enable + +# %% +# %load_ext autoreload +# %autoreload 2 + +import logging + +import helpers.hdbg as hdbg +import helpers.henv as henv +import helpers.hprint as hprint + +# %% +print(henv.get_system_signature()[0]) + +hprint.config_notebook() + +# %% +# hdbg.init_logger(verbosity=logging.DEBUG) +hdbg.init_logger(verbosity=logging.INFO) +# hdbg.test_logger() +_LOG = logging.getLogger(__name__) + +# %% +# !sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet openai requests)" + +# %% +import helpers.hopenai as hopenai + +# %% +val = hopenai.get_model_stats() + +# %% +import pprint +pprint.pprint(val[0]) + +# %% +import pandas as pd +import numpy as np + +# %% +# Normalize the nested JSON +df = pd.json_normalize(val, sep='_') +df +# View the resulting DataFrame +#print(df.T) # Transpose just for readable vertical inspection + +# %% +df.iloc[0].T + +# %% +col_names = ["id", "context_length", "pricing_prompt", "pricing_completion"] + +# %% +df.dtypes + +# %% [markdown] +# # + +# %% +for col in df.columns: + print(hopenai.infer_column_types(df[col])) + +# %% +df.apply(lambda x: pd.Series(infer_column_types(x))).T + +# %% +hopenai.infer_column_types_df(df) + + +# %% +def find_ + + +# %% +pd.to_numeric(df["pricing_request"], errors='coerce').notna() + +# %% +df["pricing_completion"] + +# %% +df.sort_values("pricing_prompt")[col_names] + +# %% +df[["pricing_prompt", "pricing_completion"]].plot.scatter(x="pricing_prompt", y="pricing_completion") + +# %% +df["price_ratio"] = df["pricing_completion"] / df["pricing_prompt"] + +# %% + +# %% +df["total_price"] = diff --git a/helpers/notebooks/notebook_template.ipynb b/notebook_template.ipynb similarity index 61% rename from helpers/notebooks/notebook_template.ipynb rename to notebook_template.ipynb index afa106af6..050d87daf 100644 --- a/helpers/notebooks/notebook_template.ipynb +++ b/notebook_template.ipynb @@ -20,7 +20,17 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!sudo /bin/bash -c \"(source /venv/bin/activate; pip install --quiet jupyterlab-vim)\"\n", + "#!jupyter labextension enable" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-04-02T18:11:14.828251Z", @@ -33,7 +43,7 @@ "%autoreload 2\n", "\n", "import logging\n", - "\n", + " \n", "import helpers.hdbg as hdbg\n", "import helpers.henv as henv\n", "import helpers.hprint as hprint" @@ -41,38 +51,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-04-02T18:11:24.635995Z", "start_time": "2021-04-02T18:11:18.239237Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# Packages\n", - " python: 3.7.10\n", - " gluonnlp: 0.10.0\n", - " gluonts: 0.6.7\n", - " joblib: 1.0.1\n", - " mxnet: 1.8.0\n", - " numpy: 1.20.1\n", - " pandas: 1.2.3\n", - " pyarrow: 3.0.0\n", - " scipy: 1.6.1\n", - " seaborn: 0.11.1\n", - " sklearn: 0.24.1\n", - " statsmodels: 0.12.2\n", - "# Last commits:\n", - " * 9c84f6d4 saggese Lint (68 minutes ago) Fri Apr 2 17:03:37 2021 (HEAD -> AmpTask1179_Audit_IM_03, origin/AmpTask1179_Audit_IM_03)\n", - " * 0a999a51 saggese Improve comments ( 2 hours ago) Fri Apr 2 16:40:50 2021 \n", - " * 199ec5ae saggese Improve comments ( 2 hours ago) Fri Apr 2 16:26:43 2021 \n" - ] - } - ], + "outputs": [], "source": [ "print(henv.get_system_signature()[0])\n", "\n", @@ -81,22 +67,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2021-04-02T18:11:24.668793Z", "start_time": "2021-04-02T18:11:24.638503Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0mWARNING: Running in Jupyter\n" - ] - } - ], + "outputs": [], "source": [ "# hdbg.init_logger(verbosity=logging.DEBUG)\n", "hdbg.init_logger(verbosity=logging.INFO)\n", @@ -124,7 +102,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.12.3" }, "toc": { "base_numbering": 1, @@ -141,5 +119,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/helpers/notebooks/notebook_template.py b/notebook_template.py similarity index 86% rename from helpers/notebooks/notebook_template.py rename to notebook_template.py index 688984623..1b39dfbcc 100644 --- a/helpers/notebooks/notebook_template.py +++ b/notebook_template.py @@ -23,12 +23,16 @@ # # This notebook examines ... +# %% +# #!sudo /bin/bash -c "(source /venv/bin/activate; pip install --quiet jupyterlab-vim)" +# #!jupyter labextension enable + # %% # %load_ext autoreload # %autoreload 2 import logging - + import helpers.hdbg as hdbg import helpers.henv as henv import helpers.hprint as hprint From 10fdf206ecc7ac2b68558801ec865ba331806c3e Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 10 May 2025 13:37:41 -0400 Subject: [PATCH 088/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- helpers/hopenai.py | 15 +++++----- template_unit_test.py | 65 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 template_unit_test.py diff --git a/helpers/hopenai.py b/helpers/hopenai.py index 2a9be2562..0d5ba4d03 100644 --- a/helpers/hopenai.py +++ b/helpers/hopenai.py @@ -274,23 +274,24 @@ def get_completion( call :return: completion text """ - get_models_stats() - assert 0 # model = _MODEL if model is None else model # model = "anthropic/claude-3-5-sonnet" # model = "openai/gpt-4o" # model="meta-llama/llama-3-70b-instruct" - model = "deepseek/deepseek-r1-distill-qwen-1.5b" + #model = "deepseek/deepseek-r1-distill-qwen-1.5b" print("OpenAI API call ... ") # client = OpenAI() # print(openai.api_base) # assert 0 # openai.api_base ="https://openrouter.ai/api/v1" # openai.api_key = os.environ.get("OPENROUTER_API_KEY") - client = OpenAI( - base_url="https://openrouter.ai/api/v1", # Important: Use OpenRouter's base URL - api_key=os.environ.get("OPENROUTER_API_KEY"), - ) + if False: + client = OpenAI( + base_url="https://openrouter.ai/api/v1", # Important: Use OpenRouter's base URL + api_key=os.environ.get("OPENROUTER_API_KEY"), + ) + else: + client = OpenAI() memento = htimer.dtimer_start(logging.DEBUG, "OpenAI API call") if not report_progress: completion = client.chat.completions.create( diff --git a/template_unit_test.py b/template_unit_test.py new file mode 100644 index 000000000..94f47b353 --- /dev/null +++ b/template_unit_test.py @@ -0,0 +1,65 @@ +import logging +import os +import pprint +from typing import Any, List, Tuple + +import helpers.hmarkdown as hmarkdo +import helpers.hprint as hprint +import helpers.hunit_test as hunitest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_format_compressed_markdown1 +# ############################################################################# + + +class Test_format_compressed_markdown1(hunitest.TestCase): + + def test1(self) -> None: + # Prepare inputs. + # ... + # Evaluate the function. + # ... + # Check output. + # ... + pass + + def test2(self) -> None: + """ + Test basic case with single first level bullet. + """ + text = """ + Some text + - First bullet + More text""" + expected = """ + Some text + + - First bullet + More text""" + self._format_and_compare_markdown(text, expected) + + def test3(self) -> None: + """ + Test multiple first level bullets. + """ + text = """ + - First bullet + - Second bullet + - Third bullet""" + expected = """ + - First bullet + + - Second bullet + + - Third bullet""" + self._format_and_compare_markdown(text, expected) + + def _format_and_compare_markdown(self, text: str, expected: str) -> None: + text = hprint.dedent(text) + expected = hprint.dedent(expected) + # + actual = hmarkdo.format_compressed_markdown(text) + self.assert_equal(actual, expected) From 4abfea7411a5345de78b3f3e59705d4d54bc79ed Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 11 May 2025 12:51:11 -0400 Subject: [PATCH 089/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 33 ++++++++++++++--------- dev_scripts_helpers/llms/llm_transform.py | 14 ++++++++++ helpers/hmarkdown.py | 4 +++ 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index f56aaabbe..b34d7c0f0 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -659,7 +659,10 @@ def md_clean_up_explanation_doc() -> _PROMPT_OUT: # ############################################################################# -def slide_improve() -> _PROMPT_OUT: +def slide_to_bullet_points() -> _PROMPT_OUT: + """ + Convert the markdown text into bullet points. + """ system = _MD_CONTEXT system += r""" I will give you markdown text @@ -680,20 +683,19 @@ def slide_improve() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def slide_improve2() -> _PROMPT_OUT: +def slide_add_example_picture() -> _PROMPT_OUT: + """ + """ system = _MD_CONTEXT system += r""" I will give you markdown text - You will: - - Maintain the structure of the text and keep the content of the existing - text - - Remove all the words that are not needed, minimizing the changes to the - text - - Add bullet points to the text that are important or missing - - Add examples to clarify the text and help intuition - - Print only the markdown without any explanation. + You will + - Select the most important concepts in the text + - Print a TODO comment of less than 30 words suggesting what example picture + to add to give an intuition of the text + - The TODO is in the format `// TODO: <suggestion>` + - Suggest what tool to use e.g., (mermaid, tikz, graphviz dot) """ pre_transforms: Set[str] = set() post_transforms = { @@ -701,18 +703,22 @@ def slide_improve2() -> _PROMPT_OUT: "remove_end_of_line_periods", "remove_empty_lines", } - post_container_transforms = ["format_markdown"] + post_container_transforms = ["append_text"] return system, pre_transforms, post_transforms, post_container_transforms -def slide_elaborate() -> _PROMPT_OUT: +def slide_expand() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" I will give you markdown text You will: + - Maintain the structure of the text and keep the content of the existing + text - Add bullet points to the text that are important or missing - Add examples to clarify the text and help intuition + - Not bold or italicize the text + - Use `E.g.,` instead of `Example` Print only the markdown without any explanation. """ @@ -737,6 +743,7 @@ def slide_reduce() -> _PROMPT_OUT: - Make sure that the text is clean and readable - Remove all the words that are not needed - Minimize the changes to the text + - Use `E.g.,` instead of `Example` Print only the markdown without any explanation. """ diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 6e4c37cec..585902b04 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -271,13 +271,27 @@ def _main(parser: argparse.ArgumentParser) -> None: out_txt = hio.from_file(tmp_out_file_name) if dshlllpr.to_run("prettier_markdown", post_container_transforms): out_txt = hmarkdo.prettier_markdown(out_txt) + # if dshlllpr.to_run("format_markdown", post_container_transforms): # Note that we need to run this outside the `llm_transform` container to # avoid to do docker-in-docker in the `llm_transform` container (which # doesn't support that). + out_txt = hmarkdo.md_clean_up(out_txt) out_txt = hmarkdo.format_markdown(out_txt) if args.bold_first_level_bullets: out_txt = hmarkdo.bold_first_level_bullets(out_txt) + # + if dshlllpr.to_run("append_text", post_container_transforms): + out_txt_tmp = [] + # Append the original text. + txt = hio.from_file(tmp_in_file_name) + txt = hmarkdo.format_markdown(txt) + txt = hmarkdo.md_clean_up(txt) + out_txt_tmp.append(txt) + # Append the transformed text. + out_txt_tmp.append(out_txt) + out_txt = "\n".join(out_txt_tmp) + # Check that all post-transforms were run. hdbg.dassert_eq( len(post_container_transforms), 0, diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 668855f67..81a3a337e 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -315,6 +315,10 @@ def md_clean_up(txt: str) -> str: txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) # Remove dot at the end of each line. txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) + # Transform `Example: Training a deep` into `E.g., training a deep`, + # converting the word after `Example:` to lower case. + txt = re.sub(r'\bExample:', 'E.g.,', txt) + txt = re.sub(r'\bE.g.,\s+(\w)', lambda m: 'E.g., ' + m.group(1).lower(), txt) return txt From ddcdc03badb6a3d92eea61bc71b1ef02e8a09d41 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 11 May 2025 20:20:47 -0400 Subject: [PATCH 090/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../dockerized_tikz_to_bitmap.py | 2 +- .../documentation/render_images.py | 33 +++-- helpers/hdocker.py | 132 +++++++++++++----- helpers/hio.py | 12 +- helpers/test/test_hdocker.py | 12 +- 5 files changed, 138 insertions(+), 53 deletions(-) diff --git a/dev_scripts_helpers/documentation/dockerized_tikz_to_bitmap.py b/dev_scripts_helpers/documentation/dockerized_tikz_to_bitmap.py index ad619bb22..4440dd854 100755 --- a/dev_scripts_helpers/documentation/dockerized_tikz_to_bitmap.py +++ b/dev_scripts_helpers/documentation/dockerized_tikz_to_bitmap.py @@ -36,7 +36,7 @@ def _main(parser: argparse.ArgumentParser) -> None: hdbg.init_logger( verbosity=args.log_level, use_exec_path=True, force_white=False ) - hdocker.dockerized_tikz_to_bitmap( + hdocker.run_dockerized_tikz_to_bitmap( args.input, cmd_opts, args.output, diff --git a/dev_scripts_helpers/documentation/render_images.py b/dev_scripts_helpers/documentation/render_images.py index 156082815..cc49be4b2 100755 --- a/dev_scripts_helpers/documentation/render_images.py +++ b/dev_scripts_helpers/documentation/render_images.py @@ -233,15 +233,26 @@ def _render_image_code( if not image_code_txt.endswith("@enduml"): image_code_txt = f"{image_code_txt}\n@enduml" elif image_code_type == "tikz": - image_code_tmp = r""" - \documentclass[tikz, border=10pt]{standalone} - \usepackage{tikz} - \begin{document} - """ - image_code_tmp = hprint.dedent(image_code_tmp) - image_code_tmp += image_code_txt - image_code_tmp += r"\end{document}" - image_code_txt = image_code_tmp + if False: + start_tag = r""" + \documentclass[tikz, border=10pt]{standalone} + \usepackage{tikz} + \begin{document} + """ + else: + start_tag = r""" + \documentclass{standalone} + \usepackage{tikz} + \usepackage{amsmath} + \begin{document} + \begin{tikzpicture} + """ + start_tag = hprint.dedent(start_tag) + end_tag = hprint.dedent(r""" + \end{tikzpicture} + \end{document} + """) + image_code_txt = "\n".join([start_tag, image_code_txt, end_tag]) # Get paths for rendered files. # TODO(gp): The fact that we compute the image file path here makes it # not possible to use a decorator to implement the caching. @@ -285,8 +296,8 @@ def _render_image_code( hdocker.run_dockerized_mermaid(in_code_file_path, out_img_file_path, force_rebuild=force_rebuild, use_sudo=use_sudo) elif image_code_type == "tikz": - cmd_opts: List[str] = [] - hdocker.dockerized_tikz_to_bitmap(in_code_file_path, cmd_opts, out_img_file_path, + cmd_opts: List[str] = ["-density 300", "-quality 10"] + hdocker.run_dockerized_tikz_to_bitmap(in_code_file_path, cmd_opts, out_img_file_path, force_rebuild=force_rebuild, use_sudo=use_sudo) elif image_code_type == "graphviz": cmd_opts: List[str] = [] diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 43c6b2516..fa45c03ea 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -1186,37 +1186,87 @@ def run_dockerized_latex( """ _LOG.debug(hprint.func_signature_to_str()) container_image = "tmp.latex" - dockerfile = r""" - # Use a lightweight base image. - FROM debian:bullseye-slim - - # Set environment variables to avoid interactive prompts. - ENV DEBIAN_FRONTEND=noninteractive - - # Update. - RUN apt-get update - - # Install only the minimal TeX Live packages. - RUN apt-get install -y --no-install-recommends \ - texlive-latex-base \ - texlive-latex-recommended \ - texlive-fonts-recommended \ - texlive-latex-extra \ - lmodern \ - tikzit - - RUN rm -rf /var/lib/apt/lists/* \ - && apt-get clean - - # Verify LaTeX is installed. - RUN latex --version - - # Set working directory. - WORKDIR /workspace - - # Default command. - CMD [ "bash" ] - """ + # Doesn't exist. + if False: + dockerfile = r""" + # Use a lightweight base image. + # FROM debian:bullseye-slim + FROM ubuntu:22.04 + """ + if False: + dockerfile = r""" + # Use minimal multi-arch TeX Live image (includes ARM support) + FROM ghcr.io/xu-cheng/texlive:latest + """ + if True: + dockerfile = r""" + FROM mfisherman/texlive-full + + # Verify LaTeX is installed. + RUN latex --version + + # Default command. + CMD [ "bash" ] + """ + # Doesn't work. + if False: + dockerfile = r""" + # Use a lightweight base image. + # FROM debian:bullseye-slim + FROM ubuntu:22.04 + + # Set environment variables to avoid interactive prompts. + ENV DEBIAN_FRONTEND=noninteractive + + # Update. + RUN apt-get update && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + apt-get update + + # Install only the minimal TeX Live packages. + RUN apt-get install -y --no-install-recommends \ + texlive-latex-base \ + texlive-latex-recommended \ + texlive-fonts-recommended \ + texlive-latex-extra \ + lmodern \ + tikzit \ + || apt-get install -y --fix-missing + """ + # + if False: + dockerfile = r""" + # Use a lightweight base image. + # FROM debian:bullseye-slim + FROM ubuntu:22.04 + + # Set environment variables to avoid interactive prompts. + ENV DEBIAN_FRONTEND=noninteractive + + RUN rm -rf /var/lib/apt/lists/* + # Update. + RUN apt-get clean && \ + apt-get update + + # Install texlive-full. + RUN apt install -y texlive-full + """ + # Clean up. + if False: + dockerfile += r""" + RUN rm -rf /var/lib/apt/lists/* \ + && apt-get clean + + # Verify LaTeX is installed. + RUN latex --version + + # Set working directory. + WORKDIR /workspace + + # Default command. + CMD [ "bash" ] + """ container_image = build_container_image( container_image, dockerfile, force_rebuild, use_sudo ) @@ -1326,8 +1376,9 @@ def run_basic_latex( force_rebuild=force_rebuild, use_sudo=use_sudo, ) - # Get the path of the output file created by Latex. - file_out = os.path.basename(in_file_name).replace(".tex", ".pdf") + # Latex writes the output file in the current working directory. + file_out = os.path.basename(in_file_name) + file_out = hio.change_filename_extension(file_out, "", "pdf") _LOG.debug("file_out=%s", file_out) hdbg.dassert_path_exists(file_out) # Move to the proper output location. @@ -1424,7 +1475,7 @@ def run_dockerized_imagemagick( return ret -def dockerized_tikz_to_bitmap( +def run_dockerized_tikz_to_bitmap( in_file_path: str, cmd_opts: List[str], out_file_path: str, @@ -1432,8 +1483,19 @@ def dockerized_tikz_to_bitmap( force_rebuild: bool = False, use_sudo: bool = False, ) -> None: - """ + r""" Convert a TikZ file to a PDF file. + + It expects the input file to be a TikZ including the Latex preamble like: + ``` + \documentclass[tikz, border=10pt]{standalone} + \usepackage{tikz} + \begin{document} + \begin{tikzpicture}[scale=0.8] + ... + \end{tikzpicture} + \end{document} + ``` """ _LOG.debug(hprint.func_signature_to_str()) # Convert tikz file to PDF. diff --git a/helpers/hio.py b/helpers/hio.py index e6bab41ff..66fef7cba 100644 --- a/helpers/hio.py +++ b/helpers/hio.py @@ -515,7 +515,7 @@ def get_size_as_str(file_name: str) -> str: def is_valid_filename_extension(ext: str) -> bool: """ - By convention extensions are the initial `.`. + By convention extensions don't include the initial `.`. E.g., "tgz" is valid, but not ".tgz". """ @@ -528,10 +528,16 @@ def change_filename_extension(filename: str, old_ext: str, new_ext: str) -> str: Change extension of a filename (e.g. "data.csv" to "data.json"). :param filename: the old filename (including extension) - :param old_ext: the extension of the old filename - :param new_ext: the extension to replace the old extension + :param old_ext: the extension of the old filename (e.g., "csv") + - If empty, it is extracted from the filename + :param new_ext: the extension to replace the old extension (e.g., "json") :return: a filename with the new extension """ + # If the old extension is empty, extract it from the filename. + if old_ext == "": + _, old_ext = os.path.splitext(filename) + # Remove the leading dot. + old_ext = old_ext.lstrip('.') hdbg.dassert( is_valid_filename_extension(old_ext), "Invalid extension '%s'", old_ext ) diff --git a/helpers/test/test_hdocker.py b/helpers/test/test_hdocker.py index 08ad13718..a7aff5525 100644 --- a/helpers/test/test_hdocker.py +++ b/helpers/test/test_hdocker.py @@ -153,7 +153,10 @@ def test1(self) -> None: check_if_exists = False # - Prepare outputs. helpers_root_path = hgit.find_helpers_root() - exp_docker_file_path = f"{helpers_root_path}/helpers/test/outcomes/Test_convert_to_docker_path1.test1/input/tmp.llm_transform.in.txt" + exp_docker_file_path = os.path.join(helpers_root_path, + "helpers/test/outcomes", + "Test_convert_to_docker_path1.test1/input", + "tmp.llm_transform.in.txt") exp_mount = "type=bind,source=/app,target=/app" self.helper( in_file_path, @@ -180,7 +183,10 @@ def test2(self) -> None: check_if_exists = True # - Prepare outputs. helpers_root_path = hgit.find_helpers_root() - exp_docker_file_path = f"{helpers_root_path}/helpers/test/outcomes/Test_convert_to_docker_path1.test2/input/tmp.input.md" + exp_docker_file_path = os.path.join(helpers_root_path, + "helpers/test/outcomes", + "Test_convert_to_docker_path1.test2/input", + "tmp.input.md") exp_mount = "type=bind,source=/app,target=/app" self.helper( in_file_path, @@ -664,7 +670,7 @@ def test_dockerized1(self) -> None: force_rebuild = False use_sudo = hdocker.get_use_sudo() # Run function. - hdocker.dockerized_tikz_to_bitmap( + hdocker.run_dockerized_tikz_to_bitmap( in_file_path, cmd_opts, out_file_path, From 06060221bb6729e41ac672fea9aa5e57b8dfb3f5 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 11 May 2025 20:24:28 -0400 Subject: [PATCH 091/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../documentation/render_images.py | 92 +++++++++++++------ dev_scripts_helpers/llms/llm_prompts.py | 6 +- helpers/hio.py | 2 +- helpers/hmarkdown.py | 4 +- helpers/hopenai.py | 34 +++---- helpers/hsql_test.py | 1 - helpers/notebooks/hopenai_tutorial.py | 11 +-- 7 files changed, 91 insertions(+), 59 deletions(-) diff --git a/dev_scripts_helpers/documentation/render_images.py b/dev_scripts_helpers/documentation/render_images.py index cc49be4b2..53483f7fb 100755 --- a/dev_scripts_helpers/documentation/render_images.py +++ b/dev_scripts_helpers/documentation/render_images.py @@ -22,12 +22,12 @@ """ import argparse +import hashlib import logging import os import re import tempfile -import hashlib -from typing import List, Tuple, Optional +from typing import List, Optional, Tuple import helpers.hdbg as hdbg import helpers.hdocker as hdocker @@ -67,7 +67,9 @@ def _get_rendered_file_paths( """ sub_dir = "figs" # E.g., "docs/readme.md" -> "/usr/docs", "readme.md". - out_file_dir, out_file_name = os.path.split(os.path.abspath(template_out_file)) + out_file_dir, out_file_name = os.path.split( + os.path.abspath(template_out_file) + ) # E.g., "readme". out_file_name_body = os.path.splitext(out_file_name)[0] # Create the name for the image file, e.g., "readme.1.png". @@ -83,6 +85,8 @@ def _get_rendered_file_paths( # ############################################################################# +# ImageHashCache +# ############################################################################# # TODO(gp): This can be generalized to compute the hash of a general computation. @@ -135,7 +139,15 @@ def __init__(self, cache_file: str): _LOG.debug("No cache file found at %s", self.cache_file) self.cache = {} - def compute_hash(self, image_code: str, image_code_type: str, out_file: str) -> Tuple[str, dict]: + def __contains__(self, entry_key: str) -> bool: + """ + Check if an entry is in the cache. + """ + return entry_key in self.cache + + def compute_hash( + self, image_code: str, image_code_type: str, out_file: str + ) -> Tuple[str, dict]: """ Compute a hash of the needed computation inputs. @@ -158,26 +170,23 @@ def compute_hash(self, image_code: str, image_code_type: str, out_file: str) -> cache_key = out_file return cache_key, cache_value - def update_cache(self, cache_key: str, cache_value: str, - *, assert_no_key: bool = False) -> bool: + def update_cache( + self, cache_key: str, cache_value: str, *, assert_no_key: bool = False + ) -> bool: """ Update the cache with a new entry. """ if assert_no_key: hdbg.dassert_not_in(cache_key, self.cache) - cache_updated = cache_key not in self.cache or self.cache[cache_key] != cache_value + cache_updated = ( + cache_key not in self.cache or self.cache[cache_key] != cache_value + ) if cache_updated: self.cache[cache_key] = cache_value # Save the cache to the file. self._save() return cache_updated - def __contains__(self, entry_key: str) -> bool: - """ - Check if an entry is in the cache. - """ - return entry_key in self.cache - def _load(self) -> dict: """ Load the hash cache from a file. @@ -248,16 +257,18 @@ def _render_image_code( \begin{tikzpicture} """ start_tag = hprint.dedent(start_tag) - end_tag = hprint.dedent(r""" + end_tag = hprint.dedent( + r""" \end{tikzpicture} \end{document} - """) + """ + ) image_code_txt = "\n".join([start_tag, image_code_txt, end_tag]) # Get paths for rendered files. # TODO(gp): The fact that we compute the image file path here makes it # not possible to use a decorator to implement the caching. - in_code_file_path, abs_img_dir_path, out_img_file_path = _get_rendered_file_paths( - out_file, image_code_idx, dst_ext + in_code_file_path, abs_img_dir_path, out_img_file_path = ( + _get_rendered_file_paths(out_file, image_code_idx, dst_ext) ) cache_hit = False if use_cache: @@ -266,7 +277,9 @@ def _render_image_code( _LOG.debug(hprint.to_str("cache_file")) cache = ImageHashCache(cache_file) # Compute hash of inputs. - cache_key, cache_value = cache.compute_hash(image_code_txt, image_code_type, out_img_file_path) + cache_key, cache_value = cache.compute_hash( + image_code_txt, image_code_type, out_img_file_path + ) # Check if the image is cached. if cache_key in cache: # The image is cached, return the path. @@ -289,20 +302,37 @@ def _render_image_code( else: if image_code_type == "plantuml": hdocker.run_dockerized_plantuml( - in_code_file_path, abs_img_dir_path, dst_ext, - force_rebuild=force_rebuild, use_sudo=use_sudo + in_code_file_path, + abs_img_dir_path, + dst_ext, + force_rebuild=force_rebuild, + use_sudo=use_sudo, ) elif image_code_type == "mermaid": - hdocker.run_dockerized_mermaid(in_code_file_path, out_img_file_path, - force_rebuild=force_rebuild, use_sudo=use_sudo) + hdocker.run_dockerized_mermaid( + in_code_file_path, + out_img_file_path, + force_rebuild=force_rebuild, + use_sudo=use_sudo, + ) elif image_code_type == "tikz": cmd_opts: List[str] = ["-density 300", "-quality 10"] - hdocker.run_dockerized_tikz_to_bitmap(in_code_file_path, cmd_opts, out_img_file_path, - force_rebuild=force_rebuild, use_sudo=use_sudo) + hdocker.run_dockerized_tikz_to_bitmap( + in_code_file_path, + cmd_opts, + out_img_file_path, + force_rebuild=force_rebuild, + use_sudo=use_sudo, + ) elif image_code_type == "graphviz": cmd_opts: List[str] = [] - hdocker.run_dockerized_graphviz(in_code_file_path, cmd_opts, out_img_file_path, - force_rebuild=force_rebuild, use_sudo=use_sudo) + hdocker.run_dockerized_graphviz( + in_code_file_path, + cmd_opts, + out_img_file_path, + force_rebuild=force_rebuild, + use_sudo=use_sudo, + ) else: raise ValueError(f"Invalid type: {image_code_type}") # Remove the temp file. @@ -444,7 +474,9 @@ def _render_images( image_code_idx += 1 # E.g., "plantuml" or "mermaid". image_code_type = m.group(2) - hdbg.dassert_in(image_code_type, ["plantuml", "mermaid", "tikz", "graphviz"]) + hdbg.dassert_in( + image_code_type, ["plantuml", "mermaid", "tikz", "graphviz"] + ) if m.group(3): hdbg.dassert_eq(user_rel_img_path, "") user_rel_img_path = m.group(4) @@ -469,7 +501,7 @@ def _render_images( force_rebuild=force_rebuild, use_sudo=use_sudo, dry_run=dry_run, - cache_file=cache_file + cache_file=cache_file, ) _ = is_cache_hit # Override the image name if explicitly set by the user. @@ -600,7 +632,9 @@ def _main(parser: argparse.ArgumentParser) -> None: in_lines = hio.from_file(in_file).split("\n") # Get the updated file lines after rendering. out_lines = _render_images( - in_lines, out_file, dst_ext, + in_lines, + out_file, + dst_ext, force_rebuild=args.dockerized_force_rebuild, use_sudo=args.dockerized_use_sudo, dry_run=args.dry_run, diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index b34d7c0f0..e6a356ad1 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -495,6 +495,9 @@ def code_transform_apply_csfy_style() -> _PROMPT_OUT: system += rf""" Apply the style described below to the Python code + ``` + {file_content} + ``` Do not remove any code, just format the existing code using the style. Do not change the behavior of the code. @@ -684,8 +687,7 @@ def slide_to_bullet_points() -> _PROMPT_OUT: def slide_add_example_picture() -> _PROMPT_OUT: - """ - """ + """ """ system = _MD_CONTEXT system += r""" I will give you markdown text diff --git a/helpers/hio.py b/helpers/hio.py index 66fef7cba..23ee74263 100644 --- a/helpers/hio.py +++ b/helpers/hio.py @@ -537,7 +537,7 @@ def change_filename_extension(filename: str, old_ext: str, new_ext: str) -> str: if old_ext == "": _, old_ext = os.path.splitext(filename) # Remove the leading dot. - old_ext = old_ext.lstrip('.') + old_ext = old_ext.lstrip(".") hdbg.dassert( is_valid_filename_extension(old_ext), "Invalid extension '%s'", old_ext ) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 81a3a337e..ba1dc1bf5 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -317,8 +317,8 @@ def md_clean_up(txt: str) -> str: txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) # Transform `Example: Training a deep` into `E.g., training a deep`, # converting the word after `Example:` to lower case. - txt = re.sub(r'\bExample:', 'E.g.,', txt) - txt = re.sub(r'\bE.g.,\s+(\w)', lambda m: 'E.g., ' + m.group(1).lower(), txt) + txt = re.sub(r"\bExample:", "E.g.,", txt) + txt = re.sub(r"\bE.g.,\s+(\w)", lambda m: "E.g., " + m.group(1).lower(), txt) return txt diff --git a/helpers/hopenai.py b/helpers/hopenai.py index 0d5ba4d03..d1b4292f6 100644 --- a/helpers/hopenai.py +++ b/helpers/hopenai.py @@ -146,13 +146,15 @@ def _calculate_cost( def convert_to_type(col, type_): if type_ == "is_bool": - return col.map(lambda x: isinstance(x, bool) or - x in ["True", "False", "true", "false"] or - x in [1, 0, "1", "0"]) + return col.map( + lambda x: isinstance(x, bool) + or x in ["True", "False", "true", "false"] + or x in [1, 0, "1", "0"] + ) elif type_ == "is_int": - return pd.to_numeric(col, errors='coerce') + return pd.to_numeric(col, errors="coerce") elif type_ == "is_numeric": - return pd.to_numeric(col, errors='coerce') + return pd.to_numeric(col, errors="coerce") elif type_ == "is_string": return col.map(lambda x: isinstance(x, str)) else: @@ -161,10 +163,10 @@ def convert_to_type(col, type_): def infer_column_types(col): vals = { - 'is_numeric': pd.to_numeric(col, errors='coerce').notna(), + "is_numeric": pd.to_numeric(col, errors="coerce").notna(), #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), - 'is_bool': col.map(lambda x: isinstance(x, bool)), - 'is_string': col.map(lambda x: isinstance(x, str)), + "is_bool": col.map(lambda x: isinstance(x, bool)), + "is_string": col.map(lambda x: isinstance(x, str)), } vals = {k: float(v.mean()) for k, v in vals.items()} # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", @@ -184,7 +186,9 @@ def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: return df.apply(lambda x: pd.Series(infer_column_types(x))).T -def convert_df(df: pd.DataFrame, *, print_invalid_values: bool = False) -> pd.DataFrame: +def convert_df( + df: pd.DataFrame, *, print_invalid_values: bool = False +) -> pd.DataFrame: types = df.apply(lambda x: pd.Series(infer_column_types(x))).T df_out = [] for col in df.columns: @@ -199,7 +203,6 @@ def convert_df(df: pd.DataFrame, *, print_invalid_values: bool = False) -> pd.Da return df_out - def get_model_stats() -> Dict[str, Any]: url = "https://openrouter.ai/api/v1/models" response = requests.get(url) @@ -241,16 +244,13 @@ def get_model_stats() -> Dict[str, Any]: hdbg.dassert_eq(list(response_json.keys()), ["data"]) response_json = response_json["data"] return response_json - - import pprint pprint.pprint(response.json()) # - #import pandas as pd - - #df = pd.read_json(response.json()) - #print(df) + # import pandas as pd + # df = pd.read_json(response.json()) + # print(df) @functools.lru_cache(maxsize=1024) @@ -278,7 +278,7 @@ def get_completion( # model = "anthropic/claude-3-5-sonnet" # model = "openai/gpt-4o" # model="meta-llama/llama-3-70b-instruct" - #model = "deepseek/deepseek-r1-distill-qwen-1.5b" + # model = "deepseek/deepseek-r1-distill-qwen-1.5b" print("OpenAI API call ... ") # client = OpenAI() # print(openai.api_base) diff --git a/helpers/hsql_test.py b/helpers/hsql_test.py index 462543efc..0d28262e3 100644 --- a/helpers/hsql_test.py +++ b/helpers/hsql_test.py @@ -212,7 +212,6 @@ def _create_docker_files(cls) -> None: host_port = 5432 + idx txt = f"""version: '3.5' - services: # Docker container running Postgres DB. {service_name}: diff --git a/helpers/notebooks/hopenai_tutorial.py b/helpers/notebooks/hopenai_tutorial.py index 118947e3c..a918b9b0a 100644 --- a/helpers/notebooks/hopenai_tutorial.py +++ b/helpers/notebooks/hopenai_tutorial.py @@ -59,11 +59,12 @@ # %% import pprint + pprint.pprint(val[0]) # %% -import pandas as pd import numpy as np +import pandas as pd # %% # Normalize the nested JSON @@ -82,7 +83,7 @@ df.dtypes # %% [markdown] -# # +# # # %% for col in df.columns: @@ -95,10 +96,6 @@ hopenai.infer_column_types_df(df) -# %% -def find_ - - # %% pd.to_numeric(df["pricing_request"], errors='coerce').notna() @@ -117,4 +114,4 @@ def find_ # %% # %% -df["total_price"] = +#df["total_price"] = From 3f997b76959f662f08e6427abd8180dca0bc18f3 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 12 May 2025 07:53:10 -0400 Subject: [PATCH 092/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/documentation/transform_notes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev_scripts_helpers/documentation/transform_notes.py b/dev_scripts_helpers/documentation/transform_notes.py index 55aad8f38..a4b41eb0f 100755 --- a/dev_scripts_helpers/documentation/transform_notes.py +++ b/dev_scripts_helpers/documentation/transform_notes.py @@ -115,6 +115,9 @@ def _main(parser: argparse.ArgumentParser) -> None: txt = hmarkdo.format_markdown(txt) elif cmd == "md_only_format": txt = hmarkdo.format_markdown(txt) + elif cmd == "md_bold_bullets": + txt = hmarkdo.bold_first_level_bullets(txt) + txt = hmarkdo.format_markdown(txt) elif cmd == "md_colorize_bold_text": txt = hmarkdo.colorize_bold_text(txt) txt = hmarkdo.format_markdown(txt) From 37acad40afcaa576368fd3b1888792099ffc9a99 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 13 May 2025 15:11:41 -0400 Subject: [PATCH 093/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../documentation/preprocess_notes.py | 22 ++++++---- dev_scripts_helpers/llms/llm_prompts.py | 43 +++++++++++++++++++ helpers/hmarkdown.py | 4 +- 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/dev_scripts_helpers/documentation/preprocess_notes.py b/dev_scripts_helpers/documentation/preprocess_notes.py index de060fb0b..310b3d56b 100755 --- a/dev_scripts_helpers/documentation/preprocess_notes.py +++ b/dev_scripts_helpers/documentation/preprocess_notes.py @@ -205,12 +205,15 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: # 2) Remove code block. if _TRACE: _LOG.debug("# 2) Process code block.") - do_continue, in_code_block, out_tmp = hmarkdo.process_code_block( - line, in_code_block, i, lines - ) - out.extend(out_tmp) - if do_continue: - continue + # TODO(gp): Not sure why this is needed. For sure the extra spacing + # creates a problem with the Python code blocks rendered by pandoc beamer. + if False: + do_continue, in_code_block, out_tmp = hmarkdo.process_code_block( + line, in_code_block, i, lines + ) + out.extend(out_tmp) + if do_continue: + continue # 3) Remove single line comment. if _TRACE: _LOG.debug("# 3) Process single line comment.") @@ -255,6 +258,7 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: # It's a line in an answer. out.append(" " * _NUM_SPACES + line) else: + assert 0 # Empty line. prev_line_is_verbatim = ((i - 1) > 0) and lines[i - 1].startswith( "```" @@ -277,7 +281,9 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: or prev_line_is_verbatim or next_line_is_verbatim ): - out.append(" " * _NUM_SPACES + line) + #out.append(" " * _NUM_SPACES + line) + assert 0 + pass # c) Clean up. _LOG.debug("Clean up") # Remove all the lines with only spaces. @@ -367,7 +373,7 @@ def _parse() -> argparse.ArgumentParser: ) # TODO(gp): Unclear what it doesn. parser.add_argument( - "--qa", action="store_true", default=None, help="The input file is QA" + "--qa", action="store_true", default=False, help="The input file is QA" ) hparser.add_verbosity_arg(parser) return parser diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index e6a356ad1..144a43884 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -780,6 +780,49 @@ def slide_bold() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def slide_smart_colorize() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + I will give you markdown text + + You will: + + - Not change the text or the structure of the text + - Use the \red{...}, \green{...}, \blue{...}, \violet{} to highlight common + chunks of the expression and text + - Consider that \Pr(.) is a single token and so it should not be highlighted + independently + - Make the chunks as big as possible + + Print only the markdown without any explanation. +# <input> +# - Bayes' theorem states: +# $$\Pr(A_i|B) = \frac{\Pr(B|A_i) \cdot \Pr(A_i)}{\Pr(B)}$$ +# where: +# - $\Pr(A_i|B)$ = posterior probability of $A_i$ +# - $\Pr(B|A_i)$ = conditional (inverted) probability +# - $\Pr(A_i)$ = prior probability of $A_i$ +# - $\Pr(B)$ = probability of $B$ +# </input> + +# <output> +# - Bayes' theorem states: +# $$ +# \red{\Pr(A_i|B)} = \frac{\green{\Pr(B|A_i)} \cdot \blue{\Pr(A_i)}}{\violet{\Pr(B)}} +# $$ +# where: +# - \red{$\Pr(A_i|B)$} = posterior probability of \blue{$A_i$} +# - \green{$\Pr(B|A_i)$} = conditional (inverted) probability +# - \blue{$\Pr(A_i)$} = prior probability of \blue{$A_i$} +# - \violet{$\Pr(B)$} = probability of \violet{$B$} +# </output> + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + # ############################################################################# diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index ba1dc1bf5..874fdb0f1 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -312,13 +312,15 @@ def md_clean_up(txt: str) -> str: txt = re.sub(r"→", r"$\\rightarrow$", txt) # Remove empty spaces at beginning / end of Latex equations $...$. # E.g., $ \text{Student} $ becomes $\text{Student}$ - txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) + #txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) # Remove dot at the end of each line. txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) # Transform `Example: Training a deep` into `E.g., training a deep`, # converting the word after `Example:` to lower case. txt = re.sub(r"\bExample:", "E.g.,", txt) txt = re.sub(r"\bE.g.,\s+(\w)", lambda m: "E.g., " + m.group(1).lower(), txt) + # Replace \mid with `|`. + txt = re.sub(r"\\mid", r"|", txt) return txt From 5200d6784ab2edf74b7a824f4fa4eb6181018e0e Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 13 May 2025 20:55:05 -0400 Subject: [PATCH 094/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/documentation/preprocess_notes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev_scripts_helpers/documentation/preprocess_notes.py b/dev_scripts_helpers/documentation/preprocess_notes.py index 310b3d56b..f571f3f93 100755 --- a/dev_scripts_helpers/documentation/preprocess_notes.py +++ b/dev_scripts_helpers/documentation/preprocess_notes.py @@ -42,7 +42,9 @@ def _process_abbreviations(in_line: str) -> str: line = in_line for x, y in [ (r"=>", r"\implies"), - (r"->", r"\rightarrow"), + # TODO(gp): This collides with the arrow in graphviz commands. We + # should skip this transformation if we are in a graphviz block. + #(r"->", r"\rightarrow"), (r"-^", r"\uparrow"), (r"-v", r"\downarrow"), ]: From 50d8486eb0afc1889288930f115f6b1682f38a07 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 15 May 2025 20:24:13 -0400 Subject: [PATCH 095/193] Merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- helpers/hopenai.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/helpers/hopenai.py b/helpers/hopenai.py index d1b4292f6..de4e00564 100644 --- a/helpers/hopenai.py +++ b/helpers/hopenai.py @@ -274,25 +274,24 @@ def get_completion( call :return: completion text """ - # model = _MODEL if model is None else model - # model = "anthropic/claude-3-5-sonnet" - # model = "openai/gpt-4o" - # model="meta-llama/llama-3-70b-instruct" - # model = "deepseek/deepseek-r1-distill-qwen-1.5b" - print("OpenAI API call ... ") - # client = OpenAI() - # print(openai.api_base) - # assert 0 - # openai.api_base ="https://openrouter.ai/api/v1" - # openai.api_key = os.environ.get("OPENROUTER_API_KEY") - if False: + provider_name = "openai" + #provider_name = "openrouter" + print(f"Calling {provider_name} API call ... ") + memento = htimer.dtimer_start(logging.DEBUG, "OpenAI API call") + if provider_name == "openai": + model = _MODEL if model is None else model + client = OpenAI() + elif provider_name == "openrouter": + #model = "anthropic/claude-3-5-sonnet" + #model = "openai/gpt-4o" + model = "meta-llama/llama-3-70b-instruct" + #client = OpenAI() client = OpenAI( base_url="https://openrouter.ai/api/v1", # Important: Use OpenRouter's base URL - api_key=os.environ.get("OPENROUTER_API_KEY"), - ) + api_key=os.environ.get("OPENROUTER_API_KEY") + ) else: - client = OpenAI() - memento = htimer.dtimer_start(logging.DEBUG, "OpenAI API call") + raise ValueError(f"Unknown provider: {provider_name}") if not report_progress: completion = client.chat.completions.create( model=model, @@ -329,10 +328,11 @@ def get_completion( # Report the time taken. msg, _ = htimer.dtimer_stop(memento) print(msg) - # Calculate and accumulate the cost - # cost = _calculate_cost(completion, model, print_cost) - # Accumulate the cost. - # _accumulate_cost_if_needed(cost) + if provider_name == "openai": + # Calculate and accumulate the cost + cost = _calculate_cost(completion, model, print_cost) + # Accumulate the cost. + _accumulate_cost_if_needed(cost) return response From bfee258646c3079992782772a21bf087d6eb468c Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 15 May 2025 20:29:09 -0400 Subject: [PATCH 096/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 88 +++++++++++++++---------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 144a43884..d69702160 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -100,10 +100,13 @@ def test() -> _PROMPT_OUT: # ############################################################################# -# Fix. +# Code. # ############################################################################# +# Fix + + def code_fix_existing_comments() -> _PROMPT_OUT: """ Fix the already existing comments in the Python code. @@ -287,7 +290,13 @@ def code_fix_log_string() -> _PROMPT_OUT: ``` For instance, convert: + ``` + hdbg.dassert_in(env_var, os.environ, f"env_var='{str(env_var)}' is not in env_vars='{str(os.environ.keys())}''") + ``` to + ``` + hdbg.dassert_in(env_var, os.environ, "env_var='%s' is not in env_vars='%s'", env_var, str(os.environ.keys())) + ``` """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -304,12 +313,18 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: system = _CODING_CONTEXT system += r""" Fix statements like: + ``` + raise ValueError(f"Unsupported data_source='{data_source}'") + ``` by using f-strings (formatted string literals) instead of % formatting and format strings. Do not print any comment, but just the converted code. For instance, convert: + ``` + "Hello, %s. You are %d years old." % (name, age) + ``` to """ pre_transforms: Set[str] = set() @@ -320,9 +335,7 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ - Use % formatting, like `"Hello, %s. - - You are %d years old." % (name, age)`. + Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. """ system = _CODING_CONTEXT system += r""" @@ -465,9 +478,7 @@ def code_review_refactoring() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -# ############################################################################# -# Transform the code. -# ############################################################################# +# Transform code. def code_transform_remove_redundancy() -> _PROMPT_OUT: @@ -530,9 +541,7 @@ def code_transform_apply_linter_instructions() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -# ############################################################################# # Unit tests. -# ############################################################################# # TODO(gp): Probably obsolete since Cursor can do it. @@ -567,6 +576,8 @@ def code_write_1_unit_test() -> _PROMPT_OUT: # ############################################################################# +# Markdown. +# ############################################################################# _MD_CONTEXT = r""" @@ -600,7 +611,8 @@ def md_summarize_short() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def md_clean_up_how_to_guide_doc() -> _PROMPT_OUT: + +def md_clean_up_how_to_guide() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" Format the text passed as a how-to guide. @@ -620,46 +632,56 @@ def md_clean_up_how_to_guide_doc() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - post_container_transforms = ["prettier_markdown"] + post_container_transforms = ["format_markdown"] return system, pre_transforms, post_transforms, post_container_transforms -def md_clean_up_explanation_doc() -> _PROMPT_OUT: +# ############################################################################# +# Doc. +# ############################################################################# + + +def doc_create_bullets() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - Rewrite the provided markdown to transform it into an explanation document - that clearly explains a concept or idea. Follow this structure: - - - Abstract - Provide a clear and concise summary of the document in approximately 200 words. - - Introduction - Briefly introduce the topic and its relevance or context. + I will give you markdown text - - Core Concepts - List and explain the key ideas necessary to understand the topic. + You will: + - Convert the following markdown text into bullet points + - Use multiple levels of bullets, if needed + - Not modify the text, just convert it into bullet points - - How It Works - Describe the mechanics or process in a step-by-step or logical manner. + Print only the markdown without any explanation. + """ + pre_transforms: Set[str] = set() + post_transforms = { + "remove_end_of_line_periods", + } + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms - - Design Rationale - Explain the reasoning behind the approach, design, or structure. - - (Optional) Trade-offs and Alternatives - Discuss other possible approaches, including their pros and cons. +def doc_summarize_short() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + I will give you markdown text You will: - - Maintain clarity and conciseness throughout. - - Use bullet points and indentation to enhance readability everywhere - - Preserve all information from the original content — do not omit or - summarize unless it improves clarity. + - Write 3 bullet points that summarize the text + + Print only the markdown without any explanation. """ pre_transforms: Set[str] = set() - post_transforms = {"remove_code_delimiters"} - post_container_transforms = ["prettier_markdown"] + post_transforms = { + "remove_end_of_line_periods", + } + post_container_transforms = ["format_markdown"] return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# +# Slide. +# ############################################################################# def slide_to_bullet_points() -> _PROMPT_OUT: From 18eb1e5ffaff136a964031303bbebc8bb591d83f Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 15 May 2025 21:06:14 -0400 Subject: [PATCH 097/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 19 ++++++++++++++----- helpers/hmarkdown.py | 17 +++++++++++++++-- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index d69702160..2cada9a65 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -589,10 +589,9 @@ def code_write_1_unit_test() -> _PROMPT_OUT: def md_rewrite() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - Rewrite the text passed as if you were writing a technical document to - increase clarity and readability. - Maintain the structure of the text as much as possible, in terms of bullet - points and their indentation + - Rewrite the text passed to increase clarity and readability. + - Maintain the structure of the text as much as possible, in terms of bullet + points and their indentation """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -611,7 +610,6 @@ def md_summarize_short() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms - def md_clean_up_how_to_guide() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" @@ -668,6 +666,7 @@ def doc_summarize_short() -> _PROMPT_OUT: You will: - Write 3 bullet points that summarize the text + - Each bullet point should be at most 30 words Print only the markdown without any explanation. """ @@ -679,6 +678,16 @@ def doc_summarize_short() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def doc_rewrite() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + - Rewrite the text passed to increase clarity and readability. + - Maintain the structure of the text as much as possible, in terms of bullet + points and their indentation + """ + return md_rewrite() + + # ############################################################################# # Slide. # ############################################################################# diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 874fdb0f1..7d6cddb4a 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -865,7 +865,7 @@ def colorize_bold_text( return result -def remove_empty_lines_from_markdown(markdown_text: str) -> str: +def remove_empty_lines_from_markdown_excluding_first_level(markdown_text: str) -> str: """ Remove all empty lines from markdown text and add empty lines only before first level bullets. @@ -875,7 +875,7 @@ def remove_empty_lines_from_markdown(markdown_text: str) -> str: """ # Split into lines and remove empty ones. lines = [line for line in markdown_text.split("\n") if line.strip()] - # Remove all empty lines. + # Add empty lines only before first level bullets. result = [] for i, line in enumerate(lines): # Check if current line is a first level bullet (no indentation). @@ -887,6 +887,18 @@ def remove_empty_lines_from_markdown(markdown_text: str) -> str: return "\n".join(result) +def remove_empty_lines_from_markdown(markdown_text: str) -> str: + """ + Remove all empty lines from markdown text. + + :param markdown_text: Input markdown text + :return: Formatted markdown text + """ + # Split into lines and remove empty ones. + result = [line for line in markdown_text.split("\n") if line.strip()] + return "\n".join(result) + + def prettier_markdown(txt: str) -> str: txt = dshdlino.prettier_on_str(txt) return txt @@ -895,4 +907,5 @@ def prettier_markdown(txt: str) -> str: def format_markdown(txt: str) -> str: txt = dshdlino.prettier_on_str(txt) txt = remove_empty_lines_from_markdown(txt) + #txt = remove_empty_lines_from_markdown_excluding_first_level(txt) return txt From 8d763934589f4894596e32f569d16285fbe0a459 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 16 May 2025 21:01:52 -0400 Subject: [PATCH 098/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/documentation/render_images.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev_scripts_helpers/documentation/render_images.py b/dev_scripts_helpers/documentation/render_images.py index 53483f7fb..9a837e667 100755 --- a/dev_scripts_helpers/documentation/render_images.py +++ b/dev_scripts_helpers/documentation/render_images.py @@ -253,6 +253,8 @@ def _render_image_code( \documentclass{standalone} \usepackage{tikz} \usepackage{amsmath} + \usepackage{pgfplots} + \pgfplotsset{compat=1.17} \begin{document} \begin{tikzpicture} """ From 711e8878dedd48795531adc61221d9ba8d232897 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 19 May 2025 06:32:56 -0400 Subject: [PATCH 099/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/render_images.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/dev_scripts_helpers/documentation/render_images.py b/dev_scripts_helpers/documentation/render_images.py index 9a837e667..c1ef3af1a 100755 --- a/dev_scripts_helpers/documentation/render_images.py +++ b/dev_scripts_helpers/documentation/render_images.py @@ -361,7 +361,7 @@ def _get_comment_prefix_postfix(extension: str) -> Tuple[str, str]: return comment_prefix, comment_postfix -def _insert_image_code(extension: str, rel_img_path: str) -> str: +def _insert_image_code(extension: str, rel_img_path: str, user_img_size: str) -> str: """ Insert the code to display the image in the output file. """ @@ -369,7 +369,10 @@ def _insert_image_code(extension: str, rel_img_path: str) -> str: if extension in (".md", ".txt"): # Use the Markdown syntax. txt = f"![]({rel_img_path})" - # f"![]({rel_img_path})" + "{height=60%}" + # Add the size, if specified. + if user_img_size: + # E.g., "![](path/to/image.png){ height=100% }" + txt += "{ " + user_img_size + " }" elif extension == ".tex": # Use the LaTeX syntax. # We need to leave it on a single line to make it easy to find and @@ -434,6 +437,8 @@ def _render_images( image_code_idx = 0 # Image name explicitly set by the user with `plantuml(...)` syntax. user_rel_img_path = "" + # Image size explicitly set by the user with `plantuml[...]` syntax. + user_img_size = "" # Store the state of the parser. state = "search_image_code" # The code should look like: @@ -447,7 +452,8 @@ def _render_images( ({comment}\s*)? # Optional comment prefix ``` # Opening backticks for code block (plantuml|mermaid|tikz|graphviz*) # Image code type - (\((.*)\))? # Optional user-specified image name in parentheses + (\((.*)\))? # Optional user-specified image name as (...) + (\[(.*)\])? # Optional user-specified image size as [...] \s*$ # Any trailing whitespace and end of the line """, re.VERBOSE, @@ -483,6 +489,10 @@ def _render_images( hdbg.dassert_eq(user_rel_img_path, "") user_rel_img_path = m.group(4) _LOG.debug(hprint.to_str("user_rel_img_path")) + if m.group(5): + hdbg.dassert_eq(user_img_size, "") + user_img_size = m.group(6) + _LOG.debug(hprint.to_str("user_img_size")) # Comment out the beginning of the image code. out_lines.append( _comment_if_needed(state, line, comment_prefix, comment_postfix) @@ -516,7 +526,8 @@ def _render_images( state, line, comment_prefix, comment_postfix ) ) - out_lines.append(_insert_image_code(extension, rel_img_path)) + out_lines.append(_insert_image_code(extension, rel_img_path, user_img_size)) + user_img_size = "" # Set the parser to search for a new image code block. if state == "found_image_code": state = "search_image_code" From 649eee9b2411243570e09c4381087af5b613c607 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 19 May 2025 19:41:42 -0400 Subject: [PATCH 100/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 305 ++++++++++++++-------- dev_scripts_helpers/llms/llm_transform.py | 19 +- helpers/hio.py | 1 + helpers/hmarkdown.py | 107 +++++++- helpers/hparser.py | 1 - 5 files changed, 311 insertions(+), 122 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 2cada9a65..703d4cc30 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -107,34 +107,14 @@ def test() -> _PROMPT_OUT: # Fix -def code_fix_existing_comments() -> _PROMPT_OUT: +def code_fix_from_imports() -> _PROMPT_OUT: """ - Fix the already existing comments in the Python code. + Fix code to use imports instead of "from import" statements. """ system = _CODING_CONTEXT system += r""" - Make sure that comments in the code are: - - in imperative form - - a correct English phrase - - end with a period `.` - - clear - - Comments should be before the code that they refer to - E.g., - ``` - dir_name = self.directory.name # For example, "helpers". - ``` - should become - ``` - # E.g., "helpers". - dir_name = self.directory.name - ``` - - Variables should be enclosed in a back tick, like `bar`. - Functions should be reported as `foo()`. - - Do not change the code. - Do not add any empty line. + Replace any Python "from import" statement like `from X import Y` with the + form `import X` and then replace the uses of `Y` with `X.Y` """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -142,25 +122,24 @@ def code_fix_existing_comments() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def code_fix_improve_comments() -> _PROMPT_OUT: +def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: """ - Add comments to Python code. + Fix code missing the star before optional parameters. """ system = _CODING_CONTEXT system += r""" - - Add comments for the parts of the code that are not properly commented - - E.g., every chunk of 4 or 5 lines of code add comment explaining the - code - - Comments should go before the logical chunk of code they describe - - Comments should be in imperative form, a full English phrase, and end with a - period `.` - - Do not comment every single line of code and especially logging statements - - Add examples of the values of variables, when you are sure of the types - and values of variables. If you are not sure, do not add any information. - - Do not change the code. - Do not remove any already existing comment. - Do not add any empty line. + When you find a Python function with optional parameters, add a star after + the mandatory parameters and before the optional parameters, and make sure + that the function is called with the correct number of arguments. + + For example, convert: + ``` + def process_data(data, threshold=0.5): + ``` + to: + ``` + def process_data(data: List[float], threshold: float = 0.5) -> List[float]: + ``` """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -168,34 +147,30 @@ def code_fix_improve_comments() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def code_fix_logging_statements() -> _PROMPT_OUT: - """ - Add logging statements to Python code. - """ +def code_fix_function_type_hints() -> _PROMPT_OUT: system = _CODING_CONTEXT - system += r''' - When a variable `foobar` is important for debugging the code in case of - failure, add statements like: + system += r""" + Add type hints only to the function definitions, if they are missing. + + For example, convert: ``` - _LOG.debug(hprint.to_str("foobar")) + def process_data(data, threshold=0.5): + results = [] + for item in data: + if item > threshold: + results.append(item) + return results ``` - - At the beginning of an important function, after the docstring, add code - like + to: ``` - def get_text_report(self) -> str: - """ - Generate a text report listing each module's dependencies. - - :return: Text report of dependencies, one per line. - """ - _LOG.debug(hprint.func_signature_to_str()) + def process_data(data: List[float], threshold: float = 0.5) -> List[float]: + results: List[float] = [] + for item in data: + if item > threshold: + results.append(item) + return results ``` - - Do not change the code. - Do not remove any already existing comment. - Do not add any empty line. - ''' + """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} post_container_transforms: List[str] = [] @@ -215,11 +190,13 @@ def code_fix_docstrings() -> _PROMPT_OUT: """ system = _CODING_CONTEXT system += r''' - Make sure each function as a REST docstring + Make sure each function has a REST docstring - The first comment should be in imperative mode and fit in a single line of less than 80 characters - - To describe the parameters use the REST style, which requires each + - Describe the parameters using the REST style, which requires each parameter to be prepended with :param + - Describe the return value using the REST style, which requires the return + value to be prepended with :return An example of a correct docstring is: ``` @@ -239,29 +216,95 @@ def _format_greeting(name: str, *, greeting: str = "Hello") -> str: return system, pre_transforms, post_transforms, post_container_transforms -def code_fix_type_hints() -> _PROMPT_OUT: +def code_fix_complex_assignments() -> _PROMPT_OUT: system = _CODING_CONTEXT system += r""" - Add type hints to the Python code passed. + Convert complex assignments into if-then-else statements. For example, convert: ``` - def process_data(data, threshold=0.5): - results = [] - for item in data: - if item > threshold: - results.append(item) - return results + capitalized_parts = [ + w.capitalize() if is_first_or_last or w.lower() not in small_words else w.lower() + for w in parts + ] ``` to: ``` - def process_data(data: List[float], *, threshold: float = 0.5) -> List[float]: - results: List[float] = [] - for item in data: - if item > threshold: - results.append(item) - return results + capitalized_parts = [] + for w in parts: + if is_first_or_last or w.lower() not in small_words: + w_out = w.capitalize() + else: + w_out = w.lower() + capitalized_parts.append(w_out) + ``` + + For example, convert: + ``` + is_first_or_last = (i == 0 or i == len(tokens) - 1 or + (i > 0 and not re.search(r'\w', tokens[i - 1])) or + (i < len(tokens) - 1 and not re.search(r'\w', tokens[i + 1]))) + ``` + to: + ``` + if i == 0: + is_first_or_last = True + elif i == len(tokens) - 1: + is_first_or_last = True + elif i > 0 and not re.search(r'\w', tokens[i - 1]): + is_first_or_last = True + elif i < len(tokens) - 1 and not re.search(r'\w', tokens[i + 1]): + is_first_or_last = True + else: + is_first_or_last = False + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms + + +def code_fix_comments() -> _PROMPT_OUT: + """ + Fix the comments in the Python code. + """ + system = _CODING_CONTEXT + system += r""" + Make sure that comments in the code are: + - in imperative form + - a clear and correct English phrase + - end with a period `.` + + Comments should go before the code that they refer to + E.g., + ``` + dir_name = self.directory.name # For example, "helpers". + ``` + becomes + ``` + # E.g., "helpers". + dir_name = self.directory.name ``` + E.g., + ``` + if re.search(r'\w', token): # Check if the token is a word. + ``` + becomes: + ``` + # Check if the token is a word. + if re.search(r'\w', token): + ``` + + - Add comments for the parts of the code that are not properly commented + - E.g., every chunk of 4 or 5 lines of code add comment explaining the + code + - Do not comment every single line of code and especially logging statements + - Add examples of the values of variables, when you are sure of the types + and values of variables. If you are not sure, do not add any information. + + Classes should be reported as `ClassName`. + Variables should be enclosed in a back tick, like `bar`. + Functions should be reported as `foo()`. """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -269,6 +312,66 @@ def process_data(data: List[float], *, threshold: float = 0.5) -> List[float]: return system, pre_transforms, post_transforms, post_container_transforms +# def code_fix_improve_comments() -> _PROMPT_OUT: +# """ +# Add comments to Python code. +# """ +# system = _CODING_CONTEXT +# system += r""" +# - Add comments for the parts of the code that are not properly commented +# - E.g., every chunk of 4 or 5 lines of code add comment explaining the +# code +# - Comments should go before the logical chunk of code they describe +# - Comments should be in imperative form, a full English phrase, and end with a +# period `.` +# - Do not comment every single line of code and especially logging statements +# - Add examples of the values of variables, when you are sure of the types +# and values of variables. If you are not sure, do not add any information. + +# Do not change the code. +# Do not remove any already existing comment. +# Do not add any empty line. +# """ +# pre_transforms: Set[str] = set() +# post_transforms = {"remove_code_delimiters"} +# post_container_transforms: List[str] = [] +# return system, pre_transforms, post_transforms, post_container_transforms + + +def code_fix_logging_statements() -> _PROMPT_OUT: + """ + Add logging statements to Python code. + """ + system = _CODING_CONTEXT + system += r''' + When a variable `foobar` is important for debugging the code in case of + failure, add statements like: + ``` + _LOG.debug(hprint.to_str("foobar")) + ``` + + At the beginning of an important function, after the docstring, add code + like + ``` + def get_text_report(self) -> str: + """ + Generate a text report listing each module's dependencies. + + :return: Text report of dependencies, one per line. + """ + _LOG.debug(hprint.func_signature_to_str()) + ``` + + Do not change the code. + Do not remove any already existing comment. + Do not add any empty line. + ''' + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms + + def code_fix_log_string() -> _PROMPT_OUT: """ Fix the log statements to use % formatting. @@ -353,37 +456,6 @@ def code_fix_by_using_perc_strings() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def code_fix_from_imports() -> _PROMPT_OUT: - """ - Fix code to use imports instead of "from import" statements. - """ - system = _CODING_CONTEXT - system += r""" - Replace any Python "from import" statement like `from X import Y` with the - form `import X` and then replace the uses of `Y` with `X.Y` - """ - pre_transforms: Set[str] = set() - post_transforms = {"remove_code_delimiters"} - post_container_transforms: List[str] = [] - return system, pre_transforms, post_transforms, post_container_transforms - - -def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: - """ - Fix code missing the star before optional parameters. - """ - system = _CODING_CONTEXT - system += r""" - When you find a Python function with optional parameters, add a star after - the mandatory parameters and before the optional parameters, and make sure - that the function is called with the correct number of arguments. - """ - pre_transforms: Set[str] = set() - post_transforms = {"remove_code_delimiters"} - post_container_transforms: List[str] = [] - return system, pre_transforms, post_transforms, post_container_transforms - - def code_fix_unit_test() -> _PROMPT_OUT: """ """ system = _CODING_CONTEXT @@ -395,21 +467,22 @@ def code_fix_unit_test() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def code_fix_csfy_style() -> _PROMPT_OUT: +def code_fix_code() -> _PROMPT_OUT: """ Apply all the transformations required to write code according to the Causify conventions. """ # > grep "def code_fix" ./dev_scripts_helpers/llms/llm_prompts.py | awk '{print $2 }' function_names = [ - "code_fix_existing_comments", - "code_fix_docstrings", - "code_fix_type_hints", - "code_fix_log_string", - "code_fix_by_using_f_strings", - "code_fix_by_using_perc_strings", "code_fix_from_imports", "code_fix_star_before_optional_parameters", + "code_fix_function_type_hints", + "code_fix_docstrings", + "code_fix_complex_assignments", + "code_fix_comments", + # "code_fix_log_string", + # "code_fix_by_using_f_strings", + # "code_fix_by_using_perc_strings", ] system_prompts = [] for function_name in function_names: diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 585902b04..a93fa9770 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -63,6 +63,12 @@ def _parse() -> argparse.ArgumentParser: ) hparser.add_prompt_arg(parser) hparser.add_dockerized_script_arg(parser) + parser.add_argument( + "-d", + "--diff_compare", + action="store_true", + help="Compare the original and the transformed with vimdiff", + ) parser.add_argument( "-c", "--compare", @@ -273,9 +279,9 @@ def _main(parser: argparse.ArgumentParser) -> None: out_txt = hmarkdo.prettier_markdown(out_txt) # if dshlllpr.to_run("format_markdown", post_container_transforms): - # Note that we need to run this outside the `llm_transform` container to - # avoid to do docker-in-docker in the `llm_transform` container (which - # doesn't support that). + # Note that we need to run this outside the `llm_transform` + # container to avoid to do docker-in-docker in the `llm_transform` + # container (which doesn't support that). out_txt = hmarkdo.md_clean_up(out_txt) out_txt = hmarkdo.format_markdown(out_txt) if args.bold_first_level_bullets: @@ -298,6 +304,13 @@ def _main(parser: argparse.ArgumentParser) -> None: "Not all post_transforms were run: %s", post_container_transforms, ) + if args.diff_compare: + txt = hio.from_file(tmp_in_file_name) + hio.to_file("original.txt", txt) + hio.to_file("transformed.txt", out_txt) + cmd = "vimdiff original.txt transformed.txt" + hio.create_executable_script("tmp.llm_diff.sh", cmd) + if args.compare: out_txt_tmp = [] out_txt_tmp.append("#### Original ####") diff --git a/helpers/hio.py b/helpers/hio.py index 3b4de5a20..c5b00eb1d 100644 --- a/helpers/hio.py +++ b/helpers/hio.py @@ -553,6 +553,7 @@ def change_filename_extension(filename: str, old_ext: str, new_ext: str) -> str: # TODO(gp): @all Use msg in all uses of this script `jackpyc "create_executable"` +# TODO(gp): `file_name` should go last. def create_executable_script( file_name: str, content: str, *, msg: str = "" ) -> None: diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 7d6cddb4a..72eb6fc58 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -769,6 +769,109 @@ def selected_navigation_to_str( # ############################################################################# +import re + + + +import re # Required for regular expression operations. + +def capitalize_slide_titles(text: str) -> str: + """ + Capitalize slide titles according to specific rules. + + :param text: string of text to be processed (e.g., "a title on a slide") + :return: processed string with capitalized slide titles + """ + # Define small words that should not be capitalized unless they are the first or last word. + small_words = { + 'a', 'an', 'and', 'as', 'at', 'but', 'by', 'for', 'in', 'nor', 'of', + 'off', 'on', 'or', 'per', 'so', 'the', 'to', 'up', 'via', 'with', 'yet' + } + + def capitalize_word(word: str, is_first_or_last: bool) -> str: + """ + Capitalize a word based on its position and predefined rules. + + :param word: the word to potentially capitalize (e.g., "and") + :param is_first_or_last: boolean indicating if the word is first or last in sentence + :return: word with applied capitalization rules + """ + # Split compound words into parts. + parts = word.split('-') + capitalized_parts = [] + # Process each part of the compound word based on its position. + for word in parts: + if is_first_or_last or word.lower() not in small_words: + word_out = word.capitalize() + else: + word_out = word.lower() + capitalized_parts.append(word_out) + ret = '-'.join(capitalized_parts) + return ret + + # Split into words while preserving punctuation. + tokens = re.findall(r"\b[\w'-]+\b|[^\w\s]", text, re.UNICODE) + + # Filter out tokens that are not words. + words = [token for token in tokens if re.search(r'\w', token)] + + result = [] + # Iterate over tokens and apply capitalization rules. + for i, token in enumerate(tokens): + # Check if token is a word. + if re.search(r'\w', token): + if i == 0: + is_first_or_last = True + elif i == len(tokens) - 1: + is_first_or_last = True + elif i > 0 and not re.search(r'\w', tokens[i - 1]): + is_first_or_last = True + elif i < len(tokens) - 1 and not re.search(r'\w', tokens[i + 1]): + is_first_or_last = True + else: + is_first_or_last = False + + result.append(capitalize_word(token, is_first_or_last or token.lower() not in small_words)) + else: + # Keep punctuation as-is. + result.append(token) + + # Join words into a single string while preserving spacing and trimming surplus whitespace. + return ''.join( + [word if re.match(r'\W', word) else ' ' + word for word in result] + ).strip() + + +# In this adjusted code, I replaced the complex inline assignments with `if-then-else` structures, incorporated informative docstrings into the function definitions using REST style for clarity, added comments explaining significant code sections, and ensured comments were in imperative form and grammatically correct. + + +def capitalize_first_level_bullets(markdown_text: str) -> str: + """ + Make first-level bullets bold in markdown text. + + :param markdown_text: Input markdown text + :return: Formatted markdown text with first-level bullets in bold + """ + # **Subject-Matter Experts (SMEs)** -> **Subject-Matter Experts (SMEs)** + # Business Strategists -> Business strategists + # Establish a Phased, Collaborative Approach -> Establish a phased, collaborative approach + lines = markdown_text.split("\n") + result = [] + for line in lines: + # Check if this is a first-level bullet point. + if re.match(r"^\s*- ", line): + # Check if the line has bold text it in it. + if not re.search(r"\*\*", line): + # Bold first-level bullets. + indentation = len(line) - len(line.lstrip()) + if indentation == 0: + # First-level bullet, add bold markers. + line = re.sub(r"^(\s*-\s+)(.*)", r"\1**\2**", line) + result.append(line) + else: + result.append(line) + return "\n".join(result) + # These are the colors that are supported by Latex / markdown, are readable on # white, and form an equidistant color palette. _ALL_COLORS = [ @@ -865,7 +968,7 @@ def colorize_bold_text( return result -def remove_empty_lines_from_markdown_excluding_first_level(markdown_text: str) -> str: +def format_first_level_bullets(markdown_text: str) -> str: """ Remove all empty lines from markdown text and add empty lines only before first level bullets. @@ -907,5 +1010,5 @@ def prettier_markdown(txt: str) -> str: def format_markdown(txt: str) -> str: txt = dshdlino.prettier_on_str(txt) txt = remove_empty_lines_from_markdown(txt) - #txt = remove_empty_lines_from_markdown_excluding_first_level(txt) + #txt = format_first_level_bullets(txt) return txt diff --git a/helpers/hparser.py b/helpers/hparser.py index cacedc32e..1cb6d391e 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -661,7 +661,6 @@ def add_prompt_arg( Add common command line arguments for `*llm_transform.py` scripts. """ parser.add_argument( - "-d", "--debug", action="store_true", help="Print before/after the transform", From d5b17c44834cd81f67cb0b6b6f3c2a17acea9b0a Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 19 May 2025 20:21:09 -0400 Subject: [PATCH 101/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hopenai.py | 867 +++++++++++++++++++---------------- helpers/test/test_hopenai.py | 8 +- 2 files changed, 465 insertions(+), 410 deletions(-) diff --git a/helpers/hopenai.py b/helpers/hopenai.py index a1abeeda9..a6377b1f0 100644 --- a/helpers/hopenai.py +++ b/helpers/hopenai.py @@ -21,6 +21,7 @@ import tqdm import helpers.hdbg as hdbg +import helpers.hgit as hgit import helpers.hprint as hprint import helpers.htimer as htimer @@ -30,18 +31,8 @@ # _LOG.debug = _LOG.info -# FALLBACK: checks the response in cache, if doesn't exist make a call to OPENAI. -_CACHE_MODE = "FALLBACK" # gpt-4o-mini is Openai Model, its great for most tasks. _MODEL = "openai/gpt-4o-mini" -# File for saving get_completion() cache. -_CACHE_FILE = "cache.get_completion.json" -# File for storing openrouter models information. -_MODELS_INFO_FILE = "openrouter_models_info.csv" -# Temperature adjusts an LLM’s sampling diversity: -# lower values make it more deterministic, while higher values foster creative variation. -# 0 < Temperature <= 2, 0.1 is default value in openai models. -_TEMPERATURE = 0.1 # ############################################################################# # Utility Functions @@ -90,35 +81,119 @@ def _extract( # OpenAI API Helpers # ############################################################################# +# TODO(gp): There are a lot of functions that share state (e.g., provider_name). +# We should refactor them to use a class `LlmResponse`. -_CURRENT_OPENAI_COST = None + +# TODO(*): Select the provider from command line together with the model. +_PROVIDER_NAME = "openai" + + +def get_openai_client(provider_name: str = _PROVIDER_NAME) -> openai.OpenAI: + """ + Get an OpenAI compatible client. + """ + if provider_name == "openai": + base_url = "https://api.openai.com/v1" + api_key = os.environ.get("OPENAI_API_KEY") + elif provider_name == "openrouter": + base_url = "https://openrouter.ai/api/v1" + api_key = os.environ.get("OPENROUTER_API_KEY") + else: + raise ValueError(f"Unknown provider: {provider_name}") + _LOG.debug(hprint.to_str("provider_name", "base_url")) + client = openai.OpenAI(base_url=base_url, api_key=api_key) + return client -def get_openai_client() -> openai.OpenAI: - base_url = "https://openrouter.ai/api/v1" - api_key = os.environ.get("OPENROUTER_API_KEY") - return openai.OpenAI(base_url=base_url, api_key=api_key) +def _get_default_model(provider_name: str = _PROVIDER_NAME) -> str: + """ + Get the default model for a provider. + """ + if provider_name == "openai": + model = "gpt-4o" + elif provider_name == "openrouter": + model = "openai/gpt-4o" + else: + raise ValueError(f"Unknown provider: {provider_name}") + return model -def _get_models_info() -> list[dict]: - # Get all openrouter models info. +def _get_models_info_file() -> str: + """ + Get the path to the file for storing OpenRouter models info. + """ + helpers_root = hgit.find_helpers_root() + file_path = os.path.join(helpers_root, "tmp.openrouter_models_info.csv") + return file_path + + +# TODO(*): Return a pandas DataFrame. +def _retrieve_openrouter_models_info() -> List[Dict[str, Any]]: + """ + Retrieve OpenRouter models info from the OpenRouter API. + """ response = requests.get("https://openrouter.ai/api/v1/models").json() - return response["data"] + # {'architecture': {'input_modalities': ['text', 'image'], + # 'instruct_type': None, + # 'modality': 'text+image->text', + # 'output_modalities': ['text'], + # 'tokenizer': 'Mistral'}, + # 'context_length': 131072, + # 'created': 1746627341, + # 'description': 'Mistral Medium 3 is a high-performance enterprise-grade ' + # 'language model designed to deliver frontier-level ' + # ... + # 'broad compatibility across cloud environments.', + # 'id': 'mistralai/mistral-medium-3', + # 'name': 'Mistral: Mistral Medium 3', + # 'per_request_limits': None, + # 'pricing': {'completion': '0.000002', + # 'image': '0', + # 'internal_reasoning': '0', + # 'prompt': '0.0000004', + # 'request': '0', + # 'web_search': '0'}, + # 'supported_parameters': ['tools', + # 'tool_choice', + # 'max_tokens', + # 'temperature', + # 'top_p', + # 'stop', + # 'frequency_penalty', + # 'presence_penalty', + # 'response_format', + # 'structured_outputs', + # 'seed'], + # 'top_provider': {'context_length': 131072, + # 'is_moderated': False, + # 'max_completion_tokens': None}} + response_json = response.json() + # There is only one key in the response. + hdbg.dassert_eq(list(response_json.keys()), ["data"]) + response_json = response_json["data"] + model_info_df = pd.DataFrame(response_json) + return model_info_df -def _save_models_to_csv( - models_info: list, file_name: str = _MODELS_INFO_FILE +def _save_models_info_to_csv( + model_info_df: pd.DataFrame, file_name: str, ) -> pd.DataFrame: - models_info_obj = pd.DataFrame(models_info) + """ + Save models info to a CSV file. + """ + hdbg.dassert_isinstance(file_name, str) + hdbg.dassert_ne(file_name, "") + # TODO(*): Save all the data. # Extract prompt, completion pricing from pricing column. - models_info_obj["prompt_pricing"] = models_info_obj["pricing"].apply( + model_info_df["prompt_pricing"] = model_info_df["pricing"].apply( lambda x: x["prompt"] ) - models_info_obj["completion_pricing"] = models_info_obj["pricing"].apply( + model_info_df["completion_pricing"] = model_info_df["pricing"].apply( lambda x: x["completion"] ) # Take only relevant columns. - models_info_obj = models_info_obj[ + model_info_df = model_info_df[ [ "id", "name", @@ -129,20 +204,89 @@ def _save_models_to_csv( ] ] # Save to CSV file. - models_info_obj.to_csv(file_name, index=False) - return models_info_obj + model_info_df.to_csv(file_name, index=False) + return model_info_df + + +import pandas as pd + + +# TODO(gp): This is general enough to be moved in hpandas.py +def convert_to_type(col, type_): + if type_ == "is_bool": + return col.map( + lambda x: isinstance(x, bool) + or x in ["True", "False", "true", "false"] + or x in [1, 0, "1", "0"] + ) + elif type_ == "is_int": + return pd.to_numeric(col, errors="coerce") + elif type_ == "is_numeric": + return pd.to_numeric(col, errors="coerce") + elif type_ == "is_string": + return col.map(lambda x: isinstance(x, str)) + else: + raise ValueError(f"Unknown column type: {type_}") -def _construct_messages( +def infer_column_types(col): + vals = { + "is_numeric": pd.to_numeric(col, errors="coerce").notna(), + #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), + "is_bool": col.map(lambda x: isinstance(x, bool)), + "is_string": col.map(lambda x: isinstance(x, str)), + } + vals = {k: float(v.mean()) for k, v in vals.items()} + # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", + # (vals["is_numeric"] >= vals["is_string"], "is_numeric", + # "is_string")) + if vals["is_bool"] >= vals["is_numeric"]: + type_ = "is_bool" + elif vals["is_numeric"] >= vals["is_string"]: + type_ = "is_numeric" + else: + type_ = "is_string" + vals["type"] = type_ + return vals + + +def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: + return df.apply(lambda x: pd.Series(infer_column_types(x))).T + + +def convert_df( + df: pd.DataFrame, *, print_invalid_values: bool = False +) -> pd.DataFrame: + types = df.apply(lambda x: pd.Series(infer_column_types(x))).T + df_out = [] + for col in df.columns: + if types[col]["type"] == "is_bool": + df_out[col] = df[col].astype(bool) + elif types[col]["type"] == "is_numeric": + df_out[col] = df[col].astype(float) + elif types[col]["type"] == "is_string": + df_out[col] = df[col] + else: + raise ValueError(f"Unknown column type: {types[col]['type']}") + return df_out + + +# ############################################################################# + + +def _build_messages( system_prompt: str, user_prompt: str ) -> List[Dict[str, str]]: """ Construct the standard messages payload for the chat API. """ - return [ + hdbg.dassert_isinstance(system_prompt, str) + hdbg.dassert_isinstance(user_prompt, str) + ret = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] + return ret def _call_api_sync( @@ -155,8 +299,9 @@ def _call_api_sync( """ Make a non-streaming API call and return (response, raw_completion). - return str: Model's response in openai's completion object. - return Any: openai's completion object. + return: a tuple with + - model response in OpenAI's completion object + - raw completion """ completion = client.chat.completions.create( model=model, @@ -164,7 +309,18 @@ def _call_api_sync( temperature=temperature, **create_kwargs, ) - return completion.choices[0].message.content, completion + model_response = completion.choices[0].message.content + return model_response, completion + + +# ############################################################################# +# Cost tracking +# ############################################################################# + +# TODO(*): Convert this into a class to track costs? + + +_CURRENT_OPENAI_COST = None def start_logging_costs() -> None: @@ -191,159 +347,50 @@ def get_current_cost() -> float: def _calculate_cost( completion: openai.types.chat.chat_completion.ChatCompletion, model: str, - models_info_file: str = _MODELS_INFO_FILE, + models_info_file: str = "", ) -> float: """ Calculate the cost of an OpenAI API call. :param completion: The completion response from OpenAI :param model: The model used for the completion - :param print_cost: Whether to print the cost details :return: The calculated cost in dollars """ prompt_tokens = completion.usage.prompt_tokens completion_tokens = completion.usage.completion_tokens - # Models info are saved in the CSV file. - # Ensure file exist, if not create the file. + # If the model info file doesn't exist, download one. + if models_info_file == "": + models_info_file = _get_models_info_file() if not os.path.isfile(models_info_file): - _save_models_to_csv(_get_models_info()) - # Ensure model info present in the file. - models_info_obj: pd.Data = pd.read_csv(models_info_file) - if model not in models_info_obj["id"].values: - # Refresh CSV and reload - models_info_obj = _save_models_to_csv(_get_models_info()) + models_info_df = _retrieve_openrouter_models_info() + _save_models_info_to_csv(models_info_df, file_name=models_info_file) + else: + model_info_df: pd.DataFrame = pd.read_csv(models_info_file) # Extract pricing for this model. - row = models_info_obj.loc[models_info_obj["id"] == model].iloc[0] + hdbg.dassert_in(model, model_info_df["id"].values) + row = model_info_df.loc[model_info_df["id"] == model].iloc[0] prompt_price = row["prompt_pricing"] completion_price = row["completion_pricing"] # Compute cost. - cost = (prompt_tokens) * prompt_price + (completion_tokens) * completion_price + cost = prompt_tokens * prompt_price + completion_tokens * completion_price return cost -import pandas as pd - - -def convert_to_type(col, type_): - if type_ == "is_bool": - return col.map( - lambda x: isinstance(x, bool) - or x in ["True", "False", "true", "false"] - or x in [1, 0, "1", "0"] - ) - elif type_ == "is_int": - return pd.to_numeric(col, errors="coerce") - elif type_ == "is_numeric": - return pd.to_numeric(col, errors="coerce") - elif type_ == "is_string": - return col.map(lambda x: isinstance(x, str)) - else: - raise ValueError(f"Unknown column type: {type_}") - - -def infer_column_types(col): - vals = { - "is_numeric": pd.to_numeric(col, errors="coerce").notna(), - #'is_datetime': pd.to_datetime(col, errors='coerce').notna(), - "is_bool": col.map(lambda x: isinstance(x, bool)), - "is_string": col.map(lambda x: isinstance(x, str)), - } - vals = {k: float(v.mean()) for k, v in vals.items()} - # type_ = np.where(vals["is_bool"] >= vals["is_numeric"], "is_bool", - # (vals["is_numeric"] >= vals["is_string"], "is_numeric", - # "is_string")) - if vals["is_bool"] >= vals["is_numeric"]: - type_ = "is_bool" - elif vals["is_numeric"] >= vals["is_string"]: - type_ = "is_numeric" - else: - type_ = "is_string" - vals["type"] = type_ - return vals - - -def infer_column_types_df(df: pd.DataFrame) -> pd.DataFrame: - return df.apply(lambda x: pd.Series(infer_column_types(x))).T - - -def convert_df( - df: pd.DataFrame, *, print_invalid_values: bool = False -) -> pd.DataFrame: - types = df.apply(lambda x: pd.Series(infer_column_types(x))).T - df_out = [] - for col in df.columns: - if types[col]["type"] == "is_bool": - df_out[col] = df[col].astype(bool) - elif types[col]["type"] == "is_numeric": - df_out[col] = df[col].astype(float) - elif types[col]["type"] == "is_string": - df_out[col] = df[col] - else: - raise ValueError(f"Unknown column type: {types[col]['type']}") - return df_out - - -def get_model_stats() -> Dict[str, Any]: - url = "https://openrouter.ai/api/v1/models" - response = requests.get(url) - # {'architecture': {'input_modalities': ['text', 'image'], - # 'instruct_type': None, - # 'modality': 'text+image->text', - # 'output_modalities': ['text'], - # 'tokenizer': 'Mistral'}, - # 'context_length': 131072, - # 'created': 1746627341, - # 'description': 'Mistral Medium 3 is a high-performance enterprise-grade ' - # 'language model designed to deliver frontier-level ' - # ... - # 'broad compatibility across cloud environments.', - # 'id': 'mistralai/mistral-medium-3', - # 'name': 'Mistral: Mistral Medium 3', - # 'per_request_limits': None, - # 'pricing': {'completion': '0.000002', - # 'image': '0', - # 'internal_reasoning': '0', - # 'prompt': '0.0000004', - # 'request': '0', - # 'web_search': '0'}, - # 'supported_parameters': ['tools', - # 'tool_choice', - # 'max_tokens', - # 'temperature', - # 'top_p', - # 'stop', - # 'frequency_penalty', - # 'presence_penalty', - # 'response_format', - # 'structured_outputs', - # 'seed'], - # 'top_provider': {'context_length': 131072, - # 'is_moderated': False, - # 'max_completion_tokens': None}} - response_json = response.json() - hdbg.dassert_eq(list(response_json.keys()), ["data"]) - response_json = response_json["data"] - return response_json - import pprint - - pprint.pprint(response.json()) - # - # import pandas as pd - # df = pd.read_json(response.json()) - # print(df) +# ############################################################################# +# TODO(gp): CAPTURE seems redundant. @functools.lru_cache(maxsize=1024) def get_completion( user_prompt: str, *, system_prompt: str = "", - model: Optional[str] = None, + model: str = "", report_progress: bool = False, print_cost: bool = False, - cache_mode: str = _CACHE_MODE, - cache_file: str = _CACHE_FILE, - temperature: float = _TEMPERATURE, + cache_mode: str = "FALLBACK", + cache_file: str = "cache.get_completion.json", + temperature: float = 0.1, **create_kwargs, ) -> str: """ @@ -351,23 +398,28 @@ def get_completion( :param user_prompt: user input message :param system_prompt: system instruction - :param model: OpenAI model to use - :param create_kwargs: additional params for the API call + :param model: model to use or empty string to use the default model :param report_progress: whether to report progress running the API call :param cache_mode : "DISABLED","CAPTURE", "REPLAY", "FALLBACK" - - "DISABLED" : No caching - - "CAPTURE" : Make API calls and save responses to cache - - "REPLAY" : Uses cached responses, fail if not in cache - - "FALLBACK" : Use cached responses if available, otherwise make API call + - "DISABLED": No caching + - "CAPTURE": Make API calls and save responses to cache + - "REPLAY": Use cached responses, fail if not in cache + - "FALLBACK": Use cached responses if available, otherwise make API call + :param cache_file: file to save/load completioncache + :param temperature: adjust an LLM's sampling diversity: lower values make it + more deterministic, while higher values foster creative variation. + 0 < temperature <= 2, 0.1 is default value in OpenAI models. + :param create_kwargs: additional params for the API call :return: completion text """ hdbg.dassert_in(cache_mode, ("REPLAY", "FALLBACK", "CAPTURE", "DISABLED")) - model = _MODEL if model is None else model + if model == "": + model = _get_default_model() # Construct messages in OpenAI API request format. - messages = _construct_messages(system_prompt, user_prompt) + messages = _build_messages(system_prompt, user_prompt) + # Initialize cache. cache = CompletionCache(cache_file=cache_file) - # Dictionary makes easy to reuse it. request_params = { "model": model, "messages": messages, @@ -446,246 +498,249 @@ def get_completion( return response -def file_to_info(file: openai.types.file_object.FileObject) -> Dict[str, Any]: - """ - Convert a file object to a dictionary with selected attributes. - - :param file: file object - :return: dictionary with file metadata - """ - hdbg.dassert_isinstance(file, openai.types.file_object.FileObject) - keys = ["id", "created_at", "filename"] - file_info = _extract(file, keys) - file_info["created_at"] = datetime.datetime.fromtimestamp( - file_info["created_at"] - ) - return file_info - - -def files_to_str(files: List[openai.types.file_object.FileObject]) -> str: - """ - Generate a string summary of a list of file objects. - - :param files: list of file objects - :return: string summary - """ - txt: List[str] = [] - txt.append("Found %s files" % len(files)) - for file in files: - txt.append("Deleting file %s" % file_to_info(file)) - txt = "\n".join(txt) - return txt - - -def delete_all_files(*, ask_for_confirmation: bool = True) -> None: - """ - Delete all files from OpenAI's file storage. - - :param ask_for_confirmation: whether to prompt for confirmation - before deletion - """ - client = get_openai_client() - files = list(client.files.list()) - # Print. - _LOG.info(files_to_str(files)) - # Confirm. - if ask_for_confirmation: - hdbg.dfatal("Stopping due to user confirmation.") - # Delete. - for file in files: - _LOG.info("Deleting file %s", file) - client.files.delete(file.id) - - -# ############################################################################# -# Assistants -# ############################################################################# - - -def assistant_to_info(assistant: OAssistant.Assistant) -> Dict[str, Any]: - """ - Extract metadata from an assistant object. - - :param assistant: assistant object - :return: dictionary with assistant metadata - """ - hdbg.dassert_isinstance(assistant, OAssistant.Assistant) - keys = ["name", "created_at", "id", "instructions", "model"] - assistant_info = _extract(assistant, keys) - assistant_info["created_at"] = datetime.datetime.fromtimestamp( - assistant_info["created_at"] - ) - return assistant_info - - -def assistants_to_str(assistants: List[OAssistant.Assistant]) -> str: - """ - Generate a string summary of a list of assistants. - - :param assistants: list of assistants - :return: a string summary - """ - txt = [] - txt.append("Found %s assistants" % len(assistants)) - for assistant in assistants: - txt.append("Deleting assistant %s" % assistant_to_info(assistant)) - txt = "\n".join(txt) - return txt - - -def delete_all_assistants(*, ask_for_confirmation: bool = True) -> None: - """ - Delete all assistants from OpenAI's assistant storage. - - :param ask_for_confirmation: whether to prompt for confirmation - before deletion. - """ - client = get_openai_client() - assistants = client.beta.assistants.list() - assistants = assistants.data - _LOG.info(assistants_to_str(assistants)) - if ask_for_confirmation: - hdbg.dfatal("Stopping due to user confirmation.") - for assistant in assistants: - _LOG.info("Deleting assistant %s", assistant) - client.beta.assistants.delete(assistant.id) - - -def get_coding_style_assistant( - assistant_name: str, - instructions: str, - vector_store_name: str, - file_paths: List[str], - *, - model: Optional[str] = None, -) -> OAssistant.Assistant: - """ - Create or retrieve a coding style assistant with vector store support. - - :param assistant_name: name of the assistant - :param instructions: instructions for the assistant - :param vector_store_name: name of the vectore store - :param file_paths: list of file paths to upload - :param model: OpenAI model to use - :return: created or updated assistant object - """ - model = _MODEL if model is None else model - client = get_openai_client() - # Check if the assistant already exists. - existing_assistants = list(client.beta.assistants.list().data) - for existing_assistant in existing_assistants: - if existing_assistant.name == assistant_name: - _LOG.debug("Assistant '%s' already exists.", assistant_name) - return existing_assistant - # Cretae the assistant. - _LOG.info("Creating a new assistant: %s", assistant_name) - assistant = client.beta.assistants.create( - name=assistant_name, - instructions=instructions, - model=model, - tools=[{"type": "file_search"}], - ) - # Check if the vector store already exists. - vector_stores = list(client.beta.vector_stores.list().data) - vector_store = None - for store in vector_stores: - if store.name == vector_store_name: - _LOG.debug( - "Vector store '%s' already exists. Using it", vector_store_name - ) - vector_store = store - break - if not vector_store: - _LOG.debug("Creating vector store ...") - # Create a vector store. - vector_store = client.beta.vector_stores.create(name=vector_store_name) - # Upload files to the vector store (if provided). - if file_paths: - file_streams = [open(path, "rb") for path in file_paths] - _LOG.debug("Uploading files to vector store ...") - try: - file_batch = client.beta.vector_stores.file_batches.upload_and_poll( - vector_store_id=vector_store.id, files=file_streams - ) - _LOG.info( - "File batch uploaded successfully with status: %s", - file_batch.status, - ) - except Exception as e: - _LOG.error("Failed to upload files to vector store: %s", str(e)) - raise - # Associate the assistant with the vector store. - assistant = client.beta.assistants.update( - assistant_id=assistant.id, - tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}, - ) - return assistant - - -def get_query_assistant( - assistant: OAssistant.Assistant, question: str -) -> List[OMessage.Message]: - """ - Query an assistant with sepecific question. - - :param assistant: assistant to query - :param question: user question - :return: list of messages containing the assistant's response - """ - client = get_openai_client() - # Create a thread and attach the file to the message. - thread = client.beta.threads.create( - messages=[ - { - "role": "user", - "content": question, - } - ] - ) - # The thread now has a vector store with that file in its tool resources. - _LOG.debug("thread=%s", thread.tool_resources.file_search) - run = client.beta.threads.runs.create_and_poll( - thread_id=thread.id, assistant_id=assistant.id - ) - messages = list( - client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id) - ) - return messages - - -# import os -# import requests -# -# -# def get_openai_usage(): -# # Define the API endpoint. -# endpoint = "https://api.openai.com/v1/organization/costs" -# start_date = datetime.datetime.now() - datetime.timedelta(days=10) -# start_date = int(start_date.timestamp()) -# # Request headers. -# #api_key = os.environ.get("OPENAI_API_KEY") -# headers = { -# "Authorization": f"Bearer {api_key}", -# } -# # Query parameters -# params = { -# "start_time": start_date, -# #"end_date": end_date, -# } -# # Send the request -# response = requests.get(endpoint, headers=headers, params=params) -# if response.status_code == 200: -# data = response.json() -# import pprint -# pprint.pprint(data) -# total_spent = data.get("total_usage", 0) / 100 # Convert cents to dollars -# #print(f"Total spent from {start_date} to {end_date}: " -# # f"${total_spent:.2f}") -# return total_spent -# else: -# print(f"Failed to fetch usage: {response.status_code}, {response.text}") -# return None -# +# # ############################################################################# + + +# def file_to_info(file: openai.types.file_object.FileObject) -> Dict[str, Any]: +# """ +# Convert a file object to a dictionary with selected attributes. + +# :param file: file object +# :return: dictionary with file metadata +# """ +# hdbg.dassert_isinstance(file, openai.types.file_object.FileObject) +# keys = ["id", "created_at", "filename"] +# file_info = _extract(file, keys) +# file_info["created_at"] = datetime.datetime.fromtimestamp( +# file_info["created_at"] +# ) +# return file_info + + +# def files_to_str(files: List[openai.types.file_object.FileObject]) -> str: +# """ +# Generate a string summary of a list of file objects. + +# :param files: list of file objects +# :return: string summary +# """ +# txt: List[str] = [] +# txt.append("Found %s files" % len(files)) +# for file in files: +# txt.append("Deleting file %s" % file_to_info(file)) +# txt = "\n".join(txt) +# return txt + + +# def delete_all_files(*, ask_for_confirmation: bool = True) -> None: +# """ +# Delete all files from OpenAI's file storage. + +# :param ask_for_confirmation: whether to prompt for confirmation +# before deletion +# """ +# client = get_openai_client() +# files = list(client.files.list()) +# # Print. +# _LOG.info(files_to_str(files)) +# # Confirm. +# if ask_for_confirmation: +# hdbg.dfatal("Stopping due to user confirmation.") +# # Delete. +# for file in files: +# _LOG.info("Deleting file %s", file) +# client.files.delete(file.id) + + +# # ############################################################################# +# # Assistants +# # ############################################################################# + + +# def assistant_to_info(assistant: OAssistant.Assistant) -> Dict[str, Any]: +# """ +# Extract metadata from an assistant object. + +# :param assistant: assistant object +# :return: dictionary with assistant metadata +# """ +# hdbg.dassert_isinstance(assistant, OAssistant.Assistant) +# keys = ["name", "created_at", "id", "instructions", "model"] +# assistant_info = _extract(assistant, keys) +# assistant_info["created_at"] = datetime.datetime.fromtimestamp( +# assistant_info["created_at"] +# ) +# return assistant_info + + +# def assistants_to_str(assistants: List[OAssistant.Assistant]) -> str: +# """ +# Generate a string summary of a list of assistants. + +# :param assistants: list of assistants +# :return: a string summary +# """ +# txt = [] +# txt.append("Found %s assistants" % len(assistants)) +# for assistant in assistants: +# txt.append("Deleting assistant %s" % assistant_to_info(assistant)) +# txt = "\n".join(txt) +# return txt + + +# def delete_all_assistants(*, ask_for_confirmation: bool = True) -> None: +# """ +# Delete all assistants from OpenAI's assistant storage. + +# :param ask_for_confirmation: whether to prompt for confirmation +# before deletion. +# """ +# client = get_openai_client() +# assistants = client.beta.assistants.list() +# assistants = assistants.data +# _LOG.info(assistants_to_str(assistants)) +# if ask_for_confirmation: +# hdbg.dfatal("Stopping due to user confirmation.") +# for assistant in assistants: +# _LOG.info("Deleting assistant %s", assistant) +# client.beta.assistants.delete(assistant.id) + + +# def get_coding_style_assistant( +# assistant_name: str, +# instructions: str, +# vector_store_name: str, +# file_paths: List[str], +# *, +# model: Optional[str] = None, +# ) -> OAssistant.Assistant: +# """ +# Create or retrieve a coding style assistant with vector store support. + +# :param assistant_name: name of the assistant +# :param instructions: instructions for the assistant +# :param vector_store_name: name of the vectore store +# :param file_paths: list of file paths to upload +# :param model: OpenAI model to use +# :return: created or updated assistant object +# """ +# model = _MODEL if model is None else model +# client = get_openai_client() +# # Check if the assistant already exists. +# existing_assistants = list(client.beta.assistants.list().data) +# for existing_assistant in existing_assistants: +# if existing_assistant.name == assistant_name: +# _LOG.debug("Assistant '%s' already exists.", assistant_name) +# return existing_assistant +# # Cretae the assistant. +# _LOG.info("Creating a new assistant: %s", assistant_name) +# assistant = client.beta.assistants.create( +# name=assistant_name, +# instructions=instructions, +# model=model, +# tools=[{"type": "file_search"}], +# ) +# # Check if the vector store already exists. +# vector_stores = list(client.beta.vector_stores.list().data) +# vector_store = None +# for store in vector_stores: +# if store.name == vector_store_name: +# _LOG.debug( +# "Vector store '%s' already exists. Using it", vector_store_name +# ) +# vector_store = store +# break +# if not vector_store: +# _LOG.debug("Creating vector store ...") +# # Create a vector store. +# vector_store = client.beta.vector_stores.create(name=vector_store_name) +# # Upload files to the vector store (if provided). +# if file_paths: +# file_streams = [open(path, "rb") for path in file_paths] +# _LOG.debug("Uploading files to vector store ...") +# try: +# file_batch = client.beta.vector_stores.file_batches.upload_and_poll( +# vector_store_id=vector_store.id, files=file_streams +# ) +# _LOG.info( +# "File batch uploaded successfully with status: %s", +# file_batch.status, +# ) +# except Exception as e: +# _LOG.error("Failed to upload files to vector store: %s", str(e)) +# raise +# # Associate the assistant with the vector store. +# assistant = client.beta.assistants.update( +# assistant_id=assistant.id, +# tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}, +# ) +# return assistant + + +# def get_query_assistant( +# assistant: OAssistant.Assistant, question: str +# ) -> List[OMessage.Message]: +# """ +# Query an assistant with sepecific question. + +# :param assistant: assistant to query +# :param question: user question +# :return: list of messages containing the assistant's response +# """ +# client = get_openai_client() +# # Create a thread and attach the file to the message. +# thread = client.beta.threads.create( +# messages=[ +# { +# "role": "user", +# "content": question, +# } +# ] +# ) +# # The thread now has a vector store with that file in its tool resources. +# _LOG.debug("thread=%s", thread.tool_resources.file_search) +# run = client.beta.threads.runs.create_and_poll( +# thread_id=thread.id, assistant_id=assistant.id +# ) +# messages = list( +# client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id) +# ) +# return messages + + +# # import os +# # import requests +# # +# # +# # def get_openai_usage(): +# # # Define the API endpoint. +# # endpoint = "https://api.openai.com/v1/organization/costs" +# # start_date = datetime.datetime.now() - datetime.timedelta(days=10) +# # start_date = int(start_date.timestamp()) +# # # Request headers. +# # #api_key = os.environ.get("OPENAI_API_KEY") +# # headers = { +# # "Authorization": f"Bearer {api_key}", +# # } +# # # Query parameters +# # params = { +# # "start_time": start_date, +# # #"end_date": end_date, +# # } +# # # Send the request +# # response = requests.get(endpoint, headers=headers, params=params) +# # if response.status_code == 200: +# # data = response.json() +# # import pprint +# # pprint.pprint(data) +# # total_spent = data.get("total_usage", 0) / 100 # Convert cents to dollars +# # #print(f"Total spent from {start_date} to {end_date}: " +# # # f"${total_spent:.2f}") +# # return total_spent +# # else: +# # print(f"Failed to fetch usage: {response.status_code}, {response.text}") +# # return None +# # def apply_prompt_to_dataframe( diff --git a/helpers/test/test_hopenai.py b/helpers/test/test_hopenai.py index 2d120cad5..8f357c7c3 100644 --- a/helpers/test/test_hopenai.py +++ b/helpers/test/test_hopenai.py @@ -41,7 +41,7 @@ def _get_completion_parameters1() -> dict: def _get_openai_request_parameters1() -> dict: - messages = hopenai._construct_messages( + messages = hopenai._build_messages( user_prompt=_USER_PROMPT1, system_prompt=_SYSTEM_PROMPT1 ) data = {"messages": messages, "temperature": _TEMPERATURE1, "model": _MODEL1} @@ -59,7 +59,7 @@ def _get_completion_parameters2() -> dict: def _get_openai_request_parameters2() -> dict: - messages = hopenai._construct_messages( + messages = hopenai._build_messages( user_prompt=_USER_PROMPT2, system_prompt=_SYSTEM_PROMPT1 ) data = {"messages": messages, "temperature": _TEMPERATURE1, "model": _MODEL1} @@ -77,7 +77,7 @@ def _get_completion_parameters3() -> dict: def _get_openai_request_parameters3() -> dict: - messages = hopenai._construct_messages( + messages = hopenai._build_messages( user_prompt=_USER_PROMPT1, system_prompt=_SYSTEM_PROMPT1 ) data = {"messages": messages, "temperature": _TEMPERATURE2, "model": _MODEL1} @@ -95,7 +95,7 @@ def _get_completion_parameters4() -> dict: def _get_openai_request_parameters4() -> dict: - messages = hopenai._construct_messages( + messages = hopenai._build_messages( user_prompt=_USER_PROMPT1, system_prompt=_SYSTEM_PROMPT1 ) data = {"messages": messages, "temperature": _TEMPERATURE2, "model": _MODEL2} From 8db05a8b27ed61cb155a7ea3c9c2d7a7a3114ae1 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 19 May 2025 20:41:22 -0400 Subject: [PATCH 102/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hopenai.py | 89 +++++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 36 deletions(-) diff --git a/helpers/hopenai.py b/helpers/hopenai.py index a6377b1f0..39854661a 100644 --- a/helpers/hopenai.py +++ b/helpers/hopenai.py @@ -21,7 +21,6 @@ import tqdm import helpers.hdbg as hdbg -import helpers.hgit as hgit import helpers.hprint as hprint import helpers.htimer as htimer @@ -101,7 +100,7 @@ def get_openai_client(provider_name: str = _PROVIDER_NAME) -> openai.OpenAI: api_key = os.environ.get("OPENROUTER_API_KEY") else: raise ValueError(f"Unknown provider: {provider_name}") - _LOG.debug(hprint.to_str("provider_name", "base_url")) + _LOG.debug(hprint.to_str("provider_name base_url")) client = openai.OpenAI(base_url=base_url, api_key=api_key) return client @@ -123,17 +122,15 @@ def _get_models_info_file() -> str: """ Get the path to the file for storing OpenRouter models info. """ - helpers_root = hgit.find_helpers_root() - file_path = os.path.join(helpers_root, "tmp.openrouter_models_info.csv") + file_path = "tmp.openrouter_models_info.csv" return file_path -# TODO(*): Return a pandas DataFrame. -def _retrieve_openrouter_models_info() -> List[Dict[str, Any]]: +def _retrieve_openrouter_model_info() -> pd.DataFrame: """ Retrieve OpenRouter models info from the OpenRouter API. """ - response = requests.get("https://openrouter.ai/api/v1/models").json() + response = requests.get("https://openrouter.ai/api/v1/models") # {'architecture': {'input_modalities': ['text', 'image'], # 'instruct_type': None, # 'modality': 'text+image->text', @@ -347,7 +344,7 @@ def get_current_cost() -> float: def _calculate_cost( completion: openai.types.chat.chat_completion.ChatCompletion, model: str, - models_info_file: str = "", + models_info_file: str ) -> float: """ Calculate the cost of an OpenAI API call. @@ -358,21 +355,43 @@ def _calculate_cost( """ prompt_tokens = completion.usage.prompt_tokens completion_tokens = completion.usage.completion_tokens - # If the model info file doesn't exist, download one. - if models_info_file == "": - models_info_file = _get_models_info_file() - if not os.path.isfile(models_info_file): - models_info_df = _retrieve_openrouter_models_info() - _save_models_info_to_csv(models_info_df, file_name=models_info_file) + # TODO(gp): This should be shared in the class. + if _PROVIDER_NAME == "openai": + # Get the pricing for the selected model. + # https://openai.com/api/pricing/ + # https://gptforwork.com/tools/openai-chatgpt-api-pricing-calculator + # Cost per 1M tokens. + pricing = { + "gpt-3.5-turbo": {"prompt": 0.5, "completion": 1.5}, + "gpt-4o-mini": {"prompt": 0.15, "completion": 0.60}, + "gpt-4o": {"prompt": 5, "completion": 15}, + } + hdbg.dassert_in(model, pricing) + model_pricing = pricing[model] + # Calculate the cost. + cost = (prompt_tokens / 1e6) * model_pricing["prompt"] + ( + completion_tokens / 1e6 + ) * model_pricing["completion"] + elif _PROVIDER_NAME == "openrouter": + # If the model info file doesn't exist, download one. + if models_info_file == "": + models_info_file = _get_models_info_file() + _LOG.debug(hprint.to_str("models_info_file")) + if not os.path.isfile(models_info_file): + model_info_df = _retrieve_openrouter_model_info() + _save_models_info_to_csv(model_info_df, models_info_file) + else: + model_info_df = pd.read_csv(models_info_file) + # Extract pricing for this model. + hdbg.dassert_in(model, model_info_df["id"].values) + row = model_info_df.loc[model_info_df["id"] == model].iloc[0] + prompt_price = row["prompt_pricing"] + completion_price = row["completion_pricing"] + # Compute cost. + cost = prompt_tokens * prompt_price + completion_tokens * completion_price else: - model_info_df: pd.DataFrame = pd.read_csv(models_info_file) - # Extract pricing for this model. - hdbg.dassert_in(model, model_info_df["id"].values) - row = model_info_df.loc[model_info_df["id"] == model].iloc[0] - prompt_price = row["prompt_pricing"] - completion_price = row["completion_pricing"] - # Compute cost. - cost = prompt_tokens * prompt_price + completion_tokens * completion_price + raise ValueError(f"Unknown provider: {_PROVIDER_NAME}") + _LOG.debug(hprint.to_str("prompt_tokens completion_tokens cost")) return cost @@ -419,7 +438,7 @@ def get_completion( # Construct messages in OpenAI API request format. messages = _build_messages(system_prompt, user_prompt) # Initialize cache. - cache = CompletionCache(cache_file=cache_file) + cache = _CompletionCache(cache_file) request_params = { "model": model, "messages": messages, @@ -477,8 +496,10 @@ def get_completion( # Report the time taken. msg, _ = htimer.dtimer_stop(memento) print(msg) - # Calculate and accumulate the cost - cost = _calculate_cost(completion, model, print_cost) + # Calculate the cost. + # TODO(gp): This should be shared in the class. + models_info_file = "" + cost = _calculate_cost(completion, model, models_info_file) # Accumulate the cost. _accumulate_cost_if_needed(cost) # Convert OpenAI completion object to DICT. @@ -489,12 +510,8 @@ def get_completion( cache.save_response_to_cache( hash_key, request=request_params, response=completion_obj ) - _LOG.debug(hprint.to_str("prompt_tokens completion_tokens cost")) if print_cost: - print( - f"cost=${cost:.2f} / " - + hprint.to_str("prompt_tokens completion_tokens") - ) + print(f"cost=${cost:.2f}") return response @@ -791,14 +808,14 @@ def apply_prompt_to_dataframe( # ############################################################################# -class CompletionCache: +# TODO(gp): we can't use hcache_simple.simple_cache() because it uses a different cache +# format and does not support conditions required by get_completion(). +class _CompletionCache: """ - 1. Manage the cache for get_completion(). - 2. Do not use hcache_simple.simple_cache() because it uses a different cache format - and does not support conditions required by get_completion(). + Cache for get_completion(). """ - def __init__(self, cache_file: str = _CACHE_FILE): + def __init__(self, cache_file: str): self.cache_file = cache_file # Load the existing file(may not exist or may be invalid JSON) try: @@ -806,7 +823,7 @@ def __init__(self, cache_file: str = _CACHE_FILE): self.cache = json.load(f) except (FileNotFoundError, json.JSONDecodeError): self.cache = None - # Validates structure + # Validates structure. if ( not isinstance(self.cache, dict) or "version" not in self.cache From f745ea8734f3c384c9666884311c8ba725d22283 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 20 May 2025 16:15:01 -0400 Subject: [PATCH 103/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/render_images.py | 56 ++++++++++--------- dev_scripts_helpers/llms/llm_prompts.py | 23 ++++++++ dev_scripts_helpers/llms/llm_transform.py | 14 ++--- helpers/hmarkdown.py | 24 +++++++- 4 files changed, 82 insertions(+), 35 deletions(-) diff --git a/dev_scripts_helpers/documentation/render_images.py b/dev_scripts_helpers/documentation/render_images.py index c1ef3af1a..10a3d8034 100755 --- a/dev_scripts_helpers/documentation/render_images.py +++ b/dev_scripts_helpers/documentation/render_images.py @@ -242,29 +242,35 @@ def _render_image_code( if not image_code_txt.endswith("@enduml"): image_code_txt = f"{image_code_txt}\n@enduml" elif image_code_type == "tikz": - if False: - start_tag = r""" - \documentclass[tikz, border=10pt]{standalone} - \usepackage{tikz} - \begin{document} - """ - else: - start_tag = r""" - \documentclass{standalone} - \usepackage{tikz} - \usepackage{amsmath} - \usepackage{pgfplots} - \pgfplotsset{compat=1.17} - \begin{document} - \begin{tikzpicture} - """ - start_tag = hprint.dedent(start_tag) - end_tag = hprint.dedent( - r""" + # \documentclass[tikz, border=10pt]{standalone} + # \usepackage{tikz} + # \begin{document} + start_tag = hprint.dedent(r""" + \documentclass{standalone} + \usepackage{tikz} + \usepackage{amsmath} + \usepackage{pgfplots} + \pgfplotsset{compat=1.17} + \begin{document} + \begin{tikzpicture} + """) + end_tag = hprint.dedent(r""" \end{tikzpicture} \end{document} - """ - ) + """) + image_code_txt = "\n".join([start_tag, image_code_txt, end_tag]) + elif image_code_type == "latex": + start_tag = hprint.dedent(r""" + \documentclass[border=1pt]{standalone} % No page, tight margins + \usepackage{tabularx} + \usepackage{enumitem} + \usepackage{booktabs} % Optional: For nicer tables + \begin{document} + + """) + end_tag = hprint.dedent(r""" + \end{document} + """) image_code_txt = "\n".join([start_tag, image_code_txt, end_tag]) # Get paths for rendered files. # TODO(gp): The fact that we compute the image file path here makes it @@ -317,8 +323,8 @@ def _render_image_code( force_rebuild=force_rebuild, use_sudo=use_sudo, ) - elif image_code_type == "tikz": - cmd_opts: List[str] = ["-density 300", "-quality 10"] + elif image_code_type in ("tikz", "latex"): + cmd_opts: List[str] = ["-density 300", "-quality 20"] hdocker.run_dockerized_tikz_to_bitmap( in_code_file_path, cmd_opts, @@ -451,7 +457,7 @@ def _render_images( ^\s* # Start of the line and any leading whitespace ({comment}\s*)? # Optional comment prefix ``` # Opening backticks for code block - (plantuml|mermaid|tikz|graphviz*) # Image code type + (plantuml|mermaid|tikz|graphviz|latex*) # Image code type (\((.*)\))? # Optional user-specified image name as (...) (\[(.*)\])? # Optional user-specified image size as [...] \s*$ # Any trailing whitespace and end of the line @@ -483,7 +489,7 @@ def _render_images( # E.g., "plantuml" or "mermaid". image_code_type = m.group(2) hdbg.dassert_in( - image_code_type, ["plantuml", "mermaid", "tikz", "graphviz"] + image_code_type, ["plantuml", "mermaid", "tikz", "graphviz", "latex"] ) if m.group(3): hdbg.dassert_eq(user_rel_img_path, "") diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 703d4cc30..0d2642a87 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -863,6 +863,29 @@ def slide_reduce() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def slide_reduce_bullets() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + I will give you markdown text + + You will: + - Maintain the structure of the text + - Keep all the figures + - Keep only the bullet points that are important and clear + - Remove all the bullet points that are redundant or not clear + + Print only the markdown without any explanation. + """ + pre_transforms: Set[str] = set() + post_transforms = { + "remove_code_delimiters", + "remove_end_of_line_periods", + "remove_empty_lines", + } + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + def slide_bold() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index a93fa9770..5a4376d4b 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -304,13 +304,13 @@ def _main(parser: argparse.ArgumentParser) -> None: "Not all post_transforms were run: %s", post_container_transforms, ) - if args.diff_compare: - txt = hio.from_file(tmp_in_file_name) - hio.to_file("original.txt", txt) - hio.to_file("transformed.txt", out_txt) - cmd = "vimdiff original.txt transformed.txt" - hio.create_executable_script("tmp.llm_diff.sh", cmd) - + # Save the original and transformed text on file and a script to compare them. + txt = hio.from_file(tmp_in_file_name) + hio.to_file("original.txt", txt) + hio.to_file("transformed.txt", out_txt) + cmd = "vimdiff original.txt transformed.txt" + hio.create_executable_script("tmp.llm_diff.sh", cmd) + # if args.compare: out_txt_tmp = [] out_txt_tmp.append("#### Original ####") diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 72eb6fc58..857d74ef4 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -970,8 +970,8 @@ def colorize_bold_text( def format_first_level_bullets(markdown_text: str) -> str: """ - Remove all empty lines from markdown text and add empty lines only before - first level bullets. + Add empty lines only before first level bullets and remove all empty lines + from markdown text. :param markdown_text: Input markdown text :return: Formatted markdown text @@ -1003,12 +1003,30 @@ def remove_empty_lines_from_markdown(markdown_text: str) -> str: def prettier_markdown(txt: str) -> str: + """ + Format markdown text using `prettier`. + """ txt = dshdlino.prettier_on_str(txt) return txt def format_markdown(txt: str) -> str: + """ + Format markdown text. + """ txt = dshdlino.prettier_on_str(txt) txt = remove_empty_lines_from_markdown(txt) - #txt = format_first_level_bullets(txt) + return txt + + +def format_markdown_slide(txt: str) -> str: + """ + Format markdown text for a slide. + """ + # Split the text into title and body. + + txt = dshdlino.prettier_on_str(txt) + txt = format_first_level_bullets(txt) + #txt = capitalize_slide_titles(txt) + #txt = bold_first_level_bullets(txt) return txt From 36b821bff4df3779eaef24060887af003de8b72d Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 20 May 2025 19:16:57 -0400 Subject: [PATCH 104/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 10 +++++----- dev_scripts_helpers/llms/llm_transform.py | 7 +++++++ helpers/hmarkdown.py | 18 +++++++++++------- helpers/hopenai.py | 2 +- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 0d2642a87..039d6d612 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -834,7 +834,7 @@ def slide_expand() -> _PROMPT_OUT: "remove_end_of_line_periods", "remove_empty_lines", } - post_container_transforms = ["format_markdown"] + post_container_transforms = ["format_slide"] return system, pre_transforms, post_transforms, post_container_transforms @@ -859,7 +859,7 @@ def slide_reduce() -> _PROMPT_OUT: "remove_end_of_line_periods", "remove_empty_lines", } - post_container_transforms = ["format_markdown"] + post_container_transforms = ["format_slide"] return system, pre_transforms, post_transforms, post_container_transforms @@ -882,7 +882,7 @@ def slide_reduce_bullets() -> _PROMPT_OUT: "remove_end_of_line_periods", "remove_empty_lines", } - post_container_transforms = ["format_markdown"] + post_container_transforms = ["format_slide"] return system, pre_transforms, post_transforms, post_container_transforms @@ -903,7 +903,7 @@ def slide_bold() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - post_container_transforms = ["format_markdown"] + post_container_transforms = ["format_slide"] return system, pre_transforms, post_transforms, post_container_transforms @@ -946,7 +946,7 @@ def slide_smart_colorize() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - post_container_transforms = ["format_markdown"] + post_container_transforms = ["format_slide"] return system, pre_transforms, post_transforms, post_container_transforms diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 5a4376d4b..049e4046f 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -287,6 +287,13 @@ def _main(parser: argparse.ArgumentParser) -> None: if args.bold_first_level_bullets: out_txt = hmarkdo.bold_first_level_bullets(out_txt) # + if dshlllpr.to_run("format_slide", post_container_transforms): + # Note that we need to run this outside the `llm_transform` + # container to avoid to do docker-in-docker in the `llm_transform` + # container (which doesn't support that). + out_txt = hmarkdo.md_clean_up(out_txt) + out_txt = hmarkdo.format_markdown_slide(out_txt) + # if dshlllpr.to_run("append_text", post_container_transforms): out_txt_tmp = [] # Append the original text. diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 857d74ef4..124d9aa46 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -889,11 +889,13 @@ def capitalize_first_level_bullets(markdown_text: str) -> str: ] -def bold_first_level_bullets(markdown_text: str) -> str: +def bold_first_level_bullets(markdown_text: str, *, max_length: int = 30) -> str: """ Make first-level bullets bold in markdown text. :param markdown_text: Input markdown text + :param max_length: Max length of the bullet text to be bolded. -1 means no + limit. :return: Formatted markdown text with first-level bullets in bold """ lines = markdown_text.split("\n") @@ -901,16 +903,18 @@ def bold_first_level_bullets(markdown_text: str) -> str: for line in lines: # Check if this is a first-level bullet point. if re.match(r"^\s*- ", line): - # Check if the line has bold text it in it. + # Check if the line has already bold text it in it. if not re.search(r"\*\*", line): # Bold first-level bullets. indentation = len(line) - len(line.lstrip()) if indentation == 0: # First-level bullet, add bold markers. - line = re.sub(r"^(\s*-\s+)(.*)", r"\1**\2**", line) - result.append(line) - else: - result.append(line) + m = re.match(r"^(\s*-\s+)(.*)", line) + hdbg.dassert(m, "Can't parse line='%s'", line) + bullet_text = m.group(2) + if max_length > -1 and len(bullet_text) <= max_length: + line = m.group(1) + "**" + bullet_text + "**" + result.append(line) return "\n".join(result) @@ -1025,8 +1029,8 @@ def format_markdown_slide(txt: str) -> str: """ # Split the text into title and body. + txt = bold_first_level_bullets(txt) txt = dshdlino.prettier_on_str(txt) txt = format_first_level_bullets(txt) #txt = capitalize_slide_titles(txt) - #txt = bold_first_level_bullets(txt) return txt diff --git a/helpers/hopenai.py b/helpers/hopenai.py index 39854661a..eb8174c33 100644 --- a/helpers/hopenai.py +++ b/helpers/hopenai.py @@ -407,7 +407,7 @@ def get_completion( model: str = "", report_progress: bool = False, print_cost: bool = False, - cache_mode: str = "FALLBACK", + cache_mode: str = "DISABLED", cache_file: str = "cache.get_completion.json", temperature: float = 0.1, **create_kwargs, From b8c65d6acdcbfd81619280e2b9ac8b94415ae211 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 22 May 2025 11:34:14 -0400 Subject: [PATCH 105/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- linters/dockerized_pydeps.py | 108 ----------------------------------- 1 file changed, 108 deletions(-) delete mode 100755 linters/dockerized_pydeps.py diff --git a/linters/dockerized_pydeps.py b/linters/dockerized_pydeps.py deleted file mode 100755 index b4be012d5..000000000 --- a/linters/dockerized_pydeps.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python - -""" -Run pydeps as a dockerized executable. -""" - -import argparse -import logging - -import helpers.hdbg as hdbg -import helpers.hdocker as hdocker -import helpers.hparser as hparser -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem - -_LOG = logging.getLogger(__name__) - - -# ############################################################################# - - -def _parse() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - hparser.add_dockerized_script_arg(parser) - hparser.add_verbosity_arg(parser) - return parser - - -def _run_dockerized_pydeps( - in_file_path: str, - *, - force_rebuild: bool = False, - use_sudo: bool = False, -) -> None: - """ - Run `graphviz` in a Docker container. - - :param in_file_path: path to the code of the image to render - :param out_file_path: path to the image to be created - :param force_rebuild: whether to force rebuild the Docker container - :param use_sudo: whether to use sudo for Docker commands - """ - _LOG.debug(hprint.func_signature_to_str()) - # Get the container image. - container_image = "tmp.pydeps" - dockerfile = rf""" - FROM alpine:latest - - RUN pip install pydeps - """ - container_image = hdocker.build_container_image( - container_image, dockerfile, force_rebuild, use_sudo - ) - # Convert files to Docker paths. - is_caller_host = not hserver.is_inside_docker() - use_sibling_container_for_callee = False - caller_mount_path, callee_mount_path, mount = hdocker.get_docker_mount_info( - is_caller_host, use_sibling_container_for_callee - ) - in_file_path = hdocker.convert_caller_to_callee_docker_path( - in_file_path, - caller_mount_path, - callee_mount_path, - check_if_exists=True, - is_input=True, - is_caller_host=is_caller_host, - use_sibling_container_for_callee=use_sibling_container_for_callee, - ) - out_file_path = hdocker.convert_caller_to_callee_docker_path( - out_file_path, - caller_mount_path, - callee_mount_path, - check_if_exists=True, - is_input=False, - is_caller_host=is_caller_host, - use_sibling_container_for_callee=use_sibling_container_for_callee, - ) - cmd = ["pydeps" f"{in_file_path}"] - cmd = " ".join(cmd) - executable = hdocker.get_docker_executable(use_sudo) - docker_cmd = ( - f"{executable} run --rm --user $(id -u):$(id -g)" - f" --workdir {callee_mount_path} --mount {mount}" - f" {container_image}" - f" {cmd}" - ) - hsystem.system(docker_cmd) - - -def _main(parser: argparse.ArgumentParser) -> None: - # Parse everything that can be parsed and returns the rest. - args, cmd_opts = parser.parse_known_args() - hdbg.init_logger( - verbosity=args.log_level, use_exec_path=True, force_white=False - ) - hdocker.run_dockerized_graphviz( - args.input, - force_rebuild=args.dockerized_force_rebuild, - use_sudo=args.dockerized_use_sudo, - ) - _LOG.info("Output written to '%s'", args.output) - - -if __name__ == "__main__": - _main(_parse()) From f6e3d3bf878edd87149e46dafc44efdbfba7b8be Mon Sep 17 00:00:00 2001 From: GP Saggese <33238329+gpsaggese@users.noreply.github.com> Date: Thu, 22 May 2025 17:44:54 -0400 Subject: [PATCH 106/193] CmampTask11862_Allow_dind_unit_tests_to_run_on_server_and_CI_7 (#752) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improve Pre-commit checks: All checks passed ✅ * Improve Pre-commit checks: All checks passed ✅ * Improve Pre-commit checks: All checks passed ✅ * Improve Pre-commit checks: All checks passed ✅ * Improve Pre-commit checks: All checks passed ✅ --- .../github/sync_gh_issue_labels.py | 4 +- .../notebooks/extract_notebook_images.py | 4 +- dev_scripts_helpers/system_tools/i | 4 + dev_scripts_helpers/system_tools/il | 4 + dev_scripts_helpers/system_tools/it | 4 + dev_scripts_helpers/thin_client/setenv.sh | 0 .../thin_client/thin_client_utils.sh | 58 +++++-- devops/docker_run/docker_setenv.sh | 7 + devops/docker_run/entrypoint.sh | 2 +- helpers/hdocker.py | 75 ++------- helpers/lib_tasks.py | 2 + helpers/lib_tasks_bash.py | 54 +++++++ helpers/test/test_hmarkdown.py | 150 +++++++++--------- tasks.py | 7 + 14 files changed, 223 insertions(+), 152 deletions(-) create mode 100755 dev_scripts_helpers/system_tools/i create mode 100755 dev_scripts_helpers/system_tools/il create mode 100755 dev_scripts_helpers/system_tools/it mode change 100755 => 100644 dev_scripts_helpers/thin_client/setenv.sh create mode 100644 helpers/lib_tasks_bash.py diff --git a/dev_scripts_helpers/github/sync_gh_issue_labels.py b/dev_scripts_helpers/github/sync_gh_issue_labels.py index 989c0ba1f..7dd20ffa0 100755 --- a/dev_scripts_helpers/github/sync_gh_issue_labels.py +++ b/dev_scripts_helpers/github/sync_gh_issue_labels.py @@ -109,7 +109,9 @@ def _run_dockerized_sync_gh_issue_labels( # Build the container image, if needed. container_image = "tmp.sync_gh_issue_labels" dockerfile = r""" - FROM python:3.10-slim + # This seems to be flaky on ARM64 architectures. + #FROM python:3.10-slim + FROM python:3.10 # Install required packages. RUN apt-get update && apt-get install -y git && \ diff --git a/dev_scripts_helpers/notebooks/extract_notebook_images.py b/dev_scripts_helpers/notebooks/extract_notebook_images.py index e75623a51..8afa53dd9 100755 --- a/dev_scripts_helpers/notebooks/extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/extract_notebook_images.py @@ -68,7 +68,9 @@ def _run_dockerized_extract_notebook_images( # Build the container image, if needed. container_image = "tmp.extract_notebook_images" dockerfile = r""" - FROM python:3.10-slim + # This seems to be flaky on ARM64 architectures. + #FROM python:3.10-slim + FROM python:3.10 # Install required system libraries for Chromium and Playwright. RUN apt-get update && apt-get install -y \ diff --git a/dev_scripts_helpers/system_tools/i b/dev_scripts_helpers/system_tools/i new file mode 100755 index 000000000..00a1ebbdd --- /dev/null +++ b/dev_scripts_helpers/system_tools/i @@ -0,0 +1,4 @@ +#!/bin/bash +# Equivalent to `alias i = "invoke"`. + +invoke $* diff --git a/dev_scripts_helpers/system_tools/il b/dev_scripts_helpers/system_tools/il new file mode 100755 index 000000000..713bf45d7 --- /dev/null +++ b/dev_scripts_helpers/system_tools/il @@ -0,0 +1,4 @@ +#!/bin/bash +# Equivalent to `alias il = "invoke --list"`. + +invoke --list $* diff --git a/dev_scripts_helpers/system_tools/it b/dev_scripts_helpers/system_tools/it new file mode 100755 index 000000000..1742b680d --- /dev/null +++ b/dev_scripts_helpers/system_tools/it @@ -0,0 +1,4 @@ +#!/bin/bash +# Equivalent to `alias it="invoke traceback"`. + +invoke traceback $* diff --git a/dev_scripts_helpers/thin_client/setenv.sh b/dev_scripts_helpers/thin_client/setenv.sh old mode 100755 new mode 100644 diff --git a/dev_scripts_helpers/thin_client/thin_client_utils.sh b/dev_scripts_helpers/thin_client/thin_client_utils.sh index df3cbd127..01a9fa7d2 100644 --- a/dev_scripts_helpers/thin_client/thin_client_utils.sh +++ b/dev_scripts_helpers/thin_client/thin_client_utils.sh @@ -13,6 +13,9 @@ INFO="${GREEN}INFO${NC}" WARNING="${YELLOW}WARNING${NC}" ERROR="${RED}ERROR${NC}" +#_VERB_LEVEL=2 +_VERB_LEVEL=0 + echo -e -n $NC @@ -83,16 +86,42 @@ dassert_is_git_root() { } +# TODO(gp): -> dassert_not_empty? dassert_var_defined() { + # Check if a variable is defined. + # It needs to be called as `dassert_var_defined GIT_ROOT` and not + # `dassert_var_defined "$GIT_ROOT"`. local var_name="$1" # Use indirect expansion to check the value of the variable. if [[ -z "${!var_name}" ]]; then echo -e "${ERROR}: Var '${var_name}' is not defined or is empty." - exit 1 + kill -INT $$ fi; } +dassert_eq_num_args() { + # Check if the number of arguments passed matches expected count. + local actual_args=$1 + local expected_args=$2 + local func_name=$3 + if [[ $actual_args -ne $expected_args ]]; then + echo -e "${ERROR}: Function '$func_name' requires exactly $expected_args arguments, but got $actual_args" + kill -INT $$ + fi +} + + +dtrace() { + # Print a debug message if _VERB_LEVEL > 1. + dassert_eq_num_args $# 1 "dtrace" + local msg="$1" + if [[ "${_VERB_LEVEL:-0}" -gt 1 ]]; then + echo -e "$msg" + fi +} + + remove_dups() { # Remove duplicates. local vars="$1" @@ -198,13 +227,25 @@ set_csfy_env_vars() { set_path() { - echo "# set_path()" + # Process interface. + dassert_eq_num_args $# 1 "set_path" local dev_script_dir=$1 + # dassert_dir_exists $dev_script_dir + dtrace "dev_script_dir=$dev_script_dir" + # export PATH=$(pwd):$PATH + dtrace "GIT_ROOT=$GIT_ROOT" + dassert_var_defined "GIT_ROOT" + # export PATH=$GIT_ROOT_DIR:$PATH + # Avoid ./.mypy_cache/3.12/app/dev_scripts_helpers + DEV_SCRIPT_HELPER_DIR=$(find . -name dev_scripts_helpers -type d -not -path "*.mypy_cache*") + dassert_dir_exists $DEV_SCRIPT_HELPER_DIR + dtrace "DEV_SCRIPT_HELPER_DIR=$DEV_SCRIPT_HELPER_DIR" # Add to the PATH all the first level directory under `dev_scripts`. - export PATH_TMP="$(find $dev_script_dir -maxdepth 1 -type d -not -path "$(pwd)" | tr '\n' ':' | sed 's/:$//')" + export PATH_TMP="$(find $DEV_SCRIPT_HELPER_DIR -maxdepth 1 -type d -not -path "$(pwd)" | tr '\n' ':' | sed 's/:$//')" + dtrace "PATH_TMP=$PATH_TMP" export PATH=$PATH_TMP:$PATH # Remove duplicates. export PATH=$(remove_dups $PATH) @@ -292,20 +333,11 @@ configure_specific_project() { export DEV2="172.30.2.128" # Print some specific env vars. - printenv | egrep "AM_|CK|AWS_" | sort + printenv | egrep "AM_|CK_|AWS_|CSFY_" | sort # Set up custom path to the alembic.ini file. # See https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file export ALEMBIC_CONFIG="alembic/alembic.ini" - - alias i="invoke" - alias it="invoke traceback" - alias itpb="pbpaste | traceback_to_cfile.py -i - -o cfile" - alias ih="invoke --help" - alias il="invoke --list" - - # Add autocomplete for `invoke`. - #source $AMP/dev_scripts/invoke_completion.sh } diff --git a/devops/docker_run/docker_setenv.sh b/devops/docker_run/docker_setenv.sh index afa8f059e..1193a48d0 100644 --- a/devops/docker_run/docker_setenv.sh +++ b/devops/docker_run/docker_setenv.sh @@ -25,12 +25,19 @@ source $SOURCE_PATH # - Activate venv. activate_docker_venv +# - Configure environment variables. + # Check that the required environment vars are defined and non-empty. dassert_var_defined "CSFY_USE_HELPERS_AS_NESTED_MODULE" dassert_var_defined "CSFY_HOST_GIT_ROOT_PATH" dassert_var_defined "CSFY_GIT_ROOT_PATH" dassert_var_defined "CSFY_HELPERS_ROOT_PATH" +# TODO(gp): The variable `GIT_ROOT` is used by many scripts inside Docker but it +# should be defined in the environment. +export GIT_ROOT=$CSFY_GIT_ROOT_PATH +dassert_dir_exists $GIT_ROOT + # Check that helpers_root path exists. dassert_dir_exists $CSFY_HELPERS_ROOT_PATH diff --git a/devops/docker_run/entrypoint.sh b/devops/docker_run/entrypoint.sh index 9e5adfdc9..9a9eeaa83 100755 --- a/devops/docker_run/entrypoint.sh +++ b/devops/docker_run/entrypoint.sh @@ -89,4 +89,4 @@ echo "PATH=$PATH" echo "PYTHONPATH=$PYTHONPATH" echo "# entrypoint.sh: '$@'" -eval "$@" +eval "$@" \ No newline at end of file diff --git a/helpers/hdocker.py b/helpers/hdocker.py index fa45c03ea..bb28cba1e 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -1186,34 +1186,9 @@ def run_dockerized_latex( """ _LOG.debug(hprint.func_signature_to_str()) container_image = "tmp.latex" - # Doesn't exist. - if False: - dockerfile = r""" - # Use a lightweight base image. - # FROM debian:bullseye-slim - FROM ubuntu:22.04 - """ - if False: - dockerfile = r""" - # Use minimal multi-arch TeX Live image (includes ARM support) - FROM ghcr.io/xu-cheng/texlive:latest - """ - if True: - dockerfile = r""" - FROM mfisherman/texlive-full - - # Verify LaTeX is installed. - RUN latex --version - - # Default command. - CMD [ "bash" ] - """ - # Doesn't work. - if False: - dockerfile = r""" - # Use a lightweight base image. - # FROM debian:bullseye-slim - FROM ubuntu:22.04 + dockerfile = r""" + # Use a lightweight base image. + FROM debian:bullseye-slim # Set environment variables to avoid interactive prompts. ENV DEBIAN_FRONTEND=noninteractive @@ -1224,39 +1199,17 @@ def run_dockerized_latex( rm -rf /var/lib/apt/lists/* && \ apt-get update - # Install only the minimal TeX Live packages. - RUN apt-get install -y --no-install-recommends \ - texlive-latex-base \ - texlive-latex-recommended \ - texlive-fonts-recommended \ - texlive-latex-extra \ - lmodern \ - tikzit \ - || apt-get install -y --fix-missing - """ - # - if False: - dockerfile = r""" - # Use a lightweight base image. - # FROM debian:bullseye-slim - FROM ubuntu:22.04 - - # Set environment variables to avoid interactive prompts. - ENV DEBIAN_FRONTEND=noninteractive - - RUN rm -rf /var/lib/apt/lists/* - # Update. - RUN apt-get clean && \ - apt-get update - - # Install texlive-full. - RUN apt install -y texlive-full - """ - # Clean up. - if False: - dockerfile += r""" - RUN rm -rf /var/lib/apt/lists/* \ - && apt-get clean + # Install only the minimal TeX Live packages. + RUN apt-get install -y --no-install-recommends \ + texlive-latex-base \ + texlive-latex-recommended \ + texlive-fonts-recommended \ + texlive-latex-extra \ + lmodern \ + tikzit + + RUN rm -rf /var/lib/apt/lists/* \ + && apt-get clean # Verify LaTeX is installed. RUN latex --version diff --git a/helpers/lib_tasks.py b/helpers/lib_tasks.py index 7333344d2..12c9903fd 100644 --- a/helpers/lib_tasks.py +++ b/helpers/lib_tasks.py @@ -12,6 +12,7 @@ # Import this way to avoid complexity in propagating the refactoring in all # the repos downstream. from helpers.lib_tasks_aws import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import +from helpers.lib_tasks_bash import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import from helpers.lib_tasks_docker import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import from helpers.lib_tasks_docker_release import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import from helpers.lib_tasks_find import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import @@ -23,6 +24,7 @@ from helpers.lib_tasks_print import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import from helpers.lib_tasks_pytest import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import from helpers.lib_tasks_utils import * # isort:skip # noqa: F401,F403 # pylint: disable=unused-import,unused-wildcard-import,wildcard-import + _LOG = logging.getLogger(__name__) diff --git a/helpers/lib_tasks_bash.py b/helpers/lib_tasks_bash.py new file mode 100644 index 000000000..5f3c205d4 --- /dev/null +++ b/helpers/lib_tasks_bash.py @@ -0,0 +1,54 @@ +""" +Import as: + +import helpers.lib_tasks_find as hlitafin +""" + +import functools +import glob +import logging +import os +import re +from typing import Iterator, List, Optional, Tuple + +from invoke import task + +# We want to minimize the dependencies from non-standard Python packages since +# this code needs to run with minimal dependencies and without Docker. +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# TODO(gp): GFI: Unit test. +@task +def bash_print_path(ctx): # type: ignore + """ + Print the bash path. + """ + _ = ctx + cmd = r"echo $PATH | sed 's/:/\n/g'" + _, ret = hsystem.system_to_string(cmd) + paths = ret.split("\n") + paths.sort() + # + all_paths = [] + # Remove empty lines. + for path in paths: + if path.strip() == "": + _LOG.error("Empty path: '%s'", path) + continue + if not os.path.exists(path): + _LOG.error("Dir doesn't exist: '%s'", path) + continue + if not os.path.isdir(path): + _LOG.error("Not a dir: '%s'", path) + continue + # TODO(gp): Make it efficient. + if paths.count(path) > 1: + _LOG.error("Duplicate path: '%s'", path) + continue + all_paths.append(path) + # Print the paths. + _LOG.info("Valid paths:") + for path in all_paths: + print(path) diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index ee141074b..ee96a2cfd 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1567,84 +1567,84 @@ def test7(self) -> None: self.assert_equal(actual, expected) def test8(self) -> None: - text = r""" -- **\red{Objective}** - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - -- **\orange{Key Components}** - - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - Utility update: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - -- **\blue{Learning Process}** - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - -- **\violet{Advantages}** - - More sample-efficient than direct utility estimation - - Leverages structure of the MDP to generalize better - -- **\pink{Challenges}** - - Requires accurate model estimation - - Computational cost of solving Bellman equations repeatedly - -- **\olive{Example}** - - A thermostat estimates room temperature dynamics and uses them to predict - comfort level under a fixed heating schedule - -- **\darkgray{Use Case}** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ + text = hprint.dedent(r""" + - **\red{Objective}** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **\orange{Key Components}** + - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - Utility update: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **\blue{Learning Process}** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **\violet{Advantages}** + - More sample-efficient than direct utility estimation + - Leverages structure of the MDP to generalize better + + - **\pink{Challenges}** + - Requires accurate model estimation + - Computational cost of solving Bellman equations repeatedly + + - **\olive{Example}** + - A thermostat estimates room temperature dynamics and uses them to predict + comfort level under a fixed heating schedule + + - **\darkgray{Use Case}** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """) actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) - expected = r""" -- **\red{Objective}** - - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated - model of the environment - -- **\yellow{Key Components}** - - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and - reward function $R(s,a)$ from experience - - Utility update: Solve the Bellman equations for the fixed policy: - - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ - -- **\green{Learning Process}** - - Collect transitions $(s, \pi(s), r, s')$ during execution - - Update model estimates: - - $\Pr(s'|s,a) \approx$ empirical frequency - - $R(s,a) \approx$ average observed reward - - Use dynamic programming to compute $U^\pi(s)$ - -- **\blue{Advantages}** - - More sample-efficient than direct utility estimation - - Leverages structure of the MDP to generalize better - -- **\violet{Challenges}** - - Requires accurate model estimation - - Computational cost of solving Bellman equations repeatedly - -- **\brown{Example}** - - A thermostat estimates room temperature dynamics and uses them to predict - comfort level under a fixed heating schedule - -- **\gray{Use Case}** - - Suitable when environment dynamics are stationary and can be learned from - interaction - """ + expected = hprint.dedent(r""" + - **\red{Objective}** + - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated + model of the environment + + - **\orange{Key Components}** + - Model learning: Estimate transition probabilities $\Pr(s'|s,a)$ and + reward function $R(s,a)$ from experience + - Utility update: Solve the Bellman equations for the fixed policy: + - $U^\pi(s) = R(s, \pi(s)) + \gamma \sum_{s'} \Pr(s'|s, \pi(s)) U^\pi(s')$ + + - **\olive{Learning Process}** + - Collect transitions $(s, \pi(s), r, s')$ during execution + - Update model estimates: + - $\Pr(s'|s,a) \approx$ empirical frequency + - $R(s,a) \approx$ average observed reward + - Use dynamic programming to compute $U^\pi(s)$ + + - **\green{Advantages}** + - More sample-efficient than direct utility estimation + - Leverages structure of the MDP to generalize better + + - **\cyan{Challenges}** + - Requires accurate model estimation + - Computational cost of solving Bellman equations repeatedly + + - **\blue{Example}** + - A thermostat estimates room temperature dynamics and uses them to predict + comfort level under a fixed heating schedule + + - **\darkgray{Use Case}** + - Suitable when environment dynamics are stationary and can be learned from + interaction + """) self.assert_equal(actual, expected) # ############################################################################# -# Test_format_compressed_markdown1 +# Test_format_first_level_bullets1 # ############################################################################# -class Test_format_compressed_markdown1(hunitest.TestCase): +class Test_format_first_level_bullets1(hunitest.TestCase): def test1(self) -> None: """ @@ -1812,7 +1812,7 @@ def test9(self) -> None: self._format_and_compare_markdown(text, expected) def test10(self) -> None: - text = r""" + text = hprint.dedent(r""" - **Objective** - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated @@ -1836,8 +1836,8 @@ def test10(self) -> None: - **Use Case** - Suitable when environment dynamics are stationary and can be learned from interaction - """ - expected = r""" + """) + expected = hprint.dedent(r""" - **Objective** - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated model of the environment @@ -1858,12 +1858,12 @@ def test10(self) -> None: - **Use Case** - Suitable when environment dynamics are stationary and can be learned from interaction - """ + """) self._format_and_compare_markdown(text, expected) def _format_and_compare_markdown(self, text: str, expected: str) -> None: text = hprint.dedent(text) expected = hprint.dedent(expected) # - actual = hmarkdo.format_compressed_markdown(text) + actual = hmarkdo.format_first_level_bullets(text) self.assert_equal(actual, expected) diff --git a/tasks.py b/tasks.py index 72f35fb6c..c208032b9 100644 --- a/tasks.py +++ b/tasks.py @@ -12,7 +12,11 @@ set_default_params, ) +# TODO(gp): Remove the lib_tasks import and import directly from lib_tasks_*.py files. +# TODO(gp): How to automatically discovery the paths? + from helpers.lib_tasks import ( # isort: skip # noqa: F401 # pylint: disable=unused-import + # docker_bash, docker_build_local_image, docker_build_multi_arch_prod_image, @@ -38,6 +42,9 @@ docker_tag_local_image_as_dev, # TODO(gp): -> docker_release_... docker_tag_push_multi_arch_prod_image, docker_update_prod_task_definition, + # + bash_print_path, + # find, find_check_string_output, find_dependency, From 8872bb0f43e1b46d2b4ac2e12217573e31e19d8a Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 22 May 2025 20:31:49 -0400 Subject: [PATCH 107/193] Merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/lint_notes.py | 1 + dev_scripts_helpers/llms/llm_prompts.py | 23 +++++++++++++++++++ dev_scripts_helpers/llms/llm_transform.py | 9 ++++++++ helpers/hdocker.py | 21 +++++++++++------ helpers/hmarkdown.py | 11 ++++++++- 5 files changed, 57 insertions(+), 8 deletions(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index 8f22ca726..feee2038e 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -32,6 +32,7 @@ _LOG = logging.getLogger(__name__) + # ############################################################################# diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 039d6d612..af0a27258 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -706,6 +706,29 @@ def md_clean_up_how_to_guide() -> _PROMPT_OUT: post_container_transforms = ["format_markdown"] return system, pre_transforms, post_transforms, post_container_transforms +# ############################################################################# +# Latex +# ############################################################################# + + +_LATEX_CONTEXT = r""" + You are a proficient technical writer. + I will pass you a chunk of Latex code. + """ + + +def latex_rewrite() -> _PROMPT_OUT: + system = _LATEX_CONTEXT + system += r""" + - Rewrite the text passed to increase clarity and readability. + - Maintain the structure of the text as much as possible, in terms of bullet + points and their indentation + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = [] + return system, pre_transforms, post_transforms, post_container_transforms + # ############################################################################# # Doc. diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 049e4046f..293af0b4b 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -287,6 +287,15 @@ def _main(parser: argparse.ArgumentParser) -> None: if args.bold_first_level_bullets: out_txt = hmarkdo.bold_first_level_bullets(out_txt) # + if dshlllpr.to_run("format_latex", post_container_transforms): + # Note that we need to run this outside the `llm_transform` + # container to avoid to do docker-in-docker in the `llm_transform` + # container (which doesn't support that). + out_txt = hmarkdo.md_clean_up(out_txt) + out_txt = hmarkdo.format_markdown(out_txt) + if args.bold_first_level_bullets: + out_txt = hmarkdo.bold_first_level_bullets(out_txt) + # if dshlllpr.to_run("format_slide", post_container_transforms): # Note that we need to run this outside the `llm_transform` # container to avoid to do docker-in-docker in the `llm_transform` diff --git a/helpers/hdocker.py b/helpers/hdocker.py index bb28cba1e..b6737ad48 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -571,14 +571,18 @@ def run_dockerized_prettier( Run `prettier` in a Docker container. From host: + ``` > ./dev_scripts_helpers/documentation/dockerized_prettier.py \ --input /Users/saggese/src/helpers1/test.md --output test2.md > ./dev_scripts_helpers/documentation/dockerized_prettier.py \ --input test.md --output test2.md + ``` From dev container: + ``` docker> ./dev_scripts_helpers/documentation/dockerized_prettier.py \ --input test.md --output test2.md + ``` :param in_file_path: Path to the file to format with Prettier. :param out_file_path: Path to the output file. @@ -591,16 +595,19 @@ def run_dockerized_prettier( # Build the container, if needed. container_image = "tmp.prettier" dockerfile = r""" - # Use a Node.js image - FROM node:18 + # Use a Node.js image. + FROM node:20-slim - # Install Prettier globally + # Install Prettier globally. RUN npm install -g prettier + # The last version is broken + # npm warn deprecated @unified-latex/unified-latex-prettier@2.4.2: Incorrect version number + RUN npm install -g @unified-latex/unified-latex-prettier@1.8.2 - # Set a working directory inside the container + # Set a working directory inside the container. WORKDIR /app - # Run Prettier as the entry command + # Run Prettier as the entry command. ENTRYPOINT ["prettier"] """ container_image = build_container_image( @@ -1208,8 +1215,8 @@ def run_dockerized_latex( lmodern \ tikzit - RUN rm -rf /var/lib/apt/lists/* \ - && apt-get clean + RUN rm -rf /var/lib/apt/lists/* && \ + apt-get clean # Verify LaTeX is installed. RUN latex --version diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 124d9aa46..5f518ad33 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -842,7 +842,11 @@ def capitalize_word(word: str, is_first_or_last: bool) -> str: ).strip() -# In this adjusted code, I replaced the complex inline assignments with `if-then-else` structures, incorporated informative docstrings into the function definitions using REST style for clarity, added comments explaining significant code sections, and ensured comments were in imperative form and grammatically correct. +# In this adjusted code, I replaced the complex inline assignments with +# `if-then-else` structures, incorporated informative docstrings into the +# function definitions using REST style for clarity, added comments explaining +# significant code sections, and ensured comments were in imperative form and +# grammatically correct. def capitalize_first_level_bullets(markdown_text: str) -> str: @@ -1023,6 +1027,11 @@ def format_markdown(txt: str) -> str: return txt +def format_latex(txt: str) -> str: + txt = dshdlino.prettier_on_str(txt) + return txt + + def format_markdown_slide(txt: str) -> str: """ Format markdown text for a slide. From 1915eff59b014cf9b0723f7ca37b67941a89cd8f Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 23 May 2025 07:12:47 -0400 Subject: [PATCH 108/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../chatgpt/run_simple_chatgpt.py | 3 +- .../documentation/lint_notes.py | 89 +++++++++---------- .../documentation/test/test_lint_notes.py | 2 +- .../llms/dockerized_llm_apply_cfile.py | 2 +- .../llms/dockerized_llm_transform.py | 2 +- dev_scripts_helpers/llms/llm_apply_cfile.py | 2 +- dev_scripts_helpers/llms/llm_prompts.py | 1 + dev_scripts_helpers/llms/llm_transform.py | 7 +- helpers/hmarkdown.py | 21 +++-- helpers/hparser.py | 5 +- 10 files changed, 73 insertions(+), 61 deletions(-) diff --git a/dev_scripts_helpers/chatgpt/run_simple_chatgpt.py b/dev_scripts_helpers/chatgpt/run_simple_chatgpt.py index d8b0e81d6..09439253a 100755 --- a/dev_scripts_helpers/chatgpt/run_simple_chatgpt.py +++ b/dev_scripts_helpers/chatgpt/run_simple_chatgpt.py @@ -54,13 +54,14 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hparser.init_logger_for_input_output_transform(args) # + hparser.init_logger_for_input_output_transform(args) in_file_name, out_file_name = hparser.parse_input_output_args( args, clear_screen=True ) txt = hparser.read_file(in_file_name) txt = "\n".join(txt) + # hdbg.dassert_in(args.instruction, hchainst.instructions) instruction = hchainst.instructions[args.instruction] result = _process_text(txt, instruction) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index feee2038e..dd2010cb6 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -101,6 +101,7 @@ def _preprocess(txt: str) -> str: def prettier( in_file_path: str, out_file_path: str, + file_type: str, *, print_width: int = 80, use_dockerized_prettier: bool = True, @@ -110,21 +111,33 @@ def prettier( """ Format the given text using Prettier. + :param in_file_path: The path to the input file. + :param out_file_path: The path to the output file. + :param file_type: The type of file to be formatted, e.g., `md` or `tex`. :param print_width: The maximum line width for the formatted text. If None, the default width is used. :param use_dockerized_prettier: Whether to use a Dockerized version of Prettier. :return: The formatted text. """ + hdbg.dassert_in(file_type, ["md", "tex", "txt"]) + # Build command options. cmd_opts: List[str] = [] - cmd_opts.append("--parser markdown") - cmd_opts.append("--prose-wrap always") tab_width = 2 - cmd_opts.append(f"--tab-width {tab_width}") - if print_width is not None: - hdbg.dassert_lte(1, print_width) - cmd_opts.append(f"--print-width {print_width}") - # + if file_type == "tex": + cmd_opts.append("--plugin=prettier-plugin-latex") + elif file_type in ("md", "txt"): + cmd_opts.append("--parser markdown") + else: + raise ValueError(f"Invalid file type: {file_type}") + hdbg.dassert_lte(1, print_width) + cmd_opts.extend([ + f"--print-width {print_width}", + "--prose-wrap always", + f"--tab-width {tab_width}", + "--use-tabs false", + ]) + # Run prettier. if use_dockerized_prettier: # Run `prettier` in a Docker container. force_rebuild = False @@ -153,6 +166,7 @@ def prettier( # TODO(gp): Convert this into a decorator to adapt operations that work on # files to passing strings. +# TODO(gp): Move this to `hmarkdown.py`. def prettier_on_str( txt: str, *args: Any, @@ -163,6 +177,7 @@ def prettier_on_str( """ _LOG.debug("txt=\n%s", txt) # Save string as input. + # TODO(gp): Use a context manager. curr_dir = os.getcwd() tmp_file_name = tempfile.NamedTemporaryFile(dir=curr_dir).name hio.to_file(tmp_file_name, txt) @@ -345,6 +360,7 @@ def _process( :return: The processed text. """ is_md_file = in_file_name.endswith(".md") + extension = os.path.splitext(in_file_name)[1] # Pre-process text. action = "preprocess" if _to_execute_action(action, actions): @@ -352,7 +368,7 @@ def _process( # Prettify. action = "prettier" if _to_execute_action(action, actions): - txt = prettier_on_str(txt, **kwargs) + txt = prettier_on_str(txt, file_type=extension, **kwargs) # Post-process text. action = "postprocess" if _to_execute_action(action, actions): @@ -388,23 +404,12 @@ def _parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) + hparser.add_input_output_args(parser) parser.add_argument( - "-i", - "--infile", - nargs="?", - type=argparse.FileType("r"), - default=sys.stdin, - ) - parser.add_argument( - "-o", - "--outfile", - nargs="?", - type=argparse.FileType("w"), - default=sys.stdout, - ) - parser.add_argument( - "--in_place", - action="store_true", + "--type", + action="store", + type=str, + default="", ) parser.add_argument( "-w", @@ -422,27 +427,27 @@ def _parser() -> argparse.ArgumentParser: action="store_true", ) hparser.add_action_arg(parser, _VALID_ACTIONS, _DEFAULT_ACTIONS) + hparser.add_dockerized_script_arg(parser) hparser.add_verbosity_arg(parser) return parser -def _main(args: argparse.Namespace) -> None: - in_file_name = args.infile.name - from_stdin = in_file_name == "<stdin>" - hdbg.init_logger( - verbosity=args.log_level, use_exec_path=False, force_white=from_stdin +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hparser.init_logger_for_input_output_transform(args) + # + in_file_name, out_file_name = hparser.parse_input_output_args( + args, clear_screen=True ) + # If the input is stdin, then user needs to specify the type. + if in_file_name != "-": + hdbg.dassert_ne(args.type, "") # Read input. + txt = hparser.read_file(in_file_name) + txt = "\n".join(txt) _LOG.debug("in_file_name=%s", in_file_name) - if not from_stdin: - hdbg.dassert( - in_file_name.endswith(".txt") or in_file_name.endswith(".md"), - "Invalid extension for file name '%s'", - in_file_name, - ) - txt = args.infile.read() # Process. - txt = _process( + out_txt = _process( txt, in_file_name, actions=args.action, @@ -451,14 +456,8 @@ def _main(args: argparse.Namespace) -> None: use_dockerized_markdown_toc=args.use_dockerized_markdown_toc, ) # Write output. - if args.in_place: - hdbg.dassert_ne(in_file_name, "<stdin>") - hio.to_file(in_file_name, txt) - else: - args.outfile.write(txt) + hparser.write_file(out_txt, out_file_name) if __name__ == "__main__": - parser_ = _parser() - args_ = parser_.parse_args() - _main(args_) + _main(_parser()) diff --git a/dev_scripts_helpers/documentation/test/test_lint_notes.py b/dev_scripts_helpers/documentation/test/test_lint_notes.py index 3333a963c..58ffc644c 100644 --- a/dev_scripts_helpers/documentation/test/test_lint_notes.py +++ b/dev_scripts_helpers/documentation/test/test_lint_notes.py @@ -240,7 +240,7 @@ def test_process_prettier_bug1(self) -> None: For some reason prettier replaces - with * when there are 2 empty lines. """ txt = self._get_text_problematic_for_prettier1() - act = dshdlino.prettier_on_str(txt) + act = dshdlino.prettier_on_str(txt, file_type="txt") exp = r""" - Python formatting diff --git a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py index bf8d32955..48bb36292 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/dockerized_llm_apply_cfile.py @@ -151,7 +151,7 @@ def _parse() -> argparse.ArgumentParser: required=True, help="Path to the cfile", ) - hparser.add_prompt_arg(parser) + hparser.add_llm_prompt_arg(parser) hparser.add_verbosity_arg(parser, log_level="CRITICAL") return parser diff --git a/dev_scripts_helpers/llms/dockerized_llm_transform.py b/dev_scripts_helpers/llms/dockerized_llm_transform.py index b27a6d528..03b0246d0 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_transform.py +++ b/dev_scripts_helpers/llms/dockerized_llm_transform.py @@ -27,7 +27,7 @@ def _parse() -> argparse.ArgumentParser: formatter_class=argparse.RawDescriptionHelpFormatter, ) hparser.add_input_output_args(parser) - hparser.add_prompt_arg(parser) + hparser.add_llm_prompt_arg(parser) hparser.add_verbosity_arg(parser, log_level="CRITICAL") return parser diff --git a/dev_scripts_helpers/llms/llm_apply_cfile.py b/dev_scripts_helpers/llms/llm_apply_cfile.py index f4d194af1..0e834a0e7 100755 --- a/dev_scripts_helpers/llms/llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/llm_apply_cfile.py @@ -44,7 +44,7 @@ def _parse() -> argparse.ArgumentParser: required=True, help="Path to the cfile", ) - hparser.add_prompt_arg(parser) + hparser.add_llm_prompt_arg(parser) hparser.add_dockerized_script_arg(parser) # Use CRITICAL to avoid logging anything. hparser.add_verbosity_arg(parser, log_level="CRITICAL") diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index af0a27258..179080404 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -706,6 +706,7 @@ def md_clean_up_how_to_guide() -> _PROMPT_OUT: post_container_transforms = ["format_markdown"] return system, pre_transforms, post_transforms, post_container_transforms + # ############################################################################# # Latex # ############################################################################# diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 293af0b4b..83defdd9e 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -48,6 +48,7 @@ _LOG = logging.getLogger(__name__) +# TODO(gp): -> _parser() or _get_parser() everywhere. def _parse() -> argparse.ArgumentParser: """ Use the same argparse parser for `dockerized_llm_transform.py`. @@ -61,7 +62,7 @@ def _parse() -> argparse.ArgumentParser: in_default="-", in_required=False, ) - hparser.add_prompt_arg(parser) + hparser.add_llm_prompt_arg(parser) hparser.add_dockerized_script_arg(parser) parser.add_argument( "-d", @@ -228,15 +229,17 @@ def _convert_file_names(in_file_name: str, out_file_name: str) -> None: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() hparser.init_logger_for_input_output_transform(args) + # if args.prompt == "list": print("# Available prompt tags:") print("\n".join(dshlllpr.get_prompt_tags())) return # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) + tag = "llm_transform" tmp_in_file_name, tmp_out_file_name = ( hparser.adapt_input_output_args_for_dockerized_scripts( - in_file_name, "llm_transform" + in_file_name, tag ) ) # TODO(gp): We should just automatically pass-through the options. diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 5f518ad33..f7a0223fd 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -876,6 +876,7 @@ def capitalize_first_level_bullets(markdown_text: str) -> str: result.append(line) return "\n".join(result) + # These are the colors that are supported by Latex / markdown, are readable on # white, and form an equidistant color palette. _ALL_COLORS = [ @@ -1014,7 +1015,8 @@ def prettier_markdown(txt: str) -> str: """ Format markdown text using `prettier`. """ - txt = dshdlino.prettier_on_str(txt) + file_type = "md" + txt = dshdlino.prettier_on_str(txt, file_type) return txt @@ -1022,16 +1024,12 @@ def format_markdown(txt: str) -> str: """ Format markdown text. """ - txt = dshdlino.prettier_on_str(txt) + file_type = "md" + txt = dshdlino.prettier_on_str(txt, file_type) txt = remove_empty_lines_from_markdown(txt) return txt -def format_latex(txt: str) -> str: - txt = dshdlino.prettier_on_str(txt) - return txt - - def format_markdown_slide(txt: str) -> str: """ Format markdown text for a slide. @@ -1039,7 +1037,14 @@ def format_markdown_slide(txt: str) -> str: # Split the text into title and body. txt = bold_first_level_bullets(txt) - txt = dshdlino.prettier_on_str(txt) + file_type = "md" + txt = dshdlino.prettier_on_str(txt, file_type) txt = format_first_level_bullets(txt) #txt = capitalize_slide_titles(txt) return txt + + +def format_latex(txt: str) -> str: + file_type = "tex" + txt = dshdlino.prettier_on_str(txt, file_type) + return txt diff --git a/helpers/hparser.py b/helpers/hparser.py index 1cb6d391e..8421bf879 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -372,6 +372,9 @@ def parse_input_output_args( def init_logger_for_input_output_transform(args: argparse.Namespace) -> None: + """ + Initialize the logger when input/output transformation is used. + """ verbosity = args.log_level # If the input is stdin, we don't want to print the command line or any # other log messages, unless the user specified a more verbose log level. @@ -654,7 +657,7 @@ def add_dockerized_script_arg( return parser -def add_prompt_arg( +def add_llm_prompt_arg( parser: argparse.ArgumentParser, ) -> argparse.ArgumentParser: """ From ce73b113e9ac4564d5afdb9824cb8dd8cf2784f1 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 23 May 2025 17:28:49 -0400 Subject: [PATCH 109/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/lint_notes.py | 10 +++-- helpers/hdocker.py | 5 ++- helpers/hsystem.py | 44 ++++++++++++------- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index dd2010cb6..bc6cf3793 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -120,6 +120,7 @@ def prettier( of Prettier. :return: The formatted text. """ + _LOG.debug(hprint.func_signature_to_str()) hdbg.dassert_in(file_type, ["md", "tex", "txt"]) # Build command options. cmd_opts: List[str] = [] @@ -179,7 +180,7 @@ def prettier_on_str( # Save string as input. # TODO(gp): Use a context manager. curr_dir = os.getcwd() - tmp_file_name = tempfile.NamedTemporaryFile(dir=curr_dir).name + tmp_file_name = tempfile.NamedTemporaryFile(prefix="tmp.prettier_on_str.", dir=curr_dir).name hio.to_file(tmp_file_name, txt) # Call `prettier` in-place. prettier(tmp_file_name, tmp_file_name, *args, **kwargs) @@ -361,6 +362,9 @@ def _process( """ is_md_file = in_file_name.endswith(".md") extension = os.path.splitext(in_file_name)[1] + # Remove the . from the extenstion (e.g., ".txt"). + hdbg.dassert(extension.startswith("."), "Invalid extension='%s'", extension) + extension = extension[1:] # Pre-process text. action = "preprocess" if _to_execute_action(action, actions): @@ -416,7 +420,7 @@ def _parser() -> argparse.ArgumentParser: "--print-width", action="store", type=int, - default=None, + default=80, ) parser.add_argument( "--use_dockerized_prettier", @@ -440,7 +444,7 @@ def _main(parser: argparse.ArgumentParser) -> None: args, clear_screen=True ) # If the input is stdin, then user needs to specify the type. - if in_file_name != "-": + if in_file_name == "-": hdbg.dassert_ne(args.type, "") # Read input. txt = hparser.read_file(in_file_name) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index b6737ad48..a2a397d5f 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -598,11 +598,14 @@ def run_dockerized_prettier( # Use a Node.js image. FROM node:20-slim + RUN npm install -g npm@latest + RUN npm cache clean --force + # Install Prettier globally. RUN npm install -g prettier # The last version is broken # npm warn deprecated @unified-latex/unified-latex-prettier@2.4.2: Incorrect version number - RUN npm install -g @unified-latex/unified-latex-prettier@1.8.2 + #RUN npm install @unified-latex/unified-latex-prettier@1.8.2 --legacy-peer-deps # Set a working directory inside the container. WORKDIR /app diff --git a/helpers/hsystem.py b/helpers/hsystem.py index 45ed5e464..a307c9b9e 100644 --- a/helpers/hsystem.py +++ b/helpers/hsystem.py @@ -243,23 +243,35 @@ def _system( _LOG.error("error=%s", str(e)) _LOG.debug(" ==> rc=%s", rc) if abort_on_error and rc != 0: - msg = ( - "\n" - + hprint.frame(f"cmd='{cmd}' failed with rc='{rc}'") - + f"\nOutput of the failing command is:\n{hprint.line('>')}" - + f"\n{output}\n{hprint.line('<')}" - ) - _LOG.error("%s", msg) - # Report the first `num_error_lines` of the output. + # msg = ( + # "\n" + # + hprint.frame(f"cmd='{cmd}' failed with rc='{rc}'") + # + f"\nOutput of the failing command is:\n{hprint.line('>')}" + # + f"\n{output}\n{hprint.line('<')}" + # ) + # _LOG.error("%s", msg) + # Report the last `num_error_lines` of the output. num_error_lines = num_error_lines or 30 - output_error = "\n".join(output.split("\n")[:num_error_lines]) - msg = f"_system failed: cmd='{cmd}'" - msg = ( - "\n" - + hprint.frame(msg, char1="%", thickness=2) - + "\n" - + f"truncated output=\n{output_error}" - ) + output_error = "\n".join(output.split("\n")[-num_error_lines:]) + msg = [] + msg.append("\n" + hprint.frame("_system() failed", thickness=2)) + msg.append(hprint.func_signature_to_str()) + msg.append(hprint.frame(f"cmd='{cmd}'", char1="%", thickness=1)) + msg.append(f"- rc='{rc}'") + msg.append(f"- output='\n{output_error}'") + # Save the output in a file. + file_name = "tmp.system_output.txt" + with open(file_name, "w") as f: + f.write(output) + msg.append(f"- Output saved in '{file_name}'") + # Save the command in an executable file. + file_name = "tmp.system_cmd.sh" + msg.append(f"- Command saved in '{file_name}'") + with open(file_name, "w") as f: + f.write(cmd) + os.chmod(file_name, 0o755) + # + msg = "\n".join(msg) raise RuntimeError(msg) # hdbg.dassert_type_in(output, (str, )) return rc, output From 4f77152ff962c786479473b62ce83e3934f9a8e0 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 23 May 2025 18:54:01 -0400 Subject: [PATCH 110/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/documentation/lint_notes.py | 5 ++++- helpers/hdocker.py | 13 ++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index bc6cf3793..f82994438 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -126,7 +126,8 @@ def prettier( cmd_opts: List[str] = [] tab_width = 2 if file_type == "tex": - cmd_opts.append("--plugin=prettier-plugin-latex") + #cmd_opts.append("--plugin=prettier-plugin-latex") + cmd_opts.append("--plugin=@unified-latex/unified-latex-prettier") elif file_type in ("md", "txt"): cmd_opts.append("--parser markdown") else: @@ -153,6 +154,7 @@ def prettier( else: # Run `prettier` installed on the host directly. executable = "prettier" + #executable = "NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier" cmd = [executable] + cmd_opts if in_file_path == out_file_path: cmd.append("--write") @@ -181,6 +183,7 @@ def prettier_on_str( # TODO(gp): Use a context manager. curr_dir = os.getcwd() tmp_file_name = tempfile.NamedTemporaryFile(prefix="tmp.prettier_on_str.", dir=curr_dir).name + tmp_file_name += ".tex" hio.to_file(tmp_file_name, txt) # Call `prettier` in-place. prettier(tmp_file_name, tmp_file_name, *args, **kwargs) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index a2a397d5f..494c31079 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -596,16 +596,11 @@ def run_dockerized_prettier( container_image = "tmp.prettier" dockerfile = r""" # Use a Node.js image. - FROM node:20-slim + FROM node:18-slim - RUN npm install -g npm@latest - RUN npm cache clean --force - - # Install Prettier globally. - RUN npm install -g prettier - # The last version is broken - # npm warn deprecated @unified-latex/unified-latex-prettier@2.4.2: Incorrect version number - #RUN npm install @unified-latex/unified-latex-prettier@1.8.2 --legacy-peer-deps + RUN npm install -g prettier@2.7.0 + RUN npm install -g @unified-latex/unified-latex-prettier@1.7.1 + RUN npm install -g prettier-plugin-latex@2.0.1 # Set a working directory inside the container. WORKDIR /app From 1d9d40ae119bd87e9801620a7df3e6ad3e2cf89e Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 23 May 2025 19:16:02 -0400 Subject: [PATCH 111/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hdocker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 494c31079..7598abda9 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -650,7 +650,9 @@ def run_dockerized_prettier( # tmp.prettier \ # --parser markdown --prose-wrap always --write --tab-width 2 \ # ./test.md - bash_cmd = f"/usr/local/bin/prettier {cmd_opts_as_str} {in_file_path}" + executable = "NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier" + #bash_cmd = f"/usr/local/bin/prettier {cmd_opts_as_str} {in_file_path}" + bash_cmd = f"NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier {cmd_opts_as_str} {in_file_path}" if out_file_path != in_file_path: bash_cmd += f" > {out_file_path}" # Build the Docker command. From 52f34ebfc28fba2fe8d23f40c9a05cff65ee8ba2 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 23 May 2025 19:43:19 -0400 Subject: [PATCH 112/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../output/test.txt | 14 ++ .../output/test.txt | 32 ++++ .../documentation/test/test_lint_notes.py | 169 ++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_lint_notes_cmd_line1.test1/output/test.txt create mode 100644 dev_scripts_helpers/documentation/test/outcomes/Test_lint_notes_cmd_line1.test2/output/test.txt diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_lint_notes_cmd_line1.test1/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_lint_notes_cmd_line1.test1/output/test.txt new file mode 100644 index 000000000..1f0c52a70 --- /dev/null +++ b/dev_scripts_helpers/documentation/test/outcomes/Test_lint_notes_cmd_line1.test1/output/test.txt @@ -0,0 +1,14 @@ +<!-- toc --> + +- [Header1](#header1) + +<!-- tocstop --> + +# Header1 + +"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor +incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis +nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu +fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in +culpa qui officia deserunt mollit anim id est laborum." \ No newline at end of file diff --git a/dev_scripts_helpers/documentation/test/outcomes/Test_lint_notes_cmd_line1.test2/output/test.txt b/dev_scripts_helpers/documentation/test/outcomes/Test_lint_notes_cmd_line1.test2/output/test.txt new file mode 100644 index 000000000..9ed56c7b9 --- /dev/null +++ b/dev_scripts_helpers/documentation/test/outcomes/Test_lint_notes_cmd_line1.test2/output/test.txt @@ -0,0 +1,32 @@ +\documentclass{article} + +\title{Simple \LaTeX{} Example} +\author{Your Name} +\date{\today} + +\begin{document} + \maketitle + + \section{Introduction} + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor + incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis + nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis + aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat + nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui + officia deserunt mollit anim id est laborum. + + \section{Math Example} + Here is an inline equation: $E = mc^{2}$.\\ And a displayed equation: + \[ + \int_{0}^{\infty}e^{-x^2}\, dx = \frac{\sqrt{\pi}}{2} + \] + + \section{Lists} + \begin{itemize} + \item Item 1 + + \item Item 2 + + \item Item 3 + \end{itemize} +\end{document} \ No newline at end of file diff --git a/dev_scripts_helpers/documentation/test/test_lint_notes.py b/dev_scripts_helpers/documentation/test/test_lint_notes.py index 58ffc644c..1f3a31f19 100644 --- a/dev_scripts_helpers/documentation/test/test_lint_notes.py +++ b/dev_scripts_helpers/documentation/test/test_lint_notes.py @@ -9,6 +9,20 @@ import helpers.hserver as hserver import helpers.hunit_test as hunitest +import logging +import os +from typing import Optional, Tuple + +import pytest + +import helpers.hdbg as hdbg +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import helpers.hunit_test as hunitest + _LOG = logging.getLogger(__name__) @@ -337,3 +351,158 @@ def _helper_process( exp = hprint.dedent(exp, remove_lead_trail_empty_lines_=True) self.assert_equal(act, exp) return act + + +# ############################################################################# +# Test_lint_notes_cmd_line1 +# ############################################################################# + + +@pytest.mark.skipif( + hserver.is_inside_ci() or hserver.is_dev_csfy(), + reason="Disabled because of CmampTask10710", +) +class Test_lint_notes_cmd_line1(hunitest.TestCase): + + def create_md_input_file(self) -> str: + txt = """ + # Header1 + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + in_file = os.path.join(self.get_scratch_space(), "input.md") + hio.to_file(in_file, txt) + return in_file + + def create_tex_input_file(self) -> str: + txt = r""" + \documentclass{article} + + \title{Simple \LaTeX{} Example} + \author{Your Name} + \date{\today} + + \begin{document} + + \maketitle + + \section{Introduction} + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + + \section{Math Example} + Here is an inline equation: \( E = mc^2 \).\\ + And a displayed equation: + \[ + \int_{0}^{\infty} e^{-x^2} \, dx = \frac{\sqrt{\pi}}{2} + \] + + \section{Lists} + \begin{itemize} + \item Item 1 + \item Item 2 + \item Item 3 + \end{itemize} + + \end{document} + """ + txt = hprint.dedent(txt, remove_lead_trail_empty_lines_=True) + in_file = os.path.join(self.get_scratch_space(), "input.tex") + hio.to_file(in_file, txt) + return in_file + + # TODO(gp): Run this calling directly the code and not executing the script. + def run_lint_notes( + self, in_file: str, type_: str, cmd_opts: str + ) -> Optional[str]: + """ + Run the `lint_notes.py` script with the specified options. + + :param in_file: Path to the input file containing the notes. + :param type_: The output format, either 'md' or 'tex'. + :param cmd_opts: Additional command-line options to pass to the + script. + :returns: A tuple containing the script content and the output + content. + """ + # lint_notes.py \ + # -i papers/DataFlow_stream_computing_framework/DataFlow_stream_computing_framework.tex \ + # --use_dockerized_prettier \ + cmd = [] + exec_path = hgit.find_file_in_git_tree("lint_notes.py") + hdbg.dassert_path_exists(exec_path) + cmd.append(exec_path) + cmd.append(f"--in_file_name {in_file}") + cmd.append("--use_dockerized_prettier") + # Save a script file to store the commands. + hdbg.dassert_in(type_, ["md", "tex"]) + out_dir = self.get_scratch_space() + out_file = os.path.join(out_dir, f"output.{type_}") + cmd.append(f"--out_file_name {out_file}") + cmd.append(cmd_opts) + cmd = " ".join(cmd) + hsystem.system(cmd) + # Check the content of the file, if needed. + output_txt: Optional[str] = None + if os.path.exists(out_file): + output_txt = hio.from_file(out_file) + return output_txt + + # /////////////////////////////////////////////////////////////////////////// + + def test1(self) -> None: + """ + Run lint_to_notes.py on a markdown file. + """ + # Prepare inputs. + in_file = self.create_md_input_file() + type_ = "md" + cmd_opts = "" + # Run the script. + output_txt = self.run_lint_notes(in_file, type_, cmd_opts) + # Check. + self.check_string(output_txt) + + def test2(self) -> None: + """ + Run lint_to_notes.py on a latex file. + """ + # Prepare inputs. + in_file = self.create_tex_input_file() + type_ = "tex" + cmd_opts = "" + # Run the script. + output_txt = self.run_lint_notes(in_file, type_, cmd_opts) + # Check. + self.check_string(output_txt) + + # def test2(self) -> None: + # """ + # Run: + # > notes_to_pdf.py --input input.md -t pdf + # """ + # # Prepare inputs. + # in_file = self.create_input_file() + # type_ = "pdf" + # cmd_opts = "" + # # Run the script. + # script_txt, output_txt = self.run_notes_to_pdf(in_file, type_, cmd_opts) + # # Check. + # txt = "script_txt:\n%s\n" % script_txt + # txt += "output_txt:\n%s\n" % output_txt + # self.check_string(txt, purify_text=True) + + # def test3(self) -> None: + # """ + # Run: + # > notes_to_pdf.py --input input.md -t pdf --filter_by_header Header2 + # """ + # # Prepare inputs. + # in_file = self.create_input_file() + # type_ = "pdf" + # cmd_opts = "--filter_by_header Header2" + # # Run the script. + # script_txt, output_txt = self.run_notes_to_pdf(in_file, type_, cmd_opts) + # # Check. + # txt = "script_txt:\n%s\n" % script_txt + # txt += "output_txt:\n%s\n" % output_txt + # self.check_string(txt, purify_text=True) \ No newline at end of file From bb31902127f3122d28187026f93a6c2f2b6b1c04 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 23 May 2025 20:23:14 -0400 Subject: [PATCH 113/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/notes_to_pdf.py | 6 +- .../test/test_dockerized_prettier.py | 2 +- .../documentation/test/test_lint_notes.py | 12 +- dev_scripts_helpers/llms/llm_apply_cfile.py | 11 +- dev_scripts_helpers/llms/llm_transform.py | 17 +- helpers/hdocker.py | 178 ++++++++++++------ helpers/hparser.py | 2 +- 7 files changed, 133 insertions(+), 95 deletions(-) diff --git a/dev_scripts_helpers/documentation/notes_to_pdf.py b/dev_scripts_helpers/documentation/notes_to_pdf.py index 3bb1e881d..ab3da8951 100755 --- a/dev_scripts_helpers/documentation/notes_to_pdf.py +++ b/dev_scripts_helpers/documentation/notes_to_pdf.py @@ -282,7 +282,7 @@ def _run_pandoc_to_pdf( cmd = hdocker.run_dockerized_pandoc( cmd, container_type, - return_cmd=True, + mode="return_cmd", force_rebuild=dockerized_force_rebuild, use_sudo=dockerized_use_sudo, ) @@ -317,7 +317,7 @@ def _run_pandoc_to_pdf( ) _LOG.debug("%s", "before: " + hprint.to_str("cmd")) if not use_host_tools: - cmd = hdocker.run_dockerized_latex(cmd, return_cmd=True, use_sudo=False) + cmd = hdocker.run_dockerized_latex(cmd, mode="return_cmd", use_sudo=False) _LOG.debug("%s", "after: " + hprint.to_str("cmd")) _ = _system(cmd) # - Run latex again. @@ -403,7 +403,7 @@ def _build_pandoc_cmd( cmd = hdocker.run_dockerized_pandoc( cmd, container_type, - return_cmd=True, + mode="return_cmd", force_rebuild=dockerized_force_rebuild, use_sudo=dockerized_use_sudo, ) diff --git a/dev_scripts_helpers/documentation/test/test_dockerized_prettier.py b/dev_scripts_helpers/documentation/test/test_dockerized_prettier.py index 39e6bdbbb..5659835fd 100644 --- a/dev_scripts_helpers/documentation/test/test_dockerized_prettier.py +++ b/dev_scripts_helpers/documentation/test/test_dockerized_prettier.py @@ -43,7 +43,7 @@ def test1(self) -> None: input_file_path, cmd_opts, output_file_path, - return_cmd=False, + mode="system", force_rebuild=False, use_sudo=False, ) diff --git a/dev_scripts_helpers/documentation/test/test_lint_notes.py b/dev_scripts_helpers/documentation/test/test_lint_notes.py index 1f3a31f19..b28453951 100644 --- a/dev_scripts_helpers/documentation/test/test_lint_notes.py +++ b/dev_scripts_helpers/documentation/test/test_lint_notes.py @@ -376,11 +376,11 @@ def create_md_input_file(self) -> str: def create_tex_input_file(self) -> str: txt = r""" - \documentclass{article} + \documentclass{article} \title{Simple \LaTeX{} Example} \author{Your Name} - \date{\today} + \date{\today} \begin{document} @@ -392,15 +392,15 @@ def create_tex_input_file(self) -> str: \section{Math Example} Here is an inline equation: \( E = mc^2 \).\\ And a displayed equation: - \[ + \[ \int_{0}^{\infty} e^{-x^2} \, dx = \frac{\sqrt{\pi}}{2} \] - \section{Lists} + \section{Lists} \begin{itemize} \item Item 1 - \item Item 2 - \item Item 3 + \item Item 2 + \item Item 3 \end{itemize} \end{document} diff --git a/dev_scripts_helpers/llms/llm_apply_cfile.py b/dev_scripts_helpers/llms/llm_apply_cfile.py index 0e834a0e7..052d9e33e 100755 --- a/dev_scripts_helpers/llms/llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/llm_apply_cfile.py @@ -55,7 +55,7 @@ def _run_dockerized_llm_apply_cfile( in_file_path: str, cmd_opts: List[str], *, - return_cmd: bool = False, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, suppress_output: bool = False, @@ -140,12 +140,7 @@ def _run_dockerized_llm_apply_cfile( ] ) docker_cmd = " ".join(docker_cmd) - if return_cmd: - ret = docker_cmd - else: - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd, suppress_output=suppress_output) - ret = None + ret = _process_docker_cmd(docker_cmd, container_image, dockerfile, mode) return ret @@ -178,7 +173,7 @@ def _main(parser: argparse.ArgumentParser) -> None: _run_dockerized_llm_apply_cfile( args.cfile, cmd_line_opts, - return_cmd=False, + mode="system", force_rebuild=args.dockerized_force_rebuild, use_sudo=args.dockerized_use_sudo, suppress_output=suppress_output, diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 83defdd9e..cd648009f 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -64,12 +64,6 @@ def _parse() -> argparse.ArgumentParser: ) hparser.add_llm_prompt_arg(parser) hparser.add_dockerized_script_arg(parser) - parser.add_argument( - "-d", - "--diff_compare", - action="store_true", - help="Compare the original and the transformed with vimdiff", - ) parser.add_argument( "-c", "--compare", @@ -98,7 +92,7 @@ def _run_dockerized_llm_transform( cmd_opts: List[str], out_file_path: str, *, - return_cmd: bool = False, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, suppress_output: bool = False, @@ -191,12 +185,7 @@ def _run_dockerized_llm_transform( ] ) docker_cmd = " ".join(docker_cmd) - if return_cmd: - ret = docker_cmd - else: - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd, suppress_output=suppress_output) - ret = None + ret = _process_docker_cmd(docker_cmd, container_image, dockerfile, mode) return ret @@ -263,7 +252,7 @@ def _main(parser: argparse.ArgumentParser) -> None: tmp_in_file_name, cmd_line_opts, tmp_out_file_name, - return_cmd=False, + mode="system", force_rebuild=args.dockerized_force_rebuild, use_sudo=args.dockerized_use_sudo, suppress_output=suppress_output, diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 7598abda9..e6eebf825 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -47,10 +47,50 @@ def get_use_sudo() -> bool: # TODO(gp): use_sudo should be set to None and the correct value inferred from # the repo config. def get_docker_executable(use_sudo: bool) -> str: + """ + Get the Docker executable with / without sudo, if needed. + """ executable = "sudo " if use_sudo else "" executable += "docker" return executable + +def process_docker_cmd(docker_cmd: str, + container_image: str, + dockerfile: str, + mode: str) -> str: + """ + Process a Docker command according to the mode. + + :param docker_cmd: The Docker command to process. + :param container_image: The name of the Docker container. + :param dockerfile: The content of the Dockerfile. + :param mode: The mode to process the Docker command. + - "return_cmd": return the command as is. + - "system": execute the command. + - "save_to_file": save the command to a file. + :return: The output of the Docker command. + """ + _LOG.debug(hprint.func_signature_to_str()) + if mode == "return_cmd": + ret = docker_cmd + elif mode == "system": + # TODO(gp): Note that `suppress_output=False` seems to hang the call. + hsystem.system(docker_cmd) + ret = "" + elif mode == "save_to_file": + file_name = f"tmp.process_docker_cmd.{container_image}.txt" + txt = [] + txt.append(f"docker_cmd={docker_cmd}") + txt.append(f"container_image={container_image}") + txt.append(f"dockerfile={dockerfile}") + txt = "\n".join(txt) + hio.to_file(file_name, txt) + ret = "" + else: + raise ValueError(f"Invalid mode='{mode}'") + return ret + def container_exists(container_name: str, use_sudo: bool) -> Tuple[bool, str]: """ @@ -562,11 +602,12 @@ def run_dockerized_prettier( in_file_path: str, cmd_opts: List[str], out_file_path: str, + file_type: str, *, - return_cmd: bool = False, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, -) -> Optional[str]: +) -> str: """ Run `prettier` in a Docker container. @@ -592,22 +633,41 @@ def run_dockerized_prettier( """ _LOG.debug(hprint.func_signature_to_str()) hdbg.dassert_isinstance(cmd_opts, list) + hdbg.dassert_in(file_type, ["md", "tex"]) # Build the container, if needed. - container_image = "tmp.prettier" - dockerfile = r""" - # Use a Node.js image. - FROM node:18-slim + # TODO(gp): -> container_image_name + container_image = f"tmp.prettier.{file_type}" + if file_type == "md": + dockerfile = r""" + FROM node:20-slim - RUN npm install -g prettier@2.7.0 - RUN npm install -g @unified-latex/unified-latex-prettier@1.7.1 - RUN npm install -g prettier-plugin-latex@2.0.1 + RUN npm install -g prettier - # Set a working directory inside the container. - WORKDIR /app + # Set a working directory inside the container. + WORKDIR /app - # Run Prettier as the entry command. - ENTRYPOINT ["prettier"] - """ + # Run Prettier as the entry command. + ENTRYPOINT ["prettier"] + """ + elif file_type == "tex": + # For Latex we need to pin down the dependencies since the latest + # version of prettier is not compatible with the latest version of + # prettier-plugin-latex. + dockerfile = r""" + FROM node:18-slim + + RUN npm install -g prettier@2.7.0 + RUN npm install -g @unified-latex/unified-latex-prettier@1.7.1 + RUN npm install -g prettier-plugin-latex@2.0.1 + + # Set a working directory inside the container. + WORKDIR /app + + # Run Prettier as the entry command. + ENTRYPOINT ["prettier"] + """ + else: + raise ValueError(f"Invalid file_type='{file_type}'") container_image = build_container_image( container_image, dockerfile, force_rebuild, use_sudo ) @@ -650,9 +710,13 @@ def run_dockerized_prettier( # tmp.prettier \ # --parser markdown --prose-wrap always --write --tab-width 2 \ # ./test.md - executable = "NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier" - #bash_cmd = f"/usr/local/bin/prettier {cmd_opts_as_str} {in_file_path}" - bash_cmd = f"NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier {cmd_opts_as_str} {in_file_path}" + if file_type == "md": + executable = "/usr/local/bin/prettier" + elif file_type == "tex": + executable = "NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier" + else: + raise ValueError(f"Invalid file_type='{file_type}'") + bash_cmd = f"{executable} {cmd_opts_as_str} {in_file_path}" if out_file_path != in_file_path: bash_cmd += f" > {out_file_path}" # Build the Docker command. @@ -666,12 +730,7 @@ def run_dockerized_prettier( ] ) docker_cmd = " ".join(docker_cmd) - if return_cmd: - ret = docker_cmd - else: - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd) - ret = None + ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) return ret @@ -822,10 +881,10 @@ def run_dockerized_pandoc( cmd: str, container_type: str, *, - return_cmd: bool = False, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, -) -> Optional[str]: +) -> str: """ Run `pandoc` in a Docker container. """ @@ -1005,13 +1064,7 @@ def run_dockerized_pandoc( f"{pandoc_cmd}", ] ) - docker_cmd = " ".join(docker_cmd) - if return_cmd: - ret = docker_cmd - else: - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd) - ret = None + ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) return ret @@ -1024,9 +1077,10 @@ def run_dockerized_markdown_toc( in_file_path: str, cmd_opts: List[str], *, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, -) -> None: +) -> str: """ Run `markdown-toc` in a Docker container. """ @@ -1081,9 +1135,8 @@ def run_dockerized_markdown_toc( ] ) docker_cmd = " ".join(docker_cmd) - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd) - + ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) + return ret # ############################################################################# # Dockerized Latex. @@ -1184,10 +1237,10 @@ def convert_latex_arguments_to_cmd( def run_dockerized_latex( cmd: str, *, - return_cmd: bool = False, + mode: str= "system", force_rebuild: bool = False, use_sudo: bool = False, -) -> Optional[str]: +) -> str: """ Run `latex` in a Docker container. """ @@ -1286,13 +1339,7 @@ def run_dockerized_latex( ] ) docker_cmd = " ".join(docker_cmd) - # TODO(gp): Factor this out. - if return_cmd: - ret = docker_cmd - else: - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd) - ret = None + ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) return ret @@ -1302,6 +1349,7 @@ def run_basic_latex( run_latex_again: bool, out_file_name: str, *, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, ) -> None: @@ -1327,12 +1375,14 @@ def run_basic_latex( ) run_dockerized_latex( cmd, + mode=mode, force_rebuild=force_rebuild, use_sudo=use_sudo, ) if run_latex_again: run_dockerized_latex( cmd, + mode=mode, force_rebuild=force_rebuild, use_sudo=use_sudo, ) @@ -1357,10 +1407,10 @@ def run_dockerized_imagemagick( cmd_opts: List[str], out_file_path: str, *, - return_cmd: bool = False, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, -) -> Optional[str]: +) -> str: """ Run `ImageMagick` in a Docker container. """ @@ -1425,13 +1475,7 @@ def run_dockerized_imagemagick( ] ) docker_cmd = " ".join(docker_cmd) - # TODO(gp): Factor this out. - if return_cmd: - ret = docker_cmd - else: - # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd) - ret = None + ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) return ret @@ -1440,6 +1484,7 @@ def run_dockerized_tikz_to_bitmap( cmd_opts: List[str], out_file_path: str, *, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, ) -> None: @@ -1467,6 +1512,7 @@ def run_dockerized_tikz_to_bitmap( latex_cmd_opts, run_latex_again, file_out, + mode=mode, force_rebuild=force_rebuild, use_sudo=use_sudo, ) @@ -1475,6 +1521,7 @@ def run_dockerized_tikz_to_bitmap( file_out, cmd_opts, out_file_path, + mode=mode, force_rebuild=force_rebuild, use_sudo=use_sudo, ) @@ -1488,9 +1535,10 @@ def run_dockerized_plantuml( out_file_path: str, dst_ext: str, *, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, -) -> None: +) -> str: """ Run `plantUML` in a Docker container. @@ -1549,7 +1597,8 @@ def run_dockerized_plantuml( ] ) docker_cmd = " ".join(docker_cmd) - hsystem.system(docker_cmd) + ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) + return ret # ############################################################################# @@ -1559,9 +1608,10 @@ def run_dockerized_mermaid( in_file_path: str, out_file_path: str, *, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, -) -> None: +) -> str: """ Run `mermaid` in a Docker container. @@ -1609,8 +1659,8 @@ def run_dockerized_mermaid( ] ) docker_cmd = " ".join(docker_cmd) - _LOG.debug(hprint.to_str("docker_cmd")) - hsystem.system(docker_cmd) + ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) + return ret # TODO(gp): Factor out the common code with `run_dockerized_mermaid()`. @@ -1618,6 +1668,7 @@ def run_dockerized_mermaid2( in_file_path: str, out_file_path: str, *, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, ) -> None: @@ -1708,7 +1759,8 @@ def run_dockerized_mermaid2( ] ) docker_cmd = " ".join(docker_cmd) - hsystem.system(docker_cmd) + ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) + return ret # ############################################################################# @@ -1719,9 +1771,10 @@ def run_dockerized_graphviz( cmd_opts: List[str], out_file_path: str, *, + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, -) -> None: +) -> str: """ Run `graphviz` in a Docker container. @@ -1787,4 +1840,5 @@ def run_dockerized_graphviz( ] ) docker_cmd = " ".join(docker_cmd) - hsystem.system(docker_cmd) + ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) + return ret diff --git a/helpers/hparser.py b/helpers/hparser.py index 8421bf879..c10231feb 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -296,7 +296,7 @@ def mark_action(action: str, actions: List[str]) -> Tuple[bool, List[str]]: # tmp_in_file_name, # cmd_line_opts, # tmp_out_file_name, -# return_cmd=False, +# mode="system", # force_rebuild=args.dockerized_force_rebuild, # use_sudo=args.dockerized_use_sudo, # suppress_output=suppress_output, From 226c00365fda758c5ceb3ef304a481b8d4a6c7d3 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 24 May 2025 12:10:18 -0400 Subject: [PATCH 114/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/lint_notes.py | 23 ++--- .../documentation/preprocess_notes.py | 5 +- .../documentation/test/test_lint_notes.py | 12 +-- .../llms/dockerized_llm_transform.py | 1 - dev_scripts_helpers/llms/llm_prompts.py | 2 +- dev_scripts_helpers/llms/llm_transform.py | 4 +- dev_scripts_helpers/old/linter/linter.py | 4 +- helpers/hdocker.py | 16 ++-- helpers/hmarkdown.py | 87 +------------------ helpers/test/test_hmarkdown.py | 24 +++-- linters/amp_lint_md.py | 11 +-- 11 files changed, 52 insertions(+), 137 deletions(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index f82994438..2e9b64d41 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -19,7 +19,6 @@ import logging import os import re -import sys import tempfile from typing import Any, List, Optional @@ -126,19 +125,21 @@ def prettier( cmd_opts: List[str] = [] tab_width = 2 if file_type == "tex": - #cmd_opts.append("--plugin=prettier-plugin-latex") + # cmd_opts.append("--plugin=prettier-plugin-latex") cmd_opts.append("--plugin=@unified-latex/unified-latex-prettier") elif file_type in ("md", "txt"): cmd_opts.append("--parser markdown") else: raise ValueError(f"Invalid file type: {file_type}") hdbg.dassert_lte(1, print_width) - cmd_opts.extend([ - f"--print-width {print_width}", - "--prose-wrap always", - f"--tab-width {tab_width}", - "--use-tabs false", - ]) + cmd_opts.extend( + [ + f"--print-width {print_width}", + "--prose-wrap always", + f"--tab-width {tab_width}", + "--use-tabs false", + ] + ) # Run prettier. if use_dockerized_prettier: # Run `prettier` in a Docker container. @@ -154,7 +155,7 @@ def prettier( else: # Run `prettier` installed on the host directly. executable = "prettier" - #executable = "NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier" + # executable = "NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier" cmd = [executable] + cmd_opts if in_file_path == out_file_path: cmd.append("--write") @@ -182,7 +183,9 @@ def prettier_on_str( # Save string as input. # TODO(gp): Use a context manager. curr_dir = os.getcwd() - tmp_file_name = tempfile.NamedTemporaryFile(prefix="tmp.prettier_on_str.", dir=curr_dir).name + tmp_file_name = tempfile.NamedTemporaryFile( + prefix="tmp.prettier_on_str.", dir=curr_dir + ).name tmp_file_name += ".tex" hio.to_file(tmp_file_name, txt) # Call `prettier` in-place. diff --git a/dev_scripts_helpers/documentation/preprocess_notes.py b/dev_scripts_helpers/documentation/preprocess_notes.py index 6ef9ad488..683cd2c91 100755 --- a/dev_scripts_helpers/documentation/preprocess_notes.py +++ b/dev_scripts_helpers/documentation/preprocess_notes.py @@ -44,7 +44,7 @@ def _process_abbreviations(in_line: str) -> str: (r"=>", r"\implies"), # TODO(gp): This collides with the arrow in graphviz commands. We # should skip this transformation if we are in a graphviz block. - #(r"->", r"\rightarrow"), + # (r"->", r"\rightarrow"), (r"-^", r"\uparrow"), (r"-v", r"\downarrow"), ]: @@ -283,9 +283,8 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: or prev_line_is_verbatim or next_line_is_verbatim ): - #out.append(" " * _NUM_SPACES + line) + # out.append(" " * _NUM_SPACES + line) assert 0 - pass # c) Clean up. _LOG.debug("Clean up") # Remove all the lines with only spaces. diff --git a/dev_scripts_helpers/documentation/test/test_lint_notes.py b/dev_scripts_helpers/documentation/test/test_lint_notes.py index b28453951..30d8bf473 100644 --- a/dev_scripts_helpers/documentation/test/test_lint_notes.py +++ b/dev_scripts_helpers/documentation/test/test_lint_notes.py @@ -5,16 +5,6 @@ import pytest import dev_scripts_helpers.documentation.lint_notes as dshdlino -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hunit_test as hunitest - -import logging -import os -from typing import Optional, Tuple - -import pytest - import helpers.hdbg as hdbg import helpers.hgit as hgit import helpers.hio as hio @@ -505,4 +495,4 @@ def test2(self) -> None: # # Check. # txt = "script_txt:\n%s\n" % script_txt # txt += "output_txt:\n%s\n" % output_txt - # self.check_string(txt, purify_text=True) \ No newline at end of file + # self.check_string(txt, purify_text=True) diff --git a/dev_scripts_helpers/llms/dockerized_llm_transform.py b/dev_scripts_helpers/llms/dockerized_llm_transform.py index 03b0246d0..d9f45acb9 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_transform.py +++ b/dev_scripts_helpers/llms/dockerized_llm_transform.py @@ -12,7 +12,6 @@ import logging import dev_scripts_helpers.llms.llm_prompts as dshlllpr -import helpers.hdbg as hdbg import helpers.hparser as hparser _LOG = logging.getLogger(__name__) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 179080404..3807fe91c 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -131,7 +131,7 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: When you find a Python function with optional parameters, add a star after the mandatory parameters and before the optional parameters, and make sure that the function is called with the correct number of arguments. - + For example, convert: ``` def process_data(data, threshold=0.5): diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index cd648009f..4689dc2f4 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -227,9 +227,7 @@ def _main(parser: argparse.ArgumentParser) -> None: in_file_name, out_file_name = hparser.parse_input_output_args(args) tag = "llm_transform" tmp_in_file_name, tmp_out_file_name = ( - hparser.adapt_input_output_args_for_dockerized_scripts( - in_file_name, tag - ) + hparser.adapt_input_output_args_for_dockerized_scripts(in_file_name, tag) ) # TODO(gp): We should just automatically pass-through the options. cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] diff --git a/dev_scripts_helpers/old/linter/linter.py b/dev_scripts_helpers/old/linter/linter.py index 2041468e0..d1c6d97fe 100755 --- a/dev_scripts_helpers/old/linter/linter.py +++ b/dev_scripts_helpers/old/linter/linter.py @@ -1380,8 +1380,8 @@ def _execute(self, file_name: str, pedantic: int) -> List[str]: # cmd = [] cmd.append(exec_path) - cmd.append("-i %s" % file_name) - cmd.append("--in_place") + cmd.append(f"-i {file_name}") + cmd.append(f"-o {file_name}") cmd_as_str = " ".join(cmd) _, output = _tee(cmd_as_str, executable, abort_on_error=True) # Remove cruft. diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 8b68a8a74..116a1bf68 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -54,11 +54,10 @@ def get_docker_executable(use_sudo: bool) -> str: executable += "docker" return executable - -def process_docker_cmd(docker_cmd: str, - container_image: str, - dockerfile: str, - mode: str) -> str: + +def process_docker_cmd( + docker_cmd: str, container_image: str, dockerfile: str, mode: str +) -> str: """ Process a Docker command according to the mode. @@ -713,7 +712,9 @@ def run_dockerized_prettier( if file_type == "md": executable = "/usr/local/bin/prettier" elif file_type == "tex": - executable = "NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier" + executable = ( + "NODE_PATH=/usr/local/lib/node_modules /usr/local/bin/prettier" + ) else: raise ValueError(f"Invalid file_type='{file_type}'") bash_cmd = f"{executable} {cmd_opts_as_str} {in_file_path}" @@ -1138,6 +1139,7 @@ def run_dockerized_markdown_toc( ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) return ret + # ############################################################################# # Dockerized Latex. # ############################################################################# @@ -1237,7 +1239,7 @@ def convert_latex_arguments_to_cmd( def run_dockerized_latex( cmd: str, *, - mode: str= "system", + mode: str = "system", force_rebuild: bool = False, use_sudo: bool = False, ) -> str: diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index f7a0223fd..93d8294e6 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -312,7 +312,7 @@ def md_clean_up(txt: str) -> str: txt = re.sub(r"→", r"$\\rightarrow$", txt) # Remove empty spaces at beginning / end of Latex equations $...$. # E.g., $ \text{Student} $ becomes $\text{Student}$ - #txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) + # txt = re.sub(r"\$\s+(.*?)\s\$", r"$\1$", txt) # Remove dot at the end of each line. txt = re.sub(r"\.\s*$", "", txt, flags=re.MULTILINE) # Transform `Example: Training a deep` into `E.g., training a deep`, @@ -769,84 +769,6 @@ def selected_navigation_to_str( # ############################################################################# -import re - - - -import re # Required for regular expression operations. - -def capitalize_slide_titles(text: str) -> str: - """ - Capitalize slide titles according to specific rules. - - :param text: string of text to be processed (e.g., "a title on a slide") - :return: processed string with capitalized slide titles - """ - # Define small words that should not be capitalized unless they are the first or last word. - small_words = { - 'a', 'an', 'and', 'as', 'at', 'but', 'by', 'for', 'in', 'nor', 'of', - 'off', 'on', 'or', 'per', 'so', 'the', 'to', 'up', 'via', 'with', 'yet' - } - - def capitalize_word(word: str, is_first_or_last: bool) -> str: - """ - Capitalize a word based on its position and predefined rules. - - :param word: the word to potentially capitalize (e.g., "and") - :param is_first_or_last: boolean indicating if the word is first or last in sentence - :return: word with applied capitalization rules - """ - # Split compound words into parts. - parts = word.split('-') - capitalized_parts = [] - # Process each part of the compound word based on its position. - for word in parts: - if is_first_or_last or word.lower() not in small_words: - word_out = word.capitalize() - else: - word_out = word.lower() - capitalized_parts.append(word_out) - ret = '-'.join(capitalized_parts) - return ret - - # Split into words while preserving punctuation. - tokens = re.findall(r"\b[\w'-]+\b|[^\w\s]", text, re.UNICODE) - - # Filter out tokens that are not words. - words = [token for token in tokens if re.search(r'\w', token)] - - result = [] - # Iterate over tokens and apply capitalization rules. - for i, token in enumerate(tokens): - # Check if token is a word. - if re.search(r'\w', token): - if i == 0: - is_first_or_last = True - elif i == len(tokens) - 1: - is_first_or_last = True - elif i > 0 and not re.search(r'\w', tokens[i - 1]): - is_first_or_last = True - elif i < len(tokens) - 1 and not re.search(r'\w', tokens[i + 1]): - is_first_or_last = True - else: - is_first_or_last = False - - result.append(capitalize_word(token, is_first_or_last or token.lower() not in small_words)) - else: - # Keep punctuation as-is. - result.append(token) - - # Join words into a single string while preserving spacing and trimming surplus whitespace. - return ''.join( - [word if re.match(r'\W', word) else ' ' + word for word in result] - ).strip() - - -# In this adjusted code, I replaced the complex inline assignments with -# `if-then-else` structures, incorporated informative docstrings into the -# function definitions using REST style for clarity, added comments explaining -# significant code sections, and ensured comments were in imperative form and -# grammatically correct. def capitalize_first_level_bullets(markdown_text: str) -> str: @@ -899,8 +821,8 @@ def bold_first_level_bullets(markdown_text: str, *, max_length: int = 30) -> str Make first-level bullets bold in markdown text. :param markdown_text: Input markdown text - :param max_length: Max length of the bullet text to be bolded. -1 means no - limit. + :param max_length: Max length of the bullet text to be bolded. -1 + means no limit. :return: Formatted markdown text with first-level bullets in bold """ lines = markdown_text.split("\n") @@ -1035,12 +957,11 @@ def format_markdown_slide(txt: str) -> str: Format markdown text for a slide. """ # Split the text into title and body. - txt = bold_first_level_bullets(txt) file_type = "md" txt = dshdlino.prettier_on_str(txt, file_type) txt = format_first_level_bullets(txt) - #txt = capitalize_slide_titles(txt) + # txt = capitalize_slide_titles(txt) return txt diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index ee96a2cfd..faad555b8 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1567,7 +1567,8 @@ def test7(self) -> None: self.assert_equal(actual, expected) def test8(self) -> None: - text = hprint.dedent(r""" + text = hprint.dedent( + r""" - **\red{Objective}** - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated model of the environment @@ -1600,9 +1601,11 @@ def test8(self) -> None: - **\darkgray{Use Case}** - Suitable when environment dynamics are stationary and can be learned from interaction - """) + """ + ) actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) - expected = hprint.dedent(r""" + expected = hprint.dedent( + r""" - **\red{Objective}** - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated model of the environment @@ -1635,7 +1638,8 @@ def test8(self) -> None: - **\darkgray{Use Case}** - Suitable when environment dynamics are stationary and can be learned from interaction - """) + """ + ) self.assert_equal(actual, expected) @@ -1812,7 +1816,8 @@ def test9(self) -> None: self._format_and_compare_markdown(text, expected) def test10(self) -> None: - text = hprint.dedent(r""" + text = hprint.dedent( + r""" - **Objective** - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated @@ -1836,8 +1841,10 @@ def test10(self) -> None: - **Use Case** - Suitable when environment dynamics are stationary and can be learned from interaction - """) - expected = hprint.dedent(r""" + """ + ) + expected = hprint.dedent( + r""" - **Objective** - Learn utility estimates $U^\pi(s)$for a fixed policy$\pi$ using an estimated model of the environment @@ -1858,7 +1865,8 @@ def test10(self) -> None: - **Use Case** - Suitable when environment dynamics are stationary and can be learned from interaction - """) + """ + ) self._format_and_compare_markdown(text, expected) def _format_and_compare_markdown(self, text: str, expected: str) -> None: diff --git a/linters/amp_lint_md.py b/linters/amp_lint_md.py index 4df617a40..890026166 100644 --- a/linters/amp_lint_md.py +++ b/linters/amp_lint_md.py @@ -22,9 +22,6 @@ _LOG = logging.getLogger(__name__) -# ############################################################################# - - def _check_readme_is_capitalized(file_name: str) -> str: """ Check if all readme markdown files are named README.md. @@ -37,9 +34,6 @@ def _check_readme_is_capitalized(file_name: str) -> str: return msg -# ############################################################################# - - # ############################################################################# # _LintMarkdown # ############################################################################# @@ -48,7 +42,8 @@ def _check_readme_is_capitalized(file_name: str) -> str: class _LintMarkdown(liaction.Action): def __init__(self) -> None: - executable = "$(find -wholename '*dev_scripts_helpers/documentation/lint_notes.py')" + cmd = "find -wholename '*dev_scripts_helpers/documentation/lint_notes.py'" + executable = hsystem.system_to_one_line(cmd) super().__init__(executable) def check_if_possible(self) -> bool: @@ -111,4 +106,4 @@ def _main(parser: argparse.ArgumentParser) -> None: if __name__ == "__main__": - _main(_parse()) \ No newline at end of file + _main(_parse()) From f162c7289717cdc7d3b2a19f20ab17f751da0cf2 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 24 May 2025 14:23:56 -0400 Subject: [PATCH 115/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/documentation/notes_to_pdf.py | 3 +++ helpers/hdocker.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/dev_scripts_helpers/documentation/notes_to_pdf.py b/dev_scripts_helpers/documentation/notes_to_pdf.py index ab3da8951..7899f8a0f 100755 --- a/dev_scripts_helpers/documentation/notes_to_pdf.py +++ b/dev_scripts_helpers/documentation/notes_to_pdf.py @@ -57,6 +57,7 @@ def _report_phase(phase: str) -> None: def _log_system(cmd: str) -> None: + hdbg.dassert_isinstance(cmd, str) print("> " + cmd) _append_script(cmd) @@ -408,6 +409,8 @@ def _build_pandoc_cmd( use_sudo=dockerized_use_sudo, ) _LOG.debug("%s", "after: " + hprint.to_str("cmd")) + hdbg.dassert_isinstance(cmd, str) + hdbg.dassert_isinstance(file_out, str) return cmd, file_out diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 116a1bf68..b1c77f03c 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -71,6 +71,7 @@ def process_docker_cmd( :return: The output of the Docker command. """ _LOG.debug(hprint.func_signature_to_str()) + hdbg.dassert_isinstance(docker_cmd, str) if mode == "return_cmd": ret = docker_cmd elif mode == "system": @@ -1065,6 +1066,7 @@ def run_dockerized_pandoc( f"{pandoc_cmd}", ] ) + docker_cmd = " ".join(docker_cmd) ret = process_docker_cmd(docker_cmd, container_image, dockerfile, mode) return ret From 38cfd5e00487f2f105efbc74382e7af690806f74 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 24 May 2025 14:34:04 -0400 Subject: [PATCH 116/193] Revert dead files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/preprocess_notes.py | 4 +- ....replace_common_files_with_script_links.md | 89 ++-- helpers/create_links.py | 409 ++++++++---------- helpers/stage_linked_file.py | 104 +++-- helpers/test/test_hmarkdown.py | 6 +- 5 files changed, 268 insertions(+), 344 deletions(-) diff --git a/dev_scripts_helpers/documentation/preprocess_notes.py b/dev_scripts_helpers/documentation/preprocess_notes.py index 683cd2c91..f77258a3f 100755 --- a/dev_scripts_helpers/documentation/preprocess_notes.py +++ b/dev_scripts_helpers/documentation/preprocess_notes.py @@ -260,7 +260,6 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: # It's a line in an answer. out.append(" " * _NUM_SPACES + line) else: - assert 0 # Empty line. prev_line_is_verbatim = ((i - 1) > 0) and lines[i - 1].startswith( "```" @@ -283,8 +282,7 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: or prev_line_is_verbatim or next_line_is_verbatim ): - # out.append(" " * _NUM_SPACES + line) - assert 0 + out.append(" " * _NUM_SPACES + line) # c) Clean up. _LOG.debug("Clean up") # Remove all the lines with only spaces. diff --git a/docs/tools/dev_system/all.replace_common_files_with_script_links.md b/docs/tools/dev_system/all.replace_common_files_with_script_links.md index 32b676040..d50147a15 100644 --- a/docs/tools/dev_system/all.replace_common_files_with_script_links.md +++ b/docs/tools/dev_system/all.replace_common_files_with_script_links.md @@ -1,15 +1,18 @@ + + <!-- toc --> - [Managing Symbolic Links Between Directories](#managing-symbolic-links-between-directories) - * [Summary](#summary) + * [Define](#define) * [Why Do We Need This Approach?](#why-do-we-need-this-approach) - * [Nomenclature](#nomenclature) * [Workflow and Commands](#workflow-and-commands) + [Step 1: Replace Files with Symbolic Links](#step-1-replace-files-with-symbolic-links) + [Step 2: Stage Files for Modification](#step-2-stage-files-for-modification) + [Step 3: Restore Symbolic Links After Modifications](#step-3-restore-symbolic-links-after-modifications) + [Workflow Summary](#workflow-summary) + [Example Directory Structure](#example-directory-structure) + + [Notes and Best Practices](#notes-and-best-practices) + + [Conclusion](#conclusion) <!-- tocstop --> @@ -18,16 +21,16 @@ ## Summary - This document describes two scripts, `create_links.py` and - `stage_linked_file.py` used to manage symbolic links between a source - directory and a destination directory + `stage_linked_file.py` used to manage symbolic links between a + source directory and a destination directory - These tools simplify workflows where you want to create read-only symbolic links for files, stage modifications, and later restore the links ## Why Do We Need This Approach? -- In our codebases, it is common to have files that are identical between two - directories. Maintaining these files manually can lead to inefficiencies and - errors: +- In our codebases, it is common to have duplicate files or files + that are identical between two directories. Maintaining these files manually + can lead to inefficiencies and errors: - Synchronization: If changes are made in one location, they may not reflect in the other, leading to inconsistencies - Accidental Modifications: Directly modifying files that should remain @@ -35,31 +38,12 @@ - With our approach: - We avoid file duplication by creating links that point to the original files - - Links in the destination directory are marked as read-only, reducing the - risk of accidental changes + - Links in the destination directory remain read-only, reducing the risk of + accidental changes - If modifications are needed, the "staging process" ensures you can work safely on copies without altering the original source files - - After the code has been developed, one can then convert copies of files, - back to links - -## Nomenclature - -- Links are often confusing since it's not clear what is linked to and what is - linked from, e.g., - - `ln -s foo bar` creates a symbolic link named `foo` that points to `bar` - ```bash - foo -> bar - ``` - - This convention seems the opposite of `cp foo bar` where a new file called - `bar` is created with the content of `foo` - -- Also referring to "source" and "destination" is confusing since it is unclear - if "destination" is the "destination" of the link (i.e., the head of the - arrow) or the "destination" of the operation of copy (the tail of the arrow) - -- In the rest of this document we will refer to the file being created as - "destination" - - E.g., `ln -s new_file old_file` + - After the code has been developed, one can then convert copies of files, back + to links ## Workflow and Commands @@ -71,8 +55,7 @@ links to the corresponding files in `src_dir` Command: - - ```bash + ``` > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` @@ -93,20 +76,16 @@ - If you want to edit the files in `dst_dir` (which are currently symbolic links), use `stage_linked_file.py` to stage them. Staging replaces the symbolic links with writable copies of the original files -- At this point, you can just modify the files in `dst_dir` to achieve the - desired goal, without worries of altering the source files - - Often you don't know which files need to be changed and how to change files - so all the files are staged for modification - Command: - - ```bash + ``` > stage_linked_file.py --dst_dir /path/to/dst ``` - What it does: - Finds all the symbolic links in `dst_dir` - - Replaces each symbolic link with a writable copy of the file it points to + - Replaces each symbolic link with a writable copy of the file it points + to - Sets file permissions to `644` (writable) - Why it is important: @@ -116,12 +95,11 @@ ### Step 3: Restore Symbolic Links After Modifications -- Once you've finished modifying the files, you can restore the symbolic links +- Once you’ve finished modifying the files, you can restore the symbolic links by running `create_links.py` again with the `--replace_links` flag - Command: - - ```bash + ``` > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` @@ -137,23 +115,22 @@ ### Workflow Summary -1. Set up symbolic links: - - ```bash - > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links - ``` +- Set up `symbolic links`: + ``` + > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links + ``` -2. Stage symbolic links for modification: - ``` - > stage_linked_file.py --dst_dir /path/to/dst - ``` +- Stage `symbolic links` for modification: + ``` + > stage_linked_file.py --dst_dir /path/to/dst + ``` -3. Modify files as required +- Modify files as required -4. After modifications, restore the symbolic links: - ``` - > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links - ``` +- After modifications, restore the `symbolic links`: + ``` + > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links + ``` ### Example Directory Structure diff --git a/helpers/create_links.py b/helpers/create_links.py index 7cf93a503..481b0f152 100644 --- a/helpers/create_links.py +++ b/helpers/create_links.py @@ -1,7 +1,4 @@ -#!/usr/bin/env python - """ - Usage Example: - Using absolute links @@ -23,6 +20,10 @@ > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links --use_relative_paths - Other steps remain same. + +Import as: + +import helpers.create_links as hcrelink """ import argparse @@ -34,61 +35,123 @@ from typing import List, Tuple import helpers.hdbg as hdbg +import helpers.hio as hio import helpers.hparser as hparser _LOG = logging.getLogger(__name__) - # ############################################################################# +def _main(parser: argparse.ArgumentParser) -> None: + """ + Entry point for the script to manage symbolic links between directories. + + Depending on the command-line arguments, this script either: + + - Replaces matching files in `dst_dir` with symbolic links to `src_dir`. + - Stages all symbolic links in `dst_dir` for modification by replacing them + with writable file copies. + + Usage: + - `--replace_links`: Replace files with symbolic links + - `--stage_links`: Replace symbolic links with writable file copies + :return: None + """ + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + if args.replace_links: + common_files = _find_common_files(args.src_dir, args.dst_dir) + _replace_with_links( + common_files, use_relative_paths=args.use_relative_paths + ) + _LOG.info("Replaced %d files with symbolic links.", len(common_files)) + elif args.stage_links: + symlinks = _find_symlinks(args.dst_dir) + if not symlinks: + _LOG.info("No symbolic links found to stage.") + _stage_links(symlinks) + _LOG.info("Staged %d symbolic links for modification.", len(symlinks)) + else: + _LOG.error("You must specify either --replace_links or --stage_links.") + + +def _parse() -> argparse.ArgumentParser: + """ + Parse command-line arguments. + + :return: Argument parser object. + """ + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--src_dir", required=True, help="Source directory.") + parser.add_argument("--dst_dir", required=True, help="Destination directory.") + parser.add_argument( + "--replace_links", + action="store_true", + help="Replace files with symbolic links.", + ) + parser.add_argument( + "--stage_links", + action="store_true", + help="Replace symbolic links with writable copies.", + ) + parser.add_argument( + "--use_relative_paths", + action="store_true", + help="Use relative paths for symbolic links instead of absolute paths.", + ) + hparser.add_verbosity_arg(parser) + return parser + + def _find_common_files(src_dir: str, dst_dir: str) -> List[Tuple[str, str]]: """ Find common files in dst_dir and change to links. - If a destination dir is not found, the functions makes a dest dir and copies - all files from source to destination after users approval. All matching - files are identified based on their name and content. The matches are - returned as the file paths from both directories. + If a destination dir is not found, the functions makes a dest dir and copies all files from + source to destination after users approval. All matching files are identified based on their + name and content. The matches are returned as the file paths from both directories. :param src_dir: The source directory containing the original files :param dst_dir: The destination directory to compare files against :return: paths of matching files from `src_dir` and `dst_dir` """ - # # Ensure the destination directory exists; create it if it doesn't. - # if not os.path.exists(dst_dir): - # user_input = input( - # "Destination directory %s does not exist. Would you like to create copy all files from source? (y/n): " - # ) - # if user_input.lower() == "y": - # hio.create_dir( - # dir_name=dst_dir, - # incremental=True, - # abort_if_exists=True, - # ask_to_delete=False, - # backup_dir_if_exists=False, - # ) - # _LOG.info("Created destination directory: %s", dst_dir) - # for root, _, files in os.walk(src_dir): - # for file in files: - # src_file = os.path.join(root, file) - # dst_file = os.path.join( - # dst_dir, os.path.relpath(src_file, src_dir) - # ) - # dst_file_dir = os.path.dirname(dst_file) - # # Ensure the destination file directory exists. - # if not os.path.exists(dst_file_dir): - # os.makedirs(dst_file_dir) - # _LOG.info("Created subdirectory: %s", dst_file_dir) - # # Copy the file from source to destination. - # shutil.copy2(src_file, dst_file) - # _LOG.info("Copied file: %s -> %s", src_file, dst_file) - # else: - # _LOG.error( - # "Destination directory %s not created. Exiting function.", - # dst_dir, - # ) - # return [] + # Ensure the destination directory exists; create it if it doesn't. + if not os.path.exists(dst_dir): + user_input = input( + "Destination directory %s does not exist. Would you like to create copy all files from source? (y/n): " + ) + if user_input.lower() == "y": + hio.create_dir( + dir_name=dst_dir, + incremental=True, + abort_if_exists=True, + ask_to_delete=False, + backup_dir_if_exists=False, + ) + _LOG.info("Created destination directory: %s", dst_dir) + for root, _, files in os.walk(src_dir): + for file in files: + src_file = os.path.join(root, file) + dst_file = os.path.join( + dst_dir, os.path.relpath(src_file, src_dir) + ) + dst_file_dir = os.path.dirname(dst_file) + # Ensure the destination file directory exists. + if not os.path.exists(dst_file_dir): + os.makedirs(dst_file_dir) + _LOG.info("Created subdirectory: %s", dst_file_dir) + # Copy the file from source to destination. + shutil.copy2(src_file, dst_file) + _LOG.info("Copied file: %s -> %s", src_file, dst_file) + else: + _LOG.error( + "Destination directory %s not created. Exiting function.", + dst_dir, + ) + return [] # After copying files, continue with comparing files. common_files = [] for root, _, files in os.walk(src_dir): @@ -96,107 +159,77 @@ def _find_common_files(src_dir: str, dst_dir: str) -> List[Tuple[str, str]]: src_file = os.path.join(root, file) dst_file = os.path.join(dst_dir, os.path.relpath(src_file, src_dir)) # Check if the file exists in the destination folder. + # Certain files do not need to be copied, so we skip them. if not os.path.exists(dst_file): - _LOG.debug( - "File %s is missing in the destination directory", + _LOG.warning( + "Warning: %s is missing in the destination directory.", dst_file, ) continue - # Check if the file is a symbolic link. - if os.path.islink(dst_file): - _LOG.debug( - "File %s is a symbolic link", - dst_file, - ) - continue - # Compare file contents. + # Compare file contents after copying. if filecmp.cmp(src_file, dst_file, shallow=False): - _LOG.debug( - "Files src_file=%s, dst_file=%s are the same", + _LOG.info( + "Files are the same and will be replaced: %s -> %s", src_file, dst_file, ) common_files.append((src_file, dst_file)) else: - _LOG.debug( - "Files src_file=%s, dst_file=%s are not the same", - src_file, + _LOG.warning( + "Warning: %s and %s have different content.", dst_file, + src_file, ) return common_files -def _create_single_link( - src_file: str, - dst_file: str, - use_relative_paths: bool, - abort_on_first_error: bool, -) -> None: - """ - Create a single symbolic link from dst_file to src_file. - - :param src_file: Source file path - :param dst_file: Destination file path where symlink will be created - :param use_relative_paths: If True, create relative symlinks; if - False, use absolute paths - :param abort_on_first_error: If True, abort on the first error; if - False, continue processing - """ - hdbg.dassert_file_exists(src_file) - hdbg.dassert_file_exists(dst_file) - # Remove the destination file. - os.remove(dst_file) - try: - if use_relative_paths: - link_target = os.path.relpath(src_file, os.path.dirname(dst_file)) - else: - link_target = os.path.abspath(src_file) - os.symlink(link_target, dst_file) - # Remove write permissions from the file to prevent accidental - # modifications. - current_permissions = os.stat(dst_file).st_mode - new_permissions = ( - current_permissions & ~stat.S_IWUSR & ~stat.S_IWGRP & ~stat.S_IWOTH - ) - os.chmod(dst_file, new_permissions) - _LOG.debug("Created symlink: %s -> %s", dst_file, link_target) - except Exception as e: - msg = "Failed to create symlink %s -> %s with error %s" % ( - dst_file, - link_target, - str(e), - ) - if abort_on_first_error: - raise RuntimeError(msg) - else: - _LOG.warning(msg) - - def _replace_with_links( common_files: List[Tuple[str, str]], use_relative_paths: bool, *, abort_on_first_error: bool = False, - dry_run: bool = False, ) -> None: """ Replace matching files in the destination directory with symbolic links. :param common_files: Matching file paths from `src_dir` and `dst_dir` - :param use_relative_paths: If True, create relative symlinks; if False, use - absolute paths. - :param abort_on_first_error: If True, abort on the first error; if False, - continue processing - :param dry_run: If True, print what will be done without actually doing it. + :param use_relative_paths: If True, create relative symlinks; if False, use absolute paths. + :param abort_on_first_error: If True, abort on the first error; if False, continue processing """ for src_file, dst_file in common_files: - - _create_single_link( - src_file, dst_file, use_relative_paths, abort_on_first_error - ) - - -# ############################################################################# + try: + hdbg.dassert_file_exists(src_file) + except FileNotFoundError as e: + _LOG.error("Error: %s", str(e)) + if abort_on_first_error: + _LOG.error("Aborting: Source file %s doesn't exist.", src_file) + continue + if os.path.exists(dst_file): + os.remove(dst_file) + try: + if use_relative_paths: + link_target = os.path.relpath(src_file, os.path.dirname(dst_file)) + else: + link_target = os.path.abspath(src_file) + os.symlink(link_target, dst_file) + # Remove write permissions from the file to prevent accidental + # modifications. + current_permissions = os.stat(dst_file).st_mode + new_permissions = ( + current_permissions + & ~stat.S_IWUSR + & ~stat.S_IWGRP + & ~stat.S_IWOTH + ) + os.chmod(dst_file, new_permissions) + _LOG.info("Created symlink: %s -> %s", dst_file, link_target) + except Exception as e: + _LOG.error("Error creating symlink for %s: %s", dst_file, e) + if abort_on_first_error: + _LOG.warning( + "Aborting: Failed to create symlink for %s.", dst_file + ) + continue def _find_symlinks(dst_dir: str) -> List[str]: @@ -204,10 +237,8 @@ def _find_symlinks(dst_dir: str) -> List[str]: Find all symbolic links in the destination directory. :param dst_dir: Directory to search for symbolic links - :return: List of absolute paths to symbolic links + :return: List of paths to symbolic links """ - dst_dir = os.path.abspath(dst_dir) - hdbg.dassert_dir_exists(dst_dir) symlinks = [] for root, _, files in os.walk(dst_dir): for file in files: @@ -217,128 +248,36 @@ def _find_symlinks(dst_dir: str) -> List[str]: return symlinks -def _stage_single_link( - link: str, target_file: str, abort_on_first_error: bool, dry_run: bool -) -> None: - """ - Replace a single symlink with a writable copy of the linked file. - - :param link: The symlink to replace. - :param target_file: The file to copy to the symlink location. - :param abort_on_first_error: If True, abort on the first error; if - False, continue processing - :param dry_run: If True, print what will be done without actually - doing it. - """ - # Resolve the original file the symlink points to. - target_file = os.readlink(link) - if not os.path.exists(target_file): - msg = "Target file does not exist for link %s -> %s" % (link, target_file) - if abort_on_first_error: - raise RuntimeError(msg) - else: - _LOG.warning(msg) - return - try: - os.remove(link) - # Copy file to the symlink location. - shutil.copy2(target_file, link) - # Make the file writable to allow for modifications. - current_permissions = os.stat(link).st_mode - new_permissions = ( - current_permissions | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH - ) - os.chmod(link, new_permissions) - _LOG.debug("Staged: %s -> %s", link, target_file) - except Exception as e: - msg = "Error staging link %s: %s" % (link, str(e)) - if abort_on_first_error: - raise RuntimeError(msg) - else: - _LOG.warning(msg) - - -def _stage_links( - symlinks: List[str], abort_on_first_error: bool, dry_run: bool -) -> None: +def _stage_links(symlinks: List[str]) -> None: """ Replace symbolic links with writable copies of the linked files. :param symlinks: List of symbolic links to replace. """ for link in symlinks: - _stage_single_link(link, abort_on_first_error, dry_run) - - -# ############################################################################# - - -def _parse() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument("--src_dir", required=True, help="Source directory.") - parser.add_argument("--dst_dir", required=True, help="Destination directory.") - parser.add_argument( - "--replace_links", - action="store_true", - help="Replace equal files with symbolic links.", - ) - parser.add_argument( - "--stage_links", - action="store_true", - help="Replace symbolic links with writable copies.", - ) - parser.add_argument( - "--compare_files", - action="store_true", - help="Compare files in the directories.", - ) - parser.add_argument( - "--use_relative_paths", - action="store_true", - help="Use relative paths for symbolic links instead of absolute paths.", - ) - parser.add_argument( - "--dry_run", - action="store_true", - help="Print what will be done without actually doing it.", - ) - hparser.add_verbosity_arg(parser) - return parser - - -def _main(parser: argparse.ArgumentParser) -> None: - args = parser.parse_args() - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) - hdbg.dassert_dir_exists(args.src_dir) - hdbg.dassert_dir_exists(args.dst_dir) - # - hdbg.dassert_eq( - sum([args.replace_links, args.stage_links, args.compare_files]), - 1, - "You must specify exactly one of --replace_links, --stage_links, or --compare_files.", - ) - if args.compare_files: - # Compare files. - common_files = _find_common_files(args.src_dir, args.dst_dir) - _LOG.info("Found %d common files.", len(common_files)) - elif args.replace_links: - # Replace with links. - common_files = _find_common_files(args.src_dir, args.dst_dir) - hdbg.dassert_ne(len(symlinks), 0, "No files found to replace.") - _replace_with_links( - common_files, use_relative_paths=args.use_relative_paths - ) - _LOG.info("Replaced %d files with symbolic links.", len(common_files)) - elif args.stage_links: - # Stage links for modification. - symlinks = _find_symlinks(args.dst_dir) - hdbg.dassert_ne(len(symlinks), 0, "No symbolic links found to stage.") - _stage_links(symlinks) - _LOG.info("Staged %d symbolic links for modification.", len(symlinks)) - else: - raise RuntimeError("Internal error") + # Resolve the original file the symlink points to. + target_file = os.readlink(link) + if not os.path.exists(target_file): + _LOG.warning( + "Warning: Target file does not exist for link %s -> %s", + link, + target_file, + ) + continue + # Replace the symlink with a writable copy of the target file. + try: + os.remove(link) + # Copy file to the symlink location. + shutil.copy2(target_file, link) + # Make the file writable to allow for modifications. + current_permissions = os.stat(link).st_mode + new_permissions = ( + current_permissions | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH + ) + os.chmod(link, new_permissions) + _LOG.info("Staged: %s -> %s", link, target_file) + except Exception as e: + _LOG.error("Error staging link %s: %s", link, e) if __name__ == "__main__": diff --git a/helpers/stage_linked_file.py b/helpers/stage_linked_file.py index 5f9945834..43d83e881 100644 --- a/helpers/stage_linked_file.py +++ b/helpers/stage_linked_file.py @@ -1,65 +1,68 @@ -#!/usr/bin/env python - """ -Usage - - python3 stage_linked_file.py --dst_dir /path/to/dst +Import as: + +import helpers.stage_linked_file as hstlifil """ import argparse import logging +import os +import shutil +from typing import List _LOG = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) -# def find_symlinks(dst_dir: str) -> List[str]: -# """ -# Find all symbolic links in the destination directory. - -# :param dst_dir: Directory to search for symbolic links. -# :return: List of paths to symbolic links. -# """ -# symlinks = [] -# for root, _, files in os.walk(dst_dir): -# for file in files: -# file_path = os.path.join(root, file) -# if os.path.islink(file_path): -# symlinks.append(file_path) -# return symlinks - - -# def stage_links(symlinks: List[str]) -> None: -# """ -# Replace symbolic links with writable copies of the linked files. - -# :param symlinks: List of symbolic links to replace. -# """ -# for link in symlinks: -# # Resolve the original file the symlink points to. -# target_file = os.readlink(link) -# if not os.path.exists(target_file): -# _LOG.warning( -# f"Warning: Target file does not exist for link {link} -> {target_file}" -# ) -# continue -# # Replace the symlink with a writable copy of the target file. -# try: -# os.remove(link) -# # Copy file to the symlink location. -# shutil.copy2(target_file, link) -# # Make the file writable. -# os.chmod(link, 0o644) -# _LOG.info(f"Staged: {link} -> {target_file}") -# except Exception as e: -# _LOG.error(f"Error staging link {link}: {e}") - - -def main() -> None: +def find_symlinks(dst_dir: str) -> List[str]: + """ + Find all symbolic links in the destination directory. + + :param dst_dir: Directory to search for symbolic links. + :return: List of paths to symbolic links. + """ + symlinks = [] + for root, _, files in os.walk(dst_dir): + for file in files: + file_path = os.path.join(root, file) + if os.path.islink(file_path): + symlinks.append(file_path) + return symlinks + + +def stage_links(symlinks: List[str]) -> None: + """ + Replace symbolic links with writable copies of the linked files. + + :param symlinks: List of symbolic links to replace. + """ + for link in symlinks: + # Resolve the original file the symlink points to. + target_file = os.readlink(link) + if not os.path.exists(target_file): + _LOG.warning( + f"Warning: Target file does not exist for link {link} -> {target_file}" + ) + continue + # Replace the symlink with a writable copy of the target file. + try: + os.remove(link) + # Copy file to the symlink location. + shutil.copy2(target_file, link) + # Make the file writable. + os.chmod(link, 0o644) + _LOG.info(f"Staged: {link} -> {target_file}") + except Exception as e: + _LOG.error(f"Error staging link {link}: {e}") + + +def main(): parser = argparse.ArgumentParser( description="Stage symbolic links for modification." ) parser.add_argument("--dst_dir", required=True, help="Destination directory.") args = parser.parse_args() + symlinks = find_symlinks(args.dst_dir) if not symlinks: _LOG.info("No symbolic links found to stage.") @@ -70,3 +73,10 @@ def main() -> None: if __name__ == "__main__": main() + +""" +Usage + + - python3 stage_linked_file.py --dst_dir /path/to/dst + +""" diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index faad555b8..d24513d64 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1502,7 +1502,7 @@ def test2(self) -> None: """ text = "**First** normal **Second** text" actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) - expected = r"**\red{First}** normal **\purple{Second}** text" + expected = r"**\red{First}** normal **\teal{Second}** text" self.assert_equal(actual, expected) def test3(self) -> None: @@ -1529,7 +1529,7 @@ def test5(self) -> None: """ text = "**First** and __Second__ bold" actual = hmarkdo.colorize_bold_text(text, use_abbreviations=True) - expected = r"**\red{First}** and **\purple{Second}** bold" + expected = r"**\red{First}** and **\teal{Second}** bold" self.assert_equal(actual, expected) def test6(self) -> None: @@ -1560,7 +1560,7 @@ def test7(self) -> None: - First item - Second item - **\purple{List 2:}** + **\teal{List 2:}** - Another item - Final item """ From 2fb45a8545a5a8249b4a407d689a7197e9ee99d1 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 09:06:25 -0400 Subject: [PATCH 117/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/ai_review.py | 115 ++++++++++++++++ dev_scripts_helpers/llms/llm_prompts.py | 128 +++++++++++------- dev_scripts_helpers/system_tools/ffind.py | 22 +-- ....automated_review_guidelines2.reference.md | 68 ++++++++++ 4 files changed, 278 insertions(+), 55 deletions(-) create mode 100755 dev_scripts_helpers/llms/ai_review.py create mode 100644 docs/code_guidelines/all.automated_review_guidelines2.reference.md diff --git a/dev_scripts_helpers/llms/ai_review.py b/dev_scripts_helpers/llms/ai_review.py new file mode 100755 index 000000000..c6a4c0f91 --- /dev/null +++ b/dev_scripts_helpers/llms/ai_review.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 + +""" +Read input from either stdin or a file, apply a specified transformation using +an LLM, and then write the output to either stdout or a file. It is +particularly useful for integrating with editors like Vim. + +The script `dockerized_llm_transform.py` is executed within a Docker container to ensure +all dependencies are met. The Docker container is built dynamically if +necessary. The script requires an OpenAI API key to be set in the environment. + +Examples +# Basic Usage +> llm_transform.py -i input.txt -o output.txt -p uppercase + +# List of transforms +> llm_transform.py -i input.txt -o output.txt -p list + +# Code review +> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_review + +# Propose refactoring +> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_propose_refactoring +""" + +# TODO(gp): There are different modes to run the script +# - run the script to process input and write transformed output +# - run the script to process input and extract a cfile + + +import argparse +import logging +import os +import re +from typing import List, Optional + +import dev_scripts_helpers.llms.llm_prompts as dshlllpr +import helpers.hdbg as hdbg +import helpers.hdocker as hdocker +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hmarkdown as hmarkdo +import helpers.hparser as hparser +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import dev_scripts_helpers.llms.llm_transform as dshlllpt + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): -> _parser() or _get_parser() everywhere. +def _parse() -> argparse.ArgumentParser: + """ + Use the same argparse parser for `dockerized_llm_transform.py`. + """ + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + hparser.add_input_output_args( + parser, + in_default="-", + in_required=False, + ) + hparser.add_dockerized_script_arg(parser) + # Use CRITICAL to avoid logging anything. + hparser.add_verbosity_arg(parser, log_level="CRITICAL") + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hparser.init_logger_for_input_output_transform(args) + # + # Parse files. + in_file_name, out_file_name = hparser.parse_input_output_args(args) + tag = "ai_review" + tmp_in_file_name, tmp_out_file_name = ( + hparser.adapt_input_output_args_for_dockerized_scripts(in_file_name, tag) + ) + # TODO(gp): We should just automatically pass-through the options. + prompt = "review" + cmd_line_opts = [f"-p {prompt}", f"-v {args.log_level}"] + # cmd_line_opts = [] + # for arg in vars(args): + # if arg not in ["input", "output"]: + # value = getattr(args, arg) + # if isinstance(value, bool): + # if value: + # cmd_line_opts.append(f"--{arg.replace('_', '-')}") + # else: + # cmd_line_opts.append(f"--{arg.replace('_', '-')} {value}") + # For stdin/stdout, suppress the output of the container. + suppress_output = in_file_name == "-" or out_file_name == "-" + dshlllpt._run_dockerized_llm_transform( + tmp_in_file_name, + cmd_line_opts, + tmp_out_file_name, + mode="system", + force_rebuild=args.dockerized_force_rebuild, + use_sudo=args.dockerized_use_sudo, + suppress_output=suppress_output, + ) + # # Read the output from the container and write it to the output file from + # # command line (e.g., `-` for stdout). + # hparser.write_file(out_txt, out_file_name) + # if os.path.basename(out_file_name) == "cfile": + # print(out_txt) + out_txt = hio.from_file(tmp_out_file_name) + print(out_txt) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 3807fe91c..1249a9c31 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -504,53 +504,6 @@ def code_fix_code() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -# ############################################################################# -# Review. -# ############################################################################# - - -def code_review_correctness() -> _PROMPT_OUT: - """ - Review the code for correctness. - """ - system = _CODING_CONTEXT - system += r""" - You will review the code and make sure it is: - - correct - - clean and readable - - efficient - - robust - - maintainable - - Do not print any comment, besides for each point of improvement, you will - print the line number and the proposed improvement in the following style: - <line_number>: <short description of the proposed improvement> - """ - pre_transforms = {"add_line_numbers"} - post_transforms = {"convert_to_vim_cfile"} - post_container_transforms = ["convert_file_names"] - return system, pre_transforms, post_transforms, post_container_transforms - - -def code_review_refactoring() -> _PROMPT_OUT: - """ - Review the code for refactoring opportunities. - """ - system = _CODING_CONTEXT - system += r""" - You will review the code and look for opportunities to refactor the code, - by removing redundancy and copy-paste code. - - Do not print any comment, besides for each point of improvement, you will - print the line number and the proposed improvement in the following style: - <line_number>: <short description of the proposed improvement> - """ - pre_transforms = {"add_line_numbers"} - post_transforms = {"convert_to_vim_cfile"} - post_container_transforms = ["convert_file_names"] - return system, pre_transforms, post_transforms, post_container_transforms - - # Transform code. @@ -648,6 +601,87 @@ def code_write_1_unit_test() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +# ############################################################################# +# review. +# ############################################################################# + + +def review() -> _PROMPT_OUT: + """ + Review the code for refactoring opportunities. + """ + system = _CODING_CONTEXT + # Load the reference file. + file = "docs/code_guidelines/all.automated_review_guidelines.reference.md" + reference_txt = hio.from_file(file) + reference_txt = hmarkdo.add_line_numbers(reference_txt) + # TODO(gp): Remove table of contents between <!-- toc --> and <!-- tocstop -->. + system += rf""" + You will review the code and make sure it follows the rules in the reference below: + + {reference_txt} + + - Each rule to follow is referred by <rule_name> and represented as <header-line_number> + with the name of the header of the section in the reference file (e..g, + 'Naming') and the line number (e.g., "Naming-7") + + - Only print the lines that you are sure are in violation of one of the + rules <rule_name> in the reference + - For each violation of a rule, you will print the line number of the code + and the proposed improvement in the following style: + <line_number>: <rule_name>: <short description of the proposed improvement> + - Do not print any other comment, besides the violation of the rules + """ + pre_transforms = {"add_line_numbers"} + post_transforms = {"convert_to_vim_cfile"} + post_container_transforms = ["convert_file_names"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def review_correctness() -> _PROMPT_OUT: + """ + Review the code for correctness. + """ + system = _CODING_CONTEXT + system += r""" + You will review the code and make sure it is: + - correct + - clean and readable + - efficient + - robust + - maintainable + + Do not print any comment, besides for each point of improvement, you will + print the line number and the proposed improvement in the following style: + <line_number>: <short description of the proposed improvement> + """ + pre_transforms = {"add_line_numbers"} + post_transforms = {"convert_to_vim_cfile"} + post_container_transforms = ["convert_file_names"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def review_refactoring() -> _PROMPT_OUT: + """ + Review the code for refactoring opportunities. + """ + system = _CODING_CONTEXT + system += r""" + You will review the code and look for opportunities to refactor the code, + by removing redundancy and copy-paste code. + + Do not print any comment, besides for each point of improvement, you will + print the line number and the proposed improvement in the following style: + <line_number>: <short description of the proposed improvement> + """ + pre_transforms = {"add_line_numbers"} + post_transforms = {"convert_to_vim_cfile"} + post_container_transforms = ["convert_file_names"] + return system, pre_transforms, post_transforms, post_container_transforms + + + + # ############################################################################# # Markdown. # ############################################################################# diff --git a/dev_scripts_helpers/system_tools/ffind.py b/dev_scripts_helpers/system_tools/ffind.py index 8dcc77b4a..c275daf9b 100755 --- a/dev_scripts_helpers/system_tools/ffind.py +++ b/dev_scripts_helpers/system_tools/ffind.py @@ -42,14 +42,14 @@ def _parse() -> argparse.ArgumentParser: help="First param is regex, optional second param is dirname", ) parser.add_argument("--only_files", action="store_true", help="Only files") - parser.add_argument("--log", action="store_true", help="Only files") + parser.add_argument("--log", action="store_true", help="Report logging") hparser.add_verbosity_arg(parser) return parser def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - if args.log: + if (args.log_level == "DEBUG") or args.log: hdbg.init_logger(verbosity=args.log_level) positional = args.positional # Error check. @@ -67,13 +67,19 @@ def _main(parser: argparse.ArgumentParser) -> None: hdbg.dassert_path_exists(dir_name) name = "*" + positional[0].rstrip("").lstrip("") + "*" # - cmd = "find %s" % dir_name + cmd = [] + cmd.append(f"find {dir_name}") + # Skip certain dirs. + cmd.append(r"\( -path './.git' -o -path './.ipynb_checkpoints' -o -path ./.mypy_cache \) -prune -o") if args.only_files: - cmd += " -type f" - cmd += ' -iname "%s"' % name - cmd += " | sort" - cmd += " | grep -v .ipynb_checkpoints" - if args.log: + cmd.append("-type f") + cmd.append('-iname "%s"' % name) + # Guarantee that only non-pruned files are printed. + cmd.append('-print') + cmd.append('| grep -v __pycache__') + cmd.append("| sort") + cmd = " ".join(cmd) + if (args.log_level == "DEBUG") or args.log: print(cmd) print() os.system(cmd) diff --git a/docs/code_guidelines/all.automated_review_guidelines2.reference.md b/docs/code_guidelines/all.automated_review_guidelines2.reference.md new file mode 100644 index 000000000..5135e9951 --- /dev/null +++ b/docs/code_guidelines/all.automated_review_guidelines2.reference.md @@ -0,0 +1,68 @@ +# Guidelines for automated PR reviews + +## Python code + +### Naming + +- Name executable files and library functions using verbs + - E.g., `download.py` and not `downloader.py` + - E.g., `download_data()` and not `data_downloader()` +- Name classes and non-executable files using nouns + - E.g., `Downloader()`, `downloader.py` +- Name decorators with an adjective or a past tense verb (e.g., `timed`) +- Variable and function names should not reference implementation details, + things that can change or details that are not important + - E.g., the name of a variable should not include its type, e.g. use + `embeddings` instead of `embeddings_list` +- Abbreviations in the names should be avoided, except for the following: `df` + (dataframe), `srs` (series), `idx` (index), `id` (identifier), `val` (value), + `var` (variable), `args` (arguments), `kwargs` (keyword arguments), `col` + (column) +- Do not repeat in a function name what is already included in the library name + (avoid "code stutter") +- Use `dir` and not `directory` or `folder` +- Use `timestamp` and not `ts` or `datetime` +- To refer to the name of a column, use `..._col` and not `..._col_name` or + `..._column` + +### Docstrings + +- All functions and methods must have a docstring +- Docstrings should be wrapped in triple quotation marks (`"""`) + - The opening and closing triple quotation marks should be located on their + own separate lines +- Every docstring should start with a capital letter +- Every docstring should start with a verb in the imperative form +- Every docstring should begin with a one-line description of what the function + does + - It must fit into a single line and end with a period +- The first docstring line is followed by a blank line and then, optionally, by + a longer description (possibly on multiple lines) with a more detailed + explanation of what the function does + - It should not describe parameters / what is being returned + - It should not describe implementation details that can be changed + - It should describe the goal of the function, the interface and what the user + needs to know to use the function +- The more detailed description is followed by a blank line and then the param / + return description section + - Use lowercase after `:param XYZ: ...` / `:return:` unless the description + starts with a proper noun + - Do not add a period at the end of the param / return descriptions + - Do not mention the type of the parameters / returned structures + - Do not mention default values of parameters in parameter descriptions + - Follow this example for indentation of parameter descriptions: + ```python + :param param1: a very very long param description that + continues into a second line + :param param2: a param with two possible values + - first value description + - second value description that is very long and + continues into a second line + ``` +- Adding examples (e.g., of input and output) to the docstring is encouraged +- References to variables, file paths, functions, classes, etc. should be + wrapped in backticks +- Multiline representations of data structures (e.g., an output example) should + be wrapped in triple backticks + +### Docstrings / linter From 4884d89eb246d038c143073b7e6e1b0ea95f2358 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 09:29:52 -0400 Subject: [PATCH 118/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 30 ++- ....automated_review_guidelines2.reference.md | 68 ------- ...inter_style_review_guidelines.reference.md | 90 +++++++++ ....llm_style_review_guidelines.reference.md} | 180 +++++------------- 4 files changed, 162 insertions(+), 206 deletions(-) delete mode 100644 docs/code_guidelines/all.automated_review_guidelines2.reference.md create mode 100644 docs/code_guidelines/all.linter_style_review_guidelines.reference.md rename docs/code_guidelines/{all.automated_review_guidelines.reference.md => all.llm_style_review_guidelines.reference.md} (67%) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 1249a9c31..5219766ee 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -606,13 +606,12 @@ def code_write_1_unit_test() -> _PROMPT_OUT: # ############################################################################# -def review() -> _PROMPT_OUT: +def _review_from_file(file: str) -> _PROMPT_OUT: """ Review the code for refactoring opportunities. """ system = _CODING_CONTEXT # Load the reference file. - file = "docs/code_guidelines/all.automated_review_guidelines.reference.md" reference_txt = hio.from_file(file) reference_txt = hmarkdo.add_line_numbers(reference_txt) # TODO(gp): Remove table of contents between <!-- toc --> and <!-- tocstop -->. @@ -638,6 +637,33 @@ def review() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def review_linter_style() -> _PROMPT_OUT: + """ + Review the code for linter style. + """ + # Load the reference file. + file = "docs/code_guidelines/all.linter_style_review_guidelines.reference.md" + return _review_from_file(file) + + +def review_llm_style() -> _PROMPT_OUT: + """ + Review the code for linter style. + """ + # Load the reference file. + file = "docs/code_guidelines/all.llm_style_review_guidelines.reference.md" + return _review_from_file(file) + + +def review_llm_style() -> _PROMPT_OUT: + """ + Review the code for linter style. + """ + # Load the reference file. + file = "docs/code_guidelines/all.llm_style_review_guidelines.reference.md" + return _review_from_file(file) + + def review_correctness() -> _PROMPT_OUT: """ Review the code for correctness. diff --git a/docs/code_guidelines/all.automated_review_guidelines2.reference.md b/docs/code_guidelines/all.automated_review_guidelines2.reference.md deleted file mode 100644 index 5135e9951..000000000 --- a/docs/code_guidelines/all.automated_review_guidelines2.reference.md +++ /dev/null @@ -1,68 +0,0 @@ -# Guidelines for automated PR reviews - -## Python code - -### Naming - -- Name executable files and library functions using verbs - - E.g., `download.py` and not `downloader.py` - - E.g., `download_data()` and not `data_downloader()` -- Name classes and non-executable files using nouns - - E.g., `Downloader()`, `downloader.py` -- Name decorators with an adjective or a past tense verb (e.g., `timed`) -- Variable and function names should not reference implementation details, - things that can change or details that are not important - - E.g., the name of a variable should not include its type, e.g. use - `embeddings` instead of `embeddings_list` -- Abbreviations in the names should be avoided, except for the following: `df` - (dataframe), `srs` (series), `idx` (index), `id` (identifier), `val` (value), - `var` (variable), `args` (arguments), `kwargs` (keyword arguments), `col` - (column) -- Do not repeat in a function name what is already included in the library name - (avoid "code stutter") -- Use `dir` and not `directory` or `folder` -- Use `timestamp` and not `ts` or `datetime` -- To refer to the name of a column, use `..._col` and not `..._col_name` or - `..._column` - -### Docstrings - -- All functions and methods must have a docstring -- Docstrings should be wrapped in triple quotation marks (`"""`) - - The opening and closing triple quotation marks should be located on their - own separate lines -- Every docstring should start with a capital letter -- Every docstring should start with a verb in the imperative form -- Every docstring should begin with a one-line description of what the function - does - - It must fit into a single line and end with a period -- The first docstring line is followed by a blank line and then, optionally, by - a longer description (possibly on multiple lines) with a more detailed - explanation of what the function does - - It should not describe parameters / what is being returned - - It should not describe implementation details that can be changed - - It should describe the goal of the function, the interface and what the user - needs to know to use the function -- The more detailed description is followed by a blank line and then the param / - return description section - - Use lowercase after `:param XYZ: ...` / `:return:` unless the description - starts with a proper noun - - Do not add a period at the end of the param / return descriptions - - Do not mention the type of the parameters / returned structures - - Do not mention default values of parameters in parameter descriptions - - Follow this example for indentation of parameter descriptions: - ```python - :param param1: a very very long param description that - continues into a second line - :param param2: a param with two possible values - - first value description - - second value description that is very long and - continues into a second line - ``` -- Adding examples (e.g., of input and output) to the docstring is encouraged -- References to variables, file paths, functions, classes, etc. should be - wrapped in backticks -- Multiline representations of data structures (e.g., an output example) should - be wrapped in triple backticks - -### Docstrings / linter diff --git a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md new file mode 100644 index 000000000..583649a93 --- /dev/null +++ b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md @@ -0,0 +1,90 @@ +# Guidelines for automated PR reviews + +## Python code + +### Naming + +### Docstrings + +- All functions and methods must have a docstring +- Docstrings should be wrapped in triple quotation marks (`"""`) + - The opening and closing triple quotation marks should be located on their + own separate lines +- Every docstring should start with a capital letter +- Every docstring should start with a verb in the imperative form +- Every docstring should begin with a one-line description of what the function + does, fit into a single line and end with a period +- Adding examples (e.g., of input and output) to the docstring is encouraged +- References to variables, file paths, functions, classes, etc. should be + wrapped in backticks + +### Comments + +- Avoid empty comments and line inside the code when possible +- Every comment should start with a capital letter +- Every comment should start with a verb in the imperative form +- Every comment should end with a period +- Comments with TODOs should have the format of `# TODO(username): ...` + +### Code design + +- Order functions / classes in a topological order so that the ones at the top + of the files are the "innermost" and the ones at the end of the files are the + "outermost" +- Use banners to separate large sections of code, e.g.: + ```python + # ############################################################################# + # Read data. + # ############################################################################# + ``` + - The text inside the banner should start with a capital letter and end with a + period + +### Imports + +- All imports should be located at the top of the file +- Do not use `import *` +- Do not use `from ... import ...`, unless it is the `typing` package, e.g., + `from typing import Iterable, List` +- Always import with a full path from the root of the repo / submodule +- Each module that can be imported should have a docstring at the very beginning + describing how it should be imported + - Linter adds it automatically +- No import cycles should be introduced by the changes in the PR + +### Type annotations +- All functions and methods, including constructors, must have type annotations + for all the parameters and returned structures + - Use `-> None` if a function doesn't return anything + - The only exception are invoke tasks, i.e. functions with the `@task` + decorator, they shouldn't have type annotations +- Type annotation `Any` should be avoided, if possible + +### Functions +- Make a function private (e.g., `_foo_bar()`) when it is a helper of another + private or public function + +### Scripts + +- Use Python and not bash for scripting +- All Python scripts that are meant to be executed directly should: + - Be marked as executable files with `> chmod +x foo_bar.py` + - Have the standard Unix shebang notation at the top: `#!/usr/bin/env python` + - Use the following idiom at the bottom: + ```python + if __name__ == "__main__": + ... + ``` + - Use `argparse` for argument parsing + +### Unit tests +- Unit tests should be placed in a `test_*.py` file in the `test` directory, + close to the library / code it tests + - Test file `test_file_name.py` testing the library `file_name.py` +- Every test class should inherit from `hunitest.TestCase` +- We use `pytest` as test harness so do not add the following idiom in the + testing file + ```python + if __name__ == "__main__": + unittest.main() + ``` diff --git a/docs/code_guidelines/all.automated_review_guidelines.reference.md b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md similarity index 67% rename from docs/code_guidelines/all.automated_review_guidelines.reference.md rename to docs/code_guidelines/all.llm_style_review_guidelines.reference.md index 251b4ad6e..6585393de 100644 --- a/docs/code_guidelines/all.automated_review_guidelines.reference.md +++ b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md @@ -1,32 +1,5 @@ # Guidelines for automated PR reviews -<!-- toc --> - -- [Python code](#python-code) - * [Naming](#naming) - * [Docstrings](#docstrings) - * [Comments](#comments) - * [Code design](#code-design) - * [Imports](#imports) - * [Type annotations](#type-annotations) - * [Functions](#functions) - * [Scripts](#scripts) - * [Logging](#logging) - * [Unit tests](#unit-tests) - * [Misc](#misc) -- [Notebooks](#notebooks) - * [General](#general) - * [Jupytext](#jupytext) - * [Plotting](#plotting) -- [Markdowns](#markdowns) -- [File system structure](#file-system-structure) -- [Spelling](#spelling) - -<!-- tocstop --> - -This document outlines the rules that all checked-in code must follow. It can -serve as a guideline for automated PR reviews. - ## Python code ### Naming @@ -53,15 +26,6 @@ serve as a guideline for automated PR reviews. ### Docstrings -- All functions and methods must have a docstring -- Docstrings should be wrapped in triple quotation marks (`"""`) - - The opening and closing triple quotation marks should be located on their - own separate lines -- Every docstring should start with a capital letter -- Every docstring should start with a verb in the imperative form -- Every docstring should begin with a one-line description of what the function - does - - It must fit into a single line and end with a period - The first docstring line is followed by a blank line and then, optionally, by a longer description (possibly on multiple lines) with a more detailed explanation of what the function does @@ -95,25 +59,20 @@ serve as a guideline for automated PR reviews. - Add a comment for every logically distinct chunk of code - Use comments to separate chunks of code instead of blank lines -- Avoid empty comments when possible -- Every comment should start with a capital letter -- Every comment should start with a verb in the imperative form -- Every comment should end with a period - We do not use inline comments; every comment should be on its own separate - line -- Comments should be placed above the lines that they are referring to -- In `if-elif-else` statements, the comments are placed underneath each - statement in order to explain the code that belongs to each statement in - particular + line, before the line it refers to + - In `if-elif-else` statements, the comments are placed underneath each + statement in order to explain the code that belongs to each statement in + particular - Avoid mentioning concrete names of variables, functions, classes, files, etc. in the comments - If it is unavoidable, wrap their names in backticks - Avoid referring to the type of a variable in the comments -- Do not include implementation details in comments (describe "what" and not - "how") +- Do not include implementation details in comments + - Describe "what" and "why" the code does something and not "how" the code + does it - If some code is commented out in a PR, a comment should be added to explain the reason why -- Comments with TODOs should have the format of `# TODO(username): ...` ### Code design @@ -121,59 +80,28 @@ serve as a guideline for automated PR reviews. - Factor out common code in a separate function / method - Do not copy-and-paste parameter descriptions, instead write them in only one function and put a reference to it in the other functions where the same - parameters are used, e.g., "See `func_name()` for the param description" -- Order functions / classes in a topological order so that the ones at the top - of the files are the "innermost" and the ones at the end of the files are the - "outermost" + parameters are used + - E.g., "See `func_name()` for the param description" - Keep public functions in an order representing the typical flow of use, e.g., - Common functions, used by all other functions - Read data - Process data - Save data -- Use banners to separate large sections of code, e.g.: - ```python - # ############################################################################# - # Read data. - # ############################################################################# - ``` - - The text inside the banner should start with a capital letter and end with a - period - -### Imports - -- All imports should be located at the top of the file -- Do not use `import *` -- Do not use `from ... import ...` - - The only exception is the `typing` package, e.g., - `from typing import Iterable, List` -- Always import with a full path from the root of the repo / submodule -- Each module that can be imported should have a docstring at the very beginning - describing how it should be imported - - Linter adds it automatically -- No import cycles should be introduced by the changes in the PR ### Type annotations -- All functions and methods, including constructors, must have type annotations - for all the parameters and returned structures - - We use `-> None` if a function doesn't return anything - - The only exception are invoke tasks, i.e. functions with the `@task` - decorator — they shouldn't have type annotations -- We use `List[<type of list elements>]` instead of `list`, +- For type hints use `List[<type of list elements>]` instead of `list`, `Dict[<type of keys>, <type of values>]` instead of `dict`, `Tuple[<type of tuple elements>]` instead of `tuple`, etc. -- Type annotation `Any` should be avoided, if possible ### Functions -- Avoid modifying the function input - - For example, if a function `f` accepts a dataframe `df` as its (sole) - argument, then, ideally, `f(df)` will not modify `df` -- Use pure functions, i.e. if the function arguments do not change, then the - returned value should not change (in contrast to, e.g., functions that rely +- Avoid pure functions without side effects, i.e. for the same input arguments + the returned value should not change (in contrast to, e.g., functions that rely upon global state) -- Make a function private (e.g., `_foo_bar()`) when it is a helper of another - private or public function +- Functions should not modify the function inputs + - E.g., if a function `f()` accepts a dataframe `df` as its argument, then + `f()` will not modify `df` but make a copy and work on it - The preferred order of function parameters is: - Input parameters - Output parameters @@ -183,19 +111,21 @@ serve as a guideline for automated PR reviews. of the time are constant - All the default parameters should be keyword-only - They should be separated from the other parameters by `*` -- Do not use lists, maps, objects, etc. as the default value — instead pass - `None` and then initialize the default parameter inside the function +- Do not use mutable objects (such as lists, maps, objects) as default value for + functions, instead pass `None` and then initialize the default parameter inside + the function - Use a default value of `None` when a function needs to be wrapped and the default parameter needs to be propagated - Do not use use a boolean parameter as a switch controlling some function - behavior — instead use a string parameter `mode`, which is allowed to take a + behavior, instead use a string parameter `mode`, which is allowed to take a small well-defined set of values - For functions dealing with dataframes, avoid hard-wired column name - dependencies — instead allow the caller to pass the column name to the - function as a parameter -- Do not put computations of the output in the `return` line — instead compute - the output first, assign it to a variable and then return this variable -- A function should ideally have a single exit point (one line with `return`) + dependencies, instead allow the caller to pass the column name to the function + as a parameter +- Do not put computations of the output in the `return` line + - Instead compute the output first, assign it to a variable and then return + this variable +- A function should have a single exit point, i.e., one single line with `return` - A function should ideally return objects of only one type (or `None`) - When calling a function, assign all the input parameter values to variables on separate lines and then pass these variables to the function @@ -204,22 +134,11 @@ serve as a guideline for automated PR reviews. - E.g., call `func()` like `func(param1, param2, param3=param3)` if `param3` is the only parameter with a default value -### Scripts - -- Use Python and not bash for scripting -- All Python scripts that are meant to be executed directly should: - - Be marked as executable files with `> chmod +x foo_bar.py` - - Have the standard Unix shebang notation at the top: `#!/usr/bin/env python` - - Use the following idiom at the bottom: - ```python - if __name__ == "__main__": - ... - ``` - - Use `argparse` for argument parsing - ### Logging -- Use extensive logging (and not `print()`) for monitoring execution +- Use logging `_LOG.debug()` and not `print()` for tracing execution +- Use positional args in logging and not inline formatting + - E.g., `_LOG.debug("cmd=%s", cmd1)` instead `_LOG.debug(f"cmd={cmd1}")` - Use the following idiom to configure logging: ```python @@ -230,34 +149,22 @@ serve as a guideline for automated PR reviews. hdbg.init_logger(verbosity=logging.DEBUG) ``` -- Use positional args in logging (e.g., - `_LOG.debug("cmd=%s %s %s", cmd1, cmd2, cmd3)`) - ### Unit tests -- Unit tests should be placed in a `test_*.py` file in the `test` directory, - close to the library / code it tests -- A test class should test only one function / class -- A test method should only test a single case (e.g., "for these inputs the - function responds with this output") -- Every test class should inherit from `hunitest.TestCase` +- A test class should test only one function or class +- A test method should only test a single case + - E.g., "for these inputs the function responds with this output" - Adhere to the following conventions for naming: - - Test file `test_file_name.py` testing the library `file_name.py` - - Test class `TestFooBar` for the class `FooBar`, and test methods - `TestFooBar.test_method_a`, `TestFooBar.test_method_b` for the methods + - Class `TestFooBar` tests the class `FooBar` and its methods + `TestFooBar.test_method_a`, `TestFooBar.test_method_b` test the methods `FooBar.method_a` and `FooBar.method_b` - - Test class `Test_foo_bar` for the function `foo_bar()`, and test methods - `Test_foo_bar.test1`, `Test_foo_bar.test2` for different cases / inputs -- - Do not add the following idiom in the testing file - ```python - if __name__ == "__main__": - unittest.main() - ``` -- A unit test should be independent of all other unit tests -- If there is a lot of common code in individual test methods, it should be + - Class `Test_foo_bar` tests the function `foo_bar()` + - `Test_foo_bar.test1`, `Test_foo_bar.test2` for different cases / inputs +- A unit test should be independent of all the other unit tests +- If there is a lot of common code across individual test methods, it should be factored out in a helper method within the test class - If some code needs to be repeated at the beginning / end of each test method, - it can be moved to `set_up_test()` / `tear_down_test()` methods and the + it should be moved to `set_up_test()` / `tear_down_test()` methods and the following idiom should be added to the test class: ```python @pytest.fixture(autouse=True) @@ -268,14 +175,15 @@ serve as a guideline for automated PR reviews. # Run after each test. self.tear_down_test() ``` -- Test methods should have a docstring describing briefly what case is being +- Each test method should have a docstring describing briefly what case is being tested -- Test methods should have param / return type annotations -- Do not create temporary files for tests with `tempfile` — use - `self.get_scratch_space()` instead +- Test methods should have type hint annotations +- Do not create temporary files for tests (e.g., with `tempfile`) but use + `hunittest.TestCase.get_scratch_space()` instead - If the input to the test is a large piece of code / text, it should be moved to a separate file in the `input` dir corresponding to the test - (`outcomes/<TestClassName.test_method_name>/input`) + - E.g., `outcomes/<TestClassName.test_method_name>/input` and read through the + `self.get_input - Do not use pickle files for test inputs - In every test method, separate logically distinct code chunks with comments `# Prepare inputs.`, `# Run.` and `# Check.` From 631678c5b58feda6c2346300c42b976046147a33 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 09:38:20 -0400 Subject: [PATCH 119/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- ...inter_style_review_guidelines.reference.md | 12 ++++ ...l.llm_style_review_guidelines.reference.md | 69 +++++++++++-------- 2 files changed, 51 insertions(+), 30 deletions(-) diff --git a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md index 583649a93..2add7e86d 100644 --- a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md @@ -4,6 +4,11 @@ ### Naming +- Name executable Python scripts using verbs and actions + - E.g., `download.py` and not `downloader.py` +- Name non-executable files using nouns + - E.g., `downloader.py` + ### Docstrings - All functions and methods must have a docstring @@ -88,3 +93,10 @@ if __name__ == "__main__": unittest.main() ``` +- If a unit test is renamed or removed in a PR, the corresponding files in the + `outcomes` dir should also be renamed or removed + +### Misc +- If a PR includes renaming a file, variable, parameter, function, class, etc., + then all the instances and references to it throughout the codebase should be + updated diff --git a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md index 6585393de..0c8bff58b 100644 --- a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md @@ -4,22 +4,30 @@ ### Naming -- Name executable files (scripts) and library functions using verbs (e.g., - `download.py`, `download_data()`) -- Name classes and non-executable files using nouns (e.g., `Downloader()`, - `downloader.py`) -- Name decorators with an adjective or a past tense verb (e.g., `timed`) -- Variable and function names should not reference implementation details, +- Name functions using verbs and verbs/actions + - E.g., `download_data()` +- Name classes using nouns + - E.g., `Downloader()` +- Name decorators with an adjective or a past tense verb + - E.g., `timed` +- Variable and function names should not reference implementation details, and things that can change or details that are not important - - E.g., the name of a variable should not include its type, e.g. use - `embeddings` instead of `embeddings_list` -- Abbreviations in the names should be avoided, except for the following: `df` - (dataframe), `srs` (series), `idx` (index), `id` (identifier), `val` (value), - `var` (variable), `args` (arguments), `kwargs` (keyword arguments), `col` - (column) + - E.g., the name of a variable should not include its type + - E.g. use `embeddings` instead of `embeddings_list` +- Abbreviations in the names should be avoided, except for the following + - `df` for dataframe + - `srs` for series + - `idx` for index + - `id` for identifier + - `val` for value + - `var` for variable + - `args` for arguments and `kwargs` for keyword arguments + - `col` for columns and `row` for rows - Do not repeat in a function name what is already included in the library name (avoid "code stutter") - Use `dir` and not `directory` or `folder` +- Use `file_name` and not `filename` +- Use `dir_name` and not `dirname` - Use `timestamp` and not `ts` or `datetime` - To refer to the name of a column, use `..._col` and not `..._col_name` or `..._column` @@ -183,33 +191,34 @@ - If the input to the test is a large piece of code / text, it should be moved to a separate file in the `input` dir corresponding to the test - E.g., `outcomes/<TestClassName.test_method_name>/input` and read through the - `self.get_input + function `self.get_input_dir()` of `TestCase` - Do not use pickle files for test inputs - In every test method, separate logically distinct code chunks with comments - `# Prepare inputs.`, `# Run.` and `# Check.` -- Specify all the input parameter values on separate lines before passing them - to the function that is being tested -- Do not use `hdbg.dassert` in testing -- Use `self.assert_equal()` instead of `self.assertEqual()` + - E.g., + ``` + # Prepare inputs. + ... + # Run test. + ... + # Check outputs. + ``` +- Do not use `hdbg.dassert` in testing but use `self.assert*()` methods +- Prefer `self.assert_equal()` instead of `self.assertEqual()` - Use strings to compare actual and expected outputs instead of data structures - (e.g., a string representation of a list instead of a list) + - E.g., use a string representation of a list instead of a list - Use `self.check_string()` to compare the actual output to a golden output in - the `outcomes` dir + the `outcomes` dir, when the output is large or needs to be modified easily - When testing for an assertion, check that you are getting the exact exception that is expected -- If a unit test is renamed or removed in a PR, the corresponding files in the - `outcomes` dir should also be renamed or removed ### Misc -- If a PR includes renaming a file / variable / parameter / function / class / - etc., then all the instances and references to it throughout the codebase - should be updated -- Encode the assumptions made in the code using assertions, e.g., - `hdbg.dassert_lt(start_date, end_date)` - - Report as much information as possible in an assertion -- Use f-strings in exceptions (e.g., - `raise ValueError(f"Invalid server_name='{server_name}'")`) +- Encode the assumptions made in the code using assertions and report as much + information as possible in an assertion to make it easy to debug the output + - E.g., `hdbg.dassert_lt(start_date, end_date)` +- Do not use f-strings in `hdbg.dassert()` +- Use f-strings in exceptions + - E.g., `raise ValueError(f"Invalid server_name='{server_name}'")`) - Use complete `if-elif-else` statements instead of a sequence of `if` statements - Compile a regex expression only if it's called more than once From e5b3bc7da6693e22770e6be7e07636dbcc80bc76 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 09:47:45 -0400 Subject: [PATCH 120/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 5219766ee..aea135d16 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -655,15 +655,6 @@ def review_llm_style() -> _PROMPT_OUT: return _review_from_file(file) -def review_llm_style() -> _PROMPT_OUT: - """ - Review the code for linter style. - """ - # Load the reference file. - file = "docs/code_guidelines/all.llm_style_review_guidelines.reference.md" - return _review_from_file(file) - - def review_correctness() -> _PROMPT_OUT: """ Review the code for correctness. @@ -694,7 +685,7 @@ def review_refactoring() -> _PROMPT_OUT: system = _CODING_CONTEXT system += r""" You will review the code and look for opportunities to refactor the code, - by removing redundancy and copy-paste code. + by removing redundancy and copy-pasted code. Do not print any comment, besides for each point of improvement, you will print the line number and the proposed improvement in the following style: @@ -706,8 +697,6 @@ def review_refactoring() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms - - # ############################################################################# # Markdown. # ############################################################################# From 31a36c6fd423008399d3fb951c3965b28fd78e63 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 09:55:23 -0400 Subject: [PATCH 121/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 24 +++++++++++ ...l.llm_style_review_guidelines.reference.md | 43 ++++++++++++++----- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index aea135d16..6f46c7dd5 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -732,6 +732,30 @@ def md_summarize_short() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def md_expand() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + I will give you markdown text + + You will: + - Maintain the structure of the text and keep the content of the existing + text + - Add bullet points to the text that are important or missing + - Add examples to clarify the text and help intuition + - Do not use bold or italicize the text + - Use `E.g.,` instead of `Example` + + Print only the markdown without any explanation. + """ + pre_transforms: Set[str] = set() + post_transforms = { + "remove_code_delimiters", + "remove_end_of_line_periods", + "remove_empty_lines", + } + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + def md_clean_up_how_to_guide() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" diff --git a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md index 0c8bff58b..4d2a410c8 100644 --- a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md @@ -5,15 +5,16 @@ ### Naming - Name functions using verbs and verbs/actions - - E.g., `download_data()` + - E.g., `download_data()`, `process_input()`, `calculate_sum()` - Name classes using nouns - - E.g., `Downloader()` + - E.g., `Downloader()`, `DataProcessor()`, `User()` - Name decorators with an adjective or a past tense verb - - E.g., `timed` + - E.g., `timed`, `cached`, `logged` - Variable and function names should not reference implementation details, and things that can change or details that are not important - E.g., the name of a variable should not include its type - - E.g. use `embeddings` instead of `embeddings_list` + - E.g., use `embeddings` instead of `embeddings_list` + - E.g., use `data` instead of `data_dict` - Abbreviations in the names should be avoided, except for the following - `df` for dataframe - `srs` for series @@ -25,28 +26,37 @@ - `col` for columns and `row` for rows - Do not repeat in a function name what is already included in the library name (avoid "code stutter") + - E.g., if using a library named `math`, avoid naming a function + `math_calculate()` - Use `dir` and not `directory` or `folder` + - E.g., `dir_path` - Use `file_name` and not `filename` + - E.g., `file_name` for storing the name of a file - Use `dir_name` and not `dirname` + - E.g., `dir_name` for storing the name of a directory - Use `timestamp` and not `ts` or `datetime` + - E.g., `event_timestamp` - To refer to the name of a column, use `..._col` and not `..._col_name` or `..._column` + - E.g., `age_col` for a column storing age values ### Docstrings - The first docstring line is followed by a blank line and then, optionally, by a longer description (possibly on multiple lines) with a more detailed explanation of what the function does - - It should not describe parameters / what is being returned - - It should not describe implementation details that can be changed - - It should describe the goal of the function, the interface and what the user + - The text should describe the goal of the function, the interface and what the user needs to know to use the function -- The more detailed description is followed by a blank line and then the param / - return description section + - E.g., "This function calculates the sum of two numbers and returns the + result." + - The text should not describe parameters / what is being returned + - The text should not describe implementation details that can be changed +- The more detailed description is followed by a blank line and then the param + and return description section in REST style - Use lowercase after `:param XYZ: ...` / `:return:` unless the description starts with a proper noun - - Do not add a period at the end of the param / return descriptions - - Do not mention the type of the parameters / returned structures + - Do not add a period at the end of the param and return descriptions + - Do not mention the type of the parameters and return structures - Do not mention default values of parameters in parameter descriptions - Follow this example for indentation of parameter descriptions: ```python @@ -58,10 +68,21 @@ continues into a second line ``` - Adding examples (e.g., of input and output) to the docstring is encouraged + - E.g., + ``` + # Example usage: + result = add_numbers(3, 5) + # result is 8 + ``` - References to variables, file paths, functions, classes, etc. should be wrapped in backticks + - E.g., "The `add_numbers` function takes two arguments." - Multiline representations of data structures (e.g., an output example) should be wrapped in triple backticks + - E.g., + ``` + { "name": "John", "age": 30, "city": "New York" } + ``` ### Comments From 2b36e20aff790fc762cc05a7ea7da06abcf7882c Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 10:18:21 -0400 Subject: [PATCH 122/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- ...inter_style_review_guidelines.reference.md | 76 ++++ ...l.llm_style_review_guidelines.reference.md | 343 ++++++++++-------- 2 files changed, 275 insertions(+), 144 deletions(-) diff --git a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md index 2add7e86d..829b8c1ed 100644 --- a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md @@ -100,3 +100,79 @@ - If a PR includes renaming a file, variable, parameter, function, class, etc., then all the instances and references to it throughout the codebase should be updated + +## Notebooks + +### General + +- The name of a notebook should generally be the same as the branch name, unless + it's a Master notebook +- All notebooks should have a table of contents + - Linter automatically adds and updates the table of contents +- At the top of the notebook there should be a Markdown cell `# Description`, + followed by a Markdown cell with an explanation of the notebook's goal, what + it does, etc. +- Immediately below the description, there should be a Markdown cell + `# Imports`, followed by a code cell importing all the needed libraries + - It should include autoreload modules to keep the local code updated in real + time: + ```python + %load_ext autoreload + %autoreload 2 + ``` + - All the imports should be located in a single cell +- Below the cell with the imports, there should be a code cell that configures + the logging and notebook style, and reports execution info: + ```python + # Configure logger. + hdbg.init_logger(verbosity=logging.INFO) + _LOG = logging.getLogger(__name__) + # Print system signature. + _LOG.info("%s", henv.get_system_signature()[0]) + # Configure the notebook style. + hprint.config_notebook() + ``` +- The rest of the notebook should be clearly organized using Markdown cells with + headings of different levels +- There should be no errors in the executed notebook +- Ideally, there should be no warnings in the executed notebook + +### Jupytext + +- Every notebook should be accompanied by a Python file paired with the notebook + by `jupytext`, containing a synchronized copy of the notebook code +- The name of the notebook and the name of its paired Python file should be the + same, except the extension +- The code in the notebook and in its paired Python file should always be in + sync +- If the notebook is updated or deleted, then its paired Python file should also + by updated or deleted, and vice versa +- Linter should be used on both the notebook and its paired Python file + +## Markdowns + +- Names of documentation files should follow the format + `docs/{component}/{audience}.{topic}.{diataxis_tag}.md` + - E.g., `docs/documentation_meta/all.diataxis.explanation.md` +- All Markdown files should have a table of contents + - Linter automatically adds and updates the table of contents +- There should be one and only one level 1 heading (with one `#`) in a Markdown +- The level 1 heading should be located above the table of contents +- Headings should not be boldfaced +- Headings should not be overcapitalized + - E.g., `Data schema` instead of `Data Schema` +- Text should be reflowed to the maximum of 80 columns per line +- Fenced code blocks should always be accompanied by language markers + - E.g. `bash`, `python` +- Indent fenced code blocks at the same level as the previous line + +## Spelling + +- Capitalize the first letter of `Python` +- Spell `Linter` with the first letter in upper case and do not use an article + (`Linter` instead of `the Linter`) +- Capitalize `JSON`, `CSV`, `DB` and other abbreviations +- Spell commands in lower case and programs with the first letter in upper case + (e.g., `git` as a command, `Git` as a program) +- Represent intervals with `[a, b), (a, b], (a, b), [a, b]`, not `[a, b[` +- Write `hyperparameter` without a hyphen diff --git a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md index 4d2a410c8..7066bcf4f 100644 --- a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md @@ -77,7 +77,7 @@ - References to variables, file paths, functions, classes, etc. should be wrapped in backticks - E.g., "The `add_numbers` function takes two arguments." -- Multiline representations of data structures (e.g., an output example) should +- Multi-line representations of data structures (e.g., an output example) should be wrapped in triple backticks - E.g., ``` @@ -88,77 +88,175 @@ - Add a comment for every logically distinct chunk of code - Use comments to separate chunks of code instead of blank lines -- We do not use inline comments; every comment should be on its own separate - line, before the line it refers to +- Do not use inline comments; every comment should be on its own separate line, + before the line it refers to - In `if-elif-else` statements, the comments are placed underneath each statement in order to explain the code that belongs to each statement in particular + ```python + if ...: + # Do this. + else: + # Do that. + ``` - Avoid mentioning concrete names of variables, functions, classes, files, etc. in the comments - If it is unavoidable, wrap their names in backticks - Avoid referring to the type of a variable in the comments + - Keeps comments focused on functionality rather than implementation specifics - Do not include implementation details in comments - Describe "what" and "why" the code does something and not "how" the code does it + - Ensures comments remain relevant even if the implementation changes - If some code is commented out in a PR, a comment should be added to explain the reason why + - Provides context for future reference and helps other developers understand + the decision + - E.g., "This section is commented out due to a known bug that needs fixing" + or "Temporarily disabled for performance testing" + +### Code implementation + +- Encode the assumptions made in the code using assertions and report as much + information as possible in an assertion to make it easy to debug the output + - E.g., `hdbg.dassert_lt(start_date, end_date)` + - Ensure that assertions provide detailed information for debugging + - Use assertions to validate input parameters and preconditions +- Do not use f-strings in `hdbg.dassert()`, but use traditional string formatting + methods in assertions + - E.g., + `hdbg.dassert_eq(len(list1), len(list2), "Lists must be of equal length: %d vs %d" % (len(list1), len(list2)))` +- Use f-strings in exceptions + - E.g., `raise ValueError(f"Invalid server_name='{server_name}'")` + - Provide clear and informative error messages using f-strings + - E.g., `raise TypeError(f"Expected type int, but got {type(var).__name__}")` +- Use complete `if-elif-else` statements instead of a sequence of `if` + statements + - Ensure logical flow and clarity in conditional statements + - E.g., + ```python + if condition1: + # Execute block for condition1. + elif condition2: + # Execute block for condition2. + else: + # Execute block if none of the above conditions are met or raise an + # exception. + ``` +- Compile a regex expression only if it's called more than once + - Optimize performance by compiling regex expressions that are reused + - E.g., + ``` + import re + pattern = re.compile(r'\d+') + if pattern.match(string): + # Do something. + ``` +- Use `if var is None` to check if `var` is `None` instead of `if not var` +- Use `isinstance()` instead of `type()` to check the type of an object ### Code design - Follow DRY principle (Don't Repeat Yourself): - - Factor out common code in a separate function / method - - Do not copy-and-paste parameter descriptions, instead write them in only one - function and put a reference to it in the other functions where the same + - Factor out common code in a separate function/method + - Do not copy-and-paste parameter descriptions; instead, write them in only + one function and put a reference to it in the other functions where the same parameters are used - - E.g., "See `func_name()` for the param description" -- Keep public functions in an order representing the typical flow of use, e.g., + - E.g., "See `func_name()` for the param description" + - Avoid redundancy in code logic and comments +- Keep public functions in an order representing the typical flow of use: - Common functions, used by all other functions + - E.g., utility functions like `log_message()`, `validate_input()` - Read data + - E.g., `read_csv()`, `load_json()` - Process data + - E.g., `clean_data()`, `transform_data()` - Save data + - E.g., `write_csv()`, `export_json()` +- Ensure that function names are descriptive and convey their purpose +- Use comments to explain complex logic or calculations +- Implement error handling to manage exceptions and edge cases +- Use inheritance or composition to reuse code in object-oriented programming ### Type annotations -- For type hints use `List[<type of list elements>]` instead of `list`, - `Dict[<type of keys>, <type of values>]` instead of `dict`, - `Tuple[<type of tuple elements>]` instead of `tuple`, etc. +- For type hints use use `List`, `Dict`, and `Tuple` to provide more explicit type information + and help with static type checking + - E.g., `List[int]` instead of `list` + - E.g., `List[str]` instead of `list` + - Use `Dict` instead of `dict` + - E.g., `Dict[str, int]` instead of `dict` + - E.g., `Dict[int, List[str]]` instead of `dict` + - Use `Tuple` instead of `tuple` + - E.g., `Tuple[int, str]` instead of `tuple` + - E.g., `Tuple[str, List[int]]` instead of `tuple` ### Functions -- Avoid pure functions without side effects, i.e. for the same input arguments - the returned value should not change (in contrast to, e.g., functions that rely - upon global state) +- Avoid pure functions without side effects, i.e., for the same input arguments, + the returned value should not change (in contrast to functions that + rely upon external state) - Functions should not modify the function inputs - E.g., if a function `f()` accepts a dataframe `df` as its argument, then `f()` will not modify `df` but make a copy and work on it -- The preferred order of function parameters is: + - This ensures that the original data remains unchanged and can be reused +- The preferred order of parameters in a function declaration is: - Input parameters - Output parameters - In-out parameters - Default parameters + - This order helps in maintaining clarity and consistency in function + definitions - Default parameters should be used sparingly and only for parameters that 99% of the time are constant - All the default parameters should be keyword-only - They should be separated from the other parameters by `*` + - This ensures that default parameters are always explicitly specified by + name, improving readability - Do not use mutable objects (such as lists, maps, objects) as default value for - functions, instead pass `None` and then initialize the default parameter inside - the function + functions; instead, pass `None` and then initialize the default parameter + inside the function + - E.g., instead of using a list as a default parameter, use `None` and + initialize the list inside the function: + ``` + def add_item(item: str, *, items: Optional[List[str]]) -> List[str]: + if items is None: + items = [] + items.append(item) + return items + ``` + - Use a default value of `None` when a function needs to be wrapped and the default parameter needs to be propagated -- Do not use use a boolean parameter as a switch controlling some function - behavior, instead use a string parameter `mode`, which is allowed to take a - small well-defined set of values +- Do not use a boolean parameter as a switch controlling some function behavior; + instead, use a string parameter `mode`, which is allowed to take a small + well-defined set of values + - E.g., `def process_data(mode='fast'):` where `mode` can be `'fast'`, + `'slow'`, etc - For functions dealing with dataframes, avoid hard-wired column name - dependencies, instead allow the caller to pass the column name to the function - as a parameter + dependencies; instead, allow the caller to pass the column name to the + function as a parameter + - E.g., `def calculate_average(df: pd.DataFrame, column_name: str):` - Do not put computations of the output in the `return` line - - Instead compute the output first, assign it to a variable and then return + - Instead, compute the output first, assign it to a variable, and then return this variable -- A function should have a single exit point, i.e., one single line with `return` + - E.g., + ``` + result = compute_value() + return result + ``` +- A function should have a single exit point, i.e., one single line with + `return` - A function should ideally return objects of only one type (or `None`) - When calling a function, assign all the input parameter values to variables on separate lines and then pass these variables to the function -- Explicitly bind default parameters, i.e. specify the parameter name when + - E.g., + ``` + param1 = value1 + param2 = value2 + result = my_function(param1, param2) + ``` +- Explicitly bind default parameters, i.e., specify the parameter name when calling a function, and do not bind non-default parameters - E.g., call `func()` like `func(param1, param2, param3=param3)` if `param3` is the only parameter with a default value @@ -180,18 +278,25 @@ ### Unit tests -- A test class should test only one function or class -- A test method should only test a single case +- A test class should test only one function or class to help understanding test + failures +- A test method should only test a single case to ensures clarity and precision + in testing - E.g., "for these inputs the function responds with this output" - Adhere to the following conventions for naming: - Class `TestFooBar` tests the class `FooBar` and its methods - `TestFooBar.test_method_a`, `TestFooBar.test_method_b` test the methods - `FooBar.method_a` and `FooBar.method_b` + - `TestFooBar.test_method_a`, `TestFooBar.test_method_b` test the methods + `FooBar.method_a` and `FooBar.method_b` - Class `Test_foo_bar` tests the function `foo_bar()` + - E.g., `Test_foo_bar.test_valid_input`, `Test_foo_bar.test_invalid_input` + for different cases / inputs - `Test_foo_bar.test1`, `Test_foo_bar.test2` for different cases / inputs - A unit test should be independent of all the other unit tests + - Ensures that tests do not affect each other and can be run in isolation - If there is a lot of common code across individual test methods, it should be factored out in a helper method within the test class + - Reduces redundancy and improves maintainability of the test code + - E.g., a `setUp` method to initialize common test data or configurations - If some code needs to be repeated at the beginning / end of each test method, it should be moved to `set_up_test()` / `tear_down_test()` methods and the following idiom should be added to the test class: @@ -206,162 +311,112 @@ ``` - Each test method should have a docstring describing briefly what case is being tested + - E.g., "Tests the addition of two positive integers." + - E.g., "Verifies that an exception is raised when dividing by zero." - Test methods should have type hint annotations + - E.g., `def test_addition(self) -> None:` - Do not create temporary files for tests (e.g., with `tempfile`) but use `hunittest.TestCase.get_scratch_space()` instead -- If the input to the test is a large piece of code / text, it should be moved - to a separate file in the `input` dir corresponding to the test +- If the input to the test is a large piece of code/text, it should be moved to + a separate file in the `input` dir corresponding to the test - E.g., `outcomes/<TestClassName.test_method_name>/input` and read through the function `self.get_input_dir()` of `TestCase` + - This approach allows for easy updates and modifications to test inputs + without altering the test code itself - Do not use pickle files for test inputs + - Use JSON, YAML, CSV files for test inputs as they are more secure and + human-readable - In every test method, separate logically distinct code chunks with comments - E.g., ``` - # Prepare inputs. - ... - # Run test. - ... - # Check outputs. + # Prepare inputs + input_data = [1, 2, 3] + # Run test + result = my_function(input_data) + # Check outputs + self.assert_equal(result, expected_output) ``` - Do not use `hdbg.dassert` in testing but use `self.assert*()` methods - Prefer `self.assert_equal()` instead of `self.assertEqual()` + - Always use actual and then expected value + - E.g., `self.assert_equal(actual, expected)` - Use strings to compare actual and expected outputs instead of data structures - - E.g., use a string representation of a list instead of a list + - E.g., use `self.assert_equal(str(actual_list), str(expected_list))` - Use `self.check_string()` to compare the actual output to a golden output in the `outcomes` dir, when the output is large or needs to be modified easily + - E.g., `self.check_string(actual_output)` - When testing for an assertion, check that you are getting the exact exception that is expected - -### Misc - -- Encode the assumptions made in the code using assertions and report as much - information as possible in an assertion to make it easy to debug the output - - E.g., `hdbg.dassert_lt(start_date, end_date)` -- Do not use f-strings in `hdbg.dassert()` -- Use f-strings in exceptions - - E.g., `raise ValueError(f"Invalid server_name='{server_name}'")`) -- Use complete `if-elif-else` statements instead of a sequence of `if` - statements -- Compile a regex expression only if it's called more than once -- Use `if var is (not) None` to check if `var` is (not) `None` (instead of - `if (not) var`) -- Use `isinstance()` instead of `type()` to check the type of an object + ``` + # Make sure function raises an error. + with self.assertRaises(AssertionError) as cm: + config_list.configs = configs + act = str(cm.exception) + self.check_string(act, fuzzy_match=True) + ``` ## Notebooks ### General -- The name of a notebook should generally be the same as the branch name, unless - it's a Master notebook -- All notebooks should have a table of contents - - Linter automatically adds and updates the table of contents -- At the top of the notebook there should be a Markdown cell `# Description`, - followed by a Markdown cell with an explanation of the notebook's goal, what - it does, etc. -- Immediately below the description, there should be a Markdown cell - `# Imports`, followed by a code cell importing all the needed libraries - - It should include autoreload modules to keep the local code updated in real - time: - ```python - %load_ext autoreload - %autoreload 2 - ``` - - All the imports should be located in a single cell -- Below the cell with the imports, there should be a code cell that configures - the logging and notebook style, and reports execution info: - ```python - # Configure logger. - hdbg.init_logger(verbosity=logging.INFO) - _LOG = logging.getLogger(__name__) - # Print system signature. - _LOG.info("%s", henv.get_system_signature()[0]) - # Configure the notebook style. - hprint.config_notebook() - ``` -- The rest of the notebook should be clearly organized using Markdown cells with - headings of different levels - The code in the notebook should adhere to the same style and formatting guidelines as the code in libraries and scripts - Common or general-purpose code should be factored out in functions and moved from the notebook to a Python library, which would then be imported in the notebook -- There should be no errors in the executed notebook -- Ideally, there should be no warnings in the executed notebook -- Notebook cells should be idempotent, i.e. able of being executed multiple - times without changing their output value -- If the data is transformed, display a few lines to show the outcome (e.g., - `df.head(3)`) -- If any data is discarded / filtered, display the percentage of the rows - dropped + - E.g., create a `utils.py` file for helper functions +- Notebook cells should be idempotent, i.e., able to be executed multiple times + without changing their output value + - Avoid side effects such as modifying global variables or external states + - Ensure that cell execution order does not affect the results +- If the data is transformed, display a few lines to show the outcome + - E.g., `df.head(3)` to preview the first three rows of a DataFrame +- If any data is discarded/filtered, display the percentage of the rows dropped + - E.g., + `print(f"Percentage of rows dropped: {dropped_rows / total_rows * 100:.2f}%")` + - Provides insight into data cleaning and filtering processes - Progress bars should be added where applicable - -### Jupytext - -- Every notebook should be accompanied by a Python file paired with the notebook - by `jupytext`, containing a synchronized copy of the notebook code -- The name of the notebook and the name of its paired Python file should be the - same, except the extension -- The code in the notebook and in its paired Python file should always be in - sync -- If the notebook is updated or deleted, then its paired Python file should also - by updated or deleted, and vice versa -- Linter should be used on both the notebook and its paired Python file + - Use libraries like `tqdm` to show progress in loops or data processing tasks ### Plotting -- Each plot should have a descriptive title +- Each plot should have a descriptive title to understand the context of the plot + at a glance + - E.g., "Monthly Sales Data for 2023" instead of just "Sales Data" - Each plot should have axes labels -- If there are several lines on the plot, it should have a legend + - E.g., label the x-axis as "Months" and the y-axis as "Revenue in USD" +- If there are several multiple data series on the same plot, it should have a legend - In a plotting function, `plt.show()` should not be added at the end -- In a plotting function, the `ax` parameter should be exposed + - This allows for further customization or saving of the plot before + displaying + - E.g., users might want to save the plot using `plt.savefig('plot.png')` + before showing it +- In a plotting function, the `ax` parameter should be exposed to allow users to + customize the plot further + - E.g., users can modify the axes limits or add additional annotations - If a function plots multiple plots, they should be generally plotted in a single figure + - E.g., use `plt.subplots()` to create a grid of plots within a single figure ## Markdowns -- Names of documentation files should follow the format - `docs/{component}/{audience}.{topic}.{diataxis_tag}.md` - - E.g., `docs/documentation_meta/all.diataxis.explanation.md` -- All Markdown files should have a table of contents - - Linter automatically adds and updates the table of contents -- There should be one and only one level 1 heading (with one `#`) in a Markdown -- The level 1 heading should be located above the table of contents -- Headings should not be boldfaced -- Headings should not be overcapitalized - - E.g., `Data schema` instead of `Data Schema` -- Text should be reflowed to the maximum of 80 columns per line -- Boldface should be used sparingly +- Boldface and italics should be used sparingly - The use of bullet point lists is encouraged - For the items, `-` should be used instead of `*` or circles -- Items in bullet point lists should not end with a period -- Wrap file paths, names of variables, functions and classes in backticks -- Use `>` to indicate a command line (e.g., `> git push` or `docker> pytest`) -- Fenced code blocks should always be accompanied by language markers (e.g. - `bash`, `python`) -- Indent fenced code blocks one level more than the previous line -- Avoid to use screenshots whenever possible and instead copy-and-paste text - with the right highlighting + - Items in bullet point lists should not end with a period +- Wrap file paths, names of variables, functions, and classes in backticks + - E.g., `file_path`, `variable_name`, `function_name()`, `ClassName` +- Use `>` to indicate a command line + - E.g., `> git push` or `docker> pytest` +- Avoid using screenshots whenever possible and instead copy-and-paste text with + the right highlighting + - E.g., instead of a screenshot of a terminal command, provide the command + text: `> ls -la` - Use active voice most of the time and use passive voice sparingly + - E.g., "The user updates the file" instead of "The file is updated by the + user" - Be efficient - Do not explain things in a repetitive way - Rewrite long-winded AI-generated texts in a concise way - -## File system structure - -- If a new directory with code is added, it should contain an empty - `__init__.py` file -- Notebooks should generally be located under the `notebooks` dir -- Unit tests should be located under the `test` dir - - Golden outcomes for tests should be located under the `test/outcomes` dir -- Documentation files should generally be located under the `docs` dir - -## Spelling - -- Capitalize the first letter of `Python` -- Spell `Linter` with the first letter in upper case and do not use an article - (`Linter` instead of `the Linter`) -- Capitalize `JSON`, `CSV`, `DB` and other abbreviations -- Spell commands in lower case and programs with the first letter in upper case - (e.g., `git` as a command, `Git` as a program) -- Represent intervals with `[a, b), (a, b], (a, b), [a, b]`, not `[a, b[` -- Write `hyperparameter` without a hyphen + - E.g., instead of "The process of updating the software can be done by + following these steps," use "Update the software by following these steps" From 1412c0040d3bfb4123fed1a3083e48077be02a11 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 10:37:15 -0400 Subject: [PATCH 123/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/ai_review.py | 37 +++++++++++++---- dev_scripts_helpers/llms/llm_prompts.py | 23 +++++------ dev_scripts_helpers/llms/llm_transform.py | 1 + ...inter_style_review_guidelines.reference.md | 41 +++++++++++-------- ...l.llm_style_review_guidelines.reference.md | 3 +- 5 files changed, 69 insertions(+), 36 deletions(-) diff --git a/dev_scripts_helpers/llms/ai_review.py b/dev_scripts_helpers/llms/ai_review.py index c6a4c0f91..d073d5fa8 100755 --- a/dev_scripts_helpers/llms/ai_review.py +++ b/dev_scripts_helpers/llms/ai_review.py @@ -63,6 +63,12 @@ def _parse() -> argparse.ArgumentParser: in_default="-", in_required=False, ) + parser.add_argument( + "-s", + "--skip-post-transforms", + action="store_true", + help="Skip the post-transforms", + ) hparser.add_dockerized_script_arg(parser) # Use CRITICAL to avoid logging anything. hparser.add_verbosity_arg(parser, log_level="CRITICAL") @@ -80,7 +86,7 @@ def _main(parser: argparse.ArgumentParser) -> None: hparser.adapt_input_output_args_for_dockerized_scripts(in_file_name, tag) ) # TODO(gp): We should just automatically pass-through the options. - prompt = "review" + prompt = "review_llm" cmd_line_opts = [f"-p {prompt}", f"-v {args.log_level}"] # cmd_line_opts = [] # for arg in vars(args): @@ -102,13 +108,30 @@ def _main(parser: argparse.ArgumentParser) -> None: use_sudo=args.dockerized_use_sudo, suppress_output=suppress_output, ) - # # Read the output from the container and write it to the output file from - # # command line (e.g., `-` for stdout). - # hparser.write_file(out_txt, out_file_name) - # if os.path.basename(out_file_name) == "cfile": - # print(out_txt) + # Run post-transforms outside the container. + if not args.skip_post_transforms: + post_container_transforms = dshlllpr.get_post_container_transforms( + prompt + ) + # + if dshlllpr.to_run("convert_file_names", post_container_transforms): + dshlllpt._convert_file_names(in_file_name, tmp_out_file_name) + # + # Check that all post-transforms were run. + hdbg.dassert_eq( + len(post_container_transforms), + 0, + "Not all post_transforms were run: %s", + post_container_transforms, + ) + else: + _LOG.info("Skipping post-transforms") out_txt = hio.from_file(tmp_out_file_name) - print(out_txt) + # Read the output from the container and write it to the output file from + # command line (e.g., `-` for stdout). + hparser.write_file(out_txt, out_file_name) + if os.path.basename(out_file_name) == "cfile": + print(out_txt) if __name__ == "__main__": diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 6f46c7dd5..f8b4af5c2 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -620,12 +620,11 @@ def _review_from_file(file: str) -> _PROMPT_OUT: {reference_txt} - - Each rule to follow is referred by <rule_name> and represented as <header-line_number> - with the name of the header of the section in the reference file (e..g, - 'Naming') and the line number (e.g., "Naming-7") - - - Only print the lines that you are sure are in violation of one of the - rules <rule_name> in the reference + - Each rule to follow is referred by <rule_name> and represented as + <header-line_number> with the name of the header of the section in the + reference file (e..g, 'Naming') and the line number (e.g., "Naming-7") + - Only print the violation of the rules when you are absolutely sure that + it is a violation - For each violation of a rule, you will print the line number of the code and the proposed improvement in the following style: <line_number>: <rule_name>: <short description of the proposed improvement> @@ -637,21 +636,21 @@ def _review_from_file(file: str) -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def review_linter_style() -> _PROMPT_OUT: +def review_llm() -> _PROMPT_OUT: """ - Review the code for linter style. + Review the code using LLMs. """ # Load the reference file. - file = "docs/code_guidelines/all.linter_style_review_guidelines.reference.md" + file = "docs/code_guidelines/all.llm_style_review_guidelines.reference.md" return _review_from_file(file) -def review_llm_style() -> _PROMPT_OUT: +def review_linter() -> _PROMPT_OUT: """ - Review the code for linter style. + Review the code for linter style (still using LLMs). """ # Load the reference file. - file = "docs/code_guidelines/all.llm_style_review_guidelines.reference.md" + file = "docs/code_guidelines/all.linter_style_review_guidelines.reference.md" return _review_from_file(file) diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 9201d7f91..7ab34a725 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -70,6 +70,7 @@ def _parse() -> argparse.ArgumentParser: action="store_true", help="Print the original and transformed", ) + # TODO(gp): Remove this. parser.add_argument( "-b", "--bold_first_level_bullets", diff --git a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md index 829b8c1ed..9073e8538 100644 --- a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md @@ -139,40 +139,49 @@ ### Jupytext -- Every notebook should be accompanied by a Python file paired with the notebook - by `jupytext`, containing a synchronized copy of the notebook code -- The name of the notebook and the name of its paired Python file should be the - same, except the extension -- The code in the notebook and in its paired Python file should always be in - sync -- If the notebook is updated or deleted, then its paired Python file should also - by updated or deleted, and vice versa -- Linter should be used on both the notebook and its paired Python file +- Each notebook must have an accompanying Python file, linked via `jupytext`, + which contains a synchronized copy of the notebook's code +- The notebook and its paired Python file should share the same name, differing + only in their file extensions +- Ensure that the code in the notebook and its paired Python file remains + synchronized at all times +- If you update or delete the notebook, you must also update or delete its + paired Python file, and vice versa ## Markdowns -- Names of documentation files should follow the format +- Names of documentation files should follow the format `docs/{component}/{audience}.{topic}.{diataxis_tag}.md` + to help in organizing and categorizing documentation files effectively - E.g., `docs/documentation_meta/all.diataxis.explanation.md` + - The `{component}` part specifies the part of the project the documentation + is related to + - The `{audience}` part indicates who the documentation is intended for + - The `{topic}` part describes the subject matter of the documentation + - The `{diataxis_tag}` part categorizes the documentation according to the + Diátaxis framework (e.g., explanation, tutorial) - All Markdown files should have a table of contents - Linter automatically adds and updates the table of contents - There should be one and only one level 1 heading (with one `#`) in a Markdown -- The level 1 heading should be located above the table of contents + - The level 1 heading serves as the main title of the document + - It should clearly convey the primary topic or purpose of the document + - The level 1 heading should be located above the table of contents - Headings should not be boldfaced - Headings should not be overcapitalized - E.g., `Data schema` instead of `Data Schema` - Text should be reflowed to the maximum of 80 columns per line - Fenced code blocks should always be accompanied by language markers - - E.g. `bash`, `python` + - E.g., `bash`, `python` - Indent fenced code blocks at the same level as the previous line ## Spelling -- Capitalize the first letter of `Python` +- Spell commands in lower case and programs with the first letter in upper case + - E.g., `git` as a command, `Git` as a program + - E.g., capitalize the first letter of `Python` - Spell `Linter` with the first letter in upper case and do not use an article - (`Linter` instead of `the Linter`) + - E.g., `Linter` instead of `the Linter` - Capitalize `JSON`, `CSV`, `DB` and other abbreviations -- Spell commands in lower case and programs with the first letter in upper case - (e.g., `git` as a command, `Git` as a program) - Represent intervals with `[a, b), (a, b], (a, b), [a, b]`, not `[a, b[` - Write `hyperparameter` without a hyphen +- Use `Python` for scripting and automation tasks diff --git a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md index 7066bcf4f..7285ba10d 100644 --- a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md @@ -15,7 +15,8 @@ - E.g., the name of a variable should not include its type - E.g., use `embeddings` instead of `embeddings_list` - E.g., use `data` instead of `data_dict` -- Abbreviations in the names should be avoided, except for the following +- Abbreviations in the names of variables and functions should be avoided, except + for the following - `df` for dataframe - `srs` for series - `idx` for index From d854e501ecd9d0ccbea417f235cd0fb2e671452d Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 10:41:58 -0400 Subject: [PATCH 124/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 4 ++-- helpers/haws.py | 25 ++++++++++++++----------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index f8b4af5c2..3946d2744 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -194,9 +194,9 @@ def code_fix_docstrings() -> _PROMPT_OUT: - The first comment should be in imperative mode and fit in a single line of less than 80 characters - Describe the parameters using the REST style, which requires each - parameter to be prepended with :param + parameter to be prepended with `:param` - Describe the return value using the REST style, which requires the return - value to be prepended with :return + value to be prepended with `:return` An example of a correct docstring is: ``` diff --git a/helpers/haws.py b/helpers/haws.py index b409eca9a..e25f11308 100644 --- a/helpers/haws.py +++ b/helpers/haws.py @@ -27,7 +27,9 @@ def get_session( """ Return connected Boto3 session. - :param region: aws region, if None get region from aws credentials. + :param aws_profile: AWS profile name to use for the session. + :param region: AWS region, if None get region from AWS credentials. + :return: Boto3 session object. """ hdbg.dassert_isinstance(aws_profile, str) # When deploying jobs via ECS the container obtains credentials based on @@ -100,8 +102,9 @@ def get_task_definition_image_url( """ Get ECS task definition by name and return only image URL. - :param task_definition_name: the name of the ECS task definition, - e.g., cmamp-test + :param task_definition_name: The name of the ECS task definition, + e.g., `cmamp-test`. + :param region: AWS region, if None get region from AWS credentials. :param region: look at `get_session()` """ aws_profile = "ck" @@ -126,11 +129,11 @@ def update_task_definition( If region is different then the default one, it is assumed that ECR replication is enabled from the default region to the target region. - :param task_definition_name: the name of the ECS task definition for - which an update to container image URL is made, e.g., cmamp-test - :param new_image_url: New image url for task definition. e.g., - `***.dkr.ecr.***/cmamp:prod` - :param region: look at `get_session()` + :param task_definition_name: The name of the ECS task definition for + which an update to container image URL is made, e.g., `cmamp-test`. + :param new_image_url: New image URL for task definition. e.g., + `***.dkr.ecr.***/cmamp:prod`. + :param region: AWS region, if None get region from AWS credentials. """ client = get_ecs_client("ck", region=region) # Get the last revision of the task definition. @@ -180,9 +183,9 @@ def list_all_objects( List all objects in the specified S3 bucket under the given prefix, handling pagination. - :param s3_client: instance of boto3 S3 client - :param bucket_name: the name of the S3 bucket e.g., `cryptokaizen-data-test` - :param prefix: prefix to filter the S3 objects e.g., `binance/historical_bid_ask/` + :param s3_client: Instance of boto3 S3 client. + :param bucket_name: The name of the S3 bucket e.g., `cryptokaizen-data-test`. + :param prefix: Prefix to filter the S3 objects e.g., `binance/historical_bid_ask/`. :return: A list of dictionaries containing metadata about each object. E.g., ``` [ From 8b314a3b496c12ef49083b5e262605c1edea08ce Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 11:41:56 -0400 Subject: [PATCH 125/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/ai_review.py | 11 +- dev_scripts_helpers/llms/inject_todos.py | 141 ++++++++++++++++++ dev_scripts_helpers/llms/sync_ai_review.sh | 2 + ...l.llm_style_review_guidelines.reference.md | 2 + 4 files changed, 152 insertions(+), 4 deletions(-) create mode 100755 dev_scripts_helpers/llms/inject_todos.py create mode 100755 dev_scripts_helpers/llms/sync_ai_review.sh diff --git a/dev_scripts_helpers/llms/ai_review.py b/dev_scripts_helpers/llms/ai_review.py index d073d5fa8..db71f0f89 100755 --- a/dev_scripts_helpers/llms/ai_review.py +++ b/dev_scripts_helpers/llms/ai_review.py @@ -69,6 +69,7 @@ def _parse() -> argparse.ArgumentParser: action="store_true", help="Skip the post-transforms", ) + hparser.add_llm_prompt_arg(parser) hparser.add_dockerized_script_arg(parser) # Use CRITICAL to avoid logging anything. hparser.add_verbosity_arg(parser, log_level="CRITICAL") @@ -78,16 +79,18 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() hparser.init_logger_for_input_output_transform(args) - # # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) + hdbg.dassert_in(args.prompt, ["review_llm", "review_linter", "review_correctness", "review_refactoring"]) + if out_file_name != "cfile": + _LOG.warning("The output file name is %s, so it will be converted to `cfile`", out_file_name) + out_file_name = "cfile" tag = "ai_review" tmp_in_file_name, tmp_out_file_name = ( hparser.adapt_input_output_args_for_dockerized_scripts(in_file_name, tag) ) # TODO(gp): We should just automatically pass-through the options. - prompt = "review_llm" - cmd_line_opts = [f"-p {prompt}", f"-v {args.log_level}"] + cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] # cmd_line_opts = [] # for arg in vars(args): # if arg not in ["input", "output"]: @@ -111,7 +114,7 @@ def _main(parser: argparse.ArgumentParser) -> None: # Run post-transforms outside the container. if not args.skip_post_transforms: post_container_transforms = dshlllpr.get_post_container_transforms( - prompt + args.prompt ) # if dshlllpr.to_run("convert_file_names", post_container_transforms): diff --git a/dev_scripts_helpers/llms/inject_todos.py b/dev_scripts_helpers/llms/inject_todos.py new file mode 100755 index 000000000..db71f0f89 --- /dev/null +++ b/dev_scripts_helpers/llms/inject_todos.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +""" +Read input from either stdin or a file, apply a specified transformation using +an LLM, and then write the output to either stdout or a file. It is +particularly useful for integrating with editors like Vim. + +The script `dockerized_llm_transform.py` is executed within a Docker container to ensure +all dependencies are met. The Docker container is built dynamically if +necessary. The script requires an OpenAI API key to be set in the environment. + +Examples +# Basic Usage +> llm_transform.py -i input.txt -o output.txt -p uppercase + +# List of transforms +> llm_transform.py -i input.txt -o output.txt -p list + +# Code review +> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_review + +# Propose refactoring +> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_propose_refactoring +""" + +# TODO(gp): There are different modes to run the script +# - run the script to process input and write transformed output +# - run the script to process input and extract a cfile + + +import argparse +import logging +import os +import re +from typing import List, Optional + +import dev_scripts_helpers.llms.llm_prompts as dshlllpr +import helpers.hdbg as hdbg +import helpers.hdocker as hdocker +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hmarkdown as hmarkdo +import helpers.hparser as hparser +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import dev_scripts_helpers.llms.llm_transform as dshlllpt + +_LOG = logging.getLogger(__name__) + + +# TODO(gp): -> _parser() or _get_parser() everywhere. +def _parse() -> argparse.ArgumentParser: + """ + Use the same argparse parser for `dockerized_llm_transform.py`. + """ + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + hparser.add_input_output_args( + parser, + in_default="-", + in_required=False, + ) + parser.add_argument( + "-s", + "--skip-post-transforms", + action="store_true", + help="Skip the post-transforms", + ) + hparser.add_llm_prompt_arg(parser) + hparser.add_dockerized_script_arg(parser) + # Use CRITICAL to avoid logging anything. + hparser.add_verbosity_arg(parser, log_level="CRITICAL") + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hparser.init_logger_for_input_output_transform(args) + # Parse files. + in_file_name, out_file_name = hparser.parse_input_output_args(args) + hdbg.dassert_in(args.prompt, ["review_llm", "review_linter", "review_correctness", "review_refactoring"]) + if out_file_name != "cfile": + _LOG.warning("The output file name is %s, so it will be converted to `cfile`", out_file_name) + out_file_name = "cfile" + tag = "ai_review" + tmp_in_file_name, tmp_out_file_name = ( + hparser.adapt_input_output_args_for_dockerized_scripts(in_file_name, tag) + ) + # TODO(gp): We should just automatically pass-through the options. + cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] + # cmd_line_opts = [] + # for arg in vars(args): + # if arg not in ["input", "output"]: + # value = getattr(args, arg) + # if isinstance(value, bool): + # if value: + # cmd_line_opts.append(f"--{arg.replace('_', '-')}") + # else: + # cmd_line_opts.append(f"--{arg.replace('_', '-')} {value}") + # For stdin/stdout, suppress the output of the container. + suppress_output = in_file_name == "-" or out_file_name == "-" + dshlllpt._run_dockerized_llm_transform( + tmp_in_file_name, + cmd_line_opts, + tmp_out_file_name, + mode="system", + force_rebuild=args.dockerized_force_rebuild, + use_sudo=args.dockerized_use_sudo, + suppress_output=suppress_output, + ) + # Run post-transforms outside the container. + if not args.skip_post_transforms: + post_container_transforms = dshlllpr.get_post_container_transforms( + args.prompt + ) + # + if dshlllpr.to_run("convert_file_names", post_container_transforms): + dshlllpt._convert_file_names(in_file_name, tmp_out_file_name) + # + # Check that all post-transforms were run. + hdbg.dassert_eq( + len(post_container_transforms), + 0, + "Not all post_transforms were run: %s", + post_container_transforms, + ) + else: + _LOG.info("Skipping post-transforms") + out_txt = hio.from_file(tmp_out_file_name) + # Read the output from the container and write it to the output file from + # command line (e.g., `-` for stdout). + hparser.write_file(out_txt, out_file_name) + if os.path.basename(out_file_name) == "cfile": + print(out_txt) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/dev_scripts_helpers/llms/sync_ai_review.sh b/dev_scripts_helpers/llms/sync_ai_review.sh new file mode 100755 index 000000000..54c69316e --- /dev/null +++ b/dev_scripts_helpers/llms/sync_ai_review.sh @@ -0,0 +1,2 @@ +\cp -rf /Users/saggese/src/helpers1/dev_scripts_helpers/llms/{ai_review.py,llm_prompts.py,llm_transform.py} dev_scripts_helpers/llms +\cp -rf /Users/saggese/src/helpers1/docs/code_guidelines/*guidelines* docs/code_guidelines diff --git a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md index 7285ba10d..0e7923997 100644 --- a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md @@ -6,6 +6,8 @@ - Name functions using verbs and verbs/actions - E.g., `download_data()`, `process_input()`, `calculate_sum()` + - Python internal functions as `__repr__`, `__init__` are valid + - Functions names like `to_dict()`, `_parse()`, `_main()` are valid - Name classes using nouns - E.g., `Downloader()`, `DataProcessor()`, `User()` - Name decorators with an adjective or a past tense verb From ecddef3b106ccfae766d12781fce1cf2320b73a5 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 26 May 2025 18:53:44 -0400 Subject: [PATCH 126/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/inject_todos.py | 110 ++-------- helpers/hmarkdown.py | 79 ++++++++ helpers/hunit_test.py | 2 +- helpers/test/test_hmarkdown.py | 244 +++++++++++++++++++++++ 4 files changed, 340 insertions(+), 95 deletions(-) diff --git a/dev_scripts_helpers/llms/inject_todos.py b/dev_scripts_helpers/llms/inject_todos.py index db71f0f89..c03be0633 100755 --- a/dev_scripts_helpers/llms/inject_todos.py +++ b/dev_scripts_helpers/llms/inject_todos.py @@ -1,33 +1,9 @@ #!/usr/bin/env python3 """ -Read input from either stdin or a file, apply a specified transformation using -an LLM, and then write the output to either stdout or a file. It is -particularly useful for integrating with editors like Vim. - -The script `dockerized_llm_transform.py` is executed within a Docker container to ensure -all dependencies are met. The Docker container is built dynamically if -necessary. The script requires an OpenAI API key to be set in the environment. - -Examples -# Basic Usage -> llm_transform.py -i input.txt -o output.txt -p uppercase - -# List of transforms -> llm_transform.py -i input.txt -o output.txt -p list - -# Code review -> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_review - -# Propose refactoring -> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_propose_refactoring +Read a cfile and inject its content as todos in the code. """ -# TODO(gp): There are different modes to run the script -# - run the script to process input and write transformed output -# - run the script to process input and extract a cfile - - import argparse import logging import os @@ -52,89 +28,35 @@ # TODO(gp): -> _parser() or _get_parser() everywhere. def _parse() -> argparse.ArgumentParser: """ - Use the same argparse parser for `dockerized_llm_transform.py`. """ parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) - hparser.add_input_output_args( - parser, - in_default="-", - in_required=False, + parser.add_argument( + "--cfile", + type=str, + required=True, + default="cfile", + help="File containing the TODOs to inject", ) parser.add_argument( - "-s", - "--skip-post-transforms", + "--todo_target", action="store_true", - help="Skip the post-transforms", + help="User name to use in the TODOs" ) - hparser.add_llm_prompt_arg(parser) - hparser.add_dockerized_script_arg(parser) - # Use CRITICAL to avoid logging anything. - hparser.add_verbosity_arg(parser, log_level="CRITICAL") + hparser.add_verbosity_arg(parser) return parser def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hparser.init_logger_for_input_output_transform(args) - # Parse files. - in_file_name, out_file_name = hparser.parse_input_output_args(args) - hdbg.dassert_in(args.prompt, ["review_llm", "review_linter", "review_correctness", "review_refactoring"]) - if out_file_name != "cfile": - _LOG.warning("The output file name is %s, so it will be converted to `cfile`", out_file_name) - out_file_name = "cfile" - tag = "ai_review" - tmp_in_file_name, tmp_out_file_name = ( - hparser.adapt_input_output_args_for_dockerized_scripts(in_file_name, tag) - ) - # TODO(gp): We should just automatically pass-through the options. - cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] - # cmd_line_opts = [] - # for arg in vars(args): - # if arg not in ["input", "output"]: - # value = getattr(args, arg) - # if isinstance(value, bool): - # if value: - # cmd_line_opts.append(f"--{arg.replace('_', '-')}") - # else: - # cmd_line_opts.append(f"--{arg.replace('_', '-')} {value}") - # For stdin/stdout, suppress the output of the container. - suppress_output = in_file_name == "-" or out_file_name == "-" - dshlllpt._run_dockerized_llm_transform( - tmp_in_file_name, - cmd_line_opts, - tmp_out_file_name, - mode="system", - force_rebuild=args.dockerized_force_rebuild, - use_sudo=args.dockerized_use_sudo, - suppress_output=suppress_output, - ) - # Run post-transforms outside the container. - if not args.skip_post_transforms: - post_container_transforms = dshlllpr.get_post_container_transforms( - args.prompt - ) - # - if dshlllpr.to_run("convert_file_names", post_container_transforms): - dshlllpt._convert_file_names(in_file_name, tmp_out_file_name) - # - # Check that all post-transforms were run. - hdbg.dassert_eq( - len(post_container_transforms), - 0, - "Not all post_transforms were run: %s", - post_container_transforms, - ) - else: - _LOG.info("Skipping post-transforms") - out_txt = hio.from_file(tmp_out_file_name) - # Read the output from the container and write it to the output file from - # command line (e.g., `-` for stdout). - hparser.write_file(out_txt, out_file_name) - if os.path.basename(out_file_name) == "cfile": - print(out_txt) + # Read the cfile. + cfile_txt = hio.from_file(args.cfile) + # Inject the TODOs. + todo_txt = dshlllpr.inject_todos(cfile_txt, args.todo_target) + # Write the TODOs to the cfile. + hio.to_file(args.cfile, todo_txt) if __name__ == "__main__": diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 4145ea6a7..4bcd8f391 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -11,6 +11,7 @@ import dev_scripts_helpers.documentation.lint_notes as dshdlino import helpers.hdbg as hdbg +import helpers.hio as hio import helpers.hparser as hparser import helpers.hprint as hprint @@ -769,6 +770,84 @@ def selected_navigation_to_str( # ############################################################################# +def inject_todos_from_cfile(cfile_txt: str, todo_user: str, comment_prefix: str) -> None: + """ + Inject the TODOs from a cfile in the corresponding files. + + Given a cfile with the following content: + ``` + dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py:101: The logic ... + ``` + the function will inject the TODO in the corresponding file and line + + :param cfile_txt: The content of the cfile. + :param todo_user: The user to use in the TODO. + :param comment_prefix: The prefix to use for the comment (e.g., "#") + """ + # For each file, store + # - the current file content + # - the offset (i.e., how many lines we inserted in the file so far, so + # we can inject the TODO at the correct line number) + # - the index of the last line modified to make sure the TODOs are for + # increasing line numbers. + file_content = {} + for todo_line in cfile_txt.split("\n"): + _LOG.debug("\n%s", hprint.frame("todo line='%s'" % todo_line)) + if todo_line.strip() == "": + continue + # dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py:101: The logic for extracting required status checks and pull request reviews is repeated. Consider creating a helper function to handle this extraction to reduce redundancy. + m = re.match(r"^\s*(\S+):(\d+):\s*(.*)$", todo_line) + if not m: + _LOG.warning("Can't parse line='%s': skipping", todo_line) + continue + file_name, todo_line_number, todo = m.groups() + todo_line_number = int(todo_line_number) + _LOG.debug(hprint.to_str("file_name todo_line_number todo")) + # Update the state if needed. + if file_name not in file_content: + _LOG.debug("Reading %s", file_name) + hdbg.dassert_path_exists(file_name) + txt = hio.from_file(file_name).split("\n") + offset = 0 + last_line_modified = 0 + file_content[file_name] = (txt, offset, last_line_modified) + # Extract the info for the file to process. + txt, offset, last_line_modified = file_content[file_name] + _LOG.debug(hprint.to_str("offset last_line_modified")) + hdbg.dassert_lt(last_line_modified, todo_line_number, + "The TODOs don't look like they are increasing line numbers: " + "TODO at line %d is before the last line modified %d", todo_line_number, last_line_modified) + # We subtract 1 from the line number since TODOs count from 1, while + # Python arrays count from 0. + act_line_number = todo_line_number - 1 + offset + hdbg.dassert_lte(0, act_line_number) + hdbg.dassert_lt(act_line_number, len(txt)) + insert_line = txt[act_line_number] + _LOG.debug(hprint.to_str("act_line_number insert_line")) + # Extract how many spaces there are at place where the line to insert + # the TODO. + m = re.match(r"^(\s*)\S", insert_line) + hdbg.dassert(m, "Can't parse insert_line='%s'", insert_line) + spaces = len(m.group(1)) * " " + # Build the new line to insert. + new_line = spaces + f"{comment_prefix} TODO({todo_user}): {todo}" + _LOG.debug(hprint.to_str("new_line")) + # Insert the new line in txt at the correct position. + txt = txt[:act_line_number] + [new_line] + txt[act_line_number:] + # Update the state. + offset += 1 + file_content[file_name] = (txt, offset, todo_line_number) + # Write updated files back. + for file_name, (txt, offset, last_line_modified) in file_content.items(): + _ = last_line_modified + _LOG.info("Writing %d lines in %s", offset, file_name) + txt = "\n".join(txt) + hio.to_file(file_name, txt) + + +# ############################################################################# + + def capitalize_first_level_bullets(markdown_text: str) -> str: """ Make first-level bullets bold in markdown text. diff --git a/helpers/hunit_test.py b/helpers/hunit_test.py index c4fd69078..ccc1cf122 100644 --- a/helpers/hunit_test.py +++ b/helpers/hunit_test.py @@ -67,7 +67,7 @@ _LOG = logging.getLogger(__name__) # Mute this module unless we want to debug it. -# _LOG.setLevel(logging.INFO) +_LOG.setLevel(logging.INFO) # ############################################################################# diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index d24513d64..81fa4fa88 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1480,6 +1480,250 @@ def test3(self) -> None: self.assertTrue(True) +# ############################################################################# +# Test_inject_todos_from_cfile1 +# ############################################################################# + + +class Test_inject_todos_from_cfile1(hunitest.TestCase): + + def _create_test_file(self, filename: str, content: str) -> str: + """ + Create a test file with given content in the scratch directory. + + :param scratch_dir: Directory to create file in + :param filename: Name of file to create + :param content: Content to write to file + :return: Full path to created file + """ + scratch_dir = self.get_scratch_space() + file_path = os.path.join(scratch_dir, filename) + content = hprint.dedent(content) + hio.to_file(file_path, content) + return file_path + + def _create_cfile(self, cfile_content: List[str]) -> str: + """ + Create a cfile with TODOs in the scratch directory. + + :param scratch_dir: Directory to create file in + :param cfile_content: List of TODO lines to write + :return: Full path to created cfile + """ + content = "\n".join(cfile_content) + return self._create_test_file("cfile.txt", content) + + def _inject_todos(self, cfile_content: str) -> None: + """ + Helper to inject TODOs with standard parameters. + """ + todo_user ="user" + comment_prefix ="#" + hmarkdo.inject_todos_from_cfile(cfile_content, todo_user, comment_prefix) + + def test1(self) -> None: + """ + Test injecting TODOs from a cfile into a Python file. + """ + # Create a test file. + test_file_content = """ + def hello(msg): + print(msg) + + def world(): + print("world") + """ + file_path = self._create_test_file("test.py", test_file_content) + # Create cfile with TODOs. + cfile_content = [ + f"{file_path}:1: Add type hints.", + f"{file_path}:4: Add docstring.", + ] + self._create_cfile(cfile_content) + # Run the function under test. + self._inject_todos("\n".join(cfile_content)) + # Check output. + actual = hio.from_file(file_path) + expected = """ + # TODO(user): Add type hints. + def hello(msg): + print(msg) + + # TODO(user): Add docstring. + def world(): + print("world") + """ + self.assert_equal(actual, expected, dedent=True) + + def test_one_line_file(self) -> None: + """ + Test injecting TODOs into an empty file. + """ + # Create an empty test file + test_file_content = """ + print("hello") + """ + file_path = self._create_test_file("empty.py", test_file_content) + # Create cfile with TODOs + cfile_content = [ + f"{file_path}:1: Add content to empty file." + ] + self._create_cfile(cfile_content) + # Run the function under test + self._inject_todos("\n".join(cfile_content)) + # Check output + actual = hio.from_file(file_path) + expected = """ + # TODO(user): Add content to empty file. + print("hello") + """ + self.assert_equal(actual, expected, dedent=True) + + def test_invalid_line_numbers(self) -> None: + """ + Test handling of TODOs with invalid line numbers. + """ + # Create a test file + test_file_content = """ + line1 + line2 + """ + file_path = self._create_test_file("test.py", test_file_content) + # Create cfile with invalid line numbers + cfile_content = [ + f"{file_path}:999: This line number doesn't exist.", + ] + self._create_cfile(cfile_content) + # This should raise an assertion error due to invalid line numbers + with self.assertRaises(AssertionError) as err: + self._inject_todos("\n".join(cfile_content)) + # Check output. + expected = """ + ################################################################################ + * Failed assertion * + 998 < 2 + ################################################################################ + """ + self.assert_equal(str(err.exception), expected, dedent=True, fuzzy_match=True) + + def test2(self) -> None: + """ + Test injecting TODOs from a cfile into a Python file with a complex class. + """ + # Create a test file. + test_file_content = """ + import logging + from typing import List, Optional + + class DataProcessor: + def __init__(self): + self.logger = logging.getLogger(__name__) + self.data = [] + + def process_batch(self, items): + for item in items: + self.data.append(self._transform(item)) + + def _transform(self, item): + return item.upper() + + def get_results(self): + return self.data + + def clear(self): + self.data = [] + """ + file_path = self._create_test_file("test.py", test_file_content) + # Create cfile with TODOs. + cfile_content = [ + f"{file_path}:4: Add class docstring explaining purpose and usage", + f"{file_path}:5: Add type hints for instance variables", + f"{file_path}:9: Add type hints for items parameter", + f"{file_path}:10: Consider adding batch size validation", + f"{file_path}:13: Add error handling for non-string inputs", + f"{file_path}:16: Add return type hint and docstring", + f"{file_path}:19: Add docstring explaining clear behavior" + ] + self._create_cfile(cfile_content) + # Run function under test. + self._inject_todos("\n".join(cfile_content)) + # Check output. + actual = hio.from_file(file_path) + expected = """ + import logging + from typing import List, Optional + + # TODO(user): Add class docstring explaining purpose and usage + class DataProcessor: + # TODO(user): Add type hints for instance variables + def __init__(self): + self.logger = logging.getLogger(__name__) + self.data = [] + + # TODO(user): Add type hints for items parameter + def process_batch(self, items): + # TODO(user): Consider adding batch size validation + for item in items: + self.data.append(self._transform(item)) + + # TODO(user): Add error handling for non-string inputs + def _transform(self, item): + return item.upper() + + # TODO(user): Add return type hint and docstring + def get_results(self): + return self.data + + # TODO(user): Add docstring explaining clear behavior + def clear(self): + self.data = [] + """ + self.assert_equal(actual, expected, dedent=True) + + def test3(self) -> None: + """ + Test injecting TODOs from a cfile into multiple Python files. + """ + # Create first test file. + test_file1_content = """ + def foo(): + pass + """ + file_path1 = self._create_test_file("test1.py", test_file1_content) + # Create second test file. + test_file2_content = """ + def bar(): + return None + """ + file_path2 = self._create_test_file("test2.py", test_file2_content) + # Create cfile. + cfile_content = [ + f"{file_path1}:1: Add docstring for foo.", + f"{file_path2}:1: Add docstring for bar.", + f"{file_path2}:2: Add type hint for return.", + ] + self._create_cfile(cfile_content) + # Run function under test. + self._inject_todos("\n".join(cfile_content)) + # Check output. + actual1 = hio.from_file(file_path1) + expected1 = """ + # TODO(user): Add docstring for foo. + def foo(): + pass + """ + self.assert_equal(actual1, expected1, dedent=True) + # + actual2 = hio.from_file(file_path2) + expected2 = """ + # TODO(user): Add docstring for bar. + def bar(): + # TODO(user): Add type hint for return. + return None + """ + self.assert_equal(actual2, expected2, dedent=True) + + # ############################################################################# # Test_colorize_bold_text1 # ############################################################################# From 2a2a611e84c973957aebdb6076b73aaf49318a4b Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 27 May 2025 16:24:33 -0400 Subject: [PATCH 127/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/inject_todos.py | 11 +---------- dev_scripts_helpers/llms/llm_prompts.py | 4 ++-- dev_scripts_helpers/llms/sync_ai_review.sh | 21 +++++++++++++++++++-- helpers/hgit.py | 1 + 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/dev_scripts_helpers/llms/inject_todos.py b/dev_scripts_helpers/llms/inject_todos.py index c03be0633..d6ef2c34f 100755 --- a/dev_scripts_helpers/llms/inject_todos.py +++ b/dev_scripts_helpers/llms/inject_todos.py @@ -6,20 +6,11 @@ import argparse import logging -import os -import re -from typing import List, Optional import dev_scripts_helpers.llms.llm_prompts as dshlllpr -import helpers.hdbg as hdbg -import helpers.hdocker as hdocker -import helpers.hgit as hgit import helpers.hio as hio import helpers.hmarkdown as hmarkdo import helpers.hparser as hparser -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem import dev_scripts_helpers.llms.llm_transform as dshlllpt _LOG = logging.getLogger(__name__) @@ -54,7 +45,7 @@ def _main(parser: argparse.ArgumentParser) -> None: # Read the cfile. cfile_txt = hio.from_file(args.cfile) # Inject the TODOs. - todo_txt = dshlllpr.inject_todos(cfile_txt, args.todo_target) + todo_txt = hmarkdo.inject_todos_from_cfile(cfile_txt, args.todo_target, comment_prefix="#") # Write the TODOs to the cfile. hio.to_file(args.cfile, todo_txt) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 3946d2744..f72d23bb3 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -641,7 +641,7 @@ def review_llm() -> _PROMPT_OUT: Review the code using LLMs. """ # Load the reference file. - file = "docs/code_guidelines/all.llm_style_review_guidelines.reference.md" + file = hgit.find_file("all.llm_style_review_guidelines.reference.md") return _review_from_file(file) @@ -650,7 +650,7 @@ def review_linter() -> _PROMPT_OUT: Review the code for linter style (still using LLMs). """ # Load the reference file. - file = "docs/code_guidelines/all.linter_style_review_guidelines.reference.md" + file = hgit.find_file("all.linter_style_review_guidelines.reference.md") return _review_from_file(file) diff --git a/dev_scripts_helpers/llms/sync_ai_review.sh b/dev_scripts_helpers/llms/sync_ai_review.sh index 54c69316e..aa376d6e6 100755 --- a/dev_scripts_helpers/llms/sync_ai_review.sh +++ b/dev_scripts_helpers/llms/sync_ai_review.sh @@ -1,2 +1,19 @@ -\cp -rf /Users/saggese/src/helpers1/dev_scripts_helpers/llms/{ai_review.py,llm_prompts.py,llm_transform.py} dev_scripts_helpers/llms -\cp -rf /Users/saggese/src/helpers1/docs/code_guidelines/*guidelines* docs/code_guidelines +#!/bin/bash -xe +HELPERS_ROOT_DIR=$(find . -name "helpers_root" -type d | grep -v git) || true +if [[ -z $HELPERS_ROOT_DIR ]]; then + HELPERS_ROOT_DIR="." +fi; +echo HELPERS_ROOT_DIR=$HELPERS_ROOT_DIR + +ls $HELPERS_ROOT_DIR + +\cp -rf /Users/saggese/src/helpers1/helpers/hgit.py $HELPERS_ROOT_DIR/helpers + +ls $HELPERS_ROOT_DIR/dev_scripts_helpers/llms +\cp -rf /Users/saggese/src/helpers1/dev_scripts_helpers/llms/{ai_review.py,llm_prompts.py,llm_transform.py,inject_todos.py} $HELPERS_ROOT_DIR/dev_scripts_helpers/llms + +ls $HELPERS_ROOT_DIR/helpers +\cp -rf /Users/saggese/src/helpers1/helpers/hmarkdown.py $HELPERS_ROOT_DIR/helpers + +ls $HELPERS_ROOT_DIR/docs/code_guidelines +\cp -rf /Users/saggese/src/helpers1/docs/code_guidelines/*guidelines* $HELPERS_ROOT_DIR/docs/code_guidelines diff --git a/helpers/hgit.py b/helpers/hgit.py index 001de6698..4612bd330 100644 --- a/helpers/hgit.py +++ b/helpers/hgit.py @@ -256,6 +256,7 @@ def find_file(file_name: str, *, dir_path: Optional[str] = None) -> str: """ cmd = hprint.dedent(cmd, remove_lead_trail_empty_lines_=True) cmd = " ".join(cmd.split()) + print(cmd) _, res = hsystem.system_to_one_line(cmd) return res From 150a6e5a6dd4f08e78f98b2d6aa9ab4dfc4991d5 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 27 May 2025 16:36:28 -0400 Subject: [PATCH 128/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 13 +++++++++---- helpers/hgit.py | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index f72d23bb3..d2f77c417 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -641,8 +641,11 @@ def review_llm() -> _PROMPT_OUT: Review the code using LLMs. """ # Load the reference file. - file = hgit.find_file("all.llm_style_review_guidelines.reference.md") - return _review_from_file(file) + helper_root = hgit.find_helpers_root() + file_name = os.path.join(helper_root, "docs/code_guidelines/all.llm_style_review_guidelines.reference.md") + # TODO(gp): This doesn't work for unknown reasons. + #file_name = hgit.find_file("all.llm_style_review_guidelines.reference.md") + return _review_from_file(file_name) def review_linter() -> _PROMPT_OUT: @@ -650,8 +653,10 @@ def review_linter() -> _PROMPT_OUT: Review the code for linter style (still using LLMs). """ # Load the reference file. - file = hgit.find_file("all.linter_style_review_guidelines.reference.md") - return _review_from_file(file) + helper_root = hgit.find_helpers_root() + file_name = os.path.join(helper_root, "docs/code_guidelines/all.linter_style_review_guidelines.reference.md") + #file_name = hgit.find_file("all.linter_style_review_guidelines.reference.md") + return _review_from_file(file_name) def review_correctness() -> _PROMPT_OUT: diff --git a/helpers/hgit.py b/helpers/hgit.py index 4612bd330..4e2072856 100644 --- a/helpers/hgit.py +++ b/helpers/hgit.py @@ -251,6 +251,7 @@ def find_git_root(path: str = ".") -> str: def find_file(file_name: str, *, dir_path: Optional[str] = None) -> str: if dir_path is None: dir_path = find_git_root() + _LOG.debug(hprint.to_str("dir_path")) cmd = rf""" find {dir_path} \( -path '*/.git' -o -path '*/.mypy_cache' \) -prune -o -name "{file_name}" -print """ From 1cef5c3b00912f99a1c3b2c84df8742f3111141d Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Wed, 28 May 2025 05:17:55 -0400 Subject: [PATCH 129/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/inject_todos.py | 6 +- dev_scripts_helpers/llms/llm_prompts.py | 13 +++-- dev_scripts_helpers/llms/llm_transform.py | 4 +- ...inter_style_review_guidelines.reference.md | 22 ++++++- ...l.llm_style_review_guidelines.reference.md | 57 +++++++------------ .../Test_show_imports.test1/output/output.txt | 8 +-- 6 files changed, 56 insertions(+), 54 deletions(-) diff --git a/dev_scripts_helpers/llms/inject_todos.py b/dev_scripts_helpers/llms/inject_todos.py index d6ef2c34f..b19e9d963 100755 --- a/dev_scripts_helpers/llms/inject_todos.py +++ b/dev_scripts_helpers/llms/inject_todos.py @@ -7,11 +7,10 @@ import argparse import logging -import dev_scripts_helpers.llms.llm_prompts as dshlllpr +import helpers.hdbg as hdbg import helpers.hio as hio import helpers.hmarkdown as hmarkdo import helpers.hparser as hparser -import dev_scripts_helpers.llms.llm_transform as dshlllpt _LOG = logging.getLogger(__name__) @@ -42,12 +41,11 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) # Read the cfile. cfile_txt = hio.from_file(args.cfile) # Inject the TODOs. todo_txt = hmarkdo.inject_todos_from_cfile(cfile_txt, args.todo_target, comment_prefix="#") - # Write the TODOs to the cfile. - hio.to_file(args.cfile, todo_txt) if __name__ == "__main__": diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index d2f77c417..783a504fa 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -11,6 +11,7 @@ import helpers.hio as hio import helpers.hmarkdown as hmarkdo import helpers.hprint as hprint +import helpers.hsystem as hsystem _LOG = logging.getLogger(__name__) @@ -641,10 +642,10 @@ def review_llm() -> _PROMPT_OUT: Review the code using LLMs. """ # Load the reference file. - helper_root = hgit.find_helpers_root() - file_name = os.path.join(helper_root, "docs/code_guidelines/all.llm_style_review_guidelines.reference.md") + #helper_root = hgit.find_helpers_root() + #file_name = os.path.join(helper_root, "docs/code_guidelines/all.llm_style_review_guidelines.reference.md") # TODO(gp): This doesn't work for unknown reasons. - #file_name = hgit.find_file("all.llm_style_review_guidelines.reference.md") + file_name = hgit.find_file("all.llm_style_review_guidelines.reference.md") return _review_from_file(file_name) @@ -653,9 +654,9 @@ def review_linter() -> _PROMPT_OUT: Review the code for linter style (still using LLMs). """ # Load the reference file. - helper_root = hgit.find_helpers_root() - file_name = os.path.join(helper_root, "docs/code_guidelines/all.linter_style_review_guidelines.reference.md") - #file_name = hgit.find_file("all.linter_style_review_guidelines.reference.md") + #helper_root = hgit.find_helpers_root() + #file_name = os.path.join(helper_root, "docs/code_guidelines/all.linter_style_review_guidelines.reference.md") + file_name = hgit.find_file("all.linter_style_review_guidelines.reference.md") return _review_from_file(file_name) diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 7ab34a725..c5392357a 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -112,10 +112,10 @@ def _run_dockerized_llm_transform( FROM python:3.12-alpine # Install Bash. - #RUN apk add --no-cache bash + RUN apk add --no-cache bash git # Set Bash as the default shell. - #SHELL ["/bin/bash", "-c"] + SHELL ["/bin/bash", "-c"] # Install pip packages. RUN pip install --upgrade pip diff --git a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md index 9073e8538..e02e6b9a3 100644 --- a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md @@ -8,10 +8,30 @@ - E.g., `download.py` and not `downloader.py` - Name non-executable files using nouns - E.g., `downloader.py` +- Use `dir` and not `directory` or `folder` + - E.g., `dir_path` +- Use `file_name` and not `filename` + - E.g., `file_name` for storing the name of a file +- Use `dir_name` and not `dirname` + - E.g., `dir_name` for storing the name of a directory +- Use `timestamp` and not `ts` or `datetime` + - E.g., `event_timestamp` +- To refer to the name of a column, use `..._col` and not `..._col_name` or + `..._column` + - E.g., `age_col` for a column storing age values ### Docstrings -- All functions and methods must have a docstring +- The first docstring line is followed by a blank line and then, optionally, by + a longer description (possibly on multiple lines) with a more detailed + explanation of what the function does +- The more detailed description is followed by a blank line and then the param + and return description section in REST style +- The more detailed description is followed by a blank line and then the param + and return description section in REST style + - Use lowercase after `:param XYZ: ...` / `:return:` unless the description + starts with a proper noun +- Do not mention default values of parameters in parameter descriptions - Docstrings should be wrapped in triple quotation marks (`"""`) - The opening and closing triple quotation marks should be located on their own separate lines diff --git a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md index 0e7923997..ac6a80a1d 100644 --- a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md @@ -31,44 +31,23 @@ (avoid "code stutter") - E.g., if using a library named `math`, avoid naming a function `math_calculate()` -- Use `dir` and not `directory` or `folder` - - E.g., `dir_path` -- Use `file_name` and not `filename` - - E.g., `file_name` for storing the name of a file -- Use `dir_name` and not `dirname` - - E.g., `dir_name` for storing the name of a directory -- Use `timestamp` and not `ts` or `datetime` - - E.g., `event_timestamp` -- To refer to the name of a column, use `..._col` and not `..._col_name` or - `..._column` - - E.g., `age_col` for a column storing age values ### Docstrings -- The first docstring line is followed by a blank line and then, optionally, by - a longer description (possibly on multiple lines) with a more detailed - explanation of what the function does - - The text should describe the goal of the function, the interface and what the user - needs to know to use the function - - E.g., "This function calculates the sum of two numbers and returns the - result." - - The text should not describe parameters / what is being returned - - The text should not describe implementation details that can be changed -- The more detailed description is followed by a blank line and then the param - and return description section in REST style - - Use lowercase after `:param XYZ: ...` / `:return:` unless the description - starts with a proper noun - - Do not add a period at the end of the param and return descriptions - - Do not mention the type of the parameters and return structures - - Do not mention default values of parameters in parameter descriptions - - Follow this example for indentation of parameter descriptions: - ```python - :param param1: a very very long param description that +- All functions and methods must have a docstring +- The docstring should describe the goal of the function, the interface and what the user + needs to know to use the function + - E.g., "This function calculates the sum of two numbers and returns the + result." +- The text should not describe implementation details that can be changed +- Follow this example for indentation of parameter descriptions: + ```python + :param param1: a very very long param description that + continues into a second line + :param param2: a param with two possible values + - first value description + - second value description that is very long and continues into a second line - :param param2: a param with two possible values - - first value description - - second value description that is very long and - continues into a second line ``` - Adding examples (e.g., of input and output) to the docstring is encouraged - E.g., @@ -240,10 +219,14 @@ dependencies; instead, allow the caller to pass the column name to the function as a parameter - E.g., `def calculate_average(df: pd.DataFrame, column_name: str):` -- Do not put computations of the output in the `return` line +- Do not put computations of the output together in a `return` statement + - Bad + ``` + return compute_value() + ``` - Instead, compute the output first, assign it to a variable, and then return this variable - - E.g., + - Good ``` result = compute_value() return result @@ -268,7 +251,7 @@ - Use logging `_LOG.debug()` and not `print()` for tracing execution - Use positional args in logging and not inline formatting - - E.g., `_LOG.debug("cmd=%s", cmd1)` instead `_LOG.debug(f"cmd={cmd1}")` + - E.g., The code should do `_LOG.debug("cmd=%s", cmd1)` and not `_LOG.debug(f"cmd={cmd1}")` - Use the following idiom to configure logging: ```python diff --git a/import_check/test/outcomes/Test_show_imports.test1/output/output.txt b/import_check/test/outcomes/Test_show_imports.test1/output/output.txt index 34f420c6a..8c2bcfc54 100644 --- a/import_check/test/outcomes/Test_show_imports.test1/output/output.txt +++ b/import_check/test/outcomes/Test_show_imports.test1/output/output.txt @@ -19,7 +19,7 @@ "input.file2" ], "imports": null, - "path": "/app/helpers_root/import_check/test/outcomes/Test_show_imports.test1/input/__init__.py", + "path": "/app/import_check/test/outcomes/Test_show_imports.test1/input/__init__.py", "truncated": false, "is_external": false, "is_file": false @@ -31,7 +31,7 @@ "input.file2" ], "imports": null, - "path": "/app/helpers_root/import_check/test/outcomes/Test_show_imports.test1/input/file1.py", + "path": "/app/import_check/test/outcomes/Test_show_imports.test1/input/file1.py", "truncated": false, "is_external": false, "is_file": true @@ -45,9 +45,9 @@ "input", "input.file1" ], - "path": "/app/helpers_root/import_check/test/outcomes/Test_show_imports.test1/input/file2.py", + "path": "/app/import_check/test/outcomes/Test_show_imports.test1/input/file2.py", "truncated": false, "is_external": false, "is_file": true } -} +} \ No newline at end of file From bda8c4ff61ce3955a47c3891ebe58571399dd4e8 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Wed, 28 May 2025 05:20:59 -0400 Subject: [PATCH 130/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 783a504fa..764c2d85f 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -1052,6 +1052,31 @@ def slide_smart_colorize() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +# ############################################################################# +# Text. +# ############################################################################# + + +#def text_expand() -> _PROMPT_OUT: +# """ +# """ +# system = hio.from_file("text_expand2.txt") +# pre_transforms: Set[str] = set() +# post_transforms: Set[str] = set() +# post_container_transforms = ["format_markdown"] +# return system, pre_transforms, post_transforms, post_container_transforms + + +def text_rephrase() -> _PROMPT_OUT: + """ + """ + system = hio.from_file("text_rephrase.txt") + pre_transforms: Set[str] = set() + post_transforms: Set[str] = set() + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + # ############################################################################# From bc94524fa091c71c22566533408056bf5d506b0d Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Wed, 28 May 2025 18:04:21 -0400 Subject: [PATCH 131/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hgit.py | 1 - 1 file changed, 1 deletion(-) diff --git a/helpers/hgit.py b/helpers/hgit.py index 4e2072856..dbf206154 100644 --- a/helpers/hgit.py +++ b/helpers/hgit.py @@ -257,7 +257,6 @@ def find_file(file_name: str, *, dir_path: Optional[str] = None) -> str: """ cmd = hprint.dedent(cmd, remove_lead_trail_empty_lines_=True) cmd = " ".join(cmd.split()) - print(cmd) _, res = hsystem.system_to_one_line(cmd) return res From e1feeb28dd02ffc38816aedac646aaff17c5f378 Mon Sep 17 00:00:00 2001 From: Danya Tikhomirov <d.tikhomirov@kaizen-tech.io> Date: Wed, 28 May 2025 10:51:43 +0300 Subject: [PATCH 132/193] CMTask12257: Remove git root assertion (#768) Co-authored-by: Daniil Tikhomirov <d.tikhomirov@crypto-kaizen.com> --- dev_scripts_helpers/thin_client/thin_client_utils.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev_scripts_helpers/thin_client/thin_client_utils.sh b/dev_scripts_helpers/thin_client/thin_client_utils.sh index 01a9fa7d2..e760b090d 100644 --- a/dev_scripts_helpers/thin_client/thin_client_utils.sh +++ b/dev_scripts_helpers/thin_client/thin_client_utils.sh @@ -236,7 +236,9 @@ set_path() { # export PATH=$(pwd):$PATH dtrace "GIT_ROOT=$GIT_ROOT" - dassert_var_defined "GIT_ROOT" + # + # TODO(gp): Enable this as part of HelpersTask12257. + # dassert_var_defined "GIT_ROOT" # export PATH=$GIT_ROOT_DIR:$PATH # Avoid ./.mypy_cache/3.12/app/dev_scripts_helpers From ce969a23364b0a883a3e5a44ff6ee6a4eb581459 Mon Sep 17 00:00:00 2001 From: Heanh Sok <heanhsok@gmail.com> Date: Wed, 28 May 2025 03:56:27 -0400 Subject: [PATCH 133/193] CMTask12257_remove_git_root_assertion_2 (#769) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * CMTask12257: Remove git root assertion * Fix Pre-commit checks: All checks passed ✅ * Remove comment Pre-commit checks: All checks passed ✅ * Lint Pre-commit checks: All checks passed ✅ --------- Co-authored-by: Daniil Tikhomirov <d.tikhomirov@crypto-kaizen.com> Co-authored-by: Sonya Nikiforova <son.nik@mail.ru> --- dev_scripts_helpers/thin_client/thin_client_utils.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dev_scripts_helpers/thin_client/thin_client_utils.sh b/dev_scripts_helpers/thin_client/thin_client_utils.sh index e760b090d..6597c6be2 100644 --- a/dev_scripts_helpers/thin_client/thin_client_utils.sh +++ b/dev_scripts_helpers/thin_client/thin_client_utils.sh @@ -236,13 +236,12 @@ set_path() { # export PATH=$(pwd):$PATH dtrace "GIT_ROOT=$GIT_ROOT" - # # TODO(gp): Enable this as part of HelpersTask12257. # dassert_var_defined "GIT_ROOT" # export PATH=$GIT_ROOT_DIR:$PATH # Avoid ./.mypy_cache/3.12/app/dev_scripts_helpers - DEV_SCRIPT_HELPER_DIR=$(find . -name dev_scripts_helpers -type d -not -path "*.mypy_cache*") + DEV_SCRIPT_HELPER_DIR=$(find ${GIT_ROOT_DIR} -name dev_scripts_helpers -type d -not -path "*.mypy_cache*") dassert_dir_exists $DEV_SCRIPT_HELPER_DIR dtrace "DEV_SCRIPT_HELPER_DIR=$DEV_SCRIPT_HELPER_DIR" # Add to the PATH all the first level directory under `dev_scripts`. From 29059f8d5e1db970356b890428ba3cc2ac208d7a Mon Sep 17 00:00:00 2001 From: Sonya Nikiforova <son.nik@mail.ru> Date: Wed, 28 May 2025 16:04:34 +0200 Subject: [PATCH 134/193] Helpers task717 create ai reviewer script (#766) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * HelpersTask717: Factor out file selection Pre-commit checks: All checks passed ✅ * HelpersTask717: Update guidelines Pre-commit checks: All checks passed ✅ * HelpersTask717: Create reviewer script Pre-commit checks: All checks passed ✅ * HelpersTask717: Make scripts executable; install packages Pre-commit checks: All checks passed ✅ * HelpersTask717: Update Pre-commit checks: All checks passed ✅ --- .../llms/dockerized_llm_review.py | 262 ++++++++++++++++++ dev_scripts_helpers/llms/llm_review.py | 234 ++++++++++++++++ linters/base.py | 95 +------ linters/utils.py | 92 ++++++ 4 files changed, 596 insertions(+), 87 deletions(-) create mode 100755 dev_scripts_helpers/llms/dockerized_llm_review.py create mode 100755 dev_scripts_helpers/llms/llm_review.py diff --git a/dev_scripts_helpers/llms/dockerized_llm_review.py b/dev_scripts_helpers/llms/dockerized_llm_review.py new file mode 100755 index 000000000..32de63404 --- /dev/null +++ b/dev_scripts_helpers/llms/dockerized_llm_review.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 + +""" +Review code using LLMs. It requires certain dependencies to be present (e.g., +`openai`) and thus it is executed within a Docker container. + +To use this script, you need to indicate the file to be reviewed and the +path to the document with review guidelines. +""" + +import argparse +import logging +import re +from typing import Dict, List + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hlist as hlist +import helpers.hmarkdown as hmarkdo +import helpers.hparser as hparser +import helpers.hprint as hprint + +_LOG = logging.getLogger(__name__) + + +def _extract_bullet_points(text: str) -> List[str]: + """ + Extract bullet point list items from text. + + Sub-lists nested under first-level items are extracted together with + the first-level items. + + :param text: text to process + :return: extracted bullet points, e.g., + ``` + [ + "- Item 1", + ''' + - Item 2 + - Item 3 + ''' + ] + ``` + """ + lines = text.split("\n") + bullet_points = [] + current_item = "" + for line in lines: + if re.match(r"^- ", line): + # Match first-level bullet point item. + if current_item: + # Store the previous item, if any. + current_item = re.sub(r"\s{2,}", " ", current_item.strip()) + bullet_points.append(current_item) + # Start a new first-level bullet point item. + current_item = line + elif re.match(r"^\s+- ", line): + # Match a sub-item (non first-level bullet point item). + # Append a sub-item to the current item. + current_item += "\n" + line + elif len(line.strip()) != 0 and current_item: + # Append a line to the current item. + current_item += " " + line + else: + # Store the finished item. + current_item = re.sub(r"\s{2,}", " ", current_item.strip()) + bullet_points.append(current_item) + if current_item: + current_item = re.sub(r"\s{2,}", " ", current_item.strip()) + bullet_points.append(current_item) + # Drop empty items. + bullet_points: List[str] = hprint.remove_empty_lines_from_string_list( + bullet_points + ) + return bullet_points + + +def _load_review_guidelines(guidelines_doc_filename: str) -> Dict[str, List[str]]: + """ + Load automated review guidelines. + + :param guidelines_doc_filename: name of the file with the review guidelines + :return: review guidelines organized by topic/target file types, e.g., + + ``` + "Python code": ["All functions and methods must have a docstring", ...], + "Notebooks": ["All notebooks should have a table of contents", ...], + "Markdowns": ["Headings should not be boldfaced", ...], + "Spelling": ["Capitalize the first letter of `Python`", ...], + "File system structure": ["Unit tests should be located under the `test` dir", ...] + ``` + """ + guidelines_doc = hio.from_file(guidelines_doc_filename) + # Extract headers from the guidelines file. + headers = [ + header.description + for header in hmarkdo.extract_headers_from_markdown( + guidelines_doc, max_level=2 + ) + ] + # Define headers of the categories of guidelines. + guidelines_categories = [ + "Python code", + "Notebooks", + "Markdowns", + "Spelling", + "File system structure", + ] + guidelines: Dict[str, List[str]] = {} + for category in guidelines_categories: + hdbg.dassert_in(category, headers) + # Extract the section under the header. + section = hmarkdo.extract_section_from_markdown(guidelines_doc, category) + # Extract individual guidelines from bullet points. + individual_guidelines = _extract_bullet_points(section) + guidelines[category] = individual_guidelines + return guidelines + + +def _review( + file_path: str, + guidelines_doc_filename: str, +) -> List[str]: + """ + Get an LLM to find violations of the guidelines in an input file. + + :param file_path: path to the file to review + :param guidelines_doc_filename: name of the file with the review + guidelines + :return: automatically generated review comments for the input file + """ + # Load the file. + code = hio.from_file(file_path) + code_with_line_numbers = "\n".join( + [f"{num + 1} {line}" for num, line in enumerate(code.split("\n"))] + ) + # Load the review guidelines. + guidelines = _load_review_guidelines(guidelines_doc_filename) + # Select relevant guidelines for the file. + guidelines_for_file: List[str] = [] + if file_path.endswith(".py"): + # Use guidelines for Python code. + # Python files paired to notebooks by jupytext should also follow these guidelines. + guidelines_for_file = guidelines["Python code"] + elif file_path.endswith(".ipynb"): + # Use guidelines for notebooks. + guidelines_for_file = guidelines["Notebooks"] + elif file_path.endswith(".md"): + # Use guidelines for Markdowns. + guidelines_for_file = guidelines["Markdowns"] + # Add general guidelines. + guidelines_for_file.extend(guidelines["Spelling"]) + guidelines_for_file.extend(guidelines["File system structure"]) + # + comments: List[str] = [] + system_prompt = hprint.dedent( + """ + You are a proficient reviewer of Python code and technical documentation. + You pay a lot of attention to detail. + I will pass you the code and a guideline that the code must follow. + """ + ) + _LOG.debug(hprint.to_str("system_prompt")) + # We need to import this here since we have this package only when + # running inside a Dockerized executable. We don't want an import to + # this file assert since openai is not available in the local dev + # environment. + import helpers.hopenai as hopenai + + for guideline in guidelines_for_file: + # Check if the file follows the specific guideline. + guideline_prompt = hprint.dedent( + f""" + Check if the following code violates the following guideline: + \n<GUIDELINE> + {guideline} + </GUIDELINE> + \n<CODE> + {code_with_line_numbers} + </CODE> + + If no violations are found, do not output anything. + For every line that violates the guideline, output the following: + + '<VIOLATION>{file_path}: LINE_NUM: GUIDELINE: QUOTE</VIOLATION>' + + where GUIDELINE is the violated guideline, LINE_NUM is the number + of the line in the code that violates the guideline, QUOTE is the + quote from the code showcasing the violation. + - Line numbers are provided at the beginning of each line already + - Remove these line numbers when you quote the code with the + violation + - If a whole chunk of code violates the guideline, use the number of + the first line in the chunk and quote the first line in the chunk + - If the violation cannot be associated with a particular line, use + line number = 0 and put <UNABLE TO QUOTE> as the quote + """ + ) + response = hopenai.get_completion( + guideline_prompt, system_prompt=system_prompt, print_cost=True + ) + txt_out = hopenai.response_to_txt(response) + hdbg.dassert_isinstance(txt_out, str) + # Extract review comments from the response. + cur_comments = re.findall(r"<VIOLATION>(.*?)</VIOLATION>", txt_out) + comments.extend(cur_comments) + return comments + + +def _process_comments(comments: List[str], log_filepath: str) -> None: + """ + Post-process and save generated review comments. + + :param comments: automatically generated review comments + :param log_filepath: path to the file to save the comments to + """ + # Clean up. + hdbg.dassert_list_of_strings(comments) + comments = sorted(comments) + comments = hprint.remove_empty_lines_from_string_list(comments) + comments = hlist.remove_duplicates(comments) + # Write into a file. + hio.to_file(log_filepath, "\n".join(comments)) + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + # File selection. + parser.add_argument("-i", "--in_file_path", type=str, help="File to review") + # Reviewer guidelines file. + parser.add_argument( + "--guidelines_doc_filename", + action="store", + help="Name of the document with the guidelines for automated reviewing", + default="all.automated_review_guidelines.reference.md", + ) + # Run parameters. + parser.add_argument( + "--reviewer_log", + default="./reviewer_warnings.txt", + help="File for storing the warnings", + ) + hparser.add_verbosity_arg(parser, log_level="CRITICAL") + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + # Run. + comments = _review(args.in_file_path, args.guidelines_doc_filename) + _process_comments(comments, args.reviewer_log) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/dev_scripts_helpers/llms/llm_review.py b/dev_scripts_helpers/llms/llm_review.py new file mode 100755 index 000000000..34835a8c3 --- /dev/null +++ b/dev_scripts_helpers/llms/llm_review.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 + +""" +Review files automatically using LLMs. + +The script `dockerized_llm_review.py` is executed within a Docker container to ensure +all dependencies are met. The Docker container is built dynamically if +necessary. The script requires an OpenAI API key to be set in the environment. + +Usage example: + +# Review specified files. +> llm_review.py --files="dir1/file1.py dir2/file2.md dir3/file3.ipynb dir3/file3.py" +""" + +import argparse +import logging +import os +from typing import List + +import helpers.hdbg as hdbg +import helpers.hdocker as hdocker +import helpers.hgit as hgit +import helpers.hio as hio +import helpers.hparser as hparser +import helpers.hprint as hprint +import helpers.hserver as hserver +import helpers.hsystem as hsystem +import linters.utils as liutils + +_LOG = logging.getLogger(__name__) + + +def _run_dockerized_llm_review( + file_paths: List[str], + guidelines_doc_filename: str, + reviewer_log: str, + force_rebuild: bool, + use_sudo: bool, + log_level: str, +) -> None: + """ + Run `dockerized_llm_review.py` in a Docker container. + + The Docker container has all the necessary dependencies. + + :param file_paths: paths to files to review + :param guidelines_doc_filename: name of the file with the review + guidelines + :param reviewer_log: path to the file to save the review to + :param force_rebuild: whether to rebuild the container image + :param use_sudo: whether to run the container with sudo + :param log_level: level of logging, e.g., "DEBUG", "CRITICAL" + """ + _LOG.debug(hprint.func_signature_to_str()) + # + hdbg.dassert_in("OPENAI_API_KEY", os.environ) + # Build the container. + container_image = "tmp.llm_review" + dockerfile = r""" + FROM python:3.12-alpine + + # Install Bash. + RUN apk add --no-cache bash + + # Set Bash as the default shell. + SHELL ["/bin/bash", "-c"] + + # Install pip packages. + RUN pip install --upgrade pip + RUN pip install --no-cache-dir PyYAML pandas requests openai + """ + container_image = hdocker.build_container_image( + container_image, + dockerfile, + force_rebuild, + use_sudo, + ) + # Convert files to Docker paths. + is_caller_host = not hserver.is_inside_docker() + use_sibling_container_for_callee = True + caller_mount_path, callee_mount_path, mount = hdocker.get_docker_mount_info( + is_caller_host, use_sibling_container_for_callee + ) + helpers_root = hgit.find_helpers_root() + helpers_root_docker = hdocker.convert_caller_to_callee_docker_path( + helpers_root, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=False, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + # Get the path to the script. + script = hsystem.find_file_in_repo( + "dockerized_llm_review.py", root_dir=hgit.find_git_root() + ) + script_docker = hdocker.convert_caller_to_callee_docker_path( + script, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + # Get the path to the review guidelines doc. + review_guidelines_doc = hsystem.find_file_in_repo( + guidelines_doc_filename, root_dir=hgit.find_git_root() + ) + review_guidelines_doc_docker = hdocker.convert_caller_to_callee_docker_path( + review_guidelines_doc, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + for file_path in file_paths: + # Get the path to the file to review. + in_file_path_docker = hdocker.convert_caller_to_callee_docker_path( + file_path, + caller_mount_path, + callee_mount_path, + check_if_exists=True, + is_input=True, + is_caller_host=is_caller_host, + use_sibling_container_for_callee=use_sibling_container_for_callee, + ) + # Build the command line. + cmd = f" {script_docker} -i {in_file_path_docker}" + cmd += f" --guidelines_doc_filename {review_guidelines_doc_docker}" + cmd += f" --reviewer_log {reviewer_log}" + cmd += f" -v {log_level}" + docker_cmd = hdocker.get_docker_base_cmd(use_sudo) + docker_cmd.extend( + [ + f"-e PYTHONPATH={helpers_root_docker}", + f"--workdir {callee_mount_path}", + f"--mount {mount}", + container_image, + cmd, + ] + ) + docker_cmd = " ".join(docker_cmd) + # Run. + hsystem.system(docker_cmd) + # Output the generated comments to the user. + output_from_file = hio.from_file(reviewer_log) + print(hprint.frame(reviewer_log, char1="/").rstrip("\n")) + print(output_from_file + "\n") + print(hprint.line(char="/").rstrip("\n")) + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + # File selection. + parser.add_argument( + "-f", "--files", nargs="+", type=str, help="Files to process" + ) + parser.add_argument( + "-d", + "--dir_name", + action="store", + help="Select all the files in a dir. 'GIT_ROOT' to select git root", + ) + parser.add_argument( + "--modified", + action="store_true", + help="Select files modified in the current git client", + ) + parser.add_argument( + "--last_commit", + action="store_true", + help="Select files modified in the previous commit", + ) + parser.add_argument( + "--branch", + action="store_true", + help="Select files modified in the current branch with respect to master", + ) + parser.add_argument("--skip_files", nargs="+", type=str, help="Files to skip") + # Reviewer guidelines file. + parser.add_argument( + "--guidelines_doc_filename", + action="store", + help="Name of the document with the guidelines for automated reviewing", + default="all.automated_review_guidelines.reference.md", + ) + # Run parameters. + parser.add_argument( + "--reviewer_log", + default="./reviewer_warnings.txt", + help="File for storing the warnings", + ) + hparser.add_dockerized_script_arg(parser) + hparser.add_verbosity_arg(parser, log_level="CRITICAL") + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger( + verbosity=args.log_level, use_exec_path=True, force_white=False + ) + # Get the files to be reviewed. + file_paths = liutils.get_files_to_check( + args.files, + args.skip_files, + args.dir_name, + args.modified, + args.last_commit, + args.branch, + ) + _LOG.debug( + "Reviewing %s files; file_paths=%s", len(file_paths), " ".join(file_paths) + ) + _run_dockerized_llm_review( + file_paths, + args.guidelines_doc_filename, + args.reviewer_log, + args.dockerized_force_rebuild, + args.dockerized_use_sudo, + args.log_level, + ) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/linters/base.py b/linters/base.py index 997bf6560..f24cfeb28 100755 --- a/linters/base.py +++ b/linters/base.py @@ -16,7 +16,6 @@ import argparse import itertools import logging -import os from typing import List, Tuple, Type import joblib @@ -60,90 +59,6 @@ _LOG = logging.getLogger(__name__) -# ############################################################################# -# Files -# ############################################################################# - - -def _filter_files( - file_paths: List[str], file_paths_to_skip: List[str] -) -> List[str]: - """ - Filter the list of files by removing invalid or excluded ones. - - The following files are skipped: - - Files that do not exist - - Non-files (directories) - - Ipynb checkpoints - - Input and output files in unit tests - - Files explicitly excluded by the user - - :param file_paths: all the original files to validate and filter - :param file_paths_to_skip: files to exclude from processing - :return: files that passed the filters - """ - file_paths_to_keep: List[str] = [] - for file_path in file_paths: - # Skip files that do not exist. - is_valid = os.path.exists(file_path) - # Skip non-files. - is_valid &= os.path.isfile(file_path) - # Skip checkpoints. - is_valid &= ".ipynb_checkpoints/" not in file_path - # Skip input and output files used in unit tests. - is_valid &= not liutils.is_test_input_output_file(file_path) - # Skip files explicitly excluded by user. - is_valid &= file_path not in file_paths_to_skip - if is_valid: - file_paths_to_keep.append(file_path) - else: - _LOG.warning("Skipping %s", file_path) - return file_paths_to_keep - - -def _get_files_to_lint(args: argparse.Namespace) -> List[str]: - """ - Get the files to be processed by Linter. - - :param args: command line arguments - :return: paths of the files to lint - """ - file_paths: List[str] = [] - if args.files: - # Get the files that were explicitly specified. - file_paths = args.files - elif args.modified: - # Get all the modified files in the git client. - file_paths = hgit.get_modified_files() - elif args.last_commit: - # Get all the files modified in the previous commit. - file_paths = hgit.get_previous_committed_files() - elif args.branch: - # Get all the files modified in the branch. - file_paths = hgit.get_modified_files_in_branch(dst_branch="master") - elif args.dir_name: - # Get the files in a specified dir. - if args.dir_name == "$GIT_ROOT": - dir_name = hgit.get_client_root(super_module=True) - else: - dir_name = args.dir_name - dir_name = os.path.abspath(dir_name) - _LOG.info("Looking for all files in '%s'", dir_name) - hdbg.dassert_path_exists(dir_name) - cmd = f"find {dir_name} -name '*' -type f" - _, output = hsystem.system_to_string(cmd) - file_paths = output.split("\n") - file_paths_to_skip: List[str] = [] - if args.skip_files: - # Get the files to skip during linting. - file_paths_to_skip = args.skip_files - # Remove files that should not be linted. - file_paths = _filter_files(file_paths, file_paths_to_skip) - if len(file_paths) < 1: - _LOG.warning("No files that can be linted were found") - return file_paths - - # ############################################################################# # Actions # ############################################################################# @@ -416,7 +331,6 @@ def _run_linter( # Lint the files in parallel. num_threads = int(num_threads) _LOG.info("Using %s threads", num_threads if num_threads > 0 else "all") - lints_tmp = joblib.Parallel(n_jobs=num_threads, verbose=50)( joblib.delayed(_lint)( file_path, action_names, action_classes, args.pedantic @@ -503,7 +417,14 @@ def _parse() -> argparse.ArgumentParser: def _main(args: argparse.Namespace) -> None: hdbg.init_logger(args.log_level) # Get the files to be linted. - file_paths = _get_files_to_lint(args) + file_paths = liutils.get_files_to_check( + args.files, + args.skip_files, + args.dir_name, + args.modified, + args.last_commit, + args.branch, + ) _LOG.debug( "Linting %s files; file_paths=%s", len(file_paths), " ".join(file_paths) ) diff --git a/linters/utils.py b/linters/utils.py index 30117c961..6aa787c41 100644 --- a/linters/utils.py +++ b/linters/utils.py @@ -32,6 +32,98 @@ ] +def _filter_files( + file_paths: List[str], file_paths_to_skip: List[str] +) -> List[str]: + """ + Filter the list of files by removing invalid or excluded ones. + + The following files are skipped: + - Files that do not exist + - Non-files (directories) + - Ipynb checkpoints + - Input and output files in unit tests + - Files explicitly excluded by the user + + :param file_paths: all the original files to validate and filter + :param file_paths_to_skip: files to exclude from processing + :return: files that passed the filters + """ + file_paths_to_keep: List[str] = [] + for file_path in file_paths: + # Skip files that do not exist. + is_valid = os.path.exists(file_path) + # Skip non-files. + is_valid &= os.path.isfile(file_path) + # Skip checkpoints. + is_valid &= ".ipynb_checkpoints/" not in file_path + # Skip input and output files used in unit tests. + is_valid &= not is_test_input_output_file(file_path) + # Skip files explicitly excluded by user. + is_valid &= file_path not in file_paths_to_skip + if is_valid: + file_paths_to_keep.append(file_path) + else: + _LOG.warning("Skipping %s", file_path) + return file_paths_to_keep + + +def get_files_to_check( + files: Optional[List[str]], + skip_files: Optional[List[str]], + dir_name: Optional[str], + modified: bool, + last_commit: bool, + branch: bool, +) -> List[str]: + """ + Get the files to be processed by Linter/Reviewer. + + :param files: specific files to process + :param skip_files: specific files to skip and not process + :param dir_name: name of the dir where all files should be processed + :param modified: process the files modified in the current git + client + :param last_commit: process the files modified in the previous + commit + :param branch: process the files modified in the current branch + w.r.t. master + :return: paths of the files to process + """ + file_paths: List[str] = [] + if files: + # Get the files that were explicitly specified. + file_paths = files + elif modified: + # Get all the modified files in the git client. + file_paths = hgit.get_modified_files() + elif last_commit: + # Get all the files modified in the previous commit. + file_paths = hgit.get_previous_committed_files() + elif branch: + # Get all the files modified in the branch. + file_paths = hgit.get_modified_files_in_branch(dst_branch="master") + elif dir_name: + # Get the files in a specified dir. + if dir_name == "$GIT_ROOT": + dir_name = hgit.get_client_root(super_module=True) + dir_name = os.path.abspath(dir_name) + _LOG.info("Looking for all files in '%s'", dir_name) + hdbg.dassert_path_exists(dir_name) + cmd = f"find {dir_name} -name '*' -type f" + _, output = hsystem.system_to_string(cmd) + file_paths = output.split("\n") + file_paths_to_skip: List[str] = [] + if skip_files: + # Get the files to skip. + file_paths_to_skip = skip_files + # Remove files that should not be processed. + file_paths = _filter_files(file_paths, file_paths_to_skip) + if len(file_paths) < 1: + _LOG.warning("No files that can be processed were found") + return file_paths + + def get_python_files_to_lint(dir_name: str) -> List[str]: """ Get Python files for linter excluding jupytext and test Python files. From af22723e08d76f40bfcc848fb0446d4d580ce69a Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Wed, 28 May 2025 20:18:26 -0400 Subject: [PATCH 135/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 61 +- .../all.notes_toolchain.explanation.md | 115 +++ .../all.notes_toolchain.how_to_guide.md | 749 ++++++++++++++++++ 3 files changed, 924 insertions(+), 1 deletion(-) create mode 100644 docs/tools/documentation_toolchain/all.notes_toolchain.explanation.md create mode 100644 docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 764c2d85f..9971966e7 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -761,6 +761,7 @@ def md_expand() -> _PROMPT_OUT: post_container_transforms = ["format_markdown"] return system, pre_transforms, post_transforms, post_container_transforms + def md_clean_up_how_to_guide() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" @@ -785,6 +786,61 @@ def md_clean_up_how_to_guide() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def md_convert_text_to_bullet_points() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + - Convert the text passed to bullet points using multiple levels of bullets. + - Remove formatting (bold, italic, etc.) that is not needed. + + Make sure to lose any information. + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def md_convert_table_to_bullet_points() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + - Convert the table passed to bullet points using multiple levels of bullets. + - Remove the formatting (e.g., bold, italic) + + Make sure to lose any information. + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def md_format() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + - Replace `*` with `-` for bullet points + - Do not use tables unless necessary + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def md_remove_formatting() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + You will: + - Maintain the structure of the text and keep the content of the existing + text + - Remove the formatting (e.g., bold, italic) + + Print only the markdown without any explanation. + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + # ############################################################################# # Latex # ############################################################################# @@ -1070,7 +1126,10 @@ def slide_smart_colorize() -> _PROMPT_OUT: def text_rephrase() -> _PROMPT_OUT: """ """ - system = hio.from_file("text_rephrase.txt") + if os.path.exists("text_rephrase.txt"): + system = hio.from_file("text_rephrase.txt") + else: + system = "" pre_transforms: Set[str] = set() post_transforms: Set[str] = set() post_container_transforms = ["format_markdown"] diff --git a/docs/tools/documentation_toolchain/all.notes_toolchain.explanation.md b/docs/tools/documentation_toolchain/all.notes_toolchain.explanation.md new file mode 100644 index 000000000..b4e62867c --- /dev/null +++ b/docs/tools/documentation_toolchain/all.notes_toolchain.explanation.md @@ -0,0 +1,115 @@ +<!-- toc --> + +- [`notes_to_pdf.py` - Flow Explanation](#notes_to_pdfpy---flow-explanation) + * [Goal](#goal) + * [Architecture diagram](#architecture-diagram) + * [Steps](#steps) + * [Dependencies](#dependencies) + + [`preprocess_notes.py`](#preprocess_notespy) + + [`render_images.py`](#render_imagespy) + + [`latex_abbrevs.sty`](#latex_abbrevssty) + +<!-- tocstop --> + +# Flow Explanation + +- This document walks through the architecture of the `notes_to_pdf.py` script + +## Goal + +- Convert a lightweight, annotated plain-text notes file (usually `*.txt`) into + a share-ready document + - Formats include PDF, HTML, or Beamer slide deck +- Ensure the following: + - Honor rich Markdown features and custom shorthand + - Inline auto-generated diagrams: + - PlantUML + - Mermaid + - TikZ + - Graphviz + - LaTeX tables + +## Architecture diagram + +```mermaid +%%{init: {'theme':'default'}}%% +C4Context + +System_Ext(user, "User", "Author preparing course notes") +System(doc, "Documentation Toolchain", "Python & LaTeX") + +System_Boundary(doc_boundary, "") { + Container(notes2pdf, "notes_to_pdf.py", "Python CLI", "Orchestrates conversion: clean -> images -> Pandoc -> LaTeX") + Container(render, "render_images.py", "Python module", "Renders diagram blocks to images. Caches results.") + Container(preproc, "preprocess_notes.py", "Python module", "Cleans notes & expands macros (pre-Pandoc)") + Container(style, "latex_abbrevs.sty", "LaTeX style", "Provides LaTeX math & formatting shortcuts") +} + +Rel(user, notes2pdf, "Invokes via CLI") +Rel(notes2pdf, preproc, "Uses for Note Pre-processing") +Rel(notes2pdf, render, "Calls to Render Images") +Rel(notes2pdf, style, "Injects LaTeX Style (.sty)") +``` + +## Steps + +1. **Clean-up & augmentation** + - Performed by: `preprocess_notes.py` + - Key ideas: + - Normalizes headers + - Expands arrow shorthand (`->` to `\rightarrow`) + - Deals with comments + - Inserts Pandoc YAML front-matter + - Inserts optional navigation slides + +2. **Diagram extraction** + - Performed by: `render_images.py` + - Key ideas: + - Scans code blocks (e.g., ` plantuml) + - Renders diagrams via Docker containers + - Replaces the code with `![](figs/...)` include + - Comments out the original block + - Uses a SHA-256 cache to skip unchanged diagrams + +3. **Orchestration** + - Performed by: `notes_to_pdf.py` + - Key ideas: + - Calls Stage 1 and Stage 2, then Pandoc, then (for PDF) LaTeX + - Flags control each sub-action to allow skipping, debugging, or re-running + steps individually + +4. **Document synthesis** + - Performed by: Pandoc + LaTeX + - Key ideas: + - Pandoc converts Markdown to LaTeX (or HTML / Beamer) + - `latex_abbrevs.sty` is copied next to the generated `.tex` file + - Ensures vector/matrix macros (`\vv{}`, `\mat{}`), deep lists, and color + helpers compile correctly + +## Dependencies + +### `preprocess_notes.py` + +- **Input: ** raw notes. +- **Output: ** Pandoc‑ready Markdown. +- Handles + - formatting banner frames + - question formatting + - colour commands (`\red{}` -> `\textcolor{red}{...}`) + - TOC injection + +### `render_images.py` + +- Docker‑wrapper around PlantUML, Mermaid CLI, TikZ, Graphviz to convert image + description in a file, replacing the text with the picture + `figs/<basename>.<index>.png` + +### `latex_abbrevs.sty` + +- Custom style for Latex documents, including: + - Bold-underlined vectors (`\vv{x}`) + - Matrices + - Colour presets + - 9-level `enumitem` lists + - Symbol shorthands +- Copied automatically; you rarely touch this unless you need new macros. diff --git a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md new file mode 100644 index 000000000..1249d6535 --- /dev/null +++ b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md @@ -0,0 +1,749 @@ +<!-- toc --> + +- [Notes Documentation Toolchain](#notes-documentation-toolchain) + * [1. Generate Slides and PDFs — `notes_to_pdf.py`](#1-generate-slides-and-pdfs--notes_to_pdfpy) + + [What it does](#what-it-does) + + [Most used flags](#most-used-flags) + + [Quickstart recipes](#quickstart-recipes) + + [CLI flags cheatsheet](#cli-flags-cheatsheet) + + [Worked examples](#worked-examples) + - [Slides with navigation breadcrumbs](#slides-with-navigation-breadcrumbs) + - [Focus on a subsection](#focus-on-a-subsection) + - [Plain PDF article](#plain-pdf-article) + * [2. Auto render figures — `render_images.py`](#2-auto-render-figures--render_imagespy) + + [Supported File types and Code blocks](#supported-file-types-and-code-blocks) + + [Quick Start Recipes](#quick-start-recipes) + - [Render to a new file](#render-to-a-new-file) + - [Render in‑place (Markdown or LaTeX)](#render-in%E2%80%91place-markdown-or-latex) + - [HTML preview of already‑rendered images](#html-preview-of-already%E2%80%91rendered-images) + - [Dry‑run (test parsing / comments only)](#dry%E2%80%91run-test-parsing--comments-only) + + [Flags](#flags) + * [3. Lint and Prettify — `lint_notes.py`](#3-lint-and-prettify--lint_notespy) + + [Quickstart recipes](#quickstart-recipes-1) + - [Prettify with Dockerised Prettier and TOC rebuild](#prettify-with-dockerised-prettier-and-toc-rebuild) + - [Custom print width and selective actions](#custom-print-width-and-selective-actions) + + [Flags](#flags-1) + * [4. Notebook Image Scraping — `extract_notebook_images.py`](#4-notebook-image-scraping--extract_notebook_imagespy) + + [Flag Options](#flag-options) + * [5. LLM Powered Transforms — `llm_transform.py`](#5-llm-powered-transforms--llm_transformpy) + + [Minimum viable command](#minimum-viable-command) + + [Finding available prompts](#finding-available-prompts) + + [Flags](#flags-2) + + [Example recipes](#example-recipes) + * [6. Pandoc Wrapper — `run_pandoc.py`](#6-pandoc-wrapper--run_pandocpy) + + [What the script does](#what-the-script-does) + + [Quickstart commands](#quickstart-commands) + + [Flags](#flags-3) + * [7. Automate notes transformations — `transform_notes.py`](#7-automate-notes-transformations--transform_notespy) + + [What it does](#what-it-does-1) + + [Supported actions](#supported-actions) + + [Examples](#examples) + + [Flags](#flags-4) + * [8. Scrape headers from a markdown — `extract_headers_from_markdown.py`](#8-scrape-headers-from-a-markdown--extract_headers_from_markdownpy) + + [Goal](#goal) + + [Examples](#examples-1) + + [Flags](#flags-5) + * [9. TikZ to Bitmap — `dockerized_tikz_to_bitmap.py`](#9-tikz-to-bitmap--dockerized_tikz_to_bitmappy) + + [Examples](#examples-2) + * [10. Graphviz Renderer — `dockerized_graphviz.py`](#10-graphviz-renderer--dockerized_graphvizpy) + + [What it does](#what-it-does-2) + + [Most used flags](#most-used-flags-1) + + [Quickstart recipes](#quickstart-recipes-2) + + [CLI flags cheatsheet](#cli-flags-cheatsheet-1) + * [11. LaTeX Renderer — `dockerized_latex.py`](#11-latex-renderer--dockerized_latexpy) + + [What it does](#what-it-does-3) + + [Most used flags](#most-used-flags-2) + + [Quickstart recipes](#quickstart-recipes-3) + + [CLI flags cheatsheet](#cli-flags-cheatsheet-2) + * [12. Mermaid Renderer — `dockerized_mermaid.py`](#12-mermaid-renderer--dockerized_mermaidpy) + + [What it does](#what-it-does-4) + + [Most used flags](#most-used-flags-3) + + [Quickstart recipes](#quickstart-recipes-4) + + [CLI flags cheatsheet](#cli-flags-cheatsheet-3) + * [13. Pandoc Renderer — `dockerized_pandoc.py`](#13-pandoc-renderer--dockerized_pandocpy) + + [What it does](#what-it-does-5) + + [Most used flags](#most-used-flags-4) + + [Quickstart recipes](#quickstart-recipes-5) + + [CLI flags cheat‑sheet](#cli-flags-cheat%E2%80%91sheet) + * [14. Prettier Formatter — `dockerized_prettier.py`](#14-prettier-formatter--dockerized_prettierpy) + + [What it does](#what-it-does-6) + + [Most used flags](#most-used-flags-5) + + [Quickstart recipes](#quickstart-recipes-6) + + [CLI flags cheatsheet](#cli-flags-cheatsheet-4) + * [15. MacOS screenshot helper — `save_screenshot.py`](#15-macos-screenshot-helper--save_screenshotpy) + + [What it does](#what-it-does-7) + + [Flags](#flags-6) + +<!-- tocstop --> + +# Notes Documentation Toolchain + +- This is a high‑level guide to the helper scripts that turn raw `.txt` notes + into polished PDFs, slide decks, and more. + +// TODO(*): Is it worth to report the flags? It's difficult to maintain + +## notes_to_pdf.py + +### What it does + +- Convert plain‑text notes into polished **PDF, HTML, or Beamer slides** with a + single command: + ```bash + > notes_to_pdf.py --input <infile.txt> --output <outfile.[pdf|html]> --type [pdf|html|slides] + ``` + +- The most used flags are + - `--type {pdf|html|slides}` + - `--toc_type {none|pandoc_native|navigation}` + - `--debug_on_error`, `--skip_action ...`, `--filter_by_lines A:B` + +### Quickstart recipes + +- Compile to **Beamer slides** + ``` + > notes_to_pdf.py -i lesson.txt -o lesson.pdf --type slides + ``` +- Produce a **stand‑alone HTML** page + ``` + > notes_to_pdf.py -i cheatsheet.txt -o cheatsheet.html --type html + ``` +- Build a **PDF article** (LaTeX) + ``` + > notes_to_pdf.py -i paper.txt -o paper.pdf --type pdf + ``` +- Skip the final viewer **open** step + ``` + > ... --skip_action open` + ``` + +- **Tip**: Run with `--preview_actions` to print the exact steps without + executing them. + +### CLI flags cheatsheet + +- Flag: `--type {pdf,html,slides}` + - Purpose: Specifies the output format + - Notes: The "slides" option uses Beamer +- Flag: `--toc_type {none,pandoc_native,navigation}` + - Purpose: Determines the Table of Contents (TOC) style + - Notes: The `navigation` option inserts slide-friendly breadcrumb frames +- Flag: `--filter_by_header "# Intro"` + - Purpose: Builds an artefact from a section subset + - Notes: This is useful for testing +- Flag: `--filter_by_lines 120:250` + - Purpose: Compiles only a specified range of lines + - Notes: Accepts `None` as a sentinel value +- Flag: `--debug_on_error` + - Purpose: On Pandoc failure, generates a _.tex_ file and provides a helpful + log + - Notes: No additional notes +- Flag: `--script myrun.sh` + - Purpose: Saves every shell command executed + - Notes: Useful for reproducing build pipelines +- Docker knobs: + - Options: + - `--dockerized_force_rebuild` + - `--dockerized_use_sudo` + - `--use_host_tools` + - Purpose: Controls the use of container vs host for pandoc/latex + +- Run `notes_to_pdf.py -h` for the exhaustive list. + +### Worked examples + +- Slides with navigation breadcrumbs, keeping intermediate files for inspection +TODO(indro): `--toc_type navigation` fails because of the preprocess step. + + ```bash + > notes_to_pdf.py \ + --input MSML610/Lesson5-Theory_Statistical_learning.txt \ + --output Lesson5.pdf \ + --type slides \ + --toc_type navigation \ + --debug_on_error \ + --skip_action cleanup_after + ``` + +- Focus on a subsection, compiling only from line 362 to EOF for a fast iteration + when debugging slides + ```bash + > notes_to_pdf.py \ + --input Lesson8-Reasoning_over_time.txt \ + --output Focus.pdf \ + --type slides \ + --filter_by_lines 362:None \ + --skip_action cleanup_after + ``` + +- Plain PDF article + ```bash + > notes_to_pdf.py -i book_notes.txt -o book_notes.pdf --type pdf + ``` + +## render_images.py + +- This script auto renders figures by + - detecting fenced code blocks (PlantUML, Mermaid, TikZ, Graphviz, ...) + - rendering them into images calling the appropriate tool + - commenting them out the block + - inlining a `![](img)` markup + +- Render the images in a text file + ```bash + > render_images.py -i notes/MSML610/Lesson9-Causal_inference.txt \ + -o lesson9.images.txt --run_dockerized + ``` + +### Supported File types and Code blocks + +- File extension: `.md`, `.txt` + - Rendering syntax allowed: + - `plantuml` + - `mermaid` + - `graphviz` + - `tikz` + - `latex` + - Output embeds as: `<img src="figs/xxx.png">` +- File extension: `.tex` + - Rendering syntax allowed: + - same tags (TikZ & LaTeX especially) + - Output embeds as: `\includegraphics{...}` + +### Quick Start Recipes + +- Render to a new file + ```bash + > render_images.py -i lesson.md -o lesson.rendered.md --action render --run_dockerized + ``` + +- Render in‑place (Markdown or LaTeX) + ```bash + > render_images.py -i lesson.md --action render --run_dockerized + ``` + +- HTML preview of already‑rendered images + ```bash + > render_images.py -i lesson.md --action open --run_dockerized + ``` + +- Dry‑run (test parsing / comments only) + ```bash + > render_images.py -i lesson.md -o /tmp/out.md --dry_run + ``` + +### Flags + +- `-i/--in_file_name` + - Default: required + - Purpose: Input `.md`, `.tex`, or `.txt` +- `-o/--out_file_name` + - Default: `<input>` + - Purpose: Output path (must share extension) +- `--action` + - Default: `render` + - Purpose: `render` ↔ `open` +- `--dry_run` + - Default: False + - Purpose: Skip actual rendering, still rewrites markup +- `--run_dockerized / --dockerized_*` + - Default: False + - Purpose: Use pre-built container images for PlantUML, Mermaid, etc +- `--verbosity/-v` + - Default: `INFO` + - Purpose: Logging verbosity + +## `lint_notes.py` + +- Tidy up Markdown/LaTeX/txt notes by: + - normalising G‑Doc artifacts + - running Prettier + - fixing bullet/heading quirks + - refreshing the Table of Contents + +### Quickstart recipes + +- Prettify with Dockerised Prettier and TOC rebuild + ```bash + > lint_notes.py -i Lesson10.md \ + --use_dockerized_prettier \ + --use_dockerized_markdown_toc + ``` + +- Custom print width and selective actions + ```bash + > lint_notes.py -i draft.txt -o tidy.txt -w 100 \ + --action preprocess,prettier,postprocess + ``` + +### Flags + +- `-i/--infile` + - Default: stdin + - Purpose: Input `.txt` or `.md` (also via pipe) +- `-o/--outfile` + - Default: stdout + - Purpose: Destination file (omit for pipe) +- `-w/--print-width` + - Default: None $\rightarrow$ Prettier default + - Purpose: Line wrap width +- `--use_dockerized_prettier` + - Default: False + - Purpose: Run Prettier inside helper container +- `--use_dockerized_markdown_toc` + - Default: False + - Purpose: Refresh TOC via containerised `markdown-toc` +- `--action` + - Default: all five stages + - Purpose: Comma-separated subset of: `preprocess`, `prettier`, `postprocess`, + `frame_chapters`, `refresh_toc` +- `-v/--verbosity` + - Default: INFO + - Purpose: Logging level + +## `extract_notebook_images.py` + +- Spins up a docker container and dumps every `png/svg` output cell into a folder. +- You can then publish or reuse the static plots/diagrams already rendered in a + Jupyter notebook. + +- Minimal call: + ```bash + > extract_notebook_images.py \ + --in_notebook_filename notebooks/Lesson8.ipynb \ + --out_image_dir notebooks/screenshots + ``` + +### Flag Options + +- `-i / --in_notebook_filename PATH` + - Purpose: Notebook to scan + - Default: required +- `-o / --out_image_dir DIR` + - Purpose: Folder where images land + - Default: required +- `--dockerized_force_rebuild` + - Purpose: Re-build the Docker image (use if you changed extractor code) + - Default: false +- `--dockerized_use_sudo` + - Purpose: Prepend `sudo docker ...` + - Default: auto-detects +- `-v INFO/DEBUG` + - Purpose: Log verbosity + - Default: `INFO` + +--- + +## 5. LLM Powered Transforms — `llm_transform.py` + +Apply a GPT‑style transformation (rewrite, summarise, critique code, convert to +slides, etc.) to any text file _without_ leaving the terminal / editor. + +> _Note: You need to have an `OPENAI_API_KEY` and an internet connection._ + +### Minimum viable command + +```bash +llm_transform.py -i draft.txt -o polished.txt -p rewrite_clearer +``` + +### Finding available prompts + +```bash +llm_transform.py -p list -i - -o - +``` + +### Flags + +| Flag | Role | Notes | +| -------------------------------------------------------------------- | ------------------------------------------------------------- | ---------------------- | +| `-i / --input` | Source text (`-` = stdin) | — | +| `-o / --output` | Destination (`-` = stdout) | — | +| `-p / --prompt` | **Prompt tag** (`list`, `code_review`, `slide_colorize`, ...) | required | +| `-c / --compare` | Print _both_ original & transformed blocks to stdout | helpful for quick diff | +| `-b / --bold_first_level_bullets` | Post‑format tweak for slide prompts | | +| `-s / --skip-post-transforms` | Return raw LLM output, skip prettier/cleanup | | +| Docker flags (`--dockerized_force_rebuild`, `--dockerized_use_sudo`) | Control container lifecycle | + +### Example recipes + +- **Turn a code file into a review checklist** + + ```bash + llm_transform.py -i foo.py -o cfile -p code_review + vim cfile + ``` + +- **Color‑accent the bold bullets for slides** + + ```bash + llm_transform.py -i deck.md -o - -p slide_colorize | tee deck.color.md + ``` + +- **Inline use in Vim** – visual‑select a block, then: + + ```vim + :'<,'>!llm_transform.py -p summarize -i - -o - + ``` + +--- + +## 6. Pandoc Wrapper — `run_pandoc.py` + +### What the script does + +- Reads **Markdown** from _stdin_ or `--input` file. +- Dispatches to a named **action** (currently only `convert_md_to_latex`). +- Pushes the Pandoc output to _stdout_ or the `--output` file. + +### Quickstart commands + +| Goal | Command | +| ------------------------------------- | -------------------------------------------- | +| Convert a Markdown file to LaTeX | `run_pandoc.py -i note.md -o note.tex` | +| Same, but stream from STDIN to STDOUT | `cat note.md \| run_pandoc.py -i - -o -` | +| Inside **Vim** (visual range) | `:'<,'>!run_pandoc.py -i - -o - -v CRITICAL` | + +> **Tip :** pass `-v CRITICAL` to silence helper logging when piping into +> editors. + +### Flags + +| Flag | Default | Meaning | +| ------------------ | --------------------- | --------------------------------------------------------- | +| `-i / --input` | `-` | Source file or `-` for STDIN. | +| `-o / --output` | `-` | Destination file or `-` for STDOUT. | +| `--action` | `convert_md_to_latex` | Transformation to apply. Future‑proofed for more actions. | +| `-v / --log_level` | `INFO` | Standard helper‑library verbosity. | + +--- + +## 7. Automate notes transformations — `transform_notes.py` + +### What it does + +- Accepts a **text/Markdown** stream (file or `-`). +- Applies a named **action** (`-a/--action`). +- Writes the result to the given output (in‑place, file, or `-`). + +### Supported actions + +| Run `-a list` to print. | Tag | Effect | Typical Vim one‑liner | | +| -------------------------------------------------------------- | -------------------------------------------------- | ------------------------ | --------------------- | --- | +| -------------------------------------------------------------- | +| -------------------------------------------------- | | `toc` | Generate a bullet | +| TOC (top‑level by default) | `:!transform_notes.py -a toc -i % -l 1` | | +| `format_headers` | Re‑flow / indent headers (≤ `--max_lev`) | +| `:%!transform_notes.py -a format -i - --max_lev 3` | | `increase_headers_level` | +| Bump all headers down one level | `:%!transform_notes.py -a increase -i -` | | +| `md_list_to_latex` | Convert a Markdown list to LaTeX `\begin{itemize}` | +| `:%!transform_notes.py -a md_list_to_latex -i -` | | `md_*` family | Formatting | +| clean‑ups (bold bullets, colourise bold text, etc.) | see `-a list` | + +### Examples + +```bash +# Re‑flow & clean a file in place +transform_notes.py -a md_format -i notes/lecture.txt --in_place + +# Generate a 2‑level TOC to STDOUT +transform_notes.py -a toc -i notes/lecture.md -o - -l 2 + +# Tidy ChatGPT‑generated Markdown (visual mode in Vim) +:'<,'>!transform_notes.py -i - -o - -a md_fix_chatgpt_output +``` + +### Flags + +| Flag | Default | Purpose | +| ---------------- | ------------ | -------------------------------------------------- | +| `-a / --action` | _(required)_ | Choose the transformation. | +| `-l / --max_lev` | `5` | Header depth for `format_headers`. | +| `-i / --input` | `-` | File path or `-` (STDIN). | +| `-o / --output` | `-` | File path or `-` (STDOUT). | +| `--in_place` | _False_ | Overwrite input file instead of writing elsewhere. | + +--- + +## 8. Scrape headers from a markdown — `extract_headers_from_markdown.py` + +### Goal + +Turn a Markdown document into either: + +- a **plain list** of headers, +- a **nested header map**, or +- a \*_Vim_ quick‑fix\*\* (`cfile`) that lets you jump between sections with + `:cnext`. + +### Examples + +```bash +# Human‑readable map (levels 1‑3) to STDOUT +extract_headers_from_markdown.py -i README.md -o - --mode list --max-level 3 + +# Build a quick‑fix file and open Vim on it +extract_headers_from_markdown.py -i README.md -o headers.cfile --mode cfile +vim -c "cfile headers.cfile" +``` + +### Flags + +| Flag | Default | Meaning | +| ------------- | ------- | ------------------------------ | +| `--mode` | `list` | `list`, `headers`, or `cfile`. | +| `--max-level` | `3` | Maximum `#` depth to parse. | + +--- + +## 9. TikZ to Bitmap — `dockerized_tikz_to_bitmap.py` + +### Examples + +```bash +# Plain 300 DPI conversion +./dockerized_tikz_to_bitmap.py -i figure.tikz -o figure.png + +# Custom ImageMagick options (e.g. 600 DPI) +./dockerized_tikz_to_bitmap.py -i fig.tikz -o fig.png -- -density 600 -quality 90 +``` + +_Any extra tokens after `--` are passed verbatim to `convert`._ + +--- + +## 10. Graphviz Renderer — `dockerized_graphviz.py` + +### What it does + +Converts a Graphviz `.dot` file into a `.png` image using a Dockerized +container. + +> ```bash +> graphviz_wrapper.py --input input.dot --output output.png +> ``` + +This script serves as a thin wrapper around Dockerized Graphviz for consistent +rendering across systems. + +### Most used flags + +- `--input`: path to the `.dot` file +- `--output`: destination `.png` image file +- `--dockerized_force_rebuild`: rebuild the container from scratch +- `--dockerized_use_sudo`: use `sudo` for Docker commands + +### Quickstart recipes + +| Goal | Command | +| --------------------- | ------------------------------------------------------------------------------ | +| Convert DOT to PNG | `graphviz_wrapper.py -i diagram.dot -o diagram.png` | +| Rebuild Docker image | `graphviz_wrapper.py -i diagram.dot -o diagram.png --dockerized_force_rebuild` | +| Use `sudo` for Docker | `graphviz_wrapper.py -i diagram.dot -o diagram.png --dockerized_use_sudo` | + +### CLI flags cheatsheet + +| Flag | Purpose | Notes | +| ---------------------------- | ---------------------------- | ------------- | +| `-i / --input` | Path to input `.dot` file | **required** | +| `-o / --output` | Output path for `.png` image | **required** | +| `--dockerized_force_rebuild` | Force Docker image rebuild | Optional | +| `--dockerized_use_sudo` | Run Docker with `sudo` | Optional | +| `-v / --verbosity` | Logging verbosity | Default: INFO | + +--- + +## 11. LaTeX Renderer — `dockerized_latex.py` + +### What it does + +Compiles a LaTeX `.tex` file into a PDF using `pdflatex` inside a Docker +container. +Automatically rebuilds the Docker image if needed. + +> ```bash +> latex_wrapper.py --input doc.tex --output doc.pdf +> ``` + +Supports optional rerun of LaTeX for proper references or table of contents +generation. + +### Most used flags + +- `--input`: LaTeX source file to compile +- `--output`: Output PDF path +- `--run_latex_again`: Compile the LaTeX file twice +- `--dockerized_force_rebuild`: Force container rebuild +- `--dockerized_use_sudo`: Run Docker with `sudo` + +### Quickstart recipes + +| Goal | Command | +| ------------------------ | ------------------------------------------------------------------------- | +| Compile `.tex` to `.pdf` | `latex_wrapper.py -i report.tex -o report.pdf` | +| Rebuild Docker image | `latex_wrapper.py -i report.tex -o report.pdf --dockerized_force_rebuild` | +| Use `sudo` for Docker | `latex_wrapper.py -i report.tex -o report.pdf --dockerized_use_sudo` | +| Run LaTeX twice | `latex_wrapper.py -i paper.tex -o paper.pdf --run_latex_again` | + +### CLI flags cheatsheet + +| Flag | Purpose | Notes | +| ---------------------------- | -------------------------- | ----------------------------- | +| `-i / --input` | Path to input `.tex` file | **required** | +| `-o / --output` | Output PDF file path | **required** | +| `--run_latex_again` | Run LaTeX a second time | Optional, useful for TOC/refs | +| `--dockerized_force_rebuild` | Force Docker image rebuild | Optional | +| `--dockerized_use_sudo` | Run Docker with `sudo` | Optional | +| `-v / --verbosity` | Logging verbosity | Default: INFO | + +--- + +## 12. Mermaid Renderer — `dockerized_mermaid.py` + +### What it does + +Renders Mermaid `.mmd` or `.md` diagrams into image files using a Dockerized +container. + +> ```bash +> mermaid_wrapper.py --input flowchart.mmd --output flowchart.png +> ``` + +Automatically sets output to match input name if `--output` is omitted. + +### Most used flags + +- `--input`: Source Mermaid file +- `--output`: Destination image file (optional) +- `--dockerized_force_rebuild`: Rebuild Docker image +- `--dockerized_use_sudo`: Use `sudo` for Docker + +### Quickstart recipes + +| Goal | Command | +| ----------------------------- | ----------------------------------------------------------------------------- | +| Render Mermaid diagram | `mermaid_wrapper.py -i diagram.mmd -o diagram.png` | +| Use input as output (default) | `mermaid_wrapper.py -i diagram.mmd` | +| Rebuild container | `mermaid_wrapper.py -i diagram.mmd -o diagram.png --dockerized_force_rebuild` | +| Use `sudo` for Docker | `mermaid_wrapper.py -i diagram.mmd -o diagram.png --dockerized_use_sudo` | + +### CLI flags cheatsheet + +| Flag | Purpose | Notes | +| ---------------------------- | ---------------------------------- | -------------------------- | +| `-i / --input` | Path to input `.mmd` or `.md` file | **required** | +| `-o / --output` | Output image file | Defaults to input filename | +| `--dockerized_force_rebuild` | Force Docker image rebuild | Optional | +| `--dockerized_use_sudo` | Run Docker with `sudo` | Optional | +| `-v / --verbosity` | Logging verbosity | Default: INFO | + +--- + +## 13. Pandoc Renderer — `dockerized_pandoc.py` + +### What it does + +Converts documents using `pandoc` inside a Docker container. +Supports output to Beamer slides, PDFs, and more with custom CLI flags. + +> ```bash +> pandoc_wrapper.py --input notes.md --output slides.pdf -- docker_args... +> ``` + +Internally builds a Docker container and passes the full `pandoc` command +string. + +### Most used flags + +- `--input`: source file (e.g., `.md`, `.txt`) +- `--output`: output file (e.g., `.pdf`, `.html`) +- `--container_type`: use `pandoc_only`, `pandoc_latex`, or `pandoc_texlive` +- `--dockerized_force_rebuild`: rebuild image from scratch +- `--dockerized_use_sudo`: run Docker with `sudo` + +### Quickstart recipes + +| Goal | Command | +| ------------------------ | ---------------------------------------------------------------------------------------------------- | +| Convert Markdown to PDF | `pandoc_wrapper.py --input notes.md --output notes.pdf --container_type pandoc_latex` | +| Convert to Beamer slides | `pandoc_wrapper.py --input slides.md --output slides.pdf --container_type pandoc_latex -- -t beamer` | +| Rebuild Docker image | `pandoc_wrapper.py --input notes.md --output notes.pdf --dockerized_force_rebuild` | +| Run with sudo | `pandoc_wrapper.py --input notes.md --output notes.pdf --dockerized_use_sudo` | + +### CLI flags cheat‑sheet + +| Flag | Purpose | Notes | +| ---------------------------- | ------------------------------------------------------ | ---------------------- | +| `--input` | Input source file for Pandoc | **required** | +| `--output` | Output file path | Defaults to input name | +| `--data_dir` | Additional resource/data path | Optional | +| `--container_type` | Docker image type: `pandoc_only`, `pandoc_latex`, etc. | Default: `pandoc_only` | +| `--dockerized_force_rebuild` | Force rebuild of Docker image | Optional | +| `--dockerized_use_sudo` | Use `sudo` for Docker execution | Optional | +| `-v / --verbosity` | Logging level | Default: INFO | + +--- + +## 14. Prettier Formatter — `dockerized_prettier.py` + +### What it does + +Formats text files (`.md`, `.txt`, `.tex`, etc.) using Prettier within a Docker +container. +Avoids environment-specific issues and ensures consistent formatting. + +> ```bash +> dockerized_prettier.py --parser markdown --write test.md +> ``` + +Supports full Prettier CLI flexibility via passthrough of additional options. + +### Most used flags + +- `--parser`: Prettier parser (e.g. `markdown`) +- `--write`: Apply formatting in-place +- `--tab-width`: Number of spaces per indentation level +- `--dockerized_force_rebuild`: Force rebuild of Docker container +- `--dockerized_use_sudo`: Use `sudo` for Docker commands + +### Quickstart recipes + +| Goal | Command | +| --------------------------------- | -------------------------------------------------------------------------------------------- | +| Format a Markdown file | `dockerized_prettier.py --parser markdown --write test.md` | +| Use `sudo` for Docker execution | `dockerized_prettier.py --use_sudo --parser markdown --write test.md` | +| Rebuild the Docker image | `dockerized_prettier.py --dockerized_force_rebuild --parser markdown --write test.md` | +| Change indentation and wrap style | `dockerized_prettier.py --parser markdown --tab-width 4 --prose-wrap always --write test.md` | + +### CLI flags cheatsheet + +| Flag | Purpose | Notes | +| ---------------------------- | ----------------------------------------------------- | ------------------------------------- | +| `-i / --input` | Input file path | Required | +| `-o / --output` | Output file path | Optional (defaults to input) | +| `--parser` | Prettier parser type (e.g. `markdown`, `babel`, etc.) | Required via passthrough | +| `--write` | Format and overwrite input file | Common usage flag | +| `--tab-width` | Number of spaces per tab | Optional, defaults to Prettier config | +| `--dockerized_force_rebuild` | Force Docker image rebuild | Optional | +| `--dockerized_use_sudo` | Use `sudo` for Docker commands | Optional | +| `-v / --verbosity` | Logging level | Default: INFO | + +--- + +## 15. MacOS screenshot helper — `save_screenshot.py` + +### What it does + +1. Prompts you to select a screen region (`⌘ + Ctrl + 4`). +2. Saves it as `screenshot.YYYY‑MM‑DD_HH‑MM‑SS.png` (or your chosen name). +3. Prints and copies the Markdown embed `<img src="path/to/file.png">`. + +### Flags + +| Flag | Purpose | +| --------------------- | ---------------------------------------- | +| `--dst_dir DIR` | Target directory (e.g. `notes/figures`). | +| `--filename NAME.png` | Override default timestamped name. | +| `--override` | Allow clobbering an existing file. | + +--- From c5b239e29136d44b0b7e9d5896052a573e6bf5d8 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 29 May 2025 07:08:14 -0400 Subject: [PATCH 136/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../extract_headers_from_markdown.py | 4 +- dev_scripts_helpers/llms/llm_prompts.py | 69 ++++++++++++------- dev_scripts_helpers/llms/llm_transform.py | 2 +- helpers/hdocker.py | 1 + helpers/hmarkdown.py | 6 ++ helpers/hparser.py | 25 +++++-- 6 files changed, 76 insertions(+), 31 deletions(-) diff --git a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py index 1a29fba9c..6d975a10b 100755 --- a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py +++ b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py @@ -65,7 +65,7 @@ def _parse() -> argparse.ArgumentParser: description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) - hparser.add_input_output_args(parser) + hparser.add_input_output_args(parser, out_default="-") parser.add_argument( "--mode", type=str, @@ -85,7 +85,7 @@ def _parse() -> argparse.ArgumentParser: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hparser.init_logger_for_input_output_transform(args) + hparser.init_logger_for_input_output_transform(args, verbose=False) in_file_name, out_file_name = hparser.parse_input_output_args(args) # _extract_headers_from_markdown( diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 9971966e7..520e4b99c 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -948,29 +948,6 @@ def slide_to_bullet_points() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def slide_add_example_picture() -> _PROMPT_OUT: - """ """ - system = _MD_CONTEXT - system += r""" - I will give you markdown text - - You will - - Select the most important concepts in the text - - Print a TODO comment of less than 30 words suggesting what example picture - to add to give an intuition of the text - - The TODO is in the format `// TODO: <suggestion>` - - Suggest what tool to use e.g., (mermaid, tikz, graphviz dot) - """ - pre_transforms: Set[str] = set() - post_transforms = { - "remove_code_delimiters", - "remove_end_of_line_periods", - "remove_empty_lines", - } - post_container_transforms = ["append_text"] - return system, pre_transforms, post_transforms, post_container_transforms - - def slide_expand() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" @@ -1108,6 +1085,52 @@ def slide_smart_colorize() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def slide_add_figure() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + I will give you markdown text + + You will create a figure that illustrates the text using Graphviz dot. + + - If you are sure about the meaning of the variables use + - Circles for variables + - Rounded boxes for states + - If you are not sure, use rounded boxes for every variable + + - If you need to use subscripts use it in Latex format such as var_0 + + - Use pastel colors like + - Red: `#F4A6A6`, Orange: `#FFD1A6`, Green: `#B2E2B2`, Teal: `#A0D6D1`, + - Cyan: `#A6E7F4`, Blue: `#A6C8F4`, Violet: `#C6A6F4`, Brown: `#D2B48C` + + - Use a template like: + ```graphviz + digraph BayesianFlow { + // rankdir=LR; + splines=true; + nodesep=1.0; + ranksep=0.75; + node [shape=box, style="rounded,filled", fontname="Helvetica", fontsize=12, penwidth=1.7]; + + // Node styles. + + // Force ranks. + + // Edges. + } + ``` + + Do not print anything else than the graphviz code in a markdown format + """ + pre_transforms: Set[str] = set() + post_transforms = { + "remove_code_delimiters", + "remove_end_of_line_periods", + "remove_empty_lines", + } + post_container_transforms = ["append_to_text"] + return system, pre_transforms, post_transforms, post_container_transforms + # ############################################################################# # Text. # ############################################################################# diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index c5392357a..28ee5d2e4 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -294,7 +294,7 @@ def _main(parser: argparse.ArgumentParser) -> None: out_txt = hmarkdo.md_clean_up(out_txt) out_txt = hmarkdo.format_markdown_slide(out_txt) # - if dshlllpr.to_run("append_text", post_container_transforms): + if dshlllpr.to_run("append_to_text", post_container_transforms): out_txt_tmp = [] # Append the original text. txt = hio.from_file(tmp_in_file_name) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 8c761d5cf..f005686f5 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -1672,6 +1672,7 @@ def run_dockerized_mermaid( # Get the container image. _ = force_rebuild container_image = "minlag/mermaid-cli" + dockerfile = "" # Convert files to Docker paths. is_caller_host = not hserver.is_inside_docker() use_sibling_container_for_callee = True diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 4bcd8f391..8ca55ae29 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -704,6 +704,12 @@ def header_tree_to_str( Only expand (i.e. recursively include children) for a node if it is part of the ancestry of the selected node. + :param tree: The tree to convert to a string. + :param ancestry: The ancestry of the selected node. + :param open_modifier: The modifier to use for the open of the selected node. + :param close_modifier: The modifier to use for the close of the selected node. + :param indent: The indent of the tree. + - Nodes not in the ancestry are included on one line (even if they have children). - The selected node (last in the ancestry) is included highlighted. diff --git a/helpers/hparser.py b/helpers/hparser.py index c10231feb..2b4ad9182 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -371,18 +371,33 @@ def parse_input_output_args( return in_file_name, out_file_name -def init_logger_for_input_output_transform(args: argparse.Namespace) -> None: +def init_logger_for_input_output_transform(args: argparse.Namespace, *, verbose: bool = True) -> None: """ Initialize the logger when input/output transformation is used. + + :param verbose: if `False`, set the log level to `CRITICAL` so that no + output is printed + ``` + 09:34:24 - INFO hdbg.py init_logger:1013 Saving log to file '/User... + 09:34:24 - INFO hdbg.py init_logger:1018 > cmd='/Users/saggese/src... + 09:34:24 - INFO hparser.py parse_input_output_args:368 in_file_name='MSML610/Les... + 09:34:24 - INFO hparser.py parse_input_output_args:369 out_file_name='-' + ``` """ verbosity = args.log_level - # If the input is stdin, we don't want to print the command line or any - # other log messages, unless the user specified a more verbose log level. - if args.in_file_name == "-": + if not verbose: + # Unless user has specified DEBUG level, set the log level to `CRITICAL` + # so that no output is printed. if args.log_level == "INFO": verbosity = "CRITICAL" else: - print("cmd line: %s" % hdbg.get_command_line()) + # If the input is stdin, we don't want to print the command line or any + # other log messages, unless the user specified a more verbose log level. + if args.in_file_name == "-": + if args.log_level == "INFO": + verbosity = "CRITICAL" + else: + print("cmd line: %s" % hdbg.get_command_line()) hdbg.init_logger(verbosity=verbosity, use_exec_path=True, force_white=False) From 79c52ba93225554d0a717dcaa6b1c567826fc445 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 29 May 2025 07:49:17 -0400 Subject: [PATCH 137/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../all.notes_toolchain.how_to_guide.md | 172 +++++++++++------- 1 file changed, 106 insertions(+), 66 deletions(-) diff --git a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md index 1249d6535..003acc195 100644 --- a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md +++ b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md @@ -122,23 +122,23 @@ ### CLI flags cheatsheet -- Flag: `--type {pdf,html,slides}` +- `--type {pdf,html,slides}` - Purpose: Specifies the output format - Notes: The "slides" option uses Beamer -- Flag: `--toc_type {none,pandoc_native,navigation}` +- `--toc_type {none,pandoc_native,navigation}` - Purpose: Determines the Table of Contents (TOC) style - Notes: The `navigation` option inserts slide-friendly breadcrumb frames -- Flag: `--filter_by_header "# Intro"` +- `--filter_by_header "# Intro"` - Purpose: Builds an artefact from a section subset - Notes: This is useful for testing -- Flag: `--filter_by_lines 120:250` +- `--filter_by_lines 120:250` - Purpose: Compiles only a specified range of lines - Notes: Accepts `None` as a sentinel value -- Flag: `--debug_on_error` +- `--debug_on_error` - Purpose: On Pandoc failure, generates a _.tex_ file and provides a helpful log - Notes: No additional notes -- Flag: `--script myrun.sh` +- `--script myrun.sh` - Purpose: Saves every shell command executed - Notes: Useful for reproducing build pipelines - Docker knobs: @@ -153,7 +153,8 @@ ### Worked examples - Slides with navigation breadcrumbs, keeping intermediate files for inspection -TODO(indro): `--toc_type navigation` fails because of the preprocess step. + +// TODO(indro): `--toc_type navigation` fails because of the preprocess step. ```bash > notes_to_pdf.py \ @@ -355,29 +356,42 @@ llm_transform.py -p list -i - -o - ### Flags -| Flag | Role | Notes | -| -------------------------------------------------------------------- | ------------------------------------------------------------- | ---------------------- | -| `-i / --input` | Source text (`-` = stdin) | — | -| `-o / --output` | Destination (`-` = stdout) | — | -| `-p / --prompt` | **Prompt tag** (`list`, `code_review`, `slide_colorize`, ...) | required | -| `-c / --compare` | Print _both_ original & transformed blocks to stdout | helpful for quick diff | -| `-b / --bold_first_level_bullets` | Post‑format tweak for slide prompts | | -| `-s / --skip-post-transforms` | Return raw LLM output, skip prettier/cleanup | | -| Docker flags (`--dockerized_force_rebuild`, `--dockerized_use_sudo`) | Control container lifecycle | +- `-i / --input` + - Role: Source text (`-` = stdin) + - Notes: None +- `-o / --output` + - Role: Destination (`-` = stdout) + - Notes: None +- `-p / --prompt` + - Role: Prompt tag (`list`, `code_review`, `slide_colorize`, ...) + - Notes: Required +- `-c / --compare` + - Role: Print both original & transformed blocks to stdout + - Notes: Helpful for quick diff +- `-b / --bold_first_level_bullets` + - Role: Post-format tweak for slide prompts + - Notes: None +- `-s / --skip-post-transforms` + - Role: Return raw LLM output, skip prettier/cleanup + - Notes: None +- Docker flags + - Flags: `--dockerized_force_rebuild`, `--dockerized_use_sudo` + - Role: Control container lifecycle + - Notes: None ### Example recipes -- **Turn a code file into a review checklist** +- Turn a code file into a review checklist ```bash - llm_transform.py -i foo.py -o cfile -p code_review + > llm_transform.py -i foo.py -o cfile -p code_review vim cfile ``` - **Color‑accent the bold bullets for slides** ```bash - llm_transform.py -i deck.md -o - -p slide_colorize | tee deck.color.md + > llm_transform.py -i deck.md -o - -p slide_colorize | tee deck.color.md ``` - **Inline use in Vim** – visual‑select a block, then: @@ -386,9 +400,7 @@ llm_transform.py -p list -i - -o - :'<,'>!llm_transform.py -p summarize -i - -o - ``` ---- - -## 6. Pandoc Wrapper — `run_pandoc.py` +## `run_pandoc.py` ### What the script does @@ -398,27 +410,37 @@ llm_transform.py -p list -i - -o - ### Quickstart commands -| Goal | Command | -| ------------------------------------- | -------------------------------------------- | -| Convert a Markdown file to LaTeX | `run_pandoc.py -i note.md -o note.tex` | -| Same, but stream from STDIN to STDOUT | `cat note.md \| run_pandoc.py -i - -o -` | -| Inside **Vim** (visual range) | `:'<,'>!run_pandoc.py -i - -o - -v CRITICAL` | +- Convert a Markdown file to LaTeX + ``` + > run_pandoc.py -i note.md -o note.tex + ``` +- Same, but stream from `stdin` to `stdout` + ``` + > cat note.md | run_pandoc.py -i - -o - + ``` +- Inside Vim (visual range) + ``` + :<,'>!run_pandoc.py -i - -o - -v CRITICAL + ``` -> **Tip :** pass `-v CRITICAL` to silence helper logging when piping into -> editors. +**Tip :** pass `-v CRITICAL` to silence helper logging when piping into editors. ### Flags -| Flag | Default | Meaning | -| ------------------ | --------------------- | --------------------------------------------------------- | -| `-i / --input` | `-` | Source file or `-` for STDIN. | -| `-o / --output` | `-` | Destination file or `-` for STDOUT. | -| `--action` | `convert_md_to_latex` | Transformation to apply. Future‑proofed for more actions. | -| `-v / --log_level` | `INFO` | Standard helper‑library verbosity. | - ---- +- `-i / --input` + - Default: `-` + - Meaning: Source file or `-` for STDIN +- `-o / --output` + - Default: `-` + - Meaning: Destination file or `-` for STDOUT +- `--action` + - Default: `convert_md_to_latex` + - Meaning: Transformation to apply. Future-proofed for more actions +- `-v / --log_level` + - Default: `INFO` + - Meaning: Standard helper-library verbosity -## 7. Automate notes transformations — `transform_notes.py` +## `transform_notes.py` ### What it does @@ -426,42 +448,60 @@ llm_transform.py -p list -i - -o - - Applies a named **action** (`-a/--action`). - Writes the result to the given output (in‑place, file, or `-`). -### Supported actions - -| Run `-a list` to print. | Tag | Effect | Typical Vim one‑liner | | -| -------------------------------------------------------------- | -------------------------------------------------- | ------------------------ | --------------------- | --- | -| -------------------------------------------------------------- | -| -------------------------------------------------- | | `toc` | Generate a bullet | -| TOC (top‑level by default) | `:!transform_notes.py -a toc -i % -l 1` | | -| `format_headers` | Re‑flow / indent headers (≤ `--max_lev`) | -| `:%!transform_notes.py -a format -i - --max_lev 3` | | `increase_headers_level` | -| Bump all headers down one level | `:%!transform_notes.py -a increase -i -` | | -| `md_list_to_latex` | Convert a Markdown list to LaTeX `\begin{itemize}` | -| `:%!transform_notes.py -a md_list_to_latex -i -` | | `md_*` family | Formatting | -| clean‑ups (bold bullets, colourise bold text, etc.) | see `-a list` | +### Example of Supported Actions + +- Run `-a list` to print a list of the valid + +- `toc` + - Generate a bullet TOC (top-level by default) + - Typical Vim one-liner: `:!transform_notes.py -a toc -i % -l 1` +- `format_headers` + - Re-flow / indent headers (up to `--max_lev`) + - Typical Vim one-liner: `:%!transform_notes.py -a format -i - --max_lev 3` +- `increase_headers_level` + - Bump all headers down one level + - Typical Vim one-liner: `:%!transform_notes.py -a increase -i -` +- `md_list_to_latex` + - Convert a Markdown list to LaTeX `\begin{itemize}` + - Typical Vim one-liner: `:%!transform_notes.py -a md_list_to_latex -i -` +- `md_*` family + - Formatting clean-ups (bold bullets, colorize bold text, etc.) + - Additional Information: See `-a list` for more details ### Examples -```bash -# Re‑flow & clean a file in place -transform_notes.py -a md_format -i notes/lecture.txt --in_place +- Re‑flow & clean a file in place + ```bash + > transform_notes.py -a md_format -i notes/lecture.txt --in_place + ``` -# Generate a 2‑level TOC to STDOUT -transform_notes.py -a toc -i notes/lecture.md -o - -l 2 +- Generate a 2‑level TOC to STDOUT + ```bash + > transform_notes.py -a toc -i notes/lecture.md -o - -l 2 + ``` -# Tidy ChatGPT‑generated Markdown (visual mode in Vim) -:'<,'>!transform_notes.py -i - -o - -a md_fix_chatgpt_output -``` +- Tidy ChatGPT‑generated Markdown (visual mode in Vim) + ``` + :'<,'>!transform_notes.py -i - -o - -a md_fix_chatgpt_output + ``` ### Flags -| Flag | Default | Purpose | -| ---------------- | ------------ | -------------------------------------------------- | -| `-a / --action` | _(required)_ | Choose the transformation. | -| `-l / --max_lev` | `5` | Header depth for `format_headers`. | -| `-i / --input` | `-` | File path or `-` (STDIN). | -| `-o / --output` | `-` | File path or `-` (STDOUT). | -| `--in_place` | _False_ | Overwrite input file instead of writing elsewhere. | +- `-a / --action` + - Default: Required + - Purpose: Choose the transformation +- `-l / --max_lev` + - Default: 5 + - Purpose: Header depth for `format_headers` +- `-i / --input` + - Default: `-` + - Purpose: File path or `-` (STDIN) +- `-o / --output` + - Default: `-` + - Purpose: File path or `-` (STDOUT) +- `--in_place` + - Default: False + - Purpose: Overwrite input file instead of writing elsewhere --- From eef566de232c1d0be631b9d82310dd21ada53522 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 29 May 2025 08:21:42 -0400 Subject: [PATCH 138/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 326 +++++++++--------- .../all.notes_toolchain.how_to_guide.md | 57 ++- .../git/all.ai_review.how_to_guide.md | 123 +++++++ 3 files changed, 301 insertions(+), 205 deletions(-) create mode 100644 docs/work_tools/git/all.ai_review.how_to_guide.md diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 520e4b99c..ee90155ef 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -601,105 +601,30 @@ def code_write_1_unit_test() -> _PROMPT_OUT: post_container_transforms: List[str] = [] return system, pre_transforms, post_transforms, post_container_transforms - # ############################################################################# -# review. +# Latex # ############################################################################# -def _review_from_file(file: str) -> _PROMPT_OUT: - """ - Review the code for refactoring opportunities. - """ - system = _CODING_CONTEXT - # Load the reference file. - reference_txt = hio.from_file(file) - reference_txt = hmarkdo.add_line_numbers(reference_txt) - # TODO(gp): Remove table of contents between <!-- toc --> and <!-- tocstop -->. - system += rf""" - You will review the code and make sure it follows the rules in the reference below: - - {reference_txt} - - - Each rule to follow is referred by <rule_name> and represented as - <header-line_number> with the name of the header of the section in the - reference file (e..g, 'Naming') and the line number (e.g., "Naming-7") - - Only print the violation of the rules when you are absolutely sure that - it is a violation - - For each violation of a rule, you will print the line number of the code - and the proposed improvement in the following style: - <line_number>: <rule_name>: <short description of the proposed improvement> - - Do not print any other comment, besides the violation of the rules - """ - pre_transforms = {"add_line_numbers"} - post_transforms = {"convert_to_vim_cfile"} - post_container_transforms = ["convert_file_names"] - return system, pre_transforms, post_transforms, post_container_transforms - - -def review_llm() -> _PROMPT_OUT: - """ - Review the code using LLMs. - """ - # Load the reference file. - #helper_root = hgit.find_helpers_root() - #file_name = os.path.join(helper_root, "docs/code_guidelines/all.llm_style_review_guidelines.reference.md") - # TODO(gp): This doesn't work for unknown reasons. - file_name = hgit.find_file("all.llm_style_review_guidelines.reference.md") - return _review_from_file(file_name) - - -def review_linter() -> _PROMPT_OUT: - """ - Review the code for linter style (still using LLMs). +_LATEX_CONTEXT = r""" + You are a proficient technical writer. + I will pass you a chunk of Latex code. """ - # Load the reference file. - #helper_root = hgit.find_helpers_root() - #file_name = os.path.join(helper_root, "docs/code_guidelines/all.linter_style_review_guidelines.reference.md") - file_name = hgit.find_file("all.linter_style_review_guidelines.reference.md") - return _review_from_file(file_name) -def review_correctness() -> _PROMPT_OUT: - """ - Review the code for correctness. - """ - system = _CODING_CONTEXT +def latex_rewrite() -> _PROMPT_OUT: + system = _LATEX_CONTEXT system += r""" - You will review the code and make sure it is: - - correct - - clean and readable - - efficient - - robust - - maintainable - - Do not print any comment, besides for each point of improvement, you will - print the line number and the proposed improvement in the following style: - <line_number>: <short description of the proposed improvement> + - Rewrite the text passed to increase clarity and readability. + - Maintain the structure of the text as much as possible, in terms of bullet + points and their indentation """ - pre_transforms = {"add_line_numbers"} - post_transforms = {"convert_to_vim_cfile"} - post_container_transforms = ["convert_file_names"] + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = [] return system, pre_transforms, post_transforms, post_container_transforms -def review_refactoring() -> _PROMPT_OUT: - """ - Review the code for refactoring opportunities. - """ - system = _CODING_CONTEXT - system += r""" - You will review the code and look for opportunities to refactor the code, - by removing redundancy and copy-pasted code. - - Do not print any comment, besides for each point of improvement, you will - print the line number and the proposed improvement in the following style: - <line_number>: <short description of the proposed improvement> - """ - pre_transforms = {"add_line_numbers"} - post_transforms = {"convert_to_vim_cfile"} - post_container_transforms = ["convert_file_names"] - return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -718,7 +643,7 @@ def md_rewrite() -> _PROMPT_OUT: system += r""" - Rewrite the text passed to increase clarity and readability. - Maintain the structure of the text as much as possible, in terms of bullet - points and their indentation + points and their indentation. """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -740,8 +665,6 @@ def md_summarize_short() -> _PROMPT_OUT: def md_expand() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text - You will: - Maintain the structure of the text and keep the content of the existing text @@ -762,6 +685,7 @@ def md_expand() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +# TODO(gp): Move to template. def md_clean_up_how_to_guide() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" @@ -804,7 +728,7 @@ def md_convert_table_to_bullet_points() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - Convert the table passed to bullet points using multiple levels of bullets. - - Remove the formatting (e.g., bold, italic) + - Remove the formatting (e.g., bold, italic). Make sure to lose any information. """ @@ -841,40 +765,10 @@ def md_remove_formatting() -> _PROMPT_OUT: post_container_transforms = ["format_markdown"] return system, pre_transforms, post_transforms, post_container_transforms -# ############################################################################# -# Latex -# ############################################################################# - -_LATEX_CONTEXT = r""" - You are a proficient technical writer. - I will pass you a chunk of Latex code. - """ - - -def latex_rewrite() -> _PROMPT_OUT: - system = _LATEX_CONTEXT - system += r""" - - Rewrite the text passed to increase clarity and readability. - - Maintain the structure of the text as much as possible, in terms of bullet - points and their indentation - """ - pre_transforms: Set[str] = set() - post_transforms = {"remove_code_delimiters"} - post_container_transforms = [] - return system, pre_transforms, post_transforms, post_container_transforms - - -# ############################################################################# -# Doc. -# ############################################################################# - - -def doc_create_bullets() -> _PROMPT_OUT: +def md_create_bullets() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text - You will: - Convert the following markdown text into bullet points - Use multiple levels of bullets, if needed @@ -890,11 +784,9 @@ def doc_create_bullets() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def doc_summarize_short() -> _PROMPT_OUT: +def md_summarize_short() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text - You will: - Write 3 bullet points that summarize the text - Each bullet point should be at most 30 words @@ -909,14 +801,136 @@ def doc_summarize_short() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def doc_rewrite() -> _PROMPT_OUT: - system = _MD_CONTEXT +# ############################################################################# +# Misc +# ############################################################################# + +# One-off transforms. + +def misc_categorize_topics() -> _PROMPT_OUT: + system = r""" + For each of the following title of article, find the best topic among the + following ones: + + LLM Reasoning, Quant Finance, Time Series, Developer Tools, Python + Ecosystem, Git and GitHub, Software Architecture, AI Infrastructure, + Knowledge Graphs, Diffusion Models, Causal Inference, Trading Strategies, + Prompt Engineering, Mathematical Concepts, Dev Productivity, Rust and C++, + Marketing and Sales, Probabilistic Programming, Code Refactoring, Open + Source + + Only print + - the first 3 words of the title + - a separator | + - the topic + and don't print any explanation. + + if you don't know the topic, print "unknown" + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + +# ############################################################################# +# review. +# ############################################################################# + + +def _review_from_file(file: str) -> _PROMPT_OUT: + """ + Review the code for refactoring opportunities. + """ + system = _CODING_CONTEXT + # Load the reference file. + reference_txt = hio.from_file(file) + reference_txt = hmarkdo.add_line_numbers(reference_txt) + # TODO(gp): Remove table of contents between <!-- toc --> and <!-- tocstop -->. + system += rf""" + You will review the code and make sure it follows the rules in the reference below: + + {reference_txt} + + - Each rule to follow is referred by <rule_name> and represented as + <header-line_number> with the name of the header of the section in the + reference file (e..g, 'Naming') and the line number (e.g., "Naming-7") + - Only print the violation of the rules when you are absolutely sure that + it is a violation + - For each violation of a rule, you will print the line number of the code + and the proposed improvement in the following style: + <line_number>: <rule_name>: <short description of the proposed improvement> + - Do not print any other comment, besides the violation of the rules + """ + pre_transforms = {"add_line_numbers"} + post_transforms = {"convert_to_vim_cfile"} + post_container_transforms = ["convert_file_names"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def review_llm() -> _PROMPT_OUT: + """ + Review the code using LLMs. + """ + # Load the reference file. + #helper_root = hgit.find_helpers_root() + #file_name = os.path.join(helper_root, "docs/code_guidelines/all.llm_style_review_guidelines.reference.md") + # TODO(gp): This doesn't work for unknown reasons. + file_name = hgit.find_file("all.llm_style_review_guidelines.reference.md") + return _review_from_file(file_name) + + +def review_linter() -> _PROMPT_OUT: + """ + Review the code for linter style (still using LLMs). + """ + # Load the reference file. + #helper_root = hgit.find_helpers_root() + #file_name = os.path.join(helper_root, "docs/code_guidelines/all.linter_style_review_guidelines.reference.md") + file_name = hgit.find_file("all.linter_style_review_guidelines.reference.md") + return _review_from_file(file_name) + + +def review_correctness() -> _PROMPT_OUT: + """ + Review the code for correctness. + """ + system = _CODING_CONTEXT system += r""" - - Rewrite the text passed to increase clarity and readability. - - Maintain the structure of the text as much as possible, in terms of bullet - points and their indentation + You will review the code and make sure it is: + - correct + - clean and readable + - efficient + - robust + - maintainable + + Do not print any comment, besides for each point of improvement, you will + print the line number and the proposed improvement in the following style: + <line_number>: <short description of the proposed improvement> """ - return md_rewrite() + pre_transforms = {"add_line_numbers"} + post_transforms = {"convert_to_vim_cfile"} + post_container_transforms = ["convert_file_names"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def review_refactoring() -> _PROMPT_OUT: + """ + Review the code for refactoring opportunities. + """ + system = _CODING_CONTEXT + system += r""" + You will review the code and look for opportunities to refactor the code, + by removing redundancy and copy-pasted code. + + Do not print any comment, besides for each point of improvement, you will + print the line number and the proposed improvement in the following style: + <line_number>: <short description of the proposed improvement> + """ + pre_transforms = {"add_line_numbers"} + post_transforms = {"convert_to_vim_cfile"} + post_container_transforms = ["convert_file_names"] + return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -930,8 +944,6 @@ def slide_to_bullet_points() -> _PROMPT_OUT: """ system = _MD_CONTEXT system += r""" - I will give you markdown text - You will: - Convert the following markdown text into bullet points - Make sure that the text is clean and readable @@ -951,8 +963,6 @@ def slide_to_bullet_points() -> _PROMPT_OUT: def slide_expand() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text - You will: - Maintain the structure of the text and keep the content of the existing text @@ -976,8 +986,6 @@ def slide_expand() -> _PROMPT_OUT: def slide_reduce() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text - You will: - Maintain the structure of the text - Keep all the figures @@ -1001,8 +1009,6 @@ def slide_reduce() -> _PROMPT_OUT: def slide_reduce_bullets() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text - You will: - Maintain the structure of the text - Keep all the figures @@ -1024,8 +1030,6 @@ def slide_reduce_bullets() -> _PROMPT_OUT: def slide_bold() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text - You will: - Not change the text or the structure of the text - Highlight in bold only the most important phrases in the text—those that @@ -1045,10 +1049,7 @@ def slide_bold() -> _PROMPT_OUT: def slide_smart_colorize() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text - You will: - - Not change the text or the structure of the text - Use the \red{...}, \green{...}, \blue{...}, \violet{} to highlight common chunks of the expression and text @@ -1088,8 +1089,6 @@ def slide_smart_colorize() -> _PROMPT_OUT: def slide_add_figure() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - I will give you markdown text - You will create a figure that illustrates the text using Graphviz dot. - If you are sure about the meaning of the variables use @@ -1131,10 +1130,12 @@ def slide_add_figure() -> _PROMPT_OUT: post_container_transforms = ["append_to_text"] return system, pre_transforms, post_transforms, post_container_transforms + # ############################################################################# # Text. # ############################################################################# +# Operate on pure text, not markdown. #def text_expand() -> _PROMPT_OUT: # """ @@ -1148,6 +1149,7 @@ def slide_add_figure() -> _PROMPT_OUT: def text_rephrase() -> _PROMPT_OUT: """ + Apply complex transformations to the text. """ if os.path.exists("text_rephrase.txt"): system = hio.from_file("text_rephrase.txt") @@ -1159,33 +1161,13 @@ def text_rephrase() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -# ############################################################################# - - -def scratch_categorize_topics() -> _PROMPT_OUT: - system = r""" - For each of the following title of article, find the best topic among the - following ones: - - LLM Reasoning, Quant Finance, Time Series, Developer Tools, Python - Ecosystem, Git and GitHub, Software Architecture, AI Infrastructure, - Knowledge Graphs, Diffusion Models, Causal Inference, Trading Strategies, - Prompt Engineering, Mathematical Concepts, Dev Productivity, Rust and C++, - Marketing and Sales, Probabilistic Programming, Code Refactoring, Open - Source - - Only print - - the first 3 words of the title - - a separator | - - the topic - and don't print any explanation. - - if you don't know the topic, print "unknown" +def text_rewrite() -> _PROMPT_OUT: + system += r""" + - Rewrite the text passed to increase clarity and readability. + - Maintain the structure of the text as much as possible, in terms of bullet + points and their indentation """ - pre_transforms: Set[str] = set() - post_transforms = {"remove_code_delimiters"} - post_container_transforms = ["format_markdown"] - return system, pre_transforms, post_transforms, post_container_transforms + return md_rewrite() # ############################################################################# diff --git a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md index 003acc195..4eb79fa6c 100644 --- a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md +++ b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md @@ -503,56 +503,47 @@ llm_transform.py -p list -i - -o - - Default: False - Purpose: Overwrite input file instead of writing elsewhere ---- - -## 8. Scrape headers from a markdown — `extract_headers_from_markdown.py` +## `extract_headers_from_markdown.py` ### Goal Turn a Markdown document into either: - -- a **plain list** of headers, -- a **nested header map**, or +- a **plain list** of headers +- a **nested header map** - a \*_Vim_ quick‑fix\*\* (`cfile`) that lets you jump between sections with `:cnext`. ### Examples -```bash -# Human‑readable map (levels 1‑3) to STDOUT -extract_headers_from_markdown.py -i README.md -o - --mode list --max-level 3 - -# Build a quick‑fix file and open Vim on it -extract_headers_from_markdown.py -i README.md -o headers.cfile --mode cfile -vim -c "cfile headers.cfile" -``` - -### Flags +- Human‑readable map (levels 1‑3) to `stdout` + ```bash + > extract_headers_from_markdown.py -i README.md -o - --mode list --max-level 3 + ``` -| Flag | Default | Meaning | -| ------------- | ------- | ------------------------------ | -| `--mode` | `list` | `list`, `headers`, or `cfile`. | -| `--max-level` | `3` | Maximum `#` depth to parse. | +- Build a quick‑fix file and open Vim on it + ```bash + > extract_headers_from_markdown.py -i README.md -o headers.cfile --mode cfile + > vim -c "cfile headers.cfile" + ``` ---- +## `dockerized_tikz_to_bitmap.py` -## 9. TikZ to Bitmap — `dockerized_tikz_to_bitmap.py` +- Converts ### Examples -```bash -# Plain 300 DPI conversion -./dockerized_tikz_to_bitmap.py -i figure.tikz -o figure.png - -# Custom ImageMagick options (e.g. 600 DPI) -./dockerized_tikz_to_bitmap.py -i fig.tikz -o fig.png -- -density 600 -quality 90 -``` - -_Any extra tokens after `--` are passed verbatim to `convert`._ +- Plain 300 DPI conversion + ```bash + > dockerized_tikz_to_bitmap.py -i figure.tikz -o figure.png + ``` ---- +- Custom ImageMagick options (e.g. 600 DPI) + ```bash + > dockerized_tikz_to_bitmap.py -i fig.tikz -o fig.png -- -density 600 -quality 90 + ``` + - Any extra tokens after `--` are passed verbatim to `convert` -## 10. Graphviz Renderer — `dockerized_graphviz.py` +## `dockerized_graphviz.py` ### What it does diff --git a/docs/work_tools/git/all.ai_review.how_to_guide.md b/docs/work_tools/git/all.ai_review.how_to_guide.md new file mode 100644 index 000000000..dd43a2a9e --- /dev/null +++ b/docs/work_tools/git/all.ai_review.how_to_guide.md @@ -0,0 +1,123 @@ + +# Use templates +- We use templates for code and documentation to show and describe how a document + or code should look like, e.g., + - `template_code.py` shows our coding style + - `template_unit_test.py` shows how our unit tests look like + - `template_doc.how_to_guide.md` shows how an Diataxis how to guide should look + like + +- The same template can have multiple use cases: + - Humans to understand how to write documentation and code + - Humans as boilerplate (e.g., copy the template and improve it) + - LLMs as reference style to apply transforms + - LLMs to report violations of coding styles + - LLMs as boilerplate (e.g., explain this code using this template) + +# Tools + +## llm_transform.py +- There are several classes of transforms + - `code_*`: transform Python code + - `code_fix_*`: fix a specific chunk of code according to a prompt + - `code_transform_*`: apply a series of transformations + - `code_write_*`: write from scratch + - `doc_*`: process free form (not markdown) text + - TODO(gp): Is it worth it, or should be merged with `md_*` targets + - `latex_*`: process Latex code + - `md_*`: process markdown `md` text and `txt` notes + - TODO(gp): + - `review_*`: process Python code to extract reviews + - `scratch_*`: misc + - `slide_*`: process markdown slides in `txt` format + +- You can list the available transformations with: + ``` + > llm_transform.py -p list + # Available prompt tags: + code_fix_by_using_f_strings + code_fix_by_using_perc_strings + code_fix_code + code_fix_comments + code_fix_complex_assignments + code_fix_docstrings + code_fix_from_imports + code_fix_function_type_hints + code_fix_log_string + code_fix_logging_statements + code_fix_star_before_optional_parameters + code_fix_unit_test + code_transform_apply_csfy_style + code_transform_apply_linter_instructions + code_transform_remove_redundancy + code_write_1_unit_test + code_write_unit_test + doc_create_bullets + doc_rewrite + doc_summarize_short + latex_rewrite + md_clean_up_how_to_guide + md_convert_table_to_bullet_points + md_convert_text_to_bullet_points + md_expand + md_format + md_remove_formatting + md_rewrite + md_summarize_short + review_correctness + review_linter + review_llm + review_refactoring + scratch_categorize_topics + slide_add_figure + slide_bold + slide_expand + slide_reduce + slide_reduce_bullets + slide_smart_colorize + slide_to_bullet_points + test + text_rephrase + ``` + +## `transform_notes.py` + +- These transformations don't need LLMs and are implemented as code + +- You can see the available transforms + ``` + > transform_notes.py -a list + test: compute the hash of a string to test the flow + format_headers: format the headers + increase_headers_level: increase the level of the headers + md_list_to_latex: convert a markdown list to a latex list + md_remove_formatting: remove the formatting + md_clean_up: clean up removing all weird characters + md_only_format: reflow the markdown + md_colorize_bold_text: colorize the bold text + md_format: reflow the markdown and colorize the bold text + ``` + +# Causify flow + +## A reviewer workflow + +- The + +## Editing workflows + +- Use `llm_transform.py` to + - edit files manually applying specific transformations to chunks of code + - apply transforms to an entire file + +- +There are 3 types of transforms +llm: executed by an LLM since they are difficult to implement otherwise +linter_llm: exected by an LLM but they should be moved to the linter (mainly formatting) +linter: linter + +ai_review.py to generate TODOs +inject_todos.py to add TODOs +apply_todos.py to execute TODOs + +There is detecting the problems and fixing the problems From 9d421bafc0ed576cd393fb57f1282bd0c3325ef8 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 29 May 2025 08:41:10 -0400 Subject: [PATCH 139/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../git/all.ai_review.how_to_guide.md | 62 ++++++++++++++----- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/docs/work_tools/git/all.ai_review.how_to_guide.md b/docs/work_tools/git/all.ai_review.how_to_guide.md index dd43a2a9e..4ef1499df 100644 --- a/docs/work_tools/git/all.ai_review.how_to_guide.md +++ b/docs/work_tools/git/all.ai_review.how_to_guide.md @@ -1,18 +1,38 @@ +# Operations + +- There are several operations we want to perform + - Apply a precise transformation to a chunk of text + - E.g., create a unit test + - Extract comments and lints in the form of a `cfile` + - E.g., lint or AI review based on certain criteria + - Apply a set of transformations (e.g., styling / formatting code) to an entire + file + - Apply modifications from a `cfile` (e.g., from linter and AI review) to a + file + - Add TODOs from a `cfile` to Python or markdown files + - Rewrite an entire markdown to fix English mistakes without changing its + structure + - Reformat an entire markdown or Python using LLMs or code # Use templates - We use templates for code and documentation to show and describe how a document or code should look like, e.g., - `template_code.py` shows our coding style - `template_unit_test.py` shows how our unit tests look like - - `template_doc.how_to_guide.md` shows how an Diataxis how to guide should look - like - -- The same template can have multiple use cases: - - Humans to understand how to write documentation and code - - Humans as boilerplate (e.g., copy the template and improve it) - - LLMs as reference style to apply transforms - - LLMs to report violations of coding styles - - LLMs as boilerplate (e.g., explain this code using this template) + - `template_doc.how_to_guide.md` shows how a Diataxis how to guide should be + structured and look like + - `template_doc.reference.md` shows how an Diataxis how to guide should be + structured and look like + +- The same template can have multiple applications for: + - Humans: + - Understand how to write documentation and code + - As boilerplate (e.g., "copy the template and customize it to achieve a + certain goal") + - LLMs: + - As reference style to apply transforms + - To report violations of coding styles + - As boilerplate (e.g., "explain this piece of code using this template") # Tools @@ -100,18 +120,14 @@ # Causify flow -## A reviewer workflow - -- The - ## Editing workflows - Use `llm_transform.py` to - - edit files manually applying specific transformations to chunks of code - - apply transforms to an entire file + - Edit files manually applying specific transformations to chunks of code + - Apply transforms to an entire file - -There are 3 types of transforms +There are 3 types of transforms / review llm: executed by an LLM since they are difficult to implement otherwise linter_llm: exected by an LLM but they should be moved to the linter (mainly formatting) linter: linter @@ -121,3 +137,17 @@ inject_todos.py to add TODOs apply_todos.py to execute TODOs There is detecting the problems and fixing the problems + +## A reviewer workflow + +- This can be used by the author of the code or by a reviewer + +- Go to the Git branch with the code +- Check which files are modified + ``` + > i git_branch_diff_with -t base --only-print-files + ``` +- Run `ai_review.py` on each file to generate a list of comments on the code + - This is equivalent to running a `review` target with `llm_transform.py` + (e.g., `llm_transform.py -p review_*`) but it is a separated flow for clarify +- This generates a `cfile` with a list of comments comments From 9403adefdbf1f9ab93d3993e23901d2460000d92 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 29 May 2025 10:17:11 -0400 Subject: [PATCH 140/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 29 +- .../all.notes_toolchain.how_to_guide.md | 296 +++++++++--------- .../git/all.ai_review.how_to_guide.md | 101 ++++-- 3 files changed, 231 insertions(+), 195 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index ee90155ef..d90d22360 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -784,21 +784,21 @@ def md_create_bullets() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms -def md_summarize_short() -> _PROMPT_OUT: - system = _MD_CONTEXT - system += r""" - You will: - - Write 3 bullet points that summarize the text - - Each bullet point should be at most 30 words +# def md_summarize_short() -> _PROMPT_OUT: +# system = _MD_CONTEXT +# system += r""" +# You will: +# - Write 3 bullet points that summarize the text +# - Each bullet point should be at most 30 words - Print only the markdown without any explanation. - """ - pre_transforms: Set[str] = set() - post_transforms = { - "remove_end_of_line_periods", - } - post_container_transforms = ["format_markdown"] - return system, pre_transforms, post_transforms, post_container_transforms +# Print only the markdown without any explanation. +# """ +# pre_transforms: Set[str] = set() +# post_transforms = { +# "remove_end_of_line_periods", +# } +# post_container_transforms = ["format_markdown"] +# return system, pre_transforms, post_transforms, post_container_transforms # ############################################################################# @@ -1162,6 +1162,7 @@ def text_rephrase() -> _PROMPT_OUT: def text_rewrite() -> _PROMPT_OUT: + system = "" system += r""" - Rewrite the text passed to increase clarity and readability. - Maintain the structure of the text as much as possible, in terms of bullet diff --git a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md index 4eb79fa6c..957888400 100644 --- a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md +++ b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md @@ -566,156 +566,116 @@ rendering across systems. ### Quickstart recipes -| Goal | Command | -| --------------------- | ------------------------------------------------------------------------------ | -| Convert DOT to PNG | `graphviz_wrapper.py -i diagram.dot -o diagram.png` | -| Rebuild Docker image | `graphviz_wrapper.py -i diagram.dot -o diagram.png --dockerized_force_rebuild` | -| Use `sudo` for Docker | `graphviz_wrapper.py -i diagram.dot -o diagram.png --dockerized_use_sudo` | - -### CLI flags cheatsheet - -| Flag | Purpose | Notes | -| ---------------------------- | ---------------------------- | ------------- | -| `-i / --input` | Path to input `.dot` file | **required** | -| `-o / --output` | Output path for `.png` image | **required** | -| `--dockerized_force_rebuild` | Force Docker image rebuild | Optional | -| `--dockerized_use_sudo` | Run Docker with `sudo` | Optional | -| `-v / --verbosity` | Logging verbosity | Default: INFO | - ---- +- Convert DOT to PNG + ``` + > graphviz_wrapper.py -i diagram.dot -o diagram.png + ``` +- Rebuild Docker image + ``` + > graphviz_wrapper.py -i diagram.dot -o diagram.png --dockerized_force_rebuild + ``` +- Use `sudo` for Docker + ```bash + > graphviz_wrapper.py -i diagram.dot -o diagram.png --dockerized_use_sudo + ``` -## 11. LaTeX Renderer — `dockerized_latex.py` +## dockerized_latex.py ### What it does -Compiles a LaTeX `.tex` file into a PDF using `pdflatex` inside a Docker -container. -Automatically rebuilds the Docker image if needed. - -> ```bash -> latex_wrapper.py --input doc.tex --output doc.pdf -> ``` - -Supports optional rerun of LaTeX for proper references or table of contents -generation. - -### Most used flags - -- `--input`: LaTeX source file to compile -- `--output`: Output PDF path -- `--run_latex_again`: Compile the LaTeX file twice -- `--dockerized_force_rebuild`: Force container rebuild -- `--dockerized_use_sudo`: Run Docker with `sudo` +- Compiles a LaTeX `.tex` file into a PDF using `pdflatex` inside a Docker + container. +- Automatically rebuilds the Docker image if needed. +- Supports optional rerun of LaTeX for proper references or table of contents + generation + ```bash + > latex_wrapper.py --input doc.tex --output doc.pdf + ``` ### Quickstart recipes -| Goal | Command | -| ------------------------ | ------------------------------------------------------------------------- | -| Compile `.tex` to `.pdf` | `latex_wrapper.py -i report.tex -o report.pdf` | -| Rebuild Docker image | `latex_wrapper.py -i report.tex -o report.pdf --dockerized_force_rebuild` | -| Use `sudo` for Docker | `latex_wrapper.py -i report.tex -o report.pdf --dockerized_use_sudo` | -| Run LaTeX twice | `latex_wrapper.py -i paper.tex -o paper.pdf --run_latex_again` | - -### CLI flags cheatsheet - -| Flag | Purpose | Notes | -| ---------------------------- | -------------------------- | ----------------------------- | -| `-i / --input` | Path to input `.tex` file | **required** | -| `-o / --output` | Output PDF file path | **required** | -| `--run_latex_again` | Run LaTeX a second time | Optional, useful for TOC/refs | -| `--dockerized_force_rebuild` | Force Docker image rebuild | Optional | -| `--dockerized_use_sudo` | Run Docker with `sudo` | Optional | -| `-v / --verbosity` | Logging verbosity | Default: INFO | - ---- +- Compile `.tex` to `.pdf` + ``` + > latex_wrapper.py -i report.tex -o report.pdf + ``` +- Rebuild Docker image + ``` + > latex_wrapper.py -i report.tex -o report.pdf --dockerized_force_rebuild + ``` +- Use `sudo` for Docker + ``` + > latex_wrapper.py -i report.tex -o report.pdf --dockerized_use_sudo + ``` +- Run LaTeX twice + ``` + > latex_wrapper.py -i paper.tex -o paper.pdf --run_latex_again + ``` -## 12. Mermaid Renderer — `dockerized_mermaid.py` +## dockerized_mermaid.py ### What it does -Renders Mermaid `.mmd` or `.md` diagrams into image files using a Dockerized -container. - -> ```bash -> mermaid_wrapper.py --input flowchart.mmd --output flowchart.png -> ``` - -Automatically sets output to match input name if `--output` is omitted. - -### Most used flags - -- `--input`: Source Mermaid file -- `--output`: Destination image file (optional) -- `--dockerized_force_rebuild`: Rebuild Docker image -- `--dockerized_use_sudo`: Use `sudo` for Docker - -### Quickstart recipes +- Renders Mermaid `.mmd` or `.md` diagrams into image files using a Dockerized + container. -| Goal | Command | -| ----------------------------- | ----------------------------------------------------------------------------- | -| Render Mermaid diagram | `mermaid_wrapper.py -i diagram.mmd -o diagram.png` | -| Use input as output (default) | `mermaid_wrapper.py -i diagram.mmd` | -| Rebuild container | `mermaid_wrapper.py -i diagram.mmd -o diagram.png --dockerized_force_rebuild` | -| Use `sudo` for Docker | `mermaid_wrapper.py -i diagram.mmd -o diagram.png --dockerized_use_sudo` | + ```bash + > mermaid_wrapper.py --input flowchart.mmd --output flowchart.png + ``` -### CLI flags cheatsheet +- Automatically sets output to match input name if `--output` is omitted -| Flag | Purpose | Notes | -| ---------------------------- | ---------------------------------- | -------------------------- | -| `-i / --input` | Path to input `.mmd` or `.md` file | **required** | -| `-o / --output` | Output image file | Defaults to input filename | -| `--dockerized_force_rebuild` | Force Docker image rebuild | Optional | -| `--dockerized_use_sudo` | Run Docker with `sudo` | Optional | -| `-v / --verbosity` | Logging verbosity | Default: INFO | ---- +- Mermaid diagram + ``` + > mermaid_wrapper.py -i diagram.mmd -o diagram.png + ``` +- Use input as output (default) + ``` + > mermaid_wrapper.py -i diagram.mmd + ``` +- Rebuild container + ``` + > mermaid_wrapper.py -i diagram.mmd -o diagram.png --dockerized_force_rebuild + ``` +- Use `sudo` for Docker + ``` + > mermaid_wrapper.py -i diagram.mmd -o diagram.png --dockerized_use_sudo + ``` -## 13. Pandoc Renderer — `dockerized_pandoc.py` +## dockerized_pandoc.py ### What it does Converts documents using `pandoc` inside a Docker container. Supports output to Beamer slides, PDFs, and more with custom CLI flags. -> ```bash +```bash > pandoc_wrapper.py --input notes.md --output slides.pdf -- docker_args... -> ``` +``` Internally builds a Docker container and passes the full `pandoc` command string. -### Most used flags - -- `--input`: source file (e.g., `.md`, `.txt`) -- `--output`: output file (e.g., `.pdf`, `.html`) -- `--container_type`: use `pandoc_only`, `pandoc_latex`, or `pandoc_texlive` -- `--dockerized_force_rebuild`: rebuild image from scratch -- `--dockerized_use_sudo`: run Docker with `sudo` - ### Quickstart recipes -| Goal | Command | -| ------------------------ | ---------------------------------------------------------------------------------------------------- | -| Convert Markdown to PDF | `pandoc_wrapper.py --input notes.md --output notes.pdf --container_type pandoc_latex` | -| Convert to Beamer slides | `pandoc_wrapper.py --input slides.md --output slides.pdf --container_type pandoc_latex -- -t beamer` | -| Rebuild Docker image | `pandoc_wrapper.py --input notes.md --output notes.pdf --dockerized_force_rebuild` | -| Run with sudo | `pandoc_wrapper.py --input notes.md --output notes.pdf --dockerized_use_sudo` | - -### CLI flags cheat‑sheet - -| Flag | Purpose | Notes | -| ---------------------------- | ------------------------------------------------------ | ---------------------- | -| `--input` | Input source file for Pandoc | **required** | -| `--output` | Output file path | Defaults to input name | -| `--data_dir` | Additional resource/data path | Optional | -| `--container_type` | Docker image type: `pandoc_only`, `pandoc_latex`, etc. | Default: `pandoc_only` | -| `--dockerized_force_rebuild` | Force rebuild of Docker image | Optional | -| `--dockerized_use_sudo` | Use `sudo` for Docker execution | Optional | -| `-v / --verbosity` | Logging level | Default: INFO | - ---- +- Convert Markdown to PDF + ``` + > pandoc_wrapper.py --input notes.md --output notes.pdf --container_type pandoc_latex + ``` +- Convert to Beamer slides + ``` + > pandoc_wrapper.py --input slides.md --output slides.pdf --container_type pandoc_latex -- -t beamer + ``` +- Rebuild Docker image + ``` + > pandoc_wrapper.py --input notes.md --output notes.pdf --dockerized_force_rebuild + ``` +- Run with sudo + ``` + > pandoc_wrapper.py --input notes.md --output notes.pdf --dockerized_use_sudo + ``` -## 14. Prettier Formatter — `dockerized_prettier.py` +## `dockerized_prettier.py` ### What it does @@ -729,52 +689,76 @@ Avoids environment-specific issues and ensures consistent formatting. Supports full Prettier CLI flexibility via passthrough of additional options. -### Most used flags +### Quickstart recipes + +- Format a Markdown file + ``` + > dockerized_prettier.py --parser markdown --write test.md + ``` +- Use `sudo` for Docker execution + ``` + > dockerized_prettier.py --use_sudo --parser markdown --write test.md + ``` +- Rebuild the Docker image + ``` + > dockerized_prettier.py --dockerized_force_rebuild --parser markdown --write test.md + ``` +- Change indentation and wrap style + ``` + dockerized_prettier.py --parser markdown --tab-width 4 --prose-wrap always --write test.md + ``` -- `--parser`: Prettier parser (e.g. `markdown`) -- `--write`: Apply formatting in-place -- `--tab-width`: Number of spaces per indentation level -- `--dockerized_force_rebuild`: Force rebuild of Docker container -- `--dockerized_use_sudo`: Use `sudo` for Docker commands +### Interface -### Quickstart recipes +- Interface + ``` + > dockerized_prettier.py -h + usage: dockerized_prettier.py [-h] -i IN_FILE_NAME [-o OUT_FILE_NAME] + [--dockerized_force_rebuild] + [--dockerized_use_sudo] + [-v {TRACE,DEBUG,INFO,WARNING,ERROR,CRITICAL}] -| Goal | Command | -| --------------------------------- | -------------------------------------------------------------------------------------------- | -| Format a Markdown file | `dockerized_prettier.py --parser markdown --write test.md` | -| Use `sudo` for Docker execution | `dockerized_prettier.py --use_sudo --parser markdown --write test.md` | -| Rebuild the Docker image | `dockerized_prettier.py --dockerized_force_rebuild --parser markdown --write test.md` | -| Change indentation and wrap style | `dockerized_prettier.py --parser markdown --tab-width 4 --prose-wrap always --write test.md` | + Run `prettier` inside a Docker container to ensure consistent formatting across + different environments. -### CLI flags cheatsheet + This script builds the container dynamically if necessary and formats the + specified file using the provided `prettier` options. -| Flag | Purpose | Notes | -| ---------------------------- | ----------------------------------------------------- | ------------------------------------- | -| `-i / --input` | Input file path | Required | -| `-o / --output` | Output file path | Optional (defaults to input) | -| `--parser` | Prettier parser type (e.g. `markdown`, `babel`, etc.) | Required via passthrough | -| `--write` | Format and overwrite input file | Common usage flag | -| `--tab-width` | Number of spaces per tab | Optional, defaults to Prettier config | -| `--dockerized_force_rebuild` | Force Docker image rebuild | Optional | -| `--dockerized_use_sudo` | Use `sudo` for Docker commands | Optional | -| `-v / --verbosity` | Logging level | Default: INFO | + Examples + # Basic usage: + > dockerized_prettier.py --parser markdown --prose-wrap always --write --tab-width 2 test.md ---- + # Use sudo for Docker commands: + > dockerized_prettier.py --use_sudo --parser markdown --prose-wrap always --write --tab-width 2 test.md + + # Set logging verbosity: + > dockerized_prettier.py -v DEBUG --parser markdown --prose-wrap always --write --tab-width 2 test.md </pre> + + # Process a file: + > cat test.md + - a + - b + - c + > dockerized_prettier.py --parser markdown --prose-wrap always --write --tab-width 2 test.md + + options: + -h, --help show this help message and exit + -i IN_FILE_NAME, --in_file_name IN_FILE_NAME + Input file or `-` for stdin + -o OUT_FILE_NAME, --out_file_name OUT_FILE_NAME + Output file or `-` for stdout + --dockerized_force_rebuild + Force to rebuild the Docker container + --dockerized_use_sudo + Use sudo inside the container + -v {TRACE,DEBUG,INFO,WARNING,ERROR,CRITICAL} + Set the logging level + ``` -## 15. MacOS screenshot helper — `save_screenshot.py` +## `save_screenshot.py` ### What it does 1. Prompts you to select a screen region (`⌘ + Ctrl + 4`). 2. Saves it as `screenshot.YYYY‑MM‑DD_HH‑MM‑SS.png` (or your chosen name). 3. Prints and copies the Markdown embed `<img src="path/to/file.png">`. - -### Flags - -| Flag | Purpose | -| --------------------- | ---------------------------------------- | -| `--dst_dir DIR` | Target directory (e.g. `notes/figures`). | -| `--filename NAME.png` | Override default timestamped name. | -| `--override` | Allow clobbering an existing file. | - ---- diff --git a/docs/work_tools/git/all.ai_review.how_to_guide.md b/docs/work_tools/git/all.ai_review.how_to_guide.md index 4ef1499df..298e7da53 100644 --- a/docs/work_tools/git/all.ai_review.how_to_guide.md +++ b/docs/work_tools/git/all.ai_review.how_to_guide.md @@ -21,7 +21,7 @@ - `template_unit_test.py` shows how our unit tests look like - `template_doc.how_to_guide.md` shows how a Diataxis how to guide should be structured and look like - - `template_doc.reference.md` shows how an Diataxis how to guide should be + - `template_doc.reference.md` shows how a Diataxis how to guide should be structured and look like - The same template can have multiple applications for: @@ -37,22 +37,21 @@ # Tools ## llm_transform.py + - There are several classes of transforms - `code_*`: transform Python code - `code_fix_*`: fix a specific chunk of code according to a prompt - `code_transform_*`: apply a series of transformations - `code_write_*`: write from scratch - - `doc_*`: process free form (not markdown) text - - TODO(gp): Is it worth it, or should be merged with `md_*` targets - `latex_*`: process Latex code - `md_*`: process markdown `md` text and `txt` notes - - TODO(gp): - `review_*`: process Python code to extract reviews - - `scratch_*`: misc + - `scratch_*`: misc and one-off transforms - `slide_*`: process markdown slides in `txt` format + - `text_*`: process free form (not markdown) text - You can list the available transformations with: - ``` + ```bash > llm_transform.py -p list # Available prompt tags: code_fix_by_using_f_strings @@ -104,8 +103,8 @@ - These transformations don't need LLMs and are implemented as code -- You can see the available transforms - ``` +- You can see the available transforms with: + ```bash > transform_notes.py -a list test: compute the hash of a string to test the flow format_headers: format the headers @@ -118,36 +117,88 @@ md_format: reflow the markdown and colorize the bold text ``` -# Causify flow +# Some typical workflows -## Editing workflows +## An editing workflow -- Use `llm_transform.py` to +- Use `llm_transform.py` to: - Edit files manually applying specific transformations to chunks of code - Apply transforms to an entire file + - Read and apply a list of transforms and apply them + - Format the style of a template to a file -- -There are 3 types of transforms / review -llm: executed by an LLM since they are difficult to implement otherwise -linter_llm: exected by an LLM but they should be moved to the linter (mainly formatting) -linter: linter - -ai_review.py to generate TODOs -inject_todos.py to add TODOs -apply_todos.py to execute TODOs - -There is detecting the problems and fixing the problems +- There are 3 types of transforms and review tasks + - `llm`: executed by an LLM since they are difficult to implement otherwise + - E.g., "apply this style to a certain file" + - `linter_llm`: executed by an LLM for now to get something in place, even + if they should be moved to code / linter + `- E.g., mainly formatting tasks + - `linter`: executed by the Linter using code and regex ## A reviewer workflow -- This can be used by the author of the code or by a reviewer +- This workflow can be used by the author of the code or by a reviewer + - The goal is to make these tools robust enough so that they can be used + directly by the author and potentially integrated in the `linter` flow itself + - Initially, reviewers use these tools as part of initial dog-fooding of the + flows - Go to the Git branch with the code - Check which files are modified - ``` - > i git_branch_diff_with -t base --only-print-files + ```bash + > invoke git_branch_diff_with -t base --only-print-files ``` - Run `ai_review.py` on each file to generate a list of comments on the code - This is equivalent to running a `review` target with `llm_transform.py` (e.g., `llm_transform.py -p review_*`) but it is a separated flow for clarify - This generates a `cfile` with a list of comments comments + +- Review the TODOs using cfile jumping around files + ```bash + > vim -c "cfile cfile" + ``` + - You can fix the code according to the TODOs + - Discard a TODO as a false positive or not important + +- Run `inject_todos.py` to add TODOs to the files for someone else to fix it + later + - E.g., in a code review you want to ask the author to perform that task + +- Run `apply_todos.py` to automatically apply the TODOs using an LLM + - This can be a risky move + +- You should always commit your code and apply the automatic transforms that + modify a file in a separate commit, so that it's easy to review + +## How to change the logic in place while reviewing + +- A common problem is that we might want to adjust one of our tools (e.g., + `linter.py`, `ai_review.py`) while reviewing somebody's else code + +- The approach is to copy files from a different Git client in the one with + the code being tested using one of the scripts + + ``` + > ai_review.py -i template_code.py + ``` + + ``` + > llm_transform.py -i template_code.py -p code_fix_code + ``` + + ```bash + > PROMPT=review_llm + > PROMPT=review_correctness + > PROMPT=review_linter + > PROMPT=review_architecture + > + > FILE=dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py + + > \cp -f /Users/saggese/src/helpers1/dev_scripts_helpers/llms/sync_ai_review.sh $HELPERS_ROOT_DIR/dev_scripts_helpers/llms && sync_ai_review.sh && ai_review.py -i $FILE -p $PROMPT + + > vi -c "cfile cfile" + + > inject_todos.py --cfile cfile + + > llm_transform.py -i dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py -p code_fix_code + ``` From ffe3d7c305f6d8a9cbaa9e5d87ded0ccee03132e Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 29 May 2025 10:37:50 -0400 Subject: [PATCH 141/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../extract_headers_from_markdown.py | 1 - dev_scripts_helpers/llms/ai_review.py | 29 +- dev_scripts_helpers/llms/inject_todos.py | 11 +- dev_scripts_helpers/llms/llm_prompts.py | 22 +- dev_scripts_helpers/llms/llm_transform.py | 4 +- dev_scripts_helpers/system_tools/ffind.py | 8 +- ...inter_style_review_guidelines.reference.md | 53 +- ...l.llm_style_review_guidelines.reference.md | 65 ++- .../all.notes_toolchain.explanation.md | 62 +- .../all.notes_toolchain.how_to_guide.md | 543 ++++++++---------- .../git/all.ai_review.how_to_guide.md | 47 +- helpers/haws.py | 3 +- helpers/hmarkdown.py | 15 +- helpers/hparser.py | 4 +- helpers/test/test_hmarkdown.py | 102 ++-- linters/amp_lint_md.py | 2 +- 16 files changed, 518 insertions(+), 453 deletions(-) diff --git a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py index 6d975a10b..cff6242d8 100755 --- a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py +++ b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py @@ -27,7 +27,6 @@ import argparse import logging -import helpers.hdbg as hdbg import helpers.hmarkdown as hmarkdo import helpers.hparser as hparser diff --git a/dev_scripts_helpers/llms/ai_review.py b/dev_scripts_helpers/llms/ai_review.py index db71f0f89..66f8671c4 100755 --- a/dev_scripts_helpers/llms/ai_review.py +++ b/dev_scripts_helpers/llms/ai_review.py @@ -31,20 +31,12 @@ import argparse import logging import os -import re -from typing import List, Optional import dev_scripts_helpers.llms.llm_prompts as dshlllpr +import dev_scripts_helpers.llms.llm_transform as dshllltr import helpers.hdbg as hdbg -import helpers.hdocker as hdocker -import helpers.hgit as hgit import helpers.hio as hio -import helpers.hmarkdown as hmarkdo import helpers.hparser as hparser -import helpers.hprint as hprint -import helpers.hserver as hserver -import helpers.hsystem as hsystem -import dev_scripts_helpers.llms.llm_transform as dshlllpt _LOG = logging.getLogger(__name__) @@ -81,9 +73,20 @@ def _main(parser: argparse.ArgumentParser) -> None: hparser.init_logger_for_input_output_transform(args) # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) - hdbg.dassert_in(args.prompt, ["review_llm", "review_linter", "review_correctness", "review_refactoring"]) + hdbg.dassert_in( + args.prompt, + [ + "review_llm", + "review_linter", + "review_correctness", + "review_refactoring", + ], + ) if out_file_name != "cfile": - _LOG.warning("The output file name is %s, so it will be converted to `cfile`", out_file_name) + _LOG.warning( + "The output file name is %s, so it will be converted to `cfile`", + out_file_name, + ) out_file_name = "cfile" tag = "ai_review" tmp_in_file_name, tmp_out_file_name = ( @@ -102,7 +105,7 @@ def _main(parser: argparse.ArgumentParser) -> None: # cmd_line_opts.append(f"--{arg.replace('_', '-')} {value}") # For stdin/stdout, suppress the output of the container. suppress_output = in_file_name == "-" or out_file_name == "-" - dshlllpt._run_dockerized_llm_transform( + dshllltr._run_dockerized_llm_transform( tmp_in_file_name, cmd_line_opts, tmp_out_file_name, @@ -118,7 +121,7 @@ def _main(parser: argparse.ArgumentParser) -> None: ) # if dshlllpr.to_run("convert_file_names", post_container_transforms): - dshlllpt._convert_file_names(in_file_name, tmp_out_file_name) + dshllltr._convert_file_names(in_file_name, tmp_out_file_name) # # Check that all post-transforms were run. hdbg.dassert_eq( diff --git a/dev_scripts_helpers/llms/inject_todos.py b/dev_scripts_helpers/llms/inject_todos.py index b19e9d963..4b8e0ec4c 100755 --- a/dev_scripts_helpers/llms/inject_todos.py +++ b/dev_scripts_helpers/llms/inject_todos.py @@ -17,8 +17,7 @@ # TODO(gp): -> _parser() or _get_parser() everywhere. def _parse() -> argparse.ArgumentParser: - """ - """ + """ """ parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, @@ -31,9 +30,7 @@ def _parse() -> argparse.ArgumentParser: help="File containing the TODOs to inject", ) parser.add_argument( - "--todo_target", - action="store_true", - help="User name to use in the TODOs" + "--todo_target", action="store_true", help="User name to use in the TODOs" ) hparser.add_verbosity_arg(parser) return parser @@ -45,7 +42,9 @@ def _main(parser: argparse.ArgumentParser) -> None: # Read the cfile. cfile_txt = hio.from_file(args.cfile) # Inject the TODOs. - todo_txt = hmarkdo.inject_todos_from_cfile(cfile_txt, args.todo_target, comment_prefix="#") + todo_txt = hmarkdo.inject_todos_from_cfile( + cfile_txt, args.todo_target, comment_prefix="#" + ) if __name__ == "__main__": diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index d90d22360..9fd1d6042 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -11,7 +11,6 @@ import helpers.hio as hio import helpers.hmarkdown as hmarkdo import helpers.hprint as hprint -import helpers.hsystem as hsystem _LOG = logging.getLogger(__name__) @@ -439,7 +438,9 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ - Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. + Use % formatting, like `"Hello, %s. + + You are %d years old." % (name, age)`. """ system = _CODING_CONTEXT system += r""" @@ -601,6 +602,7 @@ def code_write_1_unit_test() -> _PROMPT_OUT: post_container_transforms: List[str] = [] return system, pre_transforms, post_transforms, post_container_transforms + # ############################################################################# # Latex # ############################################################################# @@ -625,8 +627,6 @@ def latex_rewrite() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms - - # ############################################################################# # Markdown. # ############################################################################# @@ -695,7 +695,7 @@ def md_clean_up_how_to_guide() -> _PROMPT_OUT: a goal. Rewrite the markdown passed to make it a how-to guide and contain the - the following sections: + following sections: - Goal / Use Case - Assumptions / Requirements - Step-by-Step Instructions @@ -729,7 +729,7 @@ def md_convert_table_to_bullet_points() -> _PROMPT_OUT: system += r""" - Convert the table passed to bullet points using multiple levels of bullets. - Remove the formatting (e.g., bold, italic). - + Make sure to lose any information. """ pre_transforms: Set[str] = set() @@ -741,7 +741,7 @@ def md_convert_table_to_bullet_points() -> _PROMPT_OUT: def md_format() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" - - Replace `*` with `-` for bullet points + - Replace `*` with `-` for bullet points - Do not use tables unless necessary """ pre_transforms: Set[str] = set() @@ -807,6 +807,7 @@ def md_create_bullets() -> _PROMPT_OUT: # One-off transforms. + def misc_categorize_topics() -> _PROMPT_OUT: system = r""" For each of the following title of article, find the best topic among the @@ -873,9 +874,6 @@ def review_llm() -> _PROMPT_OUT: Review the code using LLMs. """ # Load the reference file. - #helper_root = hgit.find_helpers_root() - #file_name = os.path.join(helper_root, "docs/code_guidelines/all.llm_style_review_guidelines.reference.md") - # TODO(gp): This doesn't work for unknown reasons. file_name = hgit.find_file("all.llm_style_review_guidelines.reference.md") return _review_from_file(file_name) @@ -885,8 +883,6 @@ def review_linter() -> _PROMPT_OUT: Review the code for linter style (still using LLMs). """ # Load the reference file. - #helper_root = hgit.find_helpers_root() - #file_name = os.path.join(helper_root, "docs/code_guidelines/all.linter_style_review_guidelines.reference.md") file_name = hgit.find_file("all.linter_style_review_guidelines.reference.md") return _review_from_file(file_name) @@ -1137,7 +1133,7 @@ def slide_add_figure() -> _PROMPT_OUT: # Operate on pure text, not markdown. -#def text_expand() -> _PROMPT_OUT: +# def text_expand() -> _PROMPT_OUT: # """ # """ # system = hio.from_file("text_expand2.txt") diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 28ee5d2e4..05ec07f08 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -186,7 +186,9 @@ def _run_dockerized_llm_transform( ] ) docker_cmd = " ".join(docker_cmd) - ret = hdocker.process_docker_cmd(docker_cmd, container_image, dockerfile, mode) + ret = hdocker.process_docker_cmd( + docker_cmd, container_image, dockerfile, mode + ) return ret diff --git a/dev_scripts_helpers/system_tools/ffind.py b/dev_scripts_helpers/system_tools/ffind.py index c275daf9b..03819ad6a 100755 --- a/dev_scripts_helpers/system_tools/ffind.py +++ b/dev_scripts_helpers/system_tools/ffind.py @@ -70,13 +70,15 @@ def _main(parser: argparse.ArgumentParser) -> None: cmd = [] cmd.append(f"find {dir_name}") # Skip certain dirs. - cmd.append(r"\( -path './.git' -o -path './.ipynb_checkpoints' -o -path ./.mypy_cache \) -prune -o") + cmd.append( + r"\( -path './.git' -o -path './.ipynb_checkpoints' -o -path ./.mypy_cache \) -prune -o" + ) if args.only_files: cmd.append("-type f") cmd.append('-iname "%s"' % name) # Guarantee that only non-pruned files are printed. - cmd.append('-print') - cmd.append('| grep -v __pycache__') + cmd.append("-print") + cmd.append("| grep -v __pycache__") cmd.append("| sort") cmd = " ".join(cmd) if (args.log_level == "DEBUG") or args.log: diff --git a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md index e02e6b9a3..bfda1782b 100644 --- a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md @@ -1,3 +1,25 @@ +<!-- toc --> + +- [Guidelines for automated PR reviews](#guidelines-for-automated-pr-reviews) + * [Python code](#python-code) + + [Naming](#naming) + + [Docstrings](#docstrings) + + [Comments](#comments) + + [Code design](#code-design) + + [Imports](#imports) + + [Type annotations](#type-annotations) + + [Functions](#functions) + + [Scripts](#scripts) + + [Unit tests](#unit-tests) + + [Misc](#misc) + * [Notebooks](#notebooks) + + [General](#general) + + [Jupytext](#jupytext) + * [Markdowns](#markdowns) + * [Spelling](#spelling) + +<!-- tocstop --> + # Guidelines for automated PR reviews ## Python code @@ -6,7 +28,7 @@ - Name executable Python scripts using verbs and actions - E.g., `download.py` and not `downloader.py` -- Name non-executable files using nouns +- Name non-executable files using nouns - E.g., `downloader.py` - Use `dir` and not `directory` or `folder` - E.g., `dir_path` @@ -78,6 +100,7 @@ - No import cycles should be introduced by the changes in the PR ### Type annotations + - All functions and methods, including constructors, must have type annotations for all the parameters and returned structures - Use `-> None` if a function doesn't return anything @@ -86,6 +109,7 @@ - Type annotation `Any` should be avoided, if possible ### Functions + - Make a function private (e.g., `_foo_bar()`) when it is a helper of another private or public function @@ -103,6 +127,7 @@ - Use `argparse` for argument parsing ### Unit tests + - Unit tests should be placed in a `test_*.py` file in the `test` directory, close to the library / code it tests - Test file `test_file_name.py` testing the library `file_name.py` @@ -117,6 +142,7 @@ `outcomes` dir should also be renamed or removed ### Misc + - If a PR includes renaming a file, variable, parameter, function, class, etc., then all the instances and references to it throughout the codebase should be updated @@ -170,9 +196,9 @@ ## Markdowns -- Names of documentation files should follow the format - `docs/{component}/{audience}.{topic}.{diataxis_tag}.md` - to help in organizing and categorizing documentation files effectively +- Names of documentation files should follow the format + `docs/{component}/{audience}.{topic}.{diataxis_tag}.md` to help in organizing + and categorizing documentation files effectively - E.g., `docs/documentation_meta/all.diataxis.explanation.md` - The `{component}` part specifies the part of the project the documentation is related to @@ -182,17 +208,34 @@ Diátaxis framework (e.g., explanation, tutorial) - All Markdown files should have a table of contents - Linter automatically adds and updates the table of contents +- Items in bullet point lists should not end with a period - There should be one and only one level 1 heading (with one `#`) in a Markdown - The level 1 heading serves as the main title of the document - It should clearly convey the primary topic or purpose of the document - The level 1 heading should be located above the table of contents +- Wrap file paths, names of variables, functions, and classes in backticks + - E.g., `file_path`, `variable_name`, `function_name()`, `ClassName` +- Use `>` to indicate a command line + - E.g., `> git push` or `docker> pytest` - Headings should not be boldfaced - Headings should not be overcapitalized - E.g., `Data schema` instead of `Data Schema` - Text should be reflowed to the maximum of 80 columns per line - Fenced code blocks should always be accompanied by language markers - E.g., `bash`, `python` -- Indent fenced code blocks at the same level as the previous line + - Fenced code blocks should be indented at the same level as the previous line +- Commands should be prepended by `>` or `docker>` if they need to + - Example + ``` + > notes_to_pdf.py \ + --input MSML610/Lesson5-Theory_Statistical_learning.txt \ + --output Lesson5.pdf \ + --type slides \ + --toc_type navigation \ + --debug_on_error \ + --skip_action cleanup_after + ``` +- Commands should be prepended by `docker>` if they need to be run inside Docker ## Spelling diff --git a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md index ac6a80a1d..a6f85195d 100644 --- a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md @@ -1,3 +1,23 @@ +<!-- toc --> + +- [Guidelines for automated PR reviews](#guidelines-for-automated-pr-reviews) + * [Python code](#python-code) + + [Naming](#naming) + + [Docstrings](#docstrings) + + [Comments](#comments) + + [Code implementation](#code-implementation) + + [Code design](#code-design) + + [Type annotations](#type-annotations) + + [Functions](#functions) + + [Logging](#logging) + + [Unit tests](#unit-tests) + * [Notebooks](#notebooks) + + [General](#general) + + [Plotting](#plotting) + * [Markdowns](#markdowns) + +<!-- tocstop --> + # Guidelines for automated PR reviews ## Python code @@ -7,7 +27,7 @@ - Name functions using verbs and verbs/actions - E.g., `download_data()`, `process_input()`, `calculate_sum()` - Python internal functions as `__repr__`, `__init__` are valid - - Functions names like `to_dict()`, `_parse()`, `_main()` are valid + - Functions names like `to_dict()`, `_parse()`, `_main()` are valid - Name classes using nouns - E.g., `Downloader()`, `DataProcessor()`, `User()` - Name decorators with an adjective or a past tense verb @@ -17,8 +37,8 @@ - E.g., the name of a variable should not include its type - E.g., use `embeddings` instead of `embeddings_list` - E.g., use `data` instead of `data_dict` -- Abbreviations in the names of variables and functions should be avoided, except - for the following +- Abbreviations in the names of variables and functions should be avoided, + except for the following - `df` for dataframe - `srs` for series - `idx` for index @@ -35,8 +55,8 @@ ### Docstrings - All functions and methods must have a docstring -- The docstring should describe the goal of the function, the interface and what the user - needs to know to use the function +- The docstring should describe the goal of the function, the interface and what + the user needs to know to use the function - E.g., "This function calculates the sum of two numbers and returns the result." - The text should not describe implementation details that can be changed @@ -48,7 +68,7 @@ - first value description - second value description that is very long and continues into a second line - ``` + ``` - Adding examples (e.g., of input and output) to the docstring is encouraged - E.g., ``` @@ -104,8 +124,8 @@ - E.g., `hdbg.dassert_lt(start_date, end_date)` - Ensure that assertions provide detailed information for debugging - Use assertions to validate input parameters and preconditions -- Do not use f-strings in `hdbg.dassert()`, but use traditional string formatting - methods in assertions +- Do not use f-strings in `hdbg.dassert()`, but use traditional string + formatting methods in assertions - E.g., `hdbg.dassert_eq(len(list1), len(list2), "Lists must be of equal length: %d vs %d" % (len(list1), len(list2)))` - Use f-strings in exceptions @@ -162,8 +182,8 @@ ### Type annotations -- For type hints use use `List`, `Dict`, and `Tuple` to provide more explicit type information - and help with static type checking +- For type hints use use `List`, `Dict`, and `Tuple` to provide more explicit + type information and help with static type checking - E.g., `List[int]` instead of `list` - E.g., `List[str]` instead of `list` - Use `Dict` instead of `dict` @@ -176,8 +196,8 @@ ### Functions - Avoid pure functions without side effects, i.e., for the same input arguments, - the returned value should not change (in contrast to functions that - rely upon external state) + the returned value should not change (in contrast to functions that rely upon + external state) - Functions should not modify the function inputs - E.g., if a function `f()` accepts a dataframe `df` as its argument, then `f()` will not modify `df` but make a copy and work on it @@ -251,7 +271,8 @@ - Use logging `_LOG.debug()` and not `print()` for tracing execution - Use positional args in logging and not inline formatting - - E.g., The code should do `_LOG.debug("cmd=%s", cmd1)` and not `_LOG.debug(f"cmd={cmd1}")` + - E.g., The code should do `_LOG.debug("cmd=%s", cmd1)` and not + `_LOG.debug(f"cmd={cmd1}")` - Use the following idiom to configure logging: ```python @@ -366,12 +387,13 @@ ### Plotting -- Each plot should have a descriptive title to understand the context of the plot - at a glance +- Each plot should have a descriptive title to understand the context of the + plot at a glance - E.g., "Monthly Sales Data for 2023" instead of just "Sales Data" - Each plot should have axes labels - E.g., label the x-axis as "Months" and the y-axis as "Revenue in USD" -- If there are several multiple data series on the same plot, it should have a legend +- If there are several multiple data series on the same plot, it should have a + legend - In a plotting function, `plt.show()` should not be added at the end - This allows for further customization or saving of the plot before displaying @@ -389,11 +411,6 @@ - Boldface and italics should be used sparingly - The use of bullet point lists is encouraged - For the items, `-` should be used instead of `*` or circles - - Items in bullet point lists should not end with a period -- Wrap file paths, names of variables, functions, and classes in backticks - - E.g., `file_path`, `variable_name`, `function_name()`, `ClassName` -- Use `>` to indicate a command line - - E.g., `> git push` or `docker> pytest` - Avoid using screenshots whenever possible and instead copy-and-paste text with the right highlighting - E.g., instead of a screenshot of a terminal command, provide the command @@ -406,3 +423,9 @@ - Rewrite long-winded AI-generated texts in a concise way - E.g., instead of "The process of updating the software can be done by following these steps," use "Update the software by following these steps" + +- When describing a tool the format should be the following + - A description of what the tool does + - A list of examples of invocations of a tool, with a comment on the command + line, the command line, and its output if possible + - A copy-paste version of the tool interface running `-h` diff --git a/docs/tools/documentation_toolchain/all.notes_toolchain.explanation.md b/docs/tools/documentation_toolchain/all.notes_toolchain.explanation.md index b4e62867c..446f0b5bc 100644 --- a/docs/tools/documentation_toolchain/all.notes_toolchain.explanation.md +++ b/docs/tools/documentation_toolchain/all.notes_toolchain.explanation.md @@ -1,6 +1,6 @@ <!-- toc --> -- [`notes_to_pdf.py` - Flow Explanation](#notes_to_pdfpy---flow-explanation) +- [Flow Explanation](#flow-explanation) * [Goal](#goal) * [Architecture diagram](#architecture-diagram) * [Steps](#steps) @@ -54,37 +54,41 @@ Rel(notes2pdf, style, "Injects LaTeX Style (.sty)") ## Steps 1. **Clean-up & augmentation** - - Performed by: `preprocess_notes.py` - - Key ideas: - - Normalizes headers - - Expands arrow shorthand (`->` to `\rightarrow`) - - Deals with comments - - Inserts Pandoc YAML front-matter - - Inserts optional navigation slides + +- Performed by: `preprocess_notes.py` +- Key ideas: + - Normalizes headers + - Expands arrow shorthand (`->` to `\rightarrow`) + - Deals with comments + - Inserts Pandoc YAML front-matter + - Inserts optional navigation slides 2. **Diagram extraction** - - Performed by: `render_images.py` - - Key ideas: - - Scans code blocks (e.g., ` plantuml) - - Renders diagrams via Docker containers - - Replaces the code with `![](figs/...)` include - - Comments out the original block - - Uses a SHA-256 cache to skip unchanged diagrams + +- Performed by: `render_images.py` +- Key ideas: + - Scans code blocks (e.g., ` plantuml) + - Renders diagrams via Docker containers + - Replaces the code with `![](figs/...)` include + - Comments out the original block + - Uses a SHA-256 cache to skip unchanged diagrams 3. **Orchestration** - - Performed by: `notes_to_pdf.py` - - Key ideas: - - Calls Stage 1 and Stage 2, then Pandoc, then (for PDF) LaTeX - - Flags control each sub-action to allow skipping, debugging, or re-running - steps individually + +- Performed by: `notes_to_pdf.py` +- Key ideas: + - Calls Stage 1 and Stage 2, then Pandoc, then (for PDF) LaTeX + - Flags control each sub-action to allow skipping, debugging, or re-running + steps individually 4. **Document synthesis** - - Performed by: Pandoc + LaTeX - - Key ideas: - - Pandoc converts Markdown to LaTeX (or HTML / Beamer) - - `latex_abbrevs.sty` is copied next to the generated `.tex` file - - Ensures vector/matrix macros (`\vv{}`, `\mat{}`), deep lists, and color - helpers compile correctly + +- Performed by: Pandoc + LaTeX +- Key ideas: + - Pandoc converts Markdown to LaTeX (or HTML / Beamer) + - `latex_abbrevs.sty` is copied next to the generated `.tex` file + - Ensures vector/matrix macros (`\vv{}`, `\mat{}`), deep lists, and color + helpers compile correctly ## Dependencies @@ -93,9 +97,9 @@ Rel(notes2pdf, style, "Injects LaTeX Style (.sty)") - **Input: ** raw notes. - **Output: ** Pandoc‑ready Markdown. - Handles - - formatting banner frames - - question formatting - - colour commands (`\red{}` -> `\textcolor{red}{...}`) + - Formatting banner frames + - Question formatting + - Colour commands (`\red{}` -> `\textcolor{red}{...}`) - TOC injection ### `render_images.py` diff --git a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md index 957888400..56155346c 100644 --- a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md +++ b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md @@ -1,78 +1,57 @@ <!-- toc --> - [Notes Documentation Toolchain](#notes-documentation-toolchain) - * [1. Generate Slides and PDFs — `notes_to_pdf.py`](#1-generate-slides-and-pdfs--notes_to_pdfpy) + * [notes_to_pdf.py](#notes_to_pdfpy) + [What it does](#what-it-does) - + [Most used flags](#most-used-flags) + [Quickstart recipes](#quickstart-recipes) + [CLI flags cheatsheet](#cli-flags-cheatsheet) + [Worked examples](#worked-examples) - - [Slides with navigation breadcrumbs](#slides-with-navigation-breadcrumbs) - - [Focus on a subsection](#focus-on-a-subsection) - - [Plain PDF article](#plain-pdf-article) - * [2. Auto render figures — `render_images.py`](#2-auto-render-figures--render_imagespy) + * [render_images.py](#render_imagespy) + [Supported File types and Code blocks](#supported-file-types-and-code-blocks) + [Quick Start Recipes](#quick-start-recipes) - - [Render to a new file](#render-to-a-new-file) - - [Render in‑place (Markdown or LaTeX)](#render-in%E2%80%91place-markdown-or-latex) - - [HTML preview of already‑rendered images](#html-preview-of-already%E2%80%91rendered-images) - - [Dry‑run (test parsing / comments only)](#dry%E2%80%91run-test-parsing--comments-only) + [Flags](#flags) - * [3. Lint and Prettify — `lint_notes.py`](#3-lint-and-prettify--lint_notespy) + * [`lint_notes.py`](#lint_notespy) + [Quickstart recipes](#quickstart-recipes-1) - - [Prettify with Dockerised Prettier and TOC rebuild](#prettify-with-dockerised-prettier-and-toc-rebuild) - - [Custom print width and selective actions](#custom-print-width-and-selective-actions) + [Flags](#flags-1) - * [4. Notebook Image Scraping — `extract_notebook_images.py`](#4-notebook-image-scraping--extract_notebook_imagespy) + * [`extract_notebook_images.py`](#extract_notebook_imagespy) + [Flag Options](#flag-options) * [5. LLM Powered Transforms — `llm_transform.py`](#5-llm-powered-transforms--llm_transformpy) + [Minimum viable command](#minimum-viable-command) + [Finding available prompts](#finding-available-prompts) + [Flags](#flags-2) + [Example recipes](#example-recipes) - * [6. Pandoc Wrapper — `run_pandoc.py`](#6-pandoc-wrapper--run_pandocpy) + * [`run_pandoc.py`](#run_pandocpy) + [What the script does](#what-the-script-does) + [Quickstart commands](#quickstart-commands) + [Flags](#flags-3) - * [7. Automate notes transformations — `transform_notes.py`](#7-automate-notes-transformations--transform_notespy) + * [`transform_notes.py`](#transform_notespy) + [What it does](#what-it-does-1) - + [Supported actions](#supported-actions) + + [Example of Supported Actions](#example-of-supported-actions) + [Examples](#examples) + [Flags](#flags-4) - * [8. Scrape headers from a markdown — `extract_headers_from_markdown.py`](#8-scrape-headers-from-a-markdown--extract_headers_from_markdownpy) + * [`extract_headers_from_markdown.py`](#extract_headers_from_markdownpy) + [Goal](#goal) + [Examples](#examples-1) - + [Flags](#flags-5) - * [9. TikZ to Bitmap — `dockerized_tikz_to_bitmap.py`](#9-tikz-to-bitmap--dockerized_tikz_to_bitmappy) + * [`dockerized_tikz_to_bitmap.py`](#dockerized_tikz_to_bitmappy) + [Examples](#examples-2) - * [10. Graphviz Renderer — `dockerized_graphviz.py`](#10-graphviz-renderer--dockerized_graphvizpy) + * [`dockerized_graphviz.py`](#dockerized_graphvizpy) + [What it does](#what-it-does-2) - + [Most used flags](#most-used-flags-1) + + [Most used flags](#most-used-flags) + [Quickstart recipes](#quickstart-recipes-2) - + [CLI flags cheatsheet](#cli-flags-cheatsheet-1) - * [11. LaTeX Renderer — `dockerized_latex.py`](#11-latex-renderer--dockerized_latexpy) + * [dockerized_latex.py](#dockerized_latexpy) + [What it does](#what-it-does-3) - + [Most used flags](#most-used-flags-2) + [Quickstart recipes](#quickstart-recipes-3) - + [CLI flags cheatsheet](#cli-flags-cheatsheet-2) - * [12. Mermaid Renderer — `dockerized_mermaid.py`](#12-mermaid-renderer--dockerized_mermaidpy) + * [dockerized_mermaid.py](#dockerized_mermaidpy) + [What it does](#what-it-does-4) - + [Most used flags](#most-used-flags-3) - + [Quickstart recipes](#quickstart-recipes-4) - + [CLI flags cheatsheet](#cli-flags-cheatsheet-3) - * [13. Pandoc Renderer — `dockerized_pandoc.py`](#13-pandoc-renderer--dockerized_pandocpy) + * [dockerized_pandoc.py](#dockerized_pandocpy) + [What it does](#what-it-does-5) - + [Most used flags](#most-used-flags-4) - + [Quickstart recipes](#quickstart-recipes-5) - + [CLI flags cheat‑sheet](#cli-flags-cheat%E2%80%91sheet) - * [14. Prettier Formatter — `dockerized_prettier.py`](#14-prettier-formatter--dockerized_prettierpy) + + [Quickstart recipes](#quickstart-recipes-4) + * [`dockerized_prettier.py`](#dockerized_prettierpy) + [What it does](#what-it-does-6) - + [Most used flags](#most-used-flags-5) - + [Quickstart recipes](#quickstart-recipes-6) - + [CLI flags cheatsheet](#cli-flags-cheatsheet-4) - * [15. MacOS screenshot helper — `save_screenshot.py`](#15-macos-screenshot-helper--save_screenshotpy) + + [Quickstart recipes](#quickstart-recipes-5) + + [Interface](#interface) + * [`save_screenshot.py`](#save_screenshotpy) + [What it does](#what-it-does-7) - + [Flags](#flags-6) <!-- tocstop --> @@ -81,7 +60,7 @@ - This is a high‑level guide to the helper scripts that turn raw `.txt` notes into polished PDFs, slide decks, and more. -// TODO(*): Is it worth to report the flags? It's difficult to maintain +// TODO(\*): Is it worth to report the flags? It's difficult to maintain ## notes_to_pdf.py @@ -93,12 +72,70 @@ > notes_to_pdf.py --input <infile.txt> --output <outfile.[pdf|html]> --type [pdf|html|slides] ``` -- The most used flags are - - `--type {pdf|html|slides}` - - `--toc_type {none|pandoc_native|navigation}` - - `--debug_on_error`, `--skip_action ...`, `--filter_by_lines A:B` +- The interface is: + ``` + > notes_to_pdf.py -h + usage: notes_to_pdf.py [-h] -i INPUT -o OUTPUT --type {pdf,html,slides} + [--filter_by_header FILTER_BY_HEADER] + [--filter_by_lines FILTER_BY_LINES] [--script SCRIPT] + [--preview_actions] + [--toc_type {none,pandoc_native,navigation}] + [--no_run_latex_again] [--debug_on_error] + [--gdrive_dir GDRIVE_DIR] [--use_host_tools] + [--action {cleanup_before,preprocess_notes,render_images,run_pandoc,copy_to_gdrive,open,cleanup_after} | --skip_action {cleanup_before,preprocess_notes,render_images,run_pandoc,copy_to_gdrive,open,cleanup_after}] + [--all] [--dockerized_force_rebuild] + [--dockerized_use_sudo] + [-v {TRACE,DEBUG,INFO,WARNING,ERROR,CRITICAL}] + + Convert a txt file into a PDF / HTML / slides using `pandoc`. -### Quickstart recipes + # From scratch with TOC: + > notes_to_pdf.py -a pdf --input ... + + # For interactive mode: + > notes_to_pdf.py -a pdf --no_cleanup_before --no_cleanup --input ... + + # Check that can be compiled: + > notes_to_pdf.py -a pdf --no_toc --no_open_pdf --input ... + + > notes_to_pdf.py --input notes/IN_PROGRESS/math.The_hundred_page_ML_book.Burkov.2019.txt -t pdf --no_cleanup --no_cleanup_before --no_run_latex_again --no_open + + options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + -o OUTPUT, --output OUTPUT + Output file + --type {pdf,html,slides} + Type of output to generate + --filter_by_header FILTER_BY_HEADER + Filter by header + --filter_by_lines FILTER_BY_LINES + Filter by lines (e.g., `0:10`, `1:None`, `None:10`) + --script SCRIPT Bash script to generate with all the executed sub- + commands + --preview_actions Print the actions and exit + --toc_type {none,pandoc_native,navigation} + --no_run_latex_again + --debug_on_error + --gdrive_dir GDRIVE_DIR + Directory where to save the output to share on Google + Drive + --use_host_tools Use the host tools instead of the dockerized ones + --action {cleanup_before,preprocess_notes,render_images,run_pandoc,copy_to_gdrive,open,cleanup_after} + Actions to execute + --skip_action {cleanup_before,preprocess_notes,render_images,run_pandoc,copy_to_gdrive,open,cleanup_after} + Actions to skip + --all Run all the actions (cleanup_before preprocess_notes + render_images run_pandoc open cleanup_after) + --dockerized_force_rebuild + Force to rebuild the Docker container + --dockerized_use_sudo + Use sudo inside the container + -v {TRACE,DEBUG,INFO,WARNING,ERROR,CRITICAL} + Set the logging level + ``` + +### Examples - Compile to **Beamer slides** ``` @@ -120,42 +157,8 @@ - **Tip**: Run with `--preview_actions` to print the exact steps without executing them. -### CLI flags cheatsheet - -- `--type {pdf,html,slides}` - - Purpose: Specifies the output format - - Notes: The "slides" option uses Beamer -- `--toc_type {none,pandoc_native,navigation}` - - Purpose: Determines the Table of Contents (TOC) style - - Notes: The `navigation` option inserts slide-friendly breadcrumb frames -- `--filter_by_header "# Intro"` - - Purpose: Builds an artefact from a section subset - - Notes: This is useful for testing -- `--filter_by_lines 120:250` - - Purpose: Compiles only a specified range of lines - - Notes: Accepts `None` as a sentinel value -- `--debug_on_error` - - Purpose: On Pandoc failure, generates a _.tex_ file and provides a helpful - log - - Notes: No additional notes -- `--script myrun.sh` - - Purpose: Saves every shell command executed - - Notes: Useful for reproducing build pipelines -- Docker knobs: - - Options: - - `--dockerized_force_rebuild` - - `--dockerized_use_sudo` - - `--use_host_tools` - - Purpose: Controls the use of container vs host for pandoc/latex - -- Run `notes_to_pdf.py -h` for the exhaustive list. - -### Worked examples - - Slides with navigation breadcrumbs, keeping intermediate files for inspection - -// TODO(indro): `--toc_type navigation` fails because of the preprocess step. - + // TODO(indro): `--toc_type navigation` fails because of the preprocess step. ```bash > notes_to_pdf.py \ --input MSML610/Lesson5-Theory_Statistical_learning.txt \ @@ -166,8 +169,8 @@ --skip_action cleanup_after ``` -- Focus on a subsection, compiling only from line 362 to EOF for a fast iteration - when debugging slides +- Focus on a subsection, compiling only from line 362 to EOF for a fast + iteration when debugging slides ```bash > notes_to_pdf.py \ --input Lesson8-Reasoning_over_time.txt \ @@ -184,11 +187,13 @@ ## render_images.py +### What it does + - This script auto renders figures by - - detecting fenced code blocks (PlantUML, Mermaid, TikZ, Graphviz, ...) - - rendering them into images calling the appropriate tool - - commenting them out the block - - inlining a `![](img)` markup + - Detecting fenced code blocks (PlantUML, Mermaid, TikZ, Graphviz, ...) + - Rendering them into images calling the appropriate tool + - Commenting them out the block + - Inlining a `![](img)` markup - Render the images in a text file ```bash @@ -196,34 +201,36 @@ -o lesson9.images.txt --run_dockerized ``` -### Supported File types and Code blocks - -- File extension: `.md`, `.txt` - - Rendering syntax allowed: - - `plantuml` - - `mermaid` - - `graphviz` - - `tikz` - - `latex` - - Output embeds as: `<img src="figs/xxx.png">` -- File extension: `.tex` - - Rendering syntax allowed: - - same tags (TikZ & LaTeX especially) - - Output embeds as: `\includegraphics{...}` +The supported File types and code blocks are: + - File extension: `.md`, `.txt` + - Rendering syntax allowed: + - `plantuml` + - `mermaid` + - `graphviz` + - `tikz` + - `latex` + - Output embeds as: `<img src="figs/xxx.png">` + - File extension: `.tex` + - Rendering syntax allowed: + - Same tags (TikZ & LaTeX especially) + - Output embeds as: `\includegraphics{...}` -### Quick Start Recipes +### Examples - Render to a new file + ```bash > render_images.py -i lesson.md -o lesson.rendered.md --action render --run_dockerized ``` - Render in‑place (Markdown or LaTeX) + ```bash > render_images.py -i lesson.md --action render --run_dockerized ``` - HTML preview of already‑rendered images + ```bash > render_images.py -i lesson.md --action open --run_dockerized ``` @@ -233,38 +240,71 @@ > render_images.py -i lesson.md -o /tmp/out.md --dry_run ``` -### Flags +### Interface -- `-i/--in_file_name` - - Default: required - - Purpose: Input `.md`, `.tex`, or `.txt` -- `-o/--out_file_name` - - Default: `<input>` - - Purpose: Output path (must share extension) -- `--action` - - Default: `render` - - Purpose: `render` ↔ `open` -- `--dry_run` - - Default: False - - Purpose: Skip actual rendering, still rewrites markup -- `--run_dockerized / --dockerized_*` - - Default: False - - Purpose: Use pre-built container images for PlantUML, Mermaid, etc -- `--verbosity/-v` - - Default: `INFO` - - Purpose: Logging verbosity +- The interface + + ```bash + > render_images.py -h + usage: render_images.py [-h] -i IN_FILE_NAME [-o OUT_FILE_NAME] + [--action {open,render} | --skip_action {open,render}] + [--all] [--dry_run] [--dockerized_force_rebuild] + [--dockerized_use_sudo] + [-v {TRACE,DEBUG,INFO,WARNING,ERROR,CRITICAL}] + + Replace sections of image code with rendered images, commenting out the + original code, if needed. + + See `docs/work_tools/documentation_toolchain/all.render_images.explanation.md`. + + Usage: + + # Create a new Markdown file with rendered images: + > render_images.py -i ABC.md -o XYZ.md --action render --run_dockerized + + # Render images in place in the original Markdown file: + > render_images.py -i ABC.md --action render --run_dockerized + + # Render images in place in the original LaTeX file: + > render_images.py -i ABC.tex --action render --run_dockerized + + # Open rendered images from a Markdown file in HTML to preview: + > render_images.py -i ABC.md --action open --run_dockerized + + options: + -h, --help show this help message and exit + -i IN_FILE_NAME, --in_file_name IN_FILE_NAME + Path to the input file + -o OUT_FILE_NAME, --out_file_name OUT_FILE_NAME + Path to the output file + --action {open,render} + Actions to execute + --skip_action {open,render} + Actions to skip + --all Run all the actions () + --dry_run Update the file but do not render images + --dockerized_force_rebuild + Force to rebuild the Docker container + --dockerized_use_sudo + Use sudo inside the container + -v {TRACE,DEBUG,INFO,WARNING,ERROR,CRITICAL} + Set the logging level + ``` ## `lint_notes.py` +### What it does + - Tidy up Markdown/LaTeX/txt notes by: - - normalising G‑Doc artifacts - - running Prettier - - fixing bullet/heading quirks - - refreshing the Table of Contents + - Normalising G‑Doc artifacts + - Running Prettier + - Fixing bullet/heading quirks + - Refreshing the Table of Contents -### Quickstart recipes +### Examples - Prettify with Dockerised Prettier and TOC rebuild + ```bash > lint_notes.py -i Lesson10.md \ --use_dockerized_prettier \ @@ -277,37 +317,21 @@ --action preprocess,prettier,postprocess ``` -### Flags - -- `-i/--infile` - - Default: stdin - - Purpose: Input `.txt` or `.md` (also via pipe) -- `-o/--outfile` - - Default: stdout - - Purpose: Destination file (omit for pipe) -- `-w/--print-width` - - Default: None $\rightarrow$ Prettier default - - Purpose: Line wrap width -- `--use_dockerized_prettier` - - Default: False - - Purpose: Run Prettier inside helper container -- `--use_dockerized_markdown_toc` - - Default: False - - Purpose: Refresh TOC via containerised `markdown-toc` -- `--action` - - Default: all five stages - - Purpose: Comma-separated subset of: `preprocess`, `prettier`, `postprocess`, - `frame_chapters`, `refresh_toc` -- `-v/--verbosity` - - Default: INFO - - Purpose: Logging level +### Interface + +// TODO ## `extract_notebook_images.py` -- Spins up a docker container and dumps every `png/svg` output cell into a folder. +### What it does + +- Spins up a docker container and dumps every `png/svg` output cell into a + folder. - You can then publish or reuse the static plots/diagrams already rendered in a Jupyter notebook. +### Example + - Minimal call: ```bash > extract_notebook_images.py \ @@ -315,100 +339,62 @@ --out_image_dir notebooks/screenshots ``` -### Flag Options - -- `-i / --in_notebook_filename PATH` - - Purpose: Notebook to scan - - Default: required -- `-o / --out_image_dir DIR` - - Purpose: Folder where images land - - Default: required -- `--dockerized_force_rebuild` - - Purpose: Re-build the Docker image (use if you changed extractor code) - - Default: false -- `--dockerized_use_sudo` - - Purpose: Prepend `sudo docker ...` - - Default: auto-detects -- `-v INFO/DEBUG` - - Purpose: Log verbosity - - Default: `INFO` - ---- +### Interface -## 5. LLM Powered Transforms — `llm_transform.py` +// TODO -Apply a GPT‑style transformation (rewrite, summarise, critique code, convert to -slides, etc.) to any text file _without_ leaving the terminal / editor. +## `llm_transform.py` -> _Note: You need to have an `OPENAI_API_KEY` and an internet connection._ +### What it does -### Minimum viable command +- Apply a GPT‑style transformation (rewrite, summarise, critique code, convert to + slides, etc.) to any text file _without_ leaving the terminal / editor. -```bash -llm_transform.py -i draft.txt -o polished.txt -p rewrite_clearer -``` +- **Note**: You need to have an `OPENAI_API_KEY` and an internet connection. -### Finding available prompts - -```bash -llm_transform.py -p list -i - -o - -``` +### Examples -### Flags +- TODO + ```bash + llm_transform.py -i draft.txt -o polished.txt -p rewrite_clearer + ``` -- `-i / --input` - - Role: Source text (`-` = stdin) - - Notes: None -- `-o / --output` - - Role: Destination (`-` = stdout) - - Notes: None -- `-p / --prompt` - - Role: Prompt tag (`list`, `code_review`, `slide_colorize`, ...) - - Notes: Required -- `-c / --compare` - - Role: Print both original & transformed blocks to stdout - - Notes: Helpful for quick diff -- `-b / --bold_first_level_bullets` - - Role: Post-format tweak for slide prompts - - Notes: None -- `-s / --skip-post-transforms` - - Role: Return raw LLM output, skip prettier/cleanup - - Notes: None -- Docker flags - - Flags: `--dockerized_force_rebuild`, `--dockerized_use_sudo` - - Role: Control container lifecycle - - Notes: None - -### Example recipes +- Finding available prompts + ```bash + llm_transform.py -p list -i - -o - + ``` - Turn a code file into a review checklist - ```bash > llm_transform.py -i foo.py -o cfile -p code_review vim cfile ``` -- **Color‑accent the bold bullets for slides** +- Color‑accent the bold bullets for slides ```bash > llm_transform.py -i deck.md -o - -p slide_colorize | tee deck.color.md ``` -- **Inline use in Vim** – visual‑select a block, then: +- Inline use in Vim, visual‑select a block, then: ```vim :'<,'>!llm_transform.py -p summarize -i - -o - ``` +### Interface + +// TODO + ## `run_pandoc.py` -### What the script does +### What it does - Reads **Markdown** from _stdin_ or `--input` file. - Dispatches to a named **action** (currently only `convert_md_to_latex`). - Pushes the Pandoc output to _stdout_ or the `--output` file. -### Quickstart commands +### Example - Convert a Markdown file to LaTeX ``` @@ -423,9 +409,9 @@ llm_transform.py -p list -i - -o - :<,'>!run_pandoc.py -i - -o - -v CRITICAL ``` -**Tip :** pass `-v CRITICAL` to silence helper logging when piping into editors. +- **Tip:** pass `-v CRITICAL` to silence helper logging when piping into editors. -### Flags +### Interface - `-i / --input` - Default: `-` @@ -448,34 +434,33 @@ llm_transform.py -p list -i - -o - - Applies a named **action** (`-a/--action`). - Writes the result to the given output (in‑place, file, or `-`). -### Example of Supported Actions +### Examples - Run `-a list` to print a list of the valid - -- `toc` - - Generate a bullet TOC (top-level by default) - - Typical Vim one-liner: `:!transform_notes.py -a toc -i % -l 1` -- `format_headers` - - Re-flow / indent headers (up to `--max_lev`) - - Typical Vim one-liner: `:%!transform_notes.py -a format -i - --max_lev 3` -- `increase_headers_level` - - Bump all headers down one level - - Typical Vim one-liner: `:%!transform_notes.py -a increase -i -` -- `md_list_to_latex` - - Convert a Markdown list to LaTeX `\begin{itemize}` - - Typical Vim one-liner: `:%!transform_notes.py -a md_list_to_latex -i -` -- `md_*` family - - Formatting clean-ups (bold bullets, colorize bold text, etc.) - - Additional Information: See `-a list` for more details - -### Examples + - `toc` + - Generate a bullet TOC (top-level by default) + - Typical Vim one-liner: `:!transform_notes.py -a toc -i % -l 1` + - `format_headers` + - Re-flow / indent headers (up to `--max_lev`) + - Typical Vim one-liner: `:%!transform_notes.py -a format -i - --max_lev 3` + - `increase_headers_level` + - Bump all headers down one level + - Typical Vim one-liner: `:%!transform_notes.py -a increase -i -` + - `md_list_to_latex` + - Convert a Markdown list to LaTeX `\begin{itemize}` + - Typical Vim one-liner: `:%!transform_notes.py -a md_list_to_latex -i -` + - `md_*` family + - Formatting clean-ups (bold bullets, colorize bold text, etc.) + - Additional Information: See `-a list` for more details - Re‑flow & clean a file in place + ```bash > transform_notes.py -a md_format -i notes/lecture.txt --in_place ``` - Generate a 2‑level TOC to STDOUT + ```bash > transform_notes.py -a toc -i notes/lecture.md -o - -l 2 ``` @@ -485,37 +470,22 @@ llm_transform.py -p list -i - -o - :'<,'>!transform_notes.py -i - -o - -a md_fix_chatgpt_output ``` -### Flags - -- `-a / --action` - - Default: Required - - Purpose: Choose the transformation -- `-l / --max_lev` - - Default: 5 - - Purpose: Header depth for `format_headers` -- `-i / --input` - - Default: `-` - - Purpose: File path or `-` (STDIN) -- `-o / --output` - - Default: `-` - - Purpose: File path or `-` (STDOUT) -- `--in_place` - - Default: False - - Purpose: Overwrite input file instead of writing elsewhere +### Interface ## `extract_headers_from_markdown.py` -### Goal +### What it does -Turn a Markdown document into either: -- a **plain list** of headers -- a **nested header map** -- a \*_Vim_ quick‑fix\*\* (`cfile`) that lets you jump between sections with - `:cnext`. +- Turn a Markdown document into either: + - A **plain list** of headers + - A **nested header map** + - A \*_Vim_ quick‑fix\*\* (`cfile`) that lets you jump between sections with + `:cnext`. ### Examples - Human‑readable map (levels 1‑3) to `stdout` + ```bash > extract_headers_from_markdown.py -i README.md -o - --mode list --max-level 3 ``` @@ -528,11 +498,12 @@ Turn a Markdown document into either: ## `dockerized_tikz_to_bitmap.py` -- Converts +- Converts ### Examples - Plain 300 DPI conversion + ```bash > dockerized_tikz_to_bitmap.py -i figure.tikz -o figure.png ``` @@ -547,24 +518,18 @@ Turn a Markdown document into either: ### What it does -Converts a Graphviz `.dot` file into a `.png` image using a Dockerized -container. - -> ```bash -> graphviz_wrapper.py --input input.dot --output output.png -> ``` - -This script serves as a thin wrapper around Dockerized Graphviz for consistent -rendering across systems. +- Converts a Graphviz `.dot` file into a `.png` image using a Dockerized + container. + > ```bash + > graphviz_wrapper.py --input input.dot --output output.png + > ``` -### Most used flags +- This script serves as a thin wrapper around Dockerized Graphviz for consistent + rendering across systems. -- `--input`: path to the `.dot` file -- `--output`: destination `.png` image file -- `--dockerized_force_rebuild`: rebuild the container from scratch -- `--dockerized_use_sudo`: use `sudo` for Docker commands +### Interface -### Quickstart recipes +### Examples - Convert DOT to PNG ``` @@ -592,7 +557,7 @@ rendering across systems. > latex_wrapper.py --input doc.tex --output doc.pdf ``` -### Quickstart recipes +### Examples - Compile `.tex` to `.pdf` ``` @@ -618,13 +583,15 @@ rendering across systems. - Renders Mermaid `.mmd` or `.md` diagrams into image files using a Dockerized container. +### Examples + +- TODO ```bash > mermaid_wrapper.py --input flowchart.mmd --output flowchart.png ``` - Automatically sets output to match input name if `--output` is omitted - - Mermaid diagram ``` > mermaid_wrapper.py -i diagram.mmd -o diagram.png @@ -646,17 +613,17 @@ rendering across systems. ### What it does -Converts documents using `pandoc` inside a Docker container. -Supports output to Beamer slides, PDFs, and more with custom CLI flags. +- Converts documents using `pandoc` inside a Docker container +- Supports output to Beamer slides, PDFs, and more with custom CLI flags. ```bash > pandoc_wrapper.py --input notes.md --output slides.pdf -- docker_args... ``` -Internally builds a Docker container and passes the full `pandoc` command -string. +- Internally builds a Docker container and passes the full `pandoc` command + string. -### Quickstart recipes +### Example - Convert Markdown to PDF ``` @@ -679,17 +646,17 @@ string. ### What it does -Formats text files (`.md`, `.txt`, `.tex`, etc.) using Prettier within a Docker -container. -Avoids environment-specific issues and ensures consistent formatting. +- Formats text files (`.md`, `.txt`, `.tex`, etc.) using Prettier within a Docker + container +- Avoids environment-specific issues and ensures consistent formatting. +- Supports full Prettier CLI flexibility via passthrough of additional options. -> ```bash -> dockerized_prettier.py --parser markdown --write test.md -> ``` + > ```bash + > dockerized_prettier.py --parser markdown --write test.md + > ``` -Supports full Prettier CLI flexibility via passthrough of additional options. -### Quickstart recipes +### Examples - Format a Markdown file ``` diff --git a/docs/work_tools/git/all.ai_review.how_to_guide.md b/docs/work_tools/git/all.ai_review.how_to_guide.md index 298e7da53..cff7bdca3 100644 --- a/docs/work_tools/git/all.ai_review.how_to_guide.md +++ b/docs/work_tools/git/all.ai_review.how_to_guide.md @@ -1,3 +1,17 @@ +<!-- toc --> + +- [Operations](#operations) +- [Use templates](#use-templates) +- [Tools](#tools) + * [llm_transform.py](#llm_transformpy) + * [`transform_notes.py`](#transform_notespy) +- [Some typical workflows](#some-typical-workflows) + * [An editing workflow](#an-editing-workflow) + * [A reviewer workflow](#a-reviewer-workflow) + * [How to change the logic in place while reviewing](#how-to-change-the-logic-in-place-while-reviewing) + +<!-- tocstop --> + # Operations - There are several operations we want to perform @@ -5,18 +19,19 @@ - E.g., create a unit test - Extract comments and lints in the form of a `cfile` - E.g., lint or AI review based on certain criteria - - Apply a set of transformations (e.g., styling / formatting code) to an entire - file + - Apply a set of transformations (e.g., styling / formatting code) to an + entire file - Apply modifications from a `cfile` (e.g., from linter and AI review) to a file - - Add TODOs from a `cfile` to Python or markdown files + - Add TODOs from a `cfile` to Python or markdown files - Rewrite an entire markdown to fix English mistakes without changing its structure - Reformat an entire markdown or Python using LLMs or code # Use templates -- We use templates for code and documentation to show and describe how a document - or code should look like, e.g., + +- We use templates for code and documentation to show and describe how a + document or code should look like, e.g., - `template_code.py` shows our coding style - `template_unit_test.py` shows how our unit tests look like - `template_doc.how_to_guide.md` shows how a Diataxis how to guide should be @@ -130,16 +145,16 @@ - There are 3 types of transforms and review tasks - `llm`: executed by an LLM since they are difficult to implement otherwise - E.g., "apply this style to a certain file" - - `linter_llm`: executed by an LLM for now to get something in place, even - if they should be moved to code / linter - `- E.g., mainly formatting tasks + - `linter_llm`: executed by an LLM for now to get something in place, even if + they should be moved to code / linter `- E.g., mainly formatting tasks - `linter`: executed by the Linter using code and regex ## A reviewer workflow - This workflow can be used by the author of the code or by a reviewer - The goal is to make these tools robust enough so that they can be used - directly by the author and potentially integrated in the `linter` flow itself + directly by the author and potentially integrated in the `linter` flow + itself - Initially, reviewers use these tools as part of initial dog-fooding of the flows @@ -149,11 +164,13 @@ > invoke git_branch_diff_with -t base --only-print-files ``` - Run `ai_review.py` on each file to generate a list of comments on the code - - This is equivalent to running a `review` target with `llm_transform.py` - (e.g., `llm_transform.py -p review_*`) but it is a separated flow for clarify + - This is equivalent to running a `review` target with `llm_transform.py` + (e.g., `llm_transform.py -p review_*`) but it is a separated flow for + clarify - This generates a `cfile` with a list of comments comments - Review the TODOs using cfile jumping around files + ```bash > vim -c "cfile cfile" ``` @@ -175,13 +192,11 @@ - A common problem is that we might want to adjust one of our tools (e.g., `linter.py`, `ai_review.py`) while reviewing somebody's else code -- The approach is to copy files from a different Git client in the one with - the code being tested using one of the scripts - +- The approach is to copy files from a different Git client in the one with the + code being tested using one of the scripts ``` > ai_review.py -i template_code.py ``` - ``` > llm_transform.py -i template_code.py -p code_fix_code ``` @@ -191,7 +206,7 @@ > PROMPT=review_correctness > PROMPT=review_linter > PROMPT=review_architecture - > + > > FILE=dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py > \cp -f /Users/saggese/src/helpers1/dev_scripts_helpers/llms/sync_ai_review.sh $HELPERS_ROOT_DIR/dev_scripts_helpers/llms && sync_ai_review.sh && ai_review.py -i $FILE -p $PROMPT diff --git a/helpers/haws.py b/helpers/haws.py index e25f11308..b72a55866 100644 --- a/helpers/haws.py +++ b/helpers/haws.py @@ -130,7 +130,8 @@ def update_task_definition( replication is enabled from the default region to the target region. :param task_definition_name: The name of the ECS task definition for - which an update to container image URL is made, e.g., `cmamp-test`. + which an update to container image URL is made, e.g., `cmamp- + test`. :param new_image_url: New image URL for task definition. e.g., `***.dkr.ecr.***/cmamp:prod`. :param region: AWS region, if None get region from AWS credentials. diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 8ca55ae29..fd01527bc 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -776,7 +776,9 @@ def selected_navigation_to_str( # ############################################################################# -def inject_todos_from_cfile(cfile_txt: str, todo_user: str, comment_prefix: str) -> None: +def inject_todos_from_cfile( + cfile_txt: str, todo_user: str, comment_prefix: str +) -> None: """ Inject the TODOs from a cfile in the corresponding files. @@ -820,9 +822,14 @@ def inject_todos_from_cfile(cfile_txt: str, todo_user: str, comment_prefix: str) # Extract the info for the file to process. txt, offset, last_line_modified = file_content[file_name] _LOG.debug(hprint.to_str("offset last_line_modified")) - hdbg.dassert_lt(last_line_modified, todo_line_number, - "The TODOs don't look like they are increasing line numbers: " - "TODO at line %d is before the last line modified %d", todo_line_number, last_line_modified) + hdbg.dassert_lt( + last_line_modified, + todo_line_number, + "The TODOs don't look like they are increasing line numbers: " + "TODO at line %d is before the last line modified %d", + todo_line_number, + last_line_modified, + ) # We subtract 1 from the line number since TODOs count from 1, while # Python arrays count from 0. act_line_number = todo_line_number - 1 + offset diff --git a/helpers/hparser.py b/helpers/hparser.py index 2b4ad9182..4e75c8ae7 100644 --- a/helpers/hparser.py +++ b/helpers/hparser.py @@ -371,7 +371,9 @@ def parse_input_output_args( return in_file_name, out_file_name -def init_logger_for_input_output_transform(args: argparse.Namespace, *, verbose: bool = True) -> None: +def init_logger_for_input_output_transform( + args: argparse.Namespace, *, verbose: bool = True +) -> None: """ Initialize the logger when input/output transformation is used. diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 81fa4fa88..2a46774e5 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1487,40 +1487,6 @@ def test3(self) -> None: class Test_inject_todos_from_cfile1(hunitest.TestCase): - def _create_test_file(self, filename: str, content: str) -> str: - """ - Create a test file with given content in the scratch directory. - - :param scratch_dir: Directory to create file in - :param filename: Name of file to create - :param content: Content to write to file - :return: Full path to created file - """ - scratch_dir = self.get_scratch_space() - file_path = os.path.join(scratch_dir, filename) - content = hprint.dedent(content) - hio.to_file(file_path, content) - return file_path - - def _create_cfile(self, cfile_content: List[str]) -> str: - """ - Create a cfile with TODOs in the scratch directory. - - :param scratch_dir: Directory to create file in - :param cfile_content: List of TODO lines to write - :return: Full path to created cfile - """ - content = "\n".join(cfile_content) - return self._create_test_file("cfile.txt", content) - - def _inject_todos(self, cfile_content: str) -> None: - """ - Helper to inject TODOs with standard parameters. - """ - todo_user ="user" - comment_prefix ="#" - hmarkdo.inject_todos_from_cfile(cfile_content, todo_user, comment_prefix) - def test1(self) -> None: """ Test injecting TODOs from a cfile into a Python file. @@ -1529,7 +1495,7 @@ def test1(self) -> None: test_file_content = """ def hello(msg): print(msg) - + def world(): print("world") """ @@ -1548,7 +1514,8 @@ def world(): # TODO(user): Add type hints. def hello(msg): print(msg) - + + # TODO(user): Add docstring. def world(): print("world") @@ -1565,9 +1532,7 @@ def test_one_line_file(self) -> None: """ file_path = self._create_test_file("empty.py", test_file_content) # Create cfile with TODOs - cfile_content = [ - f"{file_path}:1: Add content to empty file." - ] + cfile_content = [f"{file_path}:1: Add content to empty file."] self._create_cfile(cfile_content) # Run the function under test self._inject_todos("\n".join(cfile_content)) @@ -1604,11 +1569,14 @@ def test_invalid_line_numbers(self) -> None: 998 < 2 ################################################################################ """ - self.assert_equal(str(err.exception), expected, dedent=True, fuzzy_match=True) + self.assert_equal( + str(err.exception), expected, dedent=True, fuzzy_match=True + ) def test2(self) -> None: """ - Test injecting TODOs from a cfile into a Python file with a complex class. + Test injecting TODOs from a cfile into a Python file with a complex + class. """ # Create a test file. test_file_content = """ @@ -1619,17 +1587,17 @@ class DataProcessor: def __init__(self): self.logger = logging.getLogger(__name__) self.data = [] - + def process_batch(self, items): for item in items: self.data.append(self._transform(item)) - + def _transform(self, item): return item.upper() - + def get_results(self): return self.data - + def clear(self): self.data = [] """ @@ -1642,7 +1610,7 @@ def clear(self): f"{file_path}:10: Consider adding batch size validation", f"{file_path}:13: Add error handling for non-string inputs", f"{file_path}:16: Add return type hint and docstring", - f"{file_path}:19: Add docstring explaining clear behavior" + f"{file_path}:19: Add docstring explaining clear behavior", ] self._create_cfile(cfile_content) # Run function under test. @@ -1659,21 +1627,21 @@ class DataProcessor: def __init__(self): self.logger = logging.getLogger(__name__) self.data = [] - + # TODO(user): Add type hints for items parameter def process_batch(self, items): # TODO(user): Consider adding batch size validation for item in items: self.data.append(self._transform(item)) - + # TODO(user): Add error handling for non-string inputs def _transform(self, item): return item.upper() - + # TODO(user): Add return type hint and docstring def get_results(self): return self.data - + # TODO(user): Add docstring explaining clear behavior def clear(self): self.data = [] @@ -1723,6 +1691,40 @@ def bar(): """ self.assert_equal(actual2, expected2, dedent=True) + def _create_test_file(self, filename: str, content: str) -> str: + """ + Create a test file with given content in the scratch directory. + + :param scratch_dir: Directory to create file in + :param filename: Name of file to create + :param content: Content to write to file + :return: Full path to created file + """ + scratch_dir = self.get_scratch_space() + file_path = os.path.join(scratch_dir, filename) + content = hprint.dedent(content) + hio.to_file(file_path, content) + return file_path + + def _create_cfile(self, cfile_content: List[str]) -> str: + """ + Create a cfile with TODOs in the scratch directory. + + :param scratch_dir: Directory to create file in + :param cfile_content: List of TODO lines to write + :return: Full path to created cfile + """ + content = "\n".join(cfile_content) + return self._create_test_file("cfile.txt", content) + + def _inject_todos(self, cfile_content: str) -> None: + """ + Helper to inject TODOs with standard parameters. + """ + todo_user = "user" + comment_prefix = "#" + hmarkdo.inject_todos_from_cfile(cfile_content, todo_user, comment_prefix) + # ############################################################################# # Test_colorize_bold_text1 diff --git a/linters/amp_lint_md.py b/linters/amp_lint_md.py index 43d3b642f..45594ba66 100644 --- a/linters/amp_lint_md.py +++ b/linters/amp_lint_md.py @@ -63,7 +63,7 @@ def _execute(self, file_name: str, pedantic: int) -> List[str]: cmd = [] cmd.append(self._executable) cmd.append(f"-i {file_name}") - cmd.append("--in_place") + cmd.append(f"-o {file_name}") cmd_as_str = " ".join(cmd) _, output = liutils.tee(cmd_as_str, self._executable, abort_on_error=True) # Check file name. From 3d7a7722b719110926982a17c896b704ae83f59d Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 29 May 2025 16:17:27 -0400 Subject: [PATCH 142/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 33 ++ ...inter_style_review_guidelines.reference.md | 10 +- ...l.llm_style_review_guidelines.reference.md | 310 +++++++++++++----- 3 files changed, 270 insertions(+), 83 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 9fd1d6042..19073e63d 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -257,6 +257,7 @@ def code_fix_complex_assignments() -> _PROMPT_OUT: is_first_or_last = True else: is_first_or_last = False + ``` """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -638,6 +639,38 @@ def latex_rewrite() -> _PROMPT_OUT: """ +def md_add_good_bad_examples() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + You will: + - Maintain the structure of the text and keep the content of the existing + text + - Add bullet points with good examples according to the text prepended with + `Good:` + - Add bullet points with good examples according to the text prepended with + `Bad:` + + - For instance for the input: + ``` + - The docstring must use imperative form, whenever possible + ``` + the output is: + ``` + - The docstring must use imperative form, whenever possible + - Good: "Calculate the sum of two numbers and return the result." + - Bad: "Calculates the sum of two numbers and returns the result." + ``` + + Print only the markdown without any explanation. + """ + pre_transforms: Set[str] = set() + post_transforms = { + "remove_empty_lines", + } + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + def md_rewrite() -> _PROMPT_OUT: system = _MD_CONTEXT system += r""" diff --git a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md index bfda1782b..7503d5b64 100644 --- a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md @@ -20,7 +20,7 @@ <!-- tocstop --> -# Guidelines for automated PR reviews +# Guidelines for PR reviews ## Python code @@ -90,10 +90,6 @@ ### Imports - All imports should be located at the top of the file -- Do not use `import *` -- Do not use `from ... import ...`, unless it is the `typing` package, e.g., - `from typing import Iterable, List` -- Always import with a full path from the root of the repo / submodule - Each module that can be imported should have a docstring at the very beginning describing how it should be imported - Linter adds it automatically @@ -236,6 +232,10 @@ --skip_action cleanup_after ``` - Commands should be prepended by `docker>` if they need to be run inside Docker +- Avoid using screenshots whenever possible and instead copy-and-paste text with + the right highlighting + - E.g., instead of a screenshot of a terminal command, provide the command + text: `> ls -la` ## Spelling diff --git a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md index a6f85195d..a3280f8f9 100644 --- a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.llm_style_review_guidelines.reference.md @@ -18,94 +18,152 @@ <!-- tocstop --> -# Guidelines for automated PR reviews +# Guidelines for PR reviews ## Python code ### Naming - Name functions using verbs and verbs/actions - - E.g., `download_data()`, `process_input()`, `calculate_sum()` - - Python internal functions as `__repr__`, `__init__` are valid - - Functions names like `to_dict()`, `_parse()`, `_main()` are valid + - Good: `download_data()`, `process_input()`, `calculate_sum()` + - Good: Python internal functions as `__repr__`, `__init__` are valid + - Good: Functions names like `to_dict()`, `_parse()`, `_main()` are valid - Name classes using nouns - - E.g., `Downloader()`, `DataProcessor()`, `User()` + - Good: `Downloader()`, `DataProcessor()`, `User()` + - Bad: `DownloadStuff()`, `ProcessData()`, `UserActions()` - Name decorators with an adjective or a past tense verb - - E.g., `timed`, `cached`, `logged` + - Good: `timed`, `cached`, `logged` + - Bad: `time`, `cache`, `log` - Variable and function names should not reference implementation details, and things that can change or details that are not important - E.g., the name of a variable should not include its type - - E.g., use `embeddings` instead of `embeddings_list` - - E.g., use `data` instead of `data_dict` -- Abbreviations in the names of variables and functions should be avoided, - except for the following - - `df` for dataframe - - `srs` for series - - `idx` for index - - `id` for identifier - - `val` for value - - `var` for variable - - `args` for arguments and `kwargs` for keyword arguments - - `col` for columns and `row` for rows + - Good: `embeddings` + - Bad: `embeddings_list` + - Good: `data` + - Bad: `data_dict` +- Abbreviations in the names of variables and functions should be avoided + - Exceptions are the following + - `df` for dataframe + - `srs` for series + - `idx` for index + - `id` for identifier + - `val` for value + - `var` for variable + - `args` for arguments and `kwargs` for keyword arguments + - `col` for columns and `row` for rows - Do not repeat in a function name what is already included in the library name - (avoid "code stutter") + avoiding "code stutter" - E.g., if using a library named `math`, avoid naming a function `math_calculate()` + - Good: `calculate()` + - Bad: `math_calculate()` ### Docstrings - All functions and methods must have a docstring -- The docstring should describe the goal of the function, the interface and what + - Good: + ``` + def add(a, b): + """ + Add two numbers and return the result + """ + return a + b + ``` + - Bad: + ``` + def add(a, b): + return a + b + ``` +- The docstring must describe the goal of the function, the interface and what the user needs to know to use the function - - E.g., "This function calculates the sum of two numbers and returns the - result." -- The text should not describe implementation details that can be changed + - Good: "Calculate the sum of two numbers and return the result." + - Good + ``` + def get_repository_settings( + repo: github.Repository.Repository, + ) -> Dict[str, Any]: + """ + Get the current settings of the repository. + + :param repo: GitHub repository object + :return: dictionary containing repository settings + """ + ``` +- The docstring must use imperative form, whenever possible + - Good: "Calculate the sum of two numbers and return the result." + - Bad: "Calculates the sum of two numbers and returns the result." +- The docstring should not describe implementation details that can be changed + - Good: "Sort the list of integers in ascending order." + - Bad: "Use the quicksort algorithm to sort the list of integers in ascending + order." - Follow this example for indentation of parameter descriptions: - ```python - :param param1: a very very long param description that - continues into a second line - :param param2: a param with two possible values - - first value description - - second value description that is very long and + - Good + ```python + :param param1: a very very long param description that continues into a second line - ``` + :param param2: a param with two possible values + - first value description + - second value description that is very long and + continues into a second line + ``` - Adding examples (e.g., of input and output) to the docstring is encouraged - - E.g., + - Good ``` # Example usage: result = add_numbers(3, 5) - # result is 8 + # The result is 8. ``` - References to variables, file paths, functions, classes, etc. should be wrapped in backticks - - E.g., "The `add_numbers` function takes two arguments." + - Good: "The `add_numbers()` function takes two arguments `a` and `b`." + - Bad: "The add_numbers() function takes two arguments a and b." - Multi-line representations of data structures (e.g., an output example) should be wrapped in triple backticks - - E.g., + - Good ``` { "name": "John", "age": 30, "city": "New York" } ``` ### Comments -- Add a comment for every logically distinct chunk of code +- Add a comment for every logically distinct chunk of code, spanning 4-5 lines - Use comments to separate chunks of code instead of blank lines + - Good: + ``` + function1() + # Then do something else. + function2() + - Bad: + ``` + function1() + + function2() + ``` - Do not use inline comments; every comment should be on its own separate line, before the line it refers to + - Good: + ``` + # Grant access to admin panel access_admin_panel(). + if user.is_admin(): + ``` + - Bad: + ``` + if user.is_admin(): # Check if the user is an admin access_admin_panel(). + ``` - In `if-elif-else` statements, the comments are placed underneath each statement in order to explain the code that belongs to each statement in particular - ```python + Good: + ``` if ...: - # Do this. + # Do this else: - # Do that. + # Do that ``` -- Avoid mentioning concrete names of variables, functions, classes, files, etc. - in the comments - - If it is unavoidable, wrap their names in backticks - Avoid referring to the type of a variable in the comments - Keeps comments focused on functionality rather than implementation specifics + - Good: "Store the user's age for validation." + - Bad: "Store the user's age as an integer for validation." - Do not include implementation details in comments - Describe "what" and "why" the code does something and not "how" the code does it @@ -121,26 +179,92 @@ - Encode the assumptions made in the code using assertions and report as much information as possible in an assertion to make it easy to debug the output - - E.g., `hdbg.dassert_lt(start_date, end_date)` + - Good: + ``` + hdbg.dassert_lt(start_date, end_date, + msg="start_date needs to be before end_date") + ``` - Ensure that assertions provide detailed information for debugging - Use assertions to validate input parameters and preconditions - Do not use f-strings in `hdbg.dassert()`, but use traditional string formatting methods in assertions - - E.g., + - Good: `hdbg.dassert_eq(len(list1), len(list2), "Lists must be of equal length: %d vs %d" % (len(list1), len(list2)))` -- Use f-strings in exceptions - - E.g., `raise ValueError(f"Invalid server_name='{server_name}'")` - - Provide clear and informative error messages using f-strings - - E.g., `raise TypeError(f"Expected type int, but got {type(var).__name__}")` + +- Add type hints only to the function definitions, if they are missing. + - Good: + ``` + def process_data(data, threshold=0.5): + results = [] + for item in data: + if item > threshold: + results.append(item) + return results + ``` + - Bad: + ``` + def process_data(data: List[float], threshold: float = 0.5) -> List[float]: + results: List[float] = [] + for item in data: + if item > threshold: + results.append(item) + return results + ``` + +- Avoid complex assignments into if-then-else statements. + - Good: + ``` + capitalized_parts = [] + for w in parts: + if is_first_or_last or w.lower() not in small_words: + w_out = w.capitalize() + else: + w_out = w.lower() + capitalized_parts.append(w_out) + ``` + - Bad: + ``` + capitalized_parts = [ + w.capitalize() if is_first_or_last or w.lower() not in small_words else w.lower() + for w in parts + ] + ``` + to: + + - Good: + ``` + if i == 0: + is_first_or_last = True + elif i == len(tokens) - 1: + is_first_or_last = True + elif i > 0 and not re.search(r'\w', tokens[i - 1]): + is_first_or_last = True + elif i < len(tokens) - 1 and not re.search(r'\w', tokens[i + 1]): + is_first_or_last = True + else: + is_first_or_last = False + ``` + - Bad: + ``` + is_first_or_last = (i == 0 or i == len(tokens) - 1 or + (i > 0 and not re.search(r'\w', tokens[i - 1])) or + (i < len(tokens) - 1 and not re.search(r'\w', tokens[i + 1]))) + ``` + +- Provide clear and informative error messages in exceptions using f-strings + - Good: `raise ValueError(f"Invalid server_name='{server_name}'")` + - Good: `raise TypeError(f"Expected type int, but got {type(var).__name__}")` - Use complete `if-elif-else` statements instead of a sequence of `if` statements - Ensure logical flow and clarity in conditional statements - - E.g., + - Good: ```python if condition1: # Execute block for condition1. + ... elif condition2: # Execute block for condition2. + ... else: # Execute block if none of the above conditions are met or raise an # exception. @@ -155,7 +279,21 @@ # Do something. ``` - Use `if var is None` to check if `var` is `None` instead of `if not var` + - Good: `if my_variable is None:` + - Bad: `if not my_variable:` - Use `isinstance()` instead of `type()` to check the type of an object + - Good: `if isinstance(obj, str):` + - Bad: `if type(obj) == str:` +- Do not use `import *` + - Good: `from math import sqrt, pi` + - Bad: `from math import *` +- Do not use `from ... import ...`, unless it is the `typing` package, e.g., + `from typing import Iterable, List` + - Good: `from typing import Dict, Tuple` + - Bad: `from os import path` +- Always import with a full path from the root of the repo / submodule + - Good: `import myproject.module.submodule` + - Bad: `from submodule import my_function` ### Code design @@ -170,11 +308,11 @@ - Common functions, used by all other functions - E.g., utility functions like `log_message()`, `validate_input()` - Read data - - E.g., `read_csv()`, `load_json()` + - Good: `read_csv()`, `load_json()` - Process data - - E.g., `clean_data()`, `transform_data()` + - Good: `clean_data()`, `transform_data()` - Save data - - E.g., `write_csv()`, `export_json()` + - Good: `write_csv()`, `export_json()` - Ensure that function names are descriptive and convey their purpose - Use comments to explain complex logic or calculations - Implement error handling to manage exceptions and edge cases @@ -202,17 +340,18 @@ - E.g., if a function `f()` accepts a dataframe `df` as its argument, then `f()` will not modify `df` but make a copy and work on it - This ensures that the original data remains unchanged and can be reused -- The preferred order of parameters in a function declaration is: +- To maintain clarity and consistency in function definitions, use the following + order of parameters in a function declaration: - Input parameters - Output parameters - In-out parameters - Default parameters - - This order helps in maintaining clarity and consistency in function - definitions - Default parameters should be used sparingly and only for parameters that 99% of the time are constant - All the default parameters should be keyword-only - They should be separated from the other parameters by `*` + - Good: `def example_function(param1: str, *, default_param1: int = 10)` + - Bad: `def example_function(param1: str, default_param1 : int =10)` - This ensures that default parameters are always explicitly specified by name, improving readability - Do not use mutable objects (such as lists, maps, objects) as default value for @@ -220,6 +359,7 @@ inside the function - E.g., instead of using a list as a default parameter, use `None` and initialize the list inside the function: + - Good: ``` def add_item(item: str, *, items: Optional[List[str]]) -> List[str]: if items is None: @@ -228,57 +368,75 @@ return items ``` -- Use a default value of `None` when a function needs to be wrapped and the - default parameter needs to be propagated - Do not use a boolean parameter as a switch controlling some function behavior; instead, use a string parameter `mode`, which is allowed to take a small well-defined set of values - - E.g., `def process_data(mode='fast'):` where `mode` can be `'fast'`, + - Good: `def process_data(mode: str = 'fast'):` where `mode` can be `'fast'`, `'slow'`, etc - For functions dealing with dataframes, avoid hard-wired column name dependencies; instead, allow the caller to pass the column name to the function as a parameter - E.g., `def calculate_average(df: pd.DataFrame, column_name: str):` -- Do not put computations of the output together in a `return` statement - - Bad - ``` - return compute_value() - ``` - - Instead, compute the output first, assign it to a variable, and then return - this variable +- Do not put computations of the output together in a `return` statement, + instead, compute the output first, assign it to a variable, and then return + this variable - Good ``` result = compute_value() return result ``` + - Bad + ``` + return compute_value() + ``` - A function should have a single exit point, i.e., one single line with `return` + - Good: + ```python + def calculate_total(price, tax): + total = price + (price * tax) + return total + ``` + - Bad: + ```python + def calculate_total(price, tax): + if price > 0: + return price + (price * tax) + else: + return 0 + ``` - A function should ideally return objects of only one type (or `None`) - When calling a function, assign all the input parameter values to variables on separate lines and then pass these variables to the function - - E.g., + - Good: ``` - param1 = value1 - param2 = value2 + param1 = 10 + param2 = 11 result = my_function(param1, param2) ``` + - Bad: + ``` + result = my_function(10, 11) + ``` - Explicitly bind default parameters, i.e., specify the parameter name when calling a function, and do not bind non-default parameters - - E.g., call `func()` like `func(param1, param2, param3=param3)` if `param3` - is the only parameter with a default value + - Good: `func(10, 20, param3=30)` + - Bad: `func(10, 20, 30)` ### Logging - Use logging `_LOG.debug()` and not `print()` for tracing execution + - Good: `_LOG.debug("value=%s", value)` + - Bad: `print("value=%s", value)` - Use positional args in logging and not inline formatting - - E.g., The code should do `_LOG.debug("cmd=%s", cmd1)` and not - `_LOG.debug(f"cmd={cmd1}")` + - Good: `_LOG.debug("cmd=%s", cmd1)` + - Bad: `_LOG.debug(f"cmd={cmd1}")` - Use the following idiom to configure logging: - ```python import helpers.hdbg as hdbg _LOG = logging.getLogger(__name__) + ... hdbg.init_logger(verbosity=logging.DEBUG) ``` @@ -411,13 +569,9 @@ - Boldface and italics should be used sparingly - The use of bullet point lists is encouraged - For the items, `-` should be used instead of `*` or circles -- Avoid using screenshots whenever possible and instead copy-and-paste text with - the right highlighting - - E.g., instead of a screenshot of a terminal command, provide the command - text: `> ls -la` - Use active voice most of the time and use passive voice sparingly - - E.g., "The user updates the file" instead of "The file is updated by the - user" + - Good: "The user updates the file." + - Bad: "The file is updated by the user." - Be efficient - Do not explain things in a repetitive way - Rewrite long-winded AI-generated texts in a concise way From ff72872cfd7b5f08bdbf01f4fdc63ef7cb8be388 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 30 May 2025 14:10:13 -0400 Subject: [PATCH 143/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/inject_todos.py | 9 ++- dev_scripts_helpers/llms/llm_prompts.py | 87 +++++++++++++++++++++--- 2 files changed, 83 insertions(+), 13 deletions(-) diff --git a/dev_scripts_helpers/llms/inject_todos.py b/dev_scripts_helpers/llms/inject_todos.py index 4b8e0ec4c..fcfcd5af7 100755 --- a/dev_scripts_helpers/llms/inject_todos.py +++ b/dev_scripts_helpers/llms/inject_todos.py @@ -25,12 +25,15 @@ def _parse() -> argparse.ArgumentParser: parser.add_argument( "--cfile", type=str, - required=True, + required=False, default="cfile", help="File containing the TODOs to inject", ) parser.add_argument( - "--todo_target", action="store_true", help="User name to use in the TODOs" + "--todo_target", + action="store", + required=True, + help="User name to use in the TODOs", ) hparser.add_verbosity_arg(parser) return parser @@ -42,7 +45,7 @@ def _main(parser: argparse.ArgumentParser) -> None: # Read the cfile. cfile_txt = hio.from_file(args.cfile) # Inject the TODOs. - todo_txt = hmarkdo.inject_todos_from_cfile( + hmarkdo.inject_todos_from_cfile( cfile_txt, args.todo_target, comment_prefix="#" ) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 19073e63d..ff990c6ff 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -881,20 +881,87 @@ def _review_from_file(file: str) -> _PROMPT_OUT: reference_txt = hio.from_file(file) reference_txt = hmarkdo.add_line_numbers(reference_txt) # TODO(gp): Remove table of contents between <!-- toc --> and <!-- tocstop -->. + # system += rf""" + # You will review the code and make sure it follows the rules described in the + # markdown below: + + # - Each rule starts with a level 1 bullet point and is followed by more bullet + # points that describe the rule, together with examples of good and bad + # code. For instance, the rule: + # ``` + # 1: - Rule1 description + # 2: - Good: `x` + # 3: - Bad: `x_0` + # 4: - Rule4 description + # 5: - Good: `x` + # 6: - Bad: `x_0` + # ``` + # - The rules are described in the markdown below: + # {reference_txt} + + # - You will refer to the rule as <rule_name> and represented as + # <header-line_number> with the name of the header of the section in the + # reference file (e.g., 'Naming') and the line number (e.g., "Naming-7") + # - Only print the violation of the rules when you are sure that it is a + # violation. If you are not sure, do not print anything. + # - For each violation of a rule, you will print the line number of the code + # and the proposed improvement in the following style: + # <line_number>: <rule_name>: <short description of the proposed improvement> + # - Do not print any other comment, besides the violation of the rules + # """ + system += rf""" - You will review the code and make sure it follows the rules in the reference below: + You will **analyze the code** and report only violations of the coding rules described below. + + #### Rule Format + The rules are written in markdown and follow this format: + + - Each top-level bullet point (`-`) is a **rule header** (e.g., a new requirement). + - Each rule contains **examples of good and bad code** using: + - `- Good:` followed by inline or code block examples + - `- Bad:` followed by inline or code block examples + + Example: + - All functions must have a docstring + - Good: + ```python + def foo(): + pass + ``` + - Bad: + ```python + def foo(): + pass + ``` + + #### List of rules {reference_txt} - - Each rule to follow is referred by <rule_name> and represented as - <header-line_number> with the name of the header of the section in the - reference file (e..g, 'Naming') and the line number (e.g., "Naming-7") - - Only print the violation of the rules when you are absolutely sure that - it is a violation - - For each violation of a rule, you will print the line number of the code - and the proposed improvement in the following style: - <line_number>: <rule_name>: <short description of the proposed improvement> - - Do not print any other comment, besides the violation of the rules + #### Rule References + - You will reference each rule as <section-name>-<line-number>, where: + - <section-name> is the header or category the rule belongs to + - <line-number> is the line number of the rule header in the markdown + + #### Your Task + - Review the input code and identify only clear violations of the rules. + - If uncertain whether something is a violation, do not report it. + + #### Output Format + + For each clear violation, output a single line in this format: + + <code_line_number>: <section-name>-<rule_line_number>: <brief description of suggested fix> + + Examples: + + 14: Docstrings-3: Missing docstring for function `add` + 27: Docstrings-17: Docstring does not describe function interface or parameters + + #### Do Not + - Do not print explanations or summaries + - Do not mention rules that are followed correctly + - Do not modify the input code """ pre_transforms = {"add_line_numbers"} post_transforms = {"convert_to_vim_cfile"} From 782068b75f5eb2945b41a6e4f61f5f6643e0926c Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 30 May 2025 15:29:09 -0400 Subject: [PATCH 144/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/system_tools/path | 70 ++++++++++++++++++++++++--- helpers/lib_tasks_lint.py | 62 ++++++++++++++++++++++++ tasks.py | 1 + 3 files changed, 127 insertions(+), 6 deletions(-) diff --git a/dev_scripts_helpers/system_tools/path b/dev_scripts_helpers/system_tools/path index 2c644665d..32fd6cd53 100755 --- a/dev_scripts_helpers/system_tools/path +++ b/dev_scripts_helpers/system_tools/path @@ -1,12 +1,70 @@ #!/usr/bin/env python +""" +Find a path in a directory and report the absolute path. + +Usage: + path.py <path> [<dir_name>] + +If <dir_name> is provided, the path is searched in the directory. + +Example: +# Look for a file in the current directory. +> path.py /Users/saggese/src/helpers1/dev_scripts_helpers/system_tools/path.py + +# +> path.py system_tools/path.py /Users/saggese/src/helpers2/dev_scripts_helpers/system_tools +""" + +import glob import os import sys +from typing import Optional + +# Abort on error if the path doesn't exist, or try to find the file with the same +# basename. +abort_on_error = False + + +def find_file(filename: str, *, search_path: str = ".") -> Optional[str]: + # Recursive glob. + search_path = os.path.join(search_path, "**", filename) + files = glob.glob(search_path, recursive=True) + if len(files) == 1: + return files[0] + elif len(files) > 1: + msg = "Found multiple files with basename '%s' in directory '%s':\n%s" % ( + filename, search_path, "\n".join(files)) + raise RuntimeError(msg) + else: + return None -path = sys.argv[1] -path_out = os.path.abspath(path) -print(path_out, end="") -if not os.path.exists(path_out): - msg = "path '%s' converted to '%s' doesn't exist" % (path, path_out) - raise RuntimeError(msg) +if __name__ == "__main__": + assert len(sys.argv) in (2, 3), "Usage: path.py <path> [<dir_name>]" + path = sys.argv[1] + if len(sys.argv) == 3: + dir_name = sys.argv[2] + else: + dir_name = "." + # Make the path absolute. + path_out = os.path.abspath(path) + # If the path exists, print it and exit. + if os.path.exists(path_out): + print(path_out, end="") + sys.exit(0) + # If the path doesn't exist, abort. + if abort_on_error: + msg = "path '%s' doesn't exist in '%s'" % (path, dir_name) + raise RuntimeError(msg) + # Look for a file with the same basename in ``dir_name``. + dir_name = os.path.abspath(dir_name) + basename = os.path.basename(path) + path_out = find_file(basename, search_path=dir_name) + # If the file doesn't exist, abort. + if path_out is None: + msg = "path '%s' doesn't exist in '%s'" % (path, dir_name) + raise RuntimeError(msg) + # Print the path and exit. + print(path_out, end="") + sys.exit(0) \ No newline at end of file diff --git a/helpers/lib_tasks_lint.py b/helpers/lib_tasks_lint.py index 153700b9e..583c3b69f 100644 --- a/helpers/lib_tasks_lint.py +++ b/helpers/lib_tasks_lint.py @@ -346,3 +346,65 @@ def _get_lint_docker_cmd( use_entrypoint=use_entrypoint, ) return cmd + + +@task +def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): # type: ignore + """ + Sync the code needed to run linter and ai_review.py from a client to the current one. + + :param git_client_name: the name of the git client to sync from. It can be + something like "helpers1" and it will be used from "$HOME/src" or can + be a full path. + :param revert_to_original: if `True`, revert the changes to the original + """ + _ = ctx + hlitauti.report_task() + # + files_to_copy = [ + #"hgit.py", + #"hmarkdown.py", + "llm_prompts.py", + "llm_transform.py", + "inject_todos.py", + "all.linter_style_review_guidelines.reference.md", + "all.llm_style_review_guidelines.reference.md", + ] + if revert_to_original: + _LOG.debug("Reverting to original code ...") + for file_name in files_to_copy: + _LOG.debug("Reverting %s to original code", file_name) + src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) + cmd = "git checkout -- %s" % src_file_path + hsystem.system(cmd) + _LOG.info("Done") + return + # + if not os.path.isabs(git_client_name): + src_git_dir = os.path.join(os.environ["HOME"], "src", git_client_name) + else: + src_git_dir = git_client_name + hdbg.dassert_dir_exists(git_client_name) + # Get the path to the helpers repo. + src_helpers_dir = hgit.find_file("helpers_root", dir_path=src_git_dir) + hdbg.dassert_dir_exists(src_helpers_dir) + dst_helpers_dir = hgit.find_helpers_root() + hdbg.dassert_dir_exists(dst_helpers_dir) + _LOG.debug(hprint.to_str("src_helpers_dir dst_helpers_dir")) + # + _LOG.info("Copying code from %s to %s ...", src_helpers_dir, dst_helpers_dir) + # Find the files to copy. + for file_name in files_to_copy: + _LOG.debug(hprint.to_str("file_name")) + src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) + src_file_path = os.path.abspath(os.path.join(src_git_dir, src_file_path)) + hdbg.dassert_file_exists(src_file_path) + # + rel_path = os.path.relpath(src_file_path, src_helpers_dir) + dst_file_path = os.path.join(dst_helpers_dir, rel_path) + hdbg.dassert_file_exists(src_file_path) + # + _LOG.debug(hprint.to_str("src_file_path -> dst_file_path")) + cmd = f"cp -f {src_file_path} {dst_file_path}" + hsystem.system(cmd) + _LOG.info("Done") \ No newline at end of file diff --git a/tasks.py b/tasks.py index f2206f928..e74dfbab9 100644 --- a/tasks.py +++ b/tasks.py @@ -87,6 +87,7 @@ lint_check_python_files_in_docker, lint_create_branch, lint_detect_cycles, + lint_sync_code, print_env, print_setup, print_tasks, From dc0382ab1d535ddd1d9b51cbcee3aa8c9db9ce5e Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 30 May 2025 20:01:31 -0400 Subject: [PATCH 145/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/system_tools/path | 50 +++++----- helpers/hdbg.py | 15 +++ helpers/hgit.py | 126 ++++++++++++++++++++++---- helpers/hsystem.py | 50 ++++++++++ helpers/lib_tasks_bash.py | 6 +- helpers/lib_tasks_git.py | 23 +++++ helpers/lib_tasks_lint.py | 32 ++++--- tasks.py | 1 + 8 files changed, 245 insertions(+), 58 deletions(-) diff --git a/dev_scripts_helpers/system_tools/path b/dev_scripts_helpers/system_tools/path index 32fd6cd53..a15869a5e 100755 --- a/dev_scripts_helpers/system_tools/path +++ b/dev_scripts_helpers/system_tools/path @@ -12,7 +12,7 @@ Example: # Look for a file in the current directory. > path.py /Users/saggese/src/helpers1/dev_scripts_helpers/system_tools/path.py -# +# Look for a file in a different directory. > path.py system_tools/path.py /Users/saggese/src/helpers2/dev_scripts_helpers/system_tools """ @@ -21,12 +21,12 @@ import os import sys from typing import Optional -# Abort on error if the path doesn't exist, or try to find the file with the same -# basename. -abort_on_error = False - -def find_file(filename: str, *, search_path: str = ".") -> Optional[str]: +# Copied from `hsystem.py`. +def _find_file(filename: str, *, search_path: str = ".") -> Optional[str]: + """ + Find a file in a directory and report the absolute path. + """ # Recursive glob. search_path = os.path.join(search_path, "**", filename) files = glob.glob(search_path, recursive=True) @@ -39,20 +39,16 @@ def find_file(filename: str, *, search_path: str = ".") -> Optional[str]: else: return None - -if __name__ == "__main__": - assert len(sys.argv) in (2, 3), "Usage: path.py <path> [<dir_name>]" - path = sys.argv[1] - if len(sys.argv) == 3: - dir_name = sys.argv[2] - else: - dir_name = "." + +def _find_path(path: str, *, dir_name: str = ".", abort_on_error: bool = False) -> str: + """ + Find a path in a directory and report its absolute path. + """ # Make the path absolute. path_out = os.path.abspath(path) - # If the path exists, print it and exit. + # If the path exists, return it. if os.path.exists(path_out): - print(path_out, end="") - sys.exit(0) + return path_out # If the path doesn't exist, abort. if abort_on_error: msg = "path '%s' doesn't exist in '%s'" % (path, dir_name) @@ -60,11 +56,23 @@ if __name__ == "__main__": # Look for a file with the same basename in ``dir_name``. dir_name = os.path.abspath(dir_name) basename = os.path.basename(path) - path_out = find_file(basename, search_path=dir_name) + path_out = _find_file(basename, search_path=dir_name) # If the file doesn't exist, abort. if path_out is None: msg = "path '%s' doesn't exist in '%s'" % (path, dir_name) raise RuntimeError(msg) - # Print the path and exit. - print(path_out, end="") - sys.exit(0) \ No newline at end of file + return path_out + + +if __name__ == "__main__": + assert len(sys.argv) in (2, 3), "Usage: path.py <path> [<dir_name>]" + # Abort on error if the path doesn't exist, or try to find the file with the + # same basename. + abort_on_error = False + path = sys.argv[1] + if len(sys.argv) == 3: + dir_name = sys.argv[2] + else: + dir_name = "." + path_out = _find_path(path, dir_name=dir_name, abort_on_error=abort_on_error) + print(path_out, end="") \ No newline at end of file diff --git a/helpers/hdbg.py b/helpers/hdbg.py index 15d10e2e4..734b63652 100644 --- a/helpers/hdbg.py +++ b/helpers/hdbg.py @@ -763,6 +763,7 @@ def dassert_path_not_exists( only_warning: bool = False, ) -> None: dassert_isinstance(path, str) + dassert_ne(path, "") path = os.path.abspath(path) if os.path.exists(path): txt = f"Path '{path}' already exist!" @@ -779,6 +780,7 @@ def dassert_file_exists( Assert unless `file_name` exists and it's a file and not a directory. """ dassert_isinstance(file_name, str) + dassert_ne(file_name, "") file_name = os.path.abspath(file_name) # `file_name` exists. exists = os.path.exists(file_name) @@ -802,6 +804,7 @@ def dassert_dir_exists( Assert unless `dir_name` exists and it's a directory. """ dassert_isinstance(dir_name, str) + dassert_ne(dir_name, "") dir_name = os.path.abspath(dir_name) # `dir_name` exists. exists = os.path.exists(dir_name) @@ -845,6 +848,18 @@ def dassert_file_extension( ) +def dassert_is_path_abs( + path: str, only_warning: bool = False +) -> None: + """ + Assert that `path` is an absolute path. + """ + dassert_isinstance(path, str) + dassert_ne(path, "") + dassert_eq(os.path.isabs(path), True, "Path '%s' is not absolute", path, + only_warning=only_warning) + + def dassert_related_params( params: Dict[str, Any], mode: str, diff --git a/helpers/hgit.py b/helpers/hgit.py index dbf206154..c84dadf12 100644 --- a/helpers/hgit.py +++ b/helpers/hgit.py @@ -245,23 +245,33 @@ def find_git_root(path: str = ".") -> str: return git_root_dir +# ############################################################################# + + # TODO(gp): There are several functions doing the same work. # helpers_root/helpers/hgit.py:827:def find_file_in_git_tree( # helpers_root/helpers/hsystem.py:757:def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: def find_file(file_name: str, *, dir_path: Optional[str] = None) -> str: + """ + Find the file under a directory. + + :param file_name: the name of the file to find + :param dir_path: the directory to start the search from + :return: the absolute path to the file + """ if dir_path is None: dir_path = find_git_root() _LOG.debug(hprint.to_str("dir_path")) - cmd = rf""" -find {dir_path} \( -path '*/.git' -o -path '*/.mypy_cache' \) -prune -o -name "{file_name}" -print - """ - cmd = hprint.dedent(cmd, remove_lead_trail_empty_lines_=True) - cmd = " ".join(cmd.split()) + cmd = (rf'find {dir_path} ' + + r"\( -path '*/.git' -o -path '*/.mypy_cache' \) -prune " + + rf'-o -name "{file_name}" -print') + _LOG.debug(hprint.to_str("cmd")) _, res = hsystem.system_to_one_line(cmd) + hdbg.dassert_ne(res, "Can't find file '%s' in '%s'", file_name, dir_path) return res -def find_helpers_root() -> str: +def find_helpers_root(dir_path: str = ".") -> str: """ Find the root directory of the `helpers` repository. @@ -271,23 +281,96 @@ def find_helpers_root() -> str: :returns: The absolute path to the `helpers_root` directory. """ - git_root = find_git_root() - if is_helpers(): - # If we are in `//helpers`, then the helpers root is the root of the - # repo. - cmd = "git rev-parse --show-toplevel" - _, helpers_root = hsystem.system_to_one_line(cmd) - else: - # We need to search for the `helpers_root` dir starting from the root - # of the repo. - helpers_root = find_file("helpers_root", dir_path=git_root) - helpers_root = os.path.abspath(helpers_root) - # Make sure the dir and that `helpers` subdir exists. - hdbg.dassert_dir_exists(helpers_root) - hdbg.dassert_dir_exists(os.path.join(helpers_root), "helpers") + with hsystem.cd(dir_path): + git_root = find_git_root() + if is_helpers(): + # If we are in `//helpers`, then the helpers root is the root of the + # repo. + cmd = "git rev-parse --show-toplevel" + _, helpers_root = hsystem.system_to_one_line(cmd) + else: + # We need to search for the `helpers_root` dir starting from the root + # of the repo. + helpers_root = find_file("helpers_root", dir_path=git_root) + helpers_root = os.path.abspath(helpers_root) + # Make sure the dir and that `helpers` subdir exists. + hdbg.dassert_dir_exists(helpers_root) + hdbg.dassert_dir_exists(os.path.join(helpers_root), "helpers") return helpers_root +# ############################################################################# + + +def resolve_git_client_dir(git_client_name: str) -> str: + """ + Resolve the absolute path of the Git client directory. + + :param git_client_name: the name of the Git client (e.g., "helpers1" or + "/Users/saggese/src/helpers1") + :return: the absolute path of the Git client directory + """ + if not os.path.isabs(git_client_name): + # If the Git client name is not absolute, assume it's in the home + # directory (e.g., 'helpers1' -> '/Users/saggese/src/helpers1'). + git_client_dir = os.path.join(os.environ["HOME"], "src", git_client_name) + else: + # If the Git client name is absolute, use it as is. + git_client_dir = git_client_name + _LOG.debug(hprint.to_str("git_client_dir")) + hdbg.dassert_dir_exists(git_client_dir) + return git_client_dir + + +def project_file_name_in_git_client(file_name: str, git_src_dir: str, git_dst_dir: str, + *, check_src_file_exists: bool = False, + check_dst_file_exists: bool = False) -> str: + """ + Find the file corresponding to `file_name` in `git_src_dir` for the client `git_dst_dir`. + + This is useful when we want to find the file in a destination Git client + directory corresponding to a file in a source Git client directory. + + E.g., for: + ``` + file_name = '/Users/saggese/src/helpers1/dev_scripts_helpers/system_tools/path.py' + git_src_dir = '/Users/saggese/src/helpers1' + git_dst_dir = '/Users/saggese/src/helpers2' + ``` + the output is + `/Users/saggese/src/helpers2/dev_scripts_helpers/system_tools/path.py` + + :param file_name: the name of the file to find (which is under `git_src_dir`) + :param git_src_dir: the directory of the Git client from which `file_name` is + :param git_dst_dir: the directory of the Git client to which find the + corresponding file + :param check_src_file_exists: if True, check that `file_name` exists in + `git_src_dir` + :param check_dst_file_exists: if True, check that the file in `git_dst_dir` + exists + :return: the absolute path of the file in `git_dst_dir` + """ + if not os.path.isabs(file_name): + file_name = os.path.abspath(file_name) + if check_src_file_exists: + hdbg.dassert_file_exists(file_name) + if not os.path.isabs(git_src_dir): + git_src_dir = os.path.abspath(git_src_dir) + if not os.path.isabs(git_dst_dir): + git_dst_dir = os.path.abspath(git_dst_dir) + # Compute the relative path of the file in the source git client. + hdbg.dassert_is_path_abs(file_name) + hdbg.dassert_is_path_abs(git_src_dir) + rel_path = os.path.relpath(file_name, git_src_dir) + # Compute the absolute path of the file in the destination git client. + hdbg.dassert_is_path_abs(git_dst_dir) + dst_file_path = os.path.join(git_dst_dir, rel_path) + dst_file_path = os.path.abspath(dst_file_path) + if check_dst_file_exists: + hdbg.dassert_file_exists(dst_file_path) + return dst_file_path + + def get_project_dirname(only_index: bool = False) -> str: """ Return the name of the project name (e.g., `/Users/saggese/src/amp1` -> @@ -317,6 +400,9 @@ def get_project_dirname(only_index: bool = False) -> str: return ret +# ############################################################################# + + @functools.lru_cache() def is_inside_submodule(git_dir: str = ".") -> bool: """ diff --git a/helpers/hsystem.py b/helpers/hsystem.py index 80ed95326..e42779044 100644 --- a/helpers/hsystem.py +++ b/helpers/hsystem.py @@ -798,6 +798,56 @@ def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: return file_name_out +def _find_file(filename: str, *, search_path: str = ".") -> Optional[str]: + """ + Find a file in a directory and report its absolute path. + + :param filename: the name of the file to find (e.g., "helpers_root") + :param search_path: the directory to search in (e.g., "/Users/saggese/src/helpers1") + :return: the absolute path of the file + """ + # Recursive glob. + search_path = os.path.join(search_path, "**", filename) + files = glob.glob(search_path, recursive=True) + if len(files) == 1: + return files[0] + elif len(files) > 1: + msg = "Found multiple files with basename '%s' in directory '%s':\n%s" % ( + filename, search_path, "\n".join(files)) + raise RuntimeError(msg) + else: + return None + + +def find_path(path: str, *, dir_name: str = ".", abort_on_error: bool = False) -> str: + """ + Find a path in a directory and report its absolute path. + + :param path: the path to find (e.g., "system_tools/path.py") + :param dir_name: the directory to search in (e.g., "/Users/saggese/src/helpers1") + :param abort_on_error: if True, raise an error if the path doesn't exist + :return: the absolute path of the path + """ + # Make the path absolute. + path_out = os.path.abspath(path) + # If the path exists, return it. + if os.path.exists(path_out): + return path_out + # If the path doesn't exist, abort. + if abort_on_error: + msg = "path '%s' doesn't exist in '%s'" % (path, dir_name) + raise RuntimeError(msg) + # Look for a file with the same basename in ``dir_name``. + dir_name = os.path.abspath(dir_name) + basename = os.path.basename(path) + path_out = _find_file(basename, search_path=dir_name) + # If the file doesn't exist, abort. + if path_out is None: + msg = "path '%s' doesn't exist in '%s'" % (path, dir_name) + raise RuntimeError(msg) + return path_out + + # TODO(Nikola): Use filesystem's `du` and move to `hio` instead? def du(path: str, human_format: bool = False) -> Union[int, str]: """ diff --git a/helpers/lib_tasks_bash.py b/helpers/lib_tasks_bash.py index 5f3c205d4..13d6d734a 100644 --- a/helpers/lib_tasks_bash.py +++ b/helpers/lib_tasks_bash.py @@ -4,8 +4,6 @@ import helpers.lib_tasks_find as hlitafin """ -import functools -import glob import logging import os import re @@ -13,6 +11,8 @@ from invoke import task +import helpers.hgit as hgit + # We want to minimize the dependencies from non-standard Python packages since # this code needs to run with minimal dependencies and without Docker. import helpers.hsystem as hsystem @@ -51,4 +51,4 @@ def bash_print_path(ctx): # type: ignore # Print the paths. _LOG.info("Valid paths:") for path in all_paths: - print(path) + print(path) \ No newline at end of file diff --git a/helpers/lib_tasks_git.py b/helpers/lib_tasks_git.py index 773adcbf7..effa02430 100644 --- a/helpers/lib_tasks_git.py +++ b/helpers/lib_tasks_git.py @@ -921,6 +921,29 @@ def git_branch_diff_with( # type: ignore ) +@task +def git_repo_copy(ctx, file_name, src_git_dir, dst_git_dir): # type: ignore + """ + Copy the code from the src Git client to the dst Git client. + + :param file_name: the name of the file to copy (which is under + `src_git_dir`) + :param src_git_dir: the directory of the source Git client (e.g., + "/Users/saggese/src/helpers1") + :param dst_git_dir: the directory of the destination Git client (e.g., + "/Users/saggese/src/helpers2") + """ + _ = ctx + src_git_dir = hgit.resolve_git_client_dir(src_git_dir) + dst_git_dir = hgit.resolve_git_client_dir(dst_git_dir) + dst_file_path = hgit.project_file_name_in_git_client(file_name, src_git_dir, dst_git_dir, + check_src_file_exists=True, + check_dst_file_exists=False) + _LOG.info("Copying code from '%s' to '%s' ...", file_name, dst_git_dir) + # Copy the file. + hsystem.system_to_string(f"cp {file_name} {dst_file_path}") + + # pylint: disable=line-too-long # TODO(gp): Add the following scripts: diff --git a/helpers/lib_tasks_lint.py b/helpers/lib_tasks_lint.py index 583c3b69f..b80e0e63b 100644 --- a/helpers/lib_tasks_lint.py +++ b/helpers/lib_tasks_lint.py @@ -370,6 +370,7 @@ def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): "all.linter_style_review_guidelines.reference.md", "all.llm_style_review_guidelines.reference.md", ] + # Revert the files in the current git client to the original code. if revert_to_original: _LOG.debug("Reverting to original code ...") for file_name in files_to_copy: @@ -379,32 +380,35 @@ def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): hsystem.system(cmd) _LOG.info("Done") return - # - if not os.path.isabs(git_client_name): - src_git_dir = os.path.join(os.environ["HOME"], "src", git_client_name) - else: - src_git_dir = git_client_name - hdbg.dassert_dir_exists(git_client_name) + # Copy the code from the src git client to the current one. + src_git_dir = hgit.resolve_git_client_dir(git_client_name) # Get the path to the helpers repo. - src_helpers_dir = hgit.find_file("helpers_root", dir_path=src_git_dir) + src_helpers_dir = hgit.find_helpers_root(src_git_dir) + hdbg.dassert_ne(src_helpers_dir, "") hdbg.dassert_dir_exists(src_helpers_dir) + # dst_helpers_dir = hgit.find_helpers_root() hdbg.dassert_dir_exists(dst_helpers_dir) _LOG.debug(hprint.to_str("src_helpers_dir dst_helpers_dir")) # - _LOG.info("Copying code from %s to %s ...", src_helpers_dir, dst_helpers_dir) + _LOG.info("Copying files from '%s' to '%s' ...", src_helpers_dir, dst_helpers_dir) # Find the files to copy. for file_name in files_to_copy: _LOG.debug(hprint.to_str("file_name")) + # Get the path to the file in the src Git client. src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) src_file_path = os.path.abspath(os.path.join(src_git_dir, src_file_path)) + _LOG.debug(hprint.to_str("src_file_path")) hdbg.dassert_file_exists(src_file_path) - # - rel_path = os.path.relpath(src_file_path, src_helpers_dir) - dst_file_path = os.path.join(dst_helpers_dir, rel_path) - hdbg.dassert_file_exists(src_file_path) - # - _LOG.debug(hprint.to_str("src_file_path -> dst_file_path")) + # Get the path to the file in the dst Git client. + dst_file_path = hgit.project_file_name_in_git_client(src_file_path, src_helpers_dir, dst_helpers_dir) + _LOG.debug(hprint.to_str("dst_file_path")) + # Copy the file. + _LOG.debug(hprint.to_str("src_file_path dst_file_path")) + dir_name = os.path.dirname(dst_file_path) + hio.create_dir(dir_name, incremental=True) cmd = f"cp -f {src_file_path} {dst_file_path}" + _LOG.debug(hprint.to_str("cmd")) + _LOG.info("Copying file '%s' to '%s' ...", src_file_path, dst_file_path) hsystem.system(cmd) _LOG.info("Done") \ No newline at end of file diff --git a/tasks.py b/tasks.py index e74dfbab9..059bcabd0 100644 --- a/tasks.py +++ b/tasks.py @@ -74,6 +74,7 @@ git_merge_master, git_patch_create, git_pull, + git_repo_copy, git_roll_amp_forward, integrate_create_branch, integrate_diff_dirs, From 669159ff015138bdf2fb93d375c7633accb2b268 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 30 May 2025 20:58:26 -0400 Subject: [PATCH 146/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../extract_headers_from_markdown.py | 2 +- .../llms/dockerized_llm_review.py | 52 --- ... all.coding_style_guidelines.reference.md} | 339 ++++++++++++++++-- ...inter_style_review_guidelines.reference.md | 250 ------------- helpers/hio.py | 2 +- helpers/hmarkdown.py | 98 +++++ helpers/test/test_hmarkdown.py | 61 ++++ 7 files changed, 467 insertions(+), 337 deletions(-) rename docs/code_guidelines/{all.llm_style_review_guidelines.reference.md => all.coding_style_guidelines.reference.md} (67%) delete mode 100644 docs/code_guidelines/all.linter_style_review_guidelines.reference.md diff --git a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py index 1398da1b2..7f0c1240a 100755 --- a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py +++ b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py @@ -75,7 +75,7 @@ def _parse() -> argparse.ArgumentParser: help="Output mode", ) parser.add_argument( - "--max-level", + "--max_level", type=int, default=3, help="Maximum header levels to parse", diff --git a/dev_scripts_helpers/llms/dockerized_llm_review.py b/dev_scripts_helpers/llms/dockerized_llm_review.py index 32de63404..f6db1739d 100755 --- a/dev_scripts_helpers/llms/dockerized_llm_review.py +++ b/dev_scripts_helpers/llms/dockerized_llm_review.py @@ -23,58 +23,6 @@ _LOG = logging.getLogger(__name__) -def _extract_bullet_points(text: str) -> List[str]: - """ - Extract bullet point list items from text. - - Sub-lists nested under first-level items are extracted together with - the first-level items. - - :param text: text to process - :return: extracted bullet points, e.g., - ``` - [ - "- Item 1", - ''' - - Item 2 - - Item 3 - ''' - ] - ``` - """ - lines = text.split("\n") - bullet_points = [] - current_item = "" - for line in lines: - if re.match(r"^- ", line): - # Match first-level bullet point item. - if current_item: - # Store the previous item, if any. - current_item = re.sub(r"\s{2,}", " ", current_item.strip()) - bullet_points.append(current_item) - # Start a new first-level bullet point item. - current_item = line - elif re.match(r"^\s+- ", line): - # Match a sub-item (non first-level bullet point item). - # Append a sub-item to the current item. - current_item += "\n" + line - elif len(line.strip()) != 0 and current_item: - # Append a line to the current item. - current_item += " " + line - else: - # Store the finished item. - current_item = re.sub(r"\s{2,}", " ", current_item.strip()) - bullet_points.append(current_item) - if current_item: - current_item = re.sub(r"\s{2,}", " ", current_item.strip()) - bullet_points.append(current_item) - # Drop empty items. - bullet_points: List[str] = hprint.remove_empty_lines_from_string_list( - bullet_points - ) - return bullet_points - - def _load_review_guidelines(guidelines_doc_filename: str) -> Dict[str, List[str]]: """ Load automated review guidelines. diff --git a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md b/docs/code_guidelines/all.coding_style_guidelines.reference.md similarity index 67% rename from docs/code_guidelines/all.llm_style_review_guidelines.reference.md rename to docs/code_guidelines/all.coding_style_guidelines.reference.md index a3280f8f9..114991b20 100644 --- a/docs/code_guidelines/all.llm_style_review_guidelines.reference.md +++ b/docs/code_guidelines/all.coding_style_guidelines.reference.md @@ -1,28 +1,32 @@ <!-- toc --> -- [Guidelines for automated PR reviews](#guidelines-for-automated-pr-reviews) - * [Python code](#python-code) - + [Naming](#naming) - + [Docstrings](#docstrings) - + [Comments](#comments) - + [Code implementation](#code-implementation) - + [Code design](#code-design) - + [Type annotations](#type-annotations) - + [Functions](#functions) - + [Logging](#logging) - + [Unit tests](#unit-tests) - * [Notebooks](#notebooks) - + [General](#general) - + [Plotting](#plotting) - * [Markdowns](#markdowns) - <!-- tocstop --> -# Guidelines for PR reviews +# ############################################################################# +# General +# ############################################################################# + +## Spelling + +### LLM + +### Linter + +- Spell commands in lower case and programs with the first letter in upper case + - E.g., `git` as a command, `Git` as a program + - E.g., capitalize the first letter of `Python` +- Spell `Linter` with the first letter in upper case and do not use an article + - E.g., `Linter` instead of `the Linter` +- Capitalize `JSON`, `CSV`, `DB` and other abbreviations +- Represent intervals with `[a, b), (a, b], (a, b), [a, b]`, not `[a, b[` +- Write `hyperparameter` without a hyphen +- Use `Python` for scripting and automation tasks -## Python code +# Python -### Naming +## Naming + +### LLM - Name functions using verbs and verbs/actions - Good: `download_data()`, `process_input()`, `calculate_sum()` @@ -58,7 +62,27 @@ - Good: `calculate()` - Bad: `math_calculate()` -### Docstrings +### Linter + +- Name executable Python scripts using verbs and actions + - E.g., `download.py` and not `downloader.py` +- Name non-executable files using nouns + - E.g., `downloader.py` +- Use `dir` and not `directory` or `folder` + - E.g., `dir_path` +- Use `file_name` and not `filename` + - E.g., `file_name` for storing the name of a file +- Use `dir_name` and not `dirname` + - E.g., `dir_name` for storing the name of a directory +- Use `timestamp` and not `ts` or `datetime` + - E.g., `event_timestamp` +- To refer to the name of a column, use `..._col` and not `..._col_name` or + `..._column` + - E.g., `age_col` for a column storing age values + +## Docstrings + +### LLM - All functions and methods must have a docstring - Good: @@ -124,7 +148,32 @@ { "name": "John", "age": 30, "city": "New York" } ``` -### Comments +### Linter + +- The first docstring line is followed by a blank line and then, optionally, by + a longer description (possibly on multiple lines) with a more detailed + explanation of what the function does +- The more detailed description is followed by a blank line and then the param + and return description section in REST style +- The more detailed description is followed by a blank line and then the param + and return description section in REST style + - Use lowercase after `:param XYZ: ...` / `:return:` unless the description + starts with a proper noun +- Do not mention default values of parameters in parameter descriptions +- Docstrings should be wrapped in triple quotation marks (`"""`) + - The opening and closing triple quotation marks should be located on their + own separate lines +- Every docstring should start with a capital letter +- Every docstring should start with a verb in the imperative form +- Every docstring should begin with a one-line description of what the function + does, fit into a single line and end with a period +- Adding examples (e.g., of input and output) to the docstring is encouraged +- References to variables, file paths, functions, classes, etc. should be + wrapped in backticks + +## Comments + +### LLM - Add a comment for every logically distinct chunk of code, spanning 4-5 lines - Use comments to separate chunks of code instead of blank lines @@ -175,7 +224,17 @@ - E.g., "This section is commented out due to a known bug that needs fixing" or "Temporarily disabled for performance testing" -### Code implementation +### Linter + +- Avoid empty comments and line inside the code when possible +- Every comment should start with a capital letter +- Every comment should start with a verb in the imperative form +- Every comment should end with a period +- Comments with TODOs should have the format of `# TODO(username): ...` + +## Code implementation + +### LLM - Encode the assumptions made in the code using assertions and report as much information as possible in an assertion to make it easy to debug the output @@ -250,7 +309,6 @@ (i > 0 and not re.search(r'\w', tokens[i - 1])) or (i < len(tokens) - 1 and not re.search(r'\w', tokens[i + 1]))) ``` - - Provide clear and informative error messages in exceptions using f-strings - Good: `raise ValueError(f"Invalid server_name='{server_name}'")` - Good: `raise TypeError(f"Expected type int, but got {type(var).__name__}")` @@ -295,7 +353,11 @@ - Good: `import myproject.module.submodule` - Bad: `from submodule import my_function` -### Code design +### Linter + +## Code design + +### LLM - Follow DRY principle (Don't Repeat Yourself): - Factor out common code in a separate function/method @@ -318,7 +380,35 @@ - Implement error handling to manage exceptions and edge cases - Use inheritance or composition to reuse code in object-oriented programming -### Type annotations +### Linter + +- Order functions / classes in a topological order so that the ones at the top + of the files are the "innermost" and the ones at the end of the files are the + "outermost" +- Use banners to separate large sections of code, e.g.: + ```python + # ############################################################################# + # Read data. + # ############################################################################# + ``` + - The text inside the banner should start with a capital letter and end with a + period + +## Imports + +### LLM + +### Linter + +- All imports should be located at the top of the file +- Each module that can be imported should have a docstring at the very beginning + describing how it should be imported + - Linter adds it automatically +- No import cycles should be introduced by the changes in the PR + +## Type annotations + +### LLM - For type hints use use `List`, `Dict`, and `Tuple` to provide more explicit type information and help with static type checking @@ -331,7 +421,18 @@ - E.g., `Tuple[int, str]` instead of `tuple` - E.g., `Tuple[str, List[int]]` instead of `tuple` -### Functions +### Linter + +- All functions and methods, including constructors, must have type annotations + for all the parameters and returned structures + - Use `-> None` if a function doesn't return anything + - The only exception are invoke tasks, i.e. functions with the `@task` + decorator, they shouldn't have type annotations +- Type annotation `Any` should be avoided, if possible + +## Functions + +### LLM - Avoid pure functions without side effects, i.e., for the same input arguments, the returned value should not change (in contrast to functions that rely upon @@ -367,7 +468,6 @@ items.append(item) return items ``` - - Do not use a boolean parameter as a switch controlling some function behavior; instead, use a string parameter `mode`, which is allowed to take a small well-defined set of values @@ -423,7 +523,31 @@ - Good: `func(10, 20, param3=30)` - Bad: `func(10, 20, 30)` -### Logging +### Linter + +- Make a function private (e.g., `_foo_bar()`) when it is a helper of another + private or public function + +## Scripts + +### LLM + +### Linter + +- Use Python and not bash for scripting +- All Python scripts that are meant to be executed directly should: + - Be marked as executable files with `> chmod +x foo_bar.py` + - Have the standard Unix shebang notation at the top: `#!/usr/bin/env python` + - Use the following idiom at the bottom: + ```python + if __name__ == "__main__": + ... + ``` + - Use `argparse` for argument parsing + +## Logging + +### LLM - Use logging `_LOG.debug()` and not `print()` for tracing execution - Good: `_LOG.debug("value=%s", value)` @@ -441,7 +565,25 @@ hdbg.init_logger(verbosity=logging.DEBUG) ``` -### Unit tests +### Linter + +## Misc + +### LLM + +### Linter + +- If a PR includes renaming a file, variable, parameter, function, class, etc., + then all the instances and references to it throughout the codebase should be + updated + +# ############################################################################# +# Unit tests +# ############################################################################# + +## Rules + +### LLM - A test class should test only one function or class to help understanding test failures @@ -520,9 +662,28 @@ self.check_string(act, fuzzy_match=True) ``` -## Notebooks +### Linter + +- Unit tests should be placed in a `test_*.py` file in the `test` directory, + close to the library / code it tests + - Test file `test_file_name.py` testing the library `file_name.py` +- Every test class should inherit from `hunitest.TestCase` +- We use `pytest` as test harness so do not add the following idiom in the + testing file + ```python + if __name__ == "__main__": + unittest.main() + ``` +- If a unit test is renamed or removed in a PR, the corresponding files in the + `outcomes` dir should also be renamed or removed + +# ############################################################################# +# Notebooks +# ############################################################################# + +## General -### General +### LLM - The code in the notebook should adhere to the same style and formatting guidelines as the code in libraries and scripts @@ -543,7 +704,11 @@ - Progress bars should be added where applicable - Use libraries like `tqdm` to show progress in loops or data processing tasks -### Plotting +### Linter + +## Plotting + +### LLM - Each plot should have a descriptive title to understand the context of the plot at a glance @@ -564,7 +729,62 @@ single figure - E.g., use `plt.subplots()` to create a grid of plots within a single figure -## Markdowns +### Linter + +- The name of a notebook should generally be the same as the branch name, unless + it's a Master notebook +- All notebooks should have a table of contents + - Linter automatically adds and updates the table of contents +- At the top of the notebook there should be a Markdown cell `# Description`, + followed by a Markdown cell with an explanation of the notebook's goal, what + it does, etc. +- Immediately below the description, there should be a Markdown cell + `# Imports`, followed by a code cell importing all the needed libraries + - It should include autoreload modules to keep the local code updated in real + time: + ```python + %load_ext autoreload + %autoreload 2 + ``` + - All the imports should be located in a single cell +- Below the cell with the imports, there should be a code cell that configures + the logging and notebook style, and reports execution info: + ```python + # Configure logger. + hdbg.init_logger(verbosity=logging.INFO) + _LOG = logging.getLogger(__name__) + # Print system signature. + _LOG.info("%s", henv.get_system_signature()[0]) + # Configure the notebook style. + hprint.config_notebook() + ``` +- The rest of the notebook should be clearly organized using Markdown cells with + headings of different levels +- There should be no errors in the executed notebook +- Ideally, there should be no warnings in the executed notebook + +## Jupytext + +### LLM + +### Linter + +- Each notebook must have an accompanying Python file, linked via `jupytext`, + which contains a synchronized copy of the notebook's code +- The notebook and its paired Python file should share the same name, differing + only in their file extensions +- Ensure that the code in the notebook and its paired Python file remains + synchronized at all times +- If you update or delete the notebook, you must also update or delete its + paired Python file, and vice versa + +# ############################################################################# +# Markdown +# ############################################################################# + +## Naming + +### LLM - Boldface and italics should be used sparingly - The use of bullet point lists is encouraged @@ -583,3 +803,56 @@ - A list of examples of invocations of a tool, with a comment on the command line, the command line, and its output if possible - A copy-paste version of the tool interface running `-h` + +### Linter + +## General + +### LLM + +### Linter + +- Names of documentation files should follow the format + `docs/{component}/{audience}.{topic}.{diataxis_tag}.md` to help in organizing + and categorizing documentation files effectively + - E.g., `docs/documentation_meta/all.diataxis.explanation.md` + - The `{component}` part specifies the part of the project the documentation + is related to + - The `{audience}` part indicates who the documentation is intended for + - The `{topic}` part describes the subject matter of the documentation + - The `{diataxis_tag}` part categorizes the documentation according to the + Diátaxis framework (e.g., explanation, tutorial) +- All Markdown files should have a table of contents + - Linter automatically adds and updates the table of contents +- Items in bullet point lists should not end with a period +- There should be one and only one level 1 heading (with one `#`) in a Markdown + - The level 1 heading serves as the main title of the document + - It should clearly convey the primary topic or purpose of the document + - The level 1 heading should be located above the table of contents +- Wrap file paths, names of variables, functions, and classes in backticks + - E.g., `file_path`, `variable_name`, `function_name()`, `ClassName` +- Use `>` to indicate a command line + - E.g., `> git push` or `docker> pytest` +- Headings should not be boldfaced +- Headings should not be overcapitalized + - E.g., `Data schema` instead of `Data Schema` +- Text should be reflowed to the maximum of 80 columns per line +- Fenced code blocks should always be accompanied by language markers + - E.g., `bash`, `python` + - Fenced code blocks should be indented at the same level as the previous line +- Commands should be prepended by `>` + - Example + ``` + > notes_to_pdf.py \ + --input MSML610/Lesson5-Theory_Statistical_learning.txt \ + --output Lesson5.pdf \ + --type slides \ + --toc_type navigation \ + --debug_on_error \ + --skip_action cleanup_after + ``` +- Commands should be prepended by `docker>` if they need to be run inside Docker +- Avoid using screenshots whenever possible and instead copy-and-paste text with + the right highlighting + - E.g., instead of a screenshot of a terminal command, provide the command + text: `> ls -la` diff --git a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md b/docs/code_guidelines/all.linter_style_review_guidelines.reference.md deleted file mode 100644 index 7503d5b64..000000000 --- a/docs/code_guidelines/all.linter_style_review_guidelines.reference.md +++ /dev/null @@ -1,250 +0,0 @@ -<!-- toc --> - -- [Guidelines for automated PR reviews](#guidelines-for-automated-pr-reviews) - * [Python code](#python-code) - + [Naming](#naming) - + [Docstrings](#docstrings) - + [Comments](#comments) - + [Code design](#code-design) - + [Imports](#imports) - + [Type annotations](#type-annotations) - + [Functions](#functions) - + [Scripts](#scripts) - + [Unit tests](#unit-tests) - + [Misc](#misc) - * [Notebooks](#notebooks) - + [General](#general) - + [Jupytext](#jupytext) - * [Markdowns](#markdowns) - * [Spelling](#spelling) - -<!-- tocstop --> - -# Guidelines for PR reviews - -## Python code - -### Naming - -- Name executable Python scripts using verbs and actions - - E.g., `download.py` and not `downloader.py` -- Name non-executable files using nouns - - E.g., `downloader.py` -- Use `dir` and not `directory` or `folder` - - E.g., `dir_path` -- Use `file_name` and not `filename` - - E.g., `file_name` for storing the name of a file -- Use `dir_name` and not `dirname` - - E.g., `dir_name` for storing the name of a directory -- Use `timestamp` and not `ts` or `datetime` - - E.g., `event_timestamp` -- To refer to the name of a column, use `..._col` and not `..._col_name` or - `..._column` - - E.g., `age_col` for a column storing age values - -### Docstrings - -- The first docstring line is followed by a blank line and then, optionally, by - a longer description (possibly on multiple lines) with a more detailed - explanation of what the function does -- The more detailed description is followed by a blank line and then the param - and return description section in REST style -- The more detailed description is followed by a blank line and then the param - and return description section in REST style - - Use lowercase after `:param XYZ: ...` / `:return:` unless the description - starts with a proper noun -- Do not mention default values of parameters in parameter descriptions -- Docstrings should be wrapped in triple quotation marks (`"""`) - - The opening and closing triple quotation marks should be located on their - own separate lines -- Every docstring should start with a capital letter -- Every docstring should start with a verb in the imperative form -- Every docstring should begin with a one-line description of what the function - does, fit into a single line and end with a period -- Adding examples (e.g., of input and output) to the docstring is encouraged -- References to variables, file paths, functions, classes, etc. should be - wrapped in backticks - -### Comments - -- Avoid empty comments and line inside the code when possible -- Every comment should start with a capital letter -- Every comment should start with a verb in the imperative form -- Every comment should end with a period -- Comments with TODOs should have the format of `# TODO(username): ...` - -### Code design - -- Order functions / classes in a topological order so that the ones at the top - of the files are the "innermost" and the ones at the end of the files are the - "outermost" -- Use banners to separate large sections of code, e.g.: - ```python - # ############################################################################# - # Read data. - # ############################################################################# - ``` - - The text inside the banner should start with a capital letter and end with a - period - -### Imports - -- All imports should be located at the top of the file -- Each module that can be imported should have a docstring at the very beginning - describing how it should be imported - - Linter adds it automatically -- No import cycles should be introduced by the changes in the PR - -### Type annotations - -- All functions and methods, including constructors, must have type annotations - for all the parameters and returned structures - - Use `-> None` if a function doesn't return anything - - The only exception are invoke tasks, i.e. functions with the `@task` - decorator, they shouldn't have type annotations -- Type annotation `Any` should be avoided, if possible - -### Functions - -- Make a function private (e.g., `_foo_bar()`) when it is a helper of another - private or public function - -### Scripts - -- Use Python and not bash for scripting -- All Python scripts that are meant to be executed directly should: - - Be marked as executable files with `> chmod +x foo_bar.py` - - Have the standard Unix shebang notation at the top: `#!/usr/bin/env python` - - Use the following idiom at the bottom: - ```python - if __name__ == "__main__": - ... - ``` - - Use `argparse` for argument parsing - -### Unit tests - -- Unit tests should be placed in a `test_*.py` file in the `test` directory, - close to the library / code it tests - - Test file `test_file_name.py` testing the library `file_name.py` -- Every test class should inherit from `hunitest.TestCase` -- We use `pytest` as test harness so do not add the following idiom in the - testing file - ```python - if __name__ == "__main__": - unittest.main() - ``` -- If a unit test is renamed or removed in a PR, the corresponding files in the - `outcomes` dir should also be renamed or removed - -### Misc - -- If a PR includes renaming a file, variable, parameter, function, class, etc., - then all the instances and references to it throughout the codebase should be - updated - -## Notebooks - -### General - -- The name of a notebook should generally be the same as the branch name, unless - it's a Master notebook -- All notebooks should have a table of contents - - Linter automatically adds and updates the table of contents -- At the top of the notebook there should be a Markdown cell `# Description`, - followed by a Markdown cell with an explanation of the notebook's goal, what - it does, etc. -- Immediately below the description, there should be a Markdown cell - `# Imports`, followed by a code cell importing all the needed libraries - - It should include autoreload modules to keep the local code updated in real - time: - ```python - %load_ext autoreload - %autoreload 2 - ``` - - All the imports should be located in a single cell -- Below the cell with the imports, there should be a code cell that configures - the logging and notebook style, and reports execution info: - ```python - # Configure logger. - hdbg.init_logger(verbosity=logging.INFO) - _LOG = logging.getLogger(__name__) - # Print system signature. - _LOG.info("%s", henv.get_system_signature()[0]) - # Configure the notebook style. - hprint.config_notebook() - ``` -- The rest of the notebook should be clearly organized using Markdown cells with - headings of different levels -- There should be no errors in the executed notebook -- Ideally, there should be no warnings in the executed notebook - -### Jupytext - -- Each notebook must have an accompanying Python file, linked via `jupytext`, - which contains a synchronized copy of the notebook's code -- The notebook and its paired Python file should share the same name, differing - only in their file extensions -- Ensure that the code in the notebook and its paired Python file remains - synchronized at all times -- If you update or delete the notebook, you must also update or delete its - paired Python file, and vice versa - -## Markdowns - -- Names of documentation files should follow the format - `docs/{component}/{audience}.{topic}.{diataxis_tag}.md` to help in organizing - and categorizing documentation files effectively - - E.g., `docs/documentation_meta/all.diataxis.explanation.md` - - The `{component}` part specifies the part of the project the documentation - is related to - - The `{audience}` part indicates who the documentation is intended for - - The `{topic}` part describes the subject matter of the documentation - - The `{diataxis_tag}` part categorizes the documentation according to the - Diátaxis framework (e.g., explanation, tutorial) -- All Markdown files should have a table of contents - - Linter automatically adds and updates the table of contents -- Items in bullet point lists should not end with a period -- There should be one and only one level 1 heading (with one `#`) in a Markdown - - The level 1 heading serves as the main title of the document - - It should clearly convey the primary topic or purpose of the document - - The level 1 heading should be located above the table of contents -- Wrap file paths, names of variables, functions, and classes in backticks - - E.g., `file_path`, `variable_name`, `function_name()`, `ClassName` -- Use `>` to indicate a command line - - E.g., `> git push` or `docker> pytest` -- Headings should not be boldfaced -- Headings should not be overcapitalized - - E.g., `Data schema` instead of `Data Schema` -- Text should be reflowed to the maximum of 80 columns per line -- Fenced code blocks should always be accompanied by language markers - - E.g., `bash`, `python` - - Fenced code blocks should be indented at the same level as the previous line -- Commands should be prepended by `>` or `docker>` if they need to - - Example - ``` - > notes_to_pdf.py \ - --input MSML610/Lesson5-Theory_Statistical_learning.txt \ - --output Lesson5.pdf \ - --type slides \ - --toc_type navigation \ - --debug_on_error \ - --skip_action cleanup_after - ``` -- Commands should be prepended by `docker>` if they need to be run inside Docker -- Avoid using screenshots whenever possible and instead copy-and-paste text with - the right highlighting - - E.g., instead of a screenshot of a terminal command, provide the command - text: `> ls -la` - -## Spelling - -- Spell commands in lower case and programs with the first letter in upper case - - E.g., `git` as a command, `Git` as a program - - E.g., capitalize the first letter of `Python` -- Spell `Linter` with the first letter in upper case and do not use an article - - E.g., `Linter` instead of `the Linter` -- Capitalize `JSON`, `CSV`, `DB` and other abbreviations -- Represent intervals with `[a, b), (a, b], (a, b), [a, b]`, not `[a, b[` -- Write `hyperparameter` without a hyphen -- Use `Python` for scripting and automation tasks diff --git a/helpers/hio.py b/helpers/hio.py index c5b00eb1d..9b6fd16c0 100644 --- a/helpers/hio.py +++ b/helpers/hio.py @@ -376,7 +376,7 @@ def create_enclosing_dir(file_name: str, incremental: bool = False) -> str: "Creating dir_name='%s' for file_name='%s'", dir_name, file_name ) create_dir(dir_name, incremental=incremental) - hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) + hdbg.dassert_dir_exists(dir_name, "file_name='%s'", file_name) return dir_name diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index fd01527bc..0f183ed4c 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -390,6 +390,104 @@ def extract_section_from_markdown(content: str, header_name: str) -> str: return "\n".join(extracted_lines) +def extract_first_level_bullets_from_markdown(text: str) -> List[str]: + """ + Extract first-level bullet point list items from text until the next one. + + Sub-lists nested under first-level items are extracted together with + the first-level items. + + :param text: text to process + ``` + - Item 1 + - Item 2 + - Item 3 + - Item 4 + ``` + :return: extracted bullet points, e.g., + ``` + [ + "- Item 1", + ''' + - Item 2 + - Item 3 + ''', + "- Item 4", + ] + ``` + """ + lines = text.split("\n") + # Store the first-level bullet points. + bullet_points = [] + # Store the current item including the first level bullet point and all + # its sub-items. + current_item = "" + for line in lines: + line = line.rstrip() + if not line: + continue + if re.match(r"^- ", line): + # Match first-level bullet point item. + if current_item: + # Store the previous item, if any. + bullet_points.append(current_item) + # Start a new first-level bullet point item. + current_item = line + elif re.match(r"^\s+- ", line): + # Match a sub-item (non first-level bullet point item). + # Append a sub-item to the current item. + current_item += "\n" + line + elif len(line.strip()) != 0 and current_item: + # Append a line to the current item. + current_item += "\n" + line + # Add the last item if there is one. + if current_item: + bullet_points.append(current_item) + return bullet_points + +# Guidelines are organized by +# - File type +# - E.g., Python, Notebooks, Markdown +# - Section +# - E.g., Naming, Comments, Code design, Imports, Type annotations, Functions, ... +# - Target +# - E.g., LLM vs Linter + +# E.g., +# ```` +# - LLM +# - Python code +# - Naming +# - Docstrings +# - Comments +# - Code implementation +# - Code design +# - Type annotations +# - Functions +# - Logging +# - Unit tests +# - Notebooks +# - General +# - Plotting +# - Markdown +# - Linter +# - Python code +# - Naming +# - Docstrings +# - Comments +# - Code design +# - Imports +# - Type annotations +# - Functions +# - Scripts +# - Unit tests +# - Misc +# - Notebooks +# - General +# - Jupytext +# - Markdown +# - Spelling + # ############################################################################# # HeaderInfo # ############################################################################# diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index c5ea9d631..859725546 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -538,6 +538,67 @@ def test_no_headers(self) -> None: self.assert_equal(str(act), str(exp)) +# ############################################################################# +# Test_extract_headers_from_markdown1 +# ############################################################################# + + +class Test_extract_first_level_bullets_from_markdown1(hunitest.TestCase): + + def helper(self, text: str, expected: str) -> None: + # Prepare inputs. + text = hprint.dedent(text) + # Call function. + actual = hmarkdo.extract_first_level_bullets_from_markdown(text) + # Check output. + act = "\n".join(actual) + self.assert_equal(act, expected, dedent=True) + + def test_basic_list1(self) -> None: + """ + Test extracting simple first-level bullet points. + """ + text = """ + - Item 1 + - Item 2 + - Item 3 + """ + expected = """ + - Item 1 + - Item 2 + - Item 3 + """ + self.helper(text, expected) + + def test_nested_list1(self) -> None: + """ + Test extracting bullet points with nested sub-items. + """ + text = """ + - Item 1 + - Item 2 + - Sub-item 2.1 + - Sub-item 2.2 + - Item 3 + """ + expected = """ + - Item 1 + - Item 2 + - Sub-item 2.1 + - Sub-item 2.2 + - Item 3 + """ + self.helper(text, expected) + + def test_empty_list1(self) -> None: + """ + Test handling empty input. + """ + text = "" + expected = "" + self.helper(text, expected) + + # ############################################################################# # Test_remove_end_of_line_periods1 # ############################################################################# From 507fc0e7d30b69a0a3697d99795ca8d792457280 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 30 May 2025 21:24:10 -0400 Subject: [PATCH 147/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../extract_headers_from_markdown.py | 8 +- helpers/hmarkdown.py | 160 ++++++++++++------ helpers/test/test_hmarkdown.py | 10 +- 3 files changed, 121 insertions(+), 57 deletions(-) diff --git a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py index 7f0c1240a..38ec14454 100755 --- a/dev_scripts_helpers/documentation/extract_headers_from_markdown.py +++ b/dev_scripts_helpers/documentation/extract_headers_from_markdown.py @@ -39,6 +39,9 @@ def _extract_headers_from_markdown( max_level: int, out_file_name: str, ) -> None: + """ + Extract headers from a Markdown file. + """ input_content = hparser.read_file(in_file_name) input_content = "\n".join(input_content) # We don't want to sanity check since we want to show the headers, even @@ -47,6 +50,7 @@ def _extract_headers_from_markdown( header_list = hmarkdo.extract_headers_from_markdown( input_content, max_level=max_level, sanity_check=sanity_check ) + # Print the headers. if mode == "cfile": output_content = hmarkdo.header_list_to_vim_cfile( in_file_name, header_list @@ -54,8 +58,8 @@ def _extract_headers_from_markdown( else: output_content = hmarkdo.header_list_to_markdown(header_list, mode) hparser.write_file(output_content, out_file_name) - # - hmarkdo.check_header_list(header_list) + # Sanity check the headers. + hmarkdo.sanity_check_header_list(header_list) # TODO(gp): _parse() -> _build_parser() everywhere. diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 0f183ed4c..f818dfd8b 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -390,7 +390,69 @@ def extract_section_from_markdown(content: str, header_name: str) -> str: return "\n".join(extracted_lines) -def extract_first_level_bullets_from_markdown(text: str) -> List[str]: +# ############################################################################# +# Guidelines processing. +# ############################################################################# + +# Guidelines are organized in 3 levels by: +# 1) File type +# - E.g., Python, Notebooks, Markdown +# 2) Section +# - E.g., Naming, Comments, Code design, Imports, Type annotations, Functions, ... +# 3) Target +# - E.g., LLM vs Linter + +# E.g., +# > extract_headers_from_markdown.py -i docs/code_guidelines/all.coding_style_guidelines.reference.md --max_level 2 +# ``` +# - All +# - Spelling +# - Python +# - Naming +# - Docstrings +# - Comments +# - Code_implementation +# - Code_design +# - Imports +# - Type annotations +# - Functions +# - Scripts +# - Logging +# - Misc +# - Unit_tests +# - Rules +# - Notebooks +# - General +# - Plotting +# - Jupytext +# - Markdown +# - Naming +# - General +# ``` + +# The level 1 is determined by the type of the file. +# The level 2 is specified as `*` (for all), as `Naming,Docstrings` +# The level 3 is specified as `mode=LLM` or `mode=Linter` + + +def sanity_check_guidelines(txt: List[str]) -> None: + """ + Sanity check the guidelines. + """ + lines = txt.split("\n") + header_list = extract_headers_from_markdown( txt, max_level=5) + # 1) Start with level 1 headers. + # 2) All level 1 headers are unique. + # 3) Header levels are increasing / decreasing by at most 1. + sanity_check_header_list(header_list) + # 4) Level 3 headers are always `LLM` or `Linter`. + # for header in header_list: + # if header.level != 3: + # hdbg.dassert_in(header.description, ["LLM", "Linter"]) + # 5) All headers have no spaces. + + +def extract_first_level_bullets_from_markdown(txt: str) -> List[str]: """ Extract first-level bullet point list items from text until the next one. @@ -416,7 +478,7 @@ def extract_first_level_bullets_from_markdown(text: str) -> List[str]: ] ``` """ - lines = text.split("\n") + lines = txt.split("\n") # Store the first-level bullet points. bullet_points = [] # Store the current item including the first level bullet point and all @@ -445,48 +507,6 @@ def extract_first_level_bullets_from_markdown(text: str) -> List[str]: bullet_points.append(current_item) return bullet_points -# Guidelines are organized by -# - File type -# - E.g., Python, Notebooks, Markdown -# - Section -# - E.g., Naming, Comments, Code design, Imports, Type annotations, Functions, ... -# - Target -# - E.g., LLM vs Linter - -# E.g., -# ```` -# - LLM -# - Python code -# - Naming -# - Docstrings -# - Comments -# - Code implementation -# - Code design -# - Type annotations -# - Functions -# - Logging -# - Unit tests -# - Notebooks -# - General -# - Plotting -# - Markdown -# - Linter -# - Python code -# - Naming -# - Docstrings -# - Comments -# - Code design -# - Imports -# - Type annotations -# - Functions -# - Scripts -# - Unit tests -# - Misc -# - Notebooks -# - General -# - Jupytext -# - Markdown -# - Spelling # ############################################################################# # HeaderInfo @@ -532,8 +552,37 @@ def as_tuple(self) -> Tuple[int, str, int]: HeaderList = List[HeaderInfo] -def check_header_list(header_list: HeaderList) -> None: - # The first header should be level 1. +def sanity_check_header_list(header_list: HeaderList) -> None: + """ + Check that the header list is valid. + + 1) The first header should be level 1. + 2) All level 1 headers are unique. + 3) Check that consecutive elements in the header list only increase by at + most one level at a time (even if it can decrease by multiple levels). + - E.g., the following is valid: + ``` + # Header 1 + # Header 2 + ## Header 2.1 + ## Header 2.2 + # Header 3 + ``` + - E.g., the following is valid: + ``` + # Header1 + ## Header 1.1 + ### Header 1.1.1 + # Header 2 + ``` + - E.g., the following is not valid: + ``` + # Header 1 + ### Header 1.0.1 + # Header 2 + ``` + """ + # 1) The first header should be level 1. if header_list and header_list[0].level > 1: _LOG.warning( "First header '%s' at line %s is not level 1, but %s", @@ -541,8 +590,13 @@ def check_header_list(header_list: HeaderList) -> None: header_list[0].line_number, header_list[0].level, ) - # Check that consecutive elements in the header list only increase by - # at most one level at a time, but can decrease by multiple levels. + # 2) All level 1 headers are unique. + level_1_headers = [ + header.description for header in header_list if header.level == 1 + ] + hdbg.dassert_no_duplicates(level_1_headers) + # 3) Check that consecutive elements in the header list only increase by at + # most one level at a time (even if it can decrease by multiple levels). if len(header_list) > 1: for i in range(1, len(header_list)): hdbg.dassert_isinstance(header_list[i - 1], HeaderInfo) @@ -556,6 +610,7 @@ def check_header_list(header_list: HeaderList) -> None: raise ValueError(msg) +# TODO(gp): Move sanity check outside? def extract_headers_from_markdown( txt: str, max_level: int, *, sanity_check: bool = True ) -> HeaderList: @@ -565,9 +620,12 @@ def extract_headers_from_markdown( :param txt: content of the input Markdown file. :param max_level: Maximum header levels to parse (e.g., 3 parses all levels included `###`, but not `####`) + :param sanity_check: If True, check that the header list is valid. :return: the generated `HeaderList`, e.g., ``` - [(1, "Chapter 1", 5), (2, "Section 1.1", 10), ...] + [ + (1, "Chapter 1", 5), + (2, "Section 1.1", 10), ...] ``` """ hdbg.dassert_isinstance(txt, str) @@ -586,7 +644,7 @@ def extract_headers_from_markdown( header_list.append(header_info) # Check the header list. if sanity_check: - check_header_list(header_list) + sanity_check_header_list(header_list) else: _LOG.debug("Skipping sanity check") return header_list @@ -649,6 +707,8 @@ def header_list_to_markdown(header_list: HeaderList, mode: str) -> str: # ############################################################################# +# Process headers. +# ############################################################################# def format_headers(in_file_name: str, out_file_name: str, max_lev: int) -> None: diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 859725546..8c012d6db 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1499,11 +1499,11 @@ def no_start_python(): # ############################################################################# -# Test_check_header_list1 +# Test_sanity_check_header_list1 # ############################################################################# -class Test_check_header_list1(hunitest.TestCase): +class Test_sanity_check_header_list1(hunitest.TestCase): def test1(self) -> None: """ @@ -1512,7 +1512,7 @@ def test1(self) -> None: # Prepare inputs. header_list = get_header_list1() # Call function. - hmarkdo.check_header_list(header_list) + hmarkdo.sanity_check_header_list(header_list) self.assertTrue(True) def test2(self) -> None: @@ -1524,7 +1524,7 @@ def test2(self) -> None: header_list = get_header_list4() # Call function. with self.assertRaises(ValueError) as err: - hmarkdo.check_header_list(header_list) + hmarkdo.sanity_check_header_list(header_list) # Check output. actual = str(err.exception) self.check_string(actual) @@ -1537,7 +1537,7 @@ def test3(self) -> None: # Prepare inputs. header_list = get_header_list5() # Call function. - hmarkdo.check_header_list(header_list) + hmarkdo.sanity_check_header_list(header_list) self.assertTrue(True) From 60cb638670e8d66669333d9581cc765dd34efbf9 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 31 May 2025 16:45:08 -0400 Subject: [PATCH 148/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hmarkdown.py | 409 +++++++++++++++++++++++---------- helpers/test/test_hmarkdown.py | 252 ++++++++++++++++++++ 2 files changed, 543 insertions(+), 118 deletions(-) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index f818dfd8b..7a54f90dc 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -330,6 +330,7 @@ def md_clean_up(txt: str) -> str: # ############################################################################# +# TODO(gp): This could be done with `HeaderList`. def extract_section_from_markdown(content: str, header_name: str) -> str: """ Extract a section of text from a Markdown document based on the header @@ -390,124 +391,6 @@ def extract_section_from_markdown(content: str, header_name: str) -> str: return "\n".join(extracted_lines) -# ############################################################################# -# Guidelines processing. -# ############################################################################# - -# Guidelines are organized in 3 levels by: -# 1) File type -# - E.g., Python, Notebooks, Markdown -# 2) Section -# - E.g., Naming, Comments, Code design, Imports, Type annotations, Functions, ... -# 3) Target -# - E.g., LLM vs Linter - -# E.g., -# > extract_headers_from_markdown.py -i docs/code_guidelines/all.coding_style_guidelines.reference.md --max_level 2 -# ``` -# - All -# - Spelling -# - Python -# - Naming -# - Docstrings -# - Comments -# - Code_implementation -# - Code_design -# - Imports -# - Type annotations -# - Functions -# - Scripts -# - Logging -# - Misc -# - Unit_tests -# - Rules -# - Notebooks -# - General -# - Plotting -# - Jupytext -# - Markdown -# - Naming -# - General -# ``` - -# The level 1 is determined by the type of the file. -# The level 2 is specified as `*` (for all), as `Naming,Docstrings` -# The level 3 is specified as `mode=LLM` or `mode=Linter` - - -def sanity_check_guidelines(txt: List[str]) -> None: - """ - Sanity check the guidelines. - """ - lines = txt.split("\n") - header_list = extract_headers_from_markdown( txt, max_level=5) - # 1) Start with level 1 headers. - # 2) All level 1 headers are unique. - # 3) Header levels are increasing / decreasing by at most 1. - sanity_check_header_list(header_list) - # 4) Level 3 headers are always `LLM` or `Linter`. - # for header in header_list: - # if header.level != 3: - # hdbg.dassert_in(header.description, ["LLM", "Linter"]) - # 5) All headers have no spaces. - - -def extract_first_level_bullets_from_markdown(txt: str) -> List[str]: - """ - Extract first-level bullet point list items from text until the next one. - - Sub-lists nested under first-level items are extracted together with - the first-level items. - - :param text: text to process - ``` - - Item 1 - - Item 2 - - Item 3 - - Item 4 - ``` - :return: extracted bullet points, e.g., - ``` - [ - "- Item 1", - ''' - - Item 2 - - Item 3 - ''', - "- Item 4", - ] - ``` - """ - lines = txt.split("\n") - # Store the first-level bullet points. - bullet_points = [] - # Store the current item including the first level bullet point and all - # its sub-items. - current_item = "" - for line in lines: - line = line.rstrip() - if not line: - continue - if re.match(r"^- ", line): - # Match first-level bullet point item. - if current_item: - # Store the previous item, if any. - bullet_points.append(current_item) - # Start a new first-level bullet point item. - current_item = line - elif re.match(r"^\s+- ", line): - # Match a sub-item (non first-level bullet point item). - # Append a sub-item to the current item. - current_item += "\n" + line - elif len(line.strip()) != 0 and current_item: - # Append a line to the current item. - current_item += "\n" + line - # Add the last item if there is one. - if current_item: - bullet_points.append(current_item) - return bullet_points - - # ############################################################################# # HeaderInfo # ############################################################################# @@ -532,6 +415,7 @@ def __init__(self, level: int, description: str, line_number: int): self.level = level # hdbg.dassert_isinstance(description, str) + hdbg.dassert_ne(description, "") self.description = description # hdbg.dassert_isinstance(line_number, int) @@ -706,6 +590,293 @@ def header_list_to_markdown(header_list: HeaderList, mode: str) -> str: return output_content +# ############################################################################# +# Rules processing. +# ############################################################################# + +# Rules are organized in 4 levels +# 1) Rule sets (level 1) +# - E.g., Python, Notebooks, Markdown +# - Level 1 is a set of rules determined mainly by the type of the file we are +# processing +# - Note: several set of rules can be applied to a given file type +# - E.g., `Python` and `Notebooks` apply to all Python files +# 2) Sections (level 2) +# - E.g., Naming, Comments, Code_design, Imports, Type_annotations, ... +# - Can be specified as: +# - `*` (to select all sections) +# - a list (e.g., `Naming,Docstrings`) +# 3) Targets (level 3) +# - E.g., LLM vs Linter +# - Can be specified as: +# - `*` (to select all targets) +# - `LLM`, `Linter` +# 4) Atomic rules (level 4) +# ``` +# - Spell commands in lower case and programs with the first letter in upper case +# - E.g., `git` as a command, `Git` as a program +# - E.g., capitalize the first letter of `Python` +# ``` + +# E.g., +# > extract_headers_from_markdown.py -i docs/code_guidelines/all.coding_style_guidelines.reference.md --max_level 2 +# ``` +# - General +# - Spelling +# - LLM +# - Linter +# - Python +# - Naming +# - LLM +# - Linter +# - Docstrings +# - ... +# - Comments +# - Code_implementation +# - Code_design +# - Imports +# - Type_annotations +# - Functions +# - Scripts +# - Logging +# - Misc +# - Unit_tests +# - All +# - Notebooks +# - General +# - Plotting +# - Jupytext +# - Markdown +# - Naming +# - General +# ``` + +# - The rules to apply to a Python file are automatically inferred as +# `([`General:*`, `Python:*`], `LLM`)` +# - The rules to apply to a Notebook file are automatically inferred as +# `([`General:*`, `Python:*`, `Notebooks:*`], `LLM`)` +# - A user can specify to apply a subset of rules like +# `([`General:*`, `Python:Naming,Docstrings`], `LLM,Linter`)` +# - Atomic rules are the first-level bullets of the markdown file, e.g., +# ``` +# - Spell commands in lower case and programs with the first letter in upper case +# - E.g., `git` as a command, `Git` as a program +# - E.g., capitalize the first letter of `Python` +# ``` + + +def sanity_check_rules(txt: List[str]) -> None: + """ + Sanity check the rules. + """ + lines = txt.split("\n") + header_list = extract_headers_from_markdown( txt, max_level=5) + # 1) Start with level 1 headers. + # 2) All level 1 headers are unique. + # 3) Header levels are increasing / decreasing by at most 1. + sanity_check_header_list(header_list) + # 4) Level 3 headers are always `LLM` or `Linter`. + # for header in header_list: + # if header.level != 3: + # hdbg.dassert_in(header.description, ["LLM", "Linter"]) + # 5) All headers have no spaces. + + +# A `Rule` is a string separated by `:` characters, where each part is +# - `*` means any string +# - `string` +# - a list of strings separated by `|` +# E.g., +# - `General:*:LLM`, `*:*:Linter|LLM`, `General|Python:*:LLM`, `Python:*:Linter` +# - For a Python file -> `General|Python:*:LLM` +# - For a Notebook file -> `General|Python|Notebooks:*:LLM` +# - `Python:Naming|Docstrings|Comments:LLM` +RuleRegex = str + + +# A "Guidelines" is a header list with only level 1 headers storing the full +# hierarchy of the rules, e.g., +# `(1, "Spelling:All:LLM", xyz)`` +# TODO(gp): Make Guidelines descend from HeaderList. +Guidelines = HeaderList + + +def convert_header_list_into_guidelines(header_list: HeaderList) -> Guidelines: + """ + Expand the header list into a full rule list. + + Expand a header list like: + ``` + - General + - Spelling + - LLM + - Linter + - Python + - Naming + - LLM + - Linter + ``` + represented internally as: + ``` + (1, "General", xyz), + (2, "Spelling", xyz), + (3, "LLM", xyz), + (3, "Linter", xyz), + (1, "Python", xyz), + (2, "Naming", xyz), + (3, "LLM", xyz), + (3, "Linter", xyz), + ``` + into "rulelist" a header list with only level 1 headers like: + ``` + [ + (1, "Spelling:All:LLM", xyz), + (1, "Spelling:All:Linter", xyz), + (1, "Python:Naming:LLM", xyz), + (1, "Python:Naming:Linter", xyz), + ] + ``` + """ + hdbg.dassert_isinstance(header_list, list) + # 1) Extract the level 3 headers. + level_1 = "" + level_2 = "" + level_3_headers = [] + for header_info in header_list: + level, description, line_number = header_info.as_tuple() + if level == 1: + level_1 = description + elif level == 2: + level_2 = description + elif level == 3: + hdbg.dassert_ne(level_1, "") + hdbg.dassert_ne(level_2, "") + full_level_3 = f"{level_1}:{level_2}:{description}" + level_3_headers.append(HeaderInfo(1, full_level_3, line_number)) + return level_3_headers + + +def _convert_rule_into_regex(rule_regex: RuleRegex) -> str: + """ + Convert a rule regex into an actual regular expression. + + E.g., + - `Spelling:*:LLM` -> `Spelling:(\S*):LLM` + - `*:*:Linter|LLM` -> `(\S*):(\S*):(Linter|LLM)` + - `Spelling|Python:*:LLM` -> `Spelling|Python:(\S*):LLM` + - `Python:*:Linter` -> `Python:(\S*):Linter` + """ + hdbg.dassert_isinstance(rule_regex, RuleRegex) + # Parse the rule regex into a list of strings. + rule_regex = rule_regex.split(":") + hdbg.dassert_eq(len(rule_regex), 3) + # Process each part of the rule regex. + for i, rule in enumerate(rule_regex): + hdbg.dassert_not_in(" ", rule) + if rule == "*": + # Convert `*`` into `\S*`` + rule_regex[i] = "(\S*)" + elif "|" in rule: + rule_regex[i] = "(" + rule_regex[i] + ")" + else: + pass + # Join the parts of the rule regex back together. + rule_regex = ":".join(rule_regex) + return rule_regex + + +def extract_rules(guidelines: Guidelines, rule_regexes: List[RuleRegex]) -> Guidelines: + """ + Extract the set of rules from the `guidelines` that match the rule regex. + """ + hdbg.dassert_isinstance(guidelines, list) + hdbg.dassert_isinstance(rule_regexes, list) + # A rule regex is a string separated by `:` characters, where each part is + # - `*` (meaning "any string") + # - a `string` (e.g., `Spelling`) + # - a list of strings separated by `|` (e.g., `LLM|Linter`) + # E.g., `Spelling:*:LLM`, `*:*:Linter|LLM`, `Spelling|Python:*:LLM`. + # Convert each rule regex into a regular expression. + rule_regex_map = {} + for rule_regex_str in rule_regexes: + hdbg.dassert_isinstance(rule_regex_str, RuleRegex) + regex = _convert_rule_into_regex(rule_regex_str) + _LOG.debug(hprint.to_str("rule_regex_str regex")) + hdbg.dassert_not_in(rule_regex_str, rule_regex_map) + rule_regex_map[rule_regex_str] = regex + # Extract the set of rules from the `guidelines` that match the rule regex. + rule_sections = [] + for guideline in guidelines: + # A guideline description is a string separated by `:` characters, where each part is + # (1, "Python:Naming:Linter", xyz), + for k, v in rule_regex_map.items(): + if re.match(v, guideline.description): + _LOG.debug("%s matches %s", k, guideline.description) + rule_sections.append(guideline) + # Select the rules. + _LOG.debug("Selected %s sections:\n%s", len(rule_sections), "\n".join([r.description for r in rule_sections])) + # Return the rules. + return rule_sections + + +# TODO(gp): -> parse_atomic_rules? +def extract_first_level_bullets_from_markdown(txt: str) -> List[str]: + """ + Parse atomic rules from a markdown. + + - Extract first-level bullet point list items from text until the next one. + - Sub-lists nested under first-level items are extracted together with the + first-level items. + + :param text: text to process + ``` + - Item 1 + - Item 2 + - Item 3 + - Item 4 + ``` + :return: extracted bullet points, e.g., + ``` + [ + "- Item 1", + ''' + - Item 2 + - Item 3 + ''', + "- Item 4", + ] + ``` + """ + lines = txt.split("\n") + # Store the first-level bullet points. + bullet_points = [] + # Store the current item including the first level bullet point and all + # its sub-items. + current_item = "" + for line in lines: + line = line.rstrip() + if not line: + continue + if re.match(r"^- ", line): + # Match first-level bullet point item. + if current_item: + # Store the previous item, if any. + bullet_points.append(current_item) + # Start a new first-level bullet point item. + current_item = line + elif re.match(r"^\s+- ", line): + # Match a sub-item (non first-level bullet point item). + # Append a sub-item to the current item. + current_item += "\n" + line + elif len(line.strip()) != 0 and current_item: + # Append a line to the current item. + current_item += "\n" + line + # Add the last item if there is one. + if current_item: + bullet_points.append(current_item) + return bullet_points + + # ############################################################################# # Process headers. # ############################################################################# @@ -1017,6 +1188,8 @@ def inject_todos_from_cfile( # ############################################################################# +# Formatting markdown +# ############################################################################# def capitalize_first_level_bullets(markdown_text: str) -> str: diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 8c012d6db..7ec987a76 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1541,6 +1541,258 @@ def test3(self) -> None: self.assertTrue(True) +# ////////////////////////////////////////////////////////////////////////////// +# Rules processing. +# ////////////////////////////////////////////////////////////////////////////// + + +def get_header_list6() -> hmarkdo.HeaderList: + """ + - Spelling + - All + - LLM + - Linter + - Python + - Naming + - LLM + - Linter + - Docstrings + - LLM + - Linter + - Unit_tests + - All + - LLM + - Linter + """ + data = [ + (1, "Spelling"), + (2, "All"), + (3, "LLM"), + (3, "Linter"), + (1, "Python"), + (2, "Naming"), + (3, "LLM"), + (3, "Linter"), + (2, "Docstrings"), + (3, "LLM"), + (3, "Linter"), + (1, "Unit_tests"), + (2, "All"), + (3, "LLM"), + (3, "Linter"), + ] + header_list = _to_header_list(data) + return header_list + + +class Test_convert_header_list_into_guidelines1(hunitest.TestCase): + + def test1(self) -> None: + """ + Test converting a header list into guidelines. + """ + # Prepare inputs. + header_list = get_header_list6() + # Call function. + guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + # Check output. + act = "\n".join(map(str, guidelines)) + exp = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + HeaderInfo(1, 'Spelling:All:Linter', 16) + HeaderInfo(1, 'Python:Naming:LLM', 31) + HeaderInfo(1, 'Python:Naming:Linter', 36) + HeaderInfo(1, 'Python:Docstrings:LLM', 46) + HeaderInfo(1, 'Python:Docstrings:Linter', 51) + HeaderInfo(1, 'Unit_tests:All:LLM', 66) + HeaderInfo(1, 'Unit_tests:All:Linter', 71) + """ + self.assert_equal(act, exp, dedent=True) + + +class Test_extract_rules1(hunitest.TestCase): + + def helper(self, rule_regexes: List[str], exp: str) -> None: + """ + Test extracting rules from a markdown file. + """ + # Prepare inputs. + guidelines = get_header_list6() + guidelines = hmarkdo.convert_header_list_into_guidelines(guidelines) + # Call function. + selected_guidelines = hmarkdo.extract_rules(guidelines, rule_regexes) + # Check output. + act = "\n".join(map(str, selected_guidelines)) + self.assert_equal(act, exp, dedent=True) + + def test1(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["Spelling:*:LLM"] + exp = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + """ + self.helper(rule_regexes, exp) + + def test2(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["Spelling:NONE:LLM"] + exp = """ + """ + self.helper(rule_regexes, exp) + + def test3(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["Spelling:All:*"] + exp = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + HeaderInfo(1, 'Spelling:All:Linter', 16) + """ + self.helper(rule_regexes, exp) + + def test4(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["Spelling:All:*", "Python:*:*"] + exp = """ + HeaderInfo(1, 'Spelling:All:LLM', 11) + HeaderInfo(1, 'Spelling:All:Linter', 16) + HeaderInfo(1, 'Python:Naming:LLM', 31) + HeaderInfo(1, 'Python:Naming:Linter', 36) + HeaderInfo(1, 'Python:Docstrings:LLM', 46) + HeaderInfo(1, 'Python:Docstrings:Linter', 51) + """ + self.helper(rule_regexes, exp) + + +def get_guidelines_txt1() -> str: + txt = r""" + # General + + ## Spelling + + ### LLM + + ### Linter + + - Spell commands in lower case and programs with the first letter in upper case + - E.g., `git` as a command, `Git` as a program + - E.g., capitalize the first letter of `Python` + - Capitalize `JSON`, `CSV`, `DB` and other abbreviations + + # Python + + ## Naming + + ### LLM + + - Name functions using verbs and verbs/actions + - Good: `download_data()`, `process_input()`, `calculate_sum()` + - Good: Python internal functions as `__repr__`, `__init__` are valid + - Good: Functions names like `to_dict()`, `_parse()`, `_main()` are valid + - Name classes using nouns + - Good: `Downloader()`, `DataProcessor()`, `User()` + - Bad: `DownloadStuff()`, `ProcessData()`, `UserActions()` + + ### Linter + + - Name executable Python scripts using verbs and actions + - E.g., `download.py` and not `downloader.py` + + # Unit_tests + + ## Rules + + ### LLM + + - A test class should test only one function or class to help understanding + test failures + - A test method should only test a single case to ensures clarity and + precision in testing + - E.g., "for these inputs the function responds with this output" + """ + txt = hprint.dedent(txt) + return txt + + +class Test_extract_rules2(hunitest.TestCase): + + def test_get_header_list1(self) -> None: + """ + Test extracting headers from a markdown file. + """ + txt = get_guidelines_txt1() + max_level = 4 + header_list = hmarkdo.extract_headers_from_markdown(txt, max_level) + # Check output. + act = "\n".join(map(str, header_list)) + exp = """ + HeaderInfo(1, 'General', 1) + HeaderInfo(2, 'Spelling', 3) + HeaderInfo(3, 'LLM', 5) + HeaderInfo(3, 'Linter', 7) + HeaderInfo(1, 'Python', 14) + HeaderInfo(2, 'Naming', 16) + HeaderInfo(3, 'LLM', 18) + HeaderInfo(3, 'Linter', 28) + HeaderInfo(1, 'Unit_tests', 33) + HeaderInfo(2, 'Rules', 35) + HeaderInfo(3, 'LLM', 37) + """ + self.assert_equal(act, exp, dedent=True) + # + guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + act = "\n".join(map(str, guidelines)) + exp = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Python:Naming:Linter', 28) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.assert_equal(act, exp, dedent=True) + + def helper(self, rule_regexes: List[str], exp: str) -> None: + """ + Test extracting rules from a markdown file. + """ + # Prepare inputs. + txt = get_guidelines_txt1() + max_level = 4 + header_list = hmarkdo.extract_headers_from_markdown(txt, max_level) + guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + # Call function. + selected_guidelines = hmarkdo.extract_rules(guidelines, rule_regexes) + # Check output. + act = "\n".join(map(str, selected_guidelines)) + self.assert_equal(act, exp, dedent=True) + + def test1(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["General:*:LLM"] + exp = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + """ + self.helper(rule_regexes, exp) + + def test2(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["General:NONE:LLM"] + exp = """ + """ + self.helper(rule_regexes, exp) + + # ############################################################################# # Test_inject_todos_from_cfile1 # ############################################################################# From d282fb596bec354756b080dc57f36f6e3e25ad46 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 31 May 2025 16:49:08 -0400 Subject: [PATCH 149/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hmarkdown.py | 4 +-- helpers/test/test_hmarkdown.py | 52 ++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 7a54f90dc..1d92f88f1 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -812,10 +812,10 @@ def extract_rules(guidelines: Guidelines, rule_regexes: List[RuleRegex]) -> Guid for k, v in rule_regex_map.items(): if re.match(v, guideline.description): _LOG.debug("%s matches %s", k, guideline.description) - rule_sections.append(guideline) + if guideline not in rule_sections: + rule_sections.append(guideline) # Select the rules. _LOG.debug("Selected %s sections:\n%s", len(rule_sections), "\n".join([r.description for r in rule_sections])) - # Return the rules. return rule_sections diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 7ec987a76..f784f6d1e 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1792,6 +1792,58 @@ def test2(self) -> None: """ self.helper(rule_regexes, exp) + def test3(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["*:*:LLM"] + exp = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper(rule_regexes, exp) + + def test4(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["*:*:LLM", "General:*:*"] + exp = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper(rule_regexes, exp) + + def test5(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["*:*:*"] + exp = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Python:Naming:Linter', 28) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper(rule_regexes, exp) + + def test6(self) -> None: + """ + Test extracting rules from a markdown file. + """ + rule_regexes = ["*:*:*", "General:*:*"] + exp = """ + HeaderInfo(1, 'General:Spelling:LLM', 5) + HeaderInfo(1, 'General:Spelling:Linter', 7) + HeaderInfo(1, 'Python:Naming:LLM', 18) + HeaderInfo(1, 'Python:Naming:Linter', 28) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + """ + self.helper(rule_regexes, exp) # ############################################################################# # Test_inject_todos_from_cfile1 From 9a82ecf2d6d341be2da4697d41ae6465597c7359 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 31 May 2025 17:11:59 -0400 Subject: [PATCH 150/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hmarkdown.py | 129 ++++++++++-------- .../output/test.txt | 4 +- .../Test_process_lines1.test1/output/test.txt | 4 +- .../output/test.txt | 6 +- helpers/test/test_hmarkdown.py | 57 ++++---- 5 files changed, 108 insertions(+), 92 deletions(-) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 1d92f88f1..710ebdfbe 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -594,33 +594,29 @@ def header_list_to_markdown(header_list: HeaderList, mode: str) -> str: # Rules processing. # ############################################################################# -# Rules are organized in 4 levels +# Rules are organized in 4 levels of a markdown file: +# # 1) Rule sets (level 1) -# - E.g., Python, Notebooks, Markdown -# - Level 1 is a set of rules determined mainly by the type of the file we are -# processing -# - Note: several set of rules can be applied to a given file type -# - E.g., `Python` and `Notebooks` apply to all Python files +# - E.g., `General`, `Python`, `Notebooks`, `Markdown` +# - Level 1 is a set of rules determined mainly by the type of the file we +# are processing +# - Several sets of rules can be applied to a given file type +# - E.g., rules in `Python` and `Notebooks` apply to all Python files # 2) Sections (level 2) -# - E.g., Naming, Comments, Code_design, Imports, Type_annotations, ... -# - Can be specified as: -# - `*` (to select all sections) -# - a list (e.g., `Naming,Docstrings`) +# - E.g., `Naming`, `Comments`, `Code_design`, `Imports`, `Type_annotations` # 3) Targets (level 3) # - E.g., LLM vs Linter -# - Can be specified as: -# - `*` (to select all targets) -# - `LLM`, `Linter` # 4) Atomic rules (level 4) +# - This is the set of rules that are applied to the file # ``` # - Spell commands in lower case and programs with the first letter in upper case # - E.g., `git` as a command, `Git` as a program # - E.g., capitalize the first letter of `Python` # ``` -# E.g., -# > extract_headers_from_markdown.py -i docs/code_guidelines/all.coding_style_guidelines.reference.md --max_level 2 +# Extract the rules from the markdown file: # ``` +# > extract_headers_from_markdown.py -i docs/code_guidelines/all.coding_style_guidelines.reference.md --max_level 2 # - General # - Spelling # - LLM @@ -651,9 +647,9 @@ def header_list_to_markdown(header_list: HeaderList, mode: str) -> str: # - General # ``` -# - The rules to apply to a Python file are automatically inferred as +# - The rules to apply to a Python file are automatically extractedas: # `([`General:*`, `Python:*`], `LLM`)` -# - The rules to apply to a Notebook file are automatically inferred as +# - The rules to apply to a Notebook file are automatically extracted as: # `([`General:*`, `Python:*`, `Notebooks:*`], `LLM`)` # - A user can specify to apply a subset of rules like # `([`General:*`, `Python:Naming,Docstrings`], `LLM,Linter`)` @@ -669,8 +665,7 @@ def sanity_check_rules(txt: List[str]) -> None: """ Sanity check the rules. """ - lines = txt.split("\n") - header_list = extract_headers_from_markdown( txt, max_level=5) + header_list = extract_headers_from_markdown(txt, max_level=5) # 1) Start with level 1 headers. # 2) All level 1 headers are unique. # 3) Header levels are increasing / decreasing by at most 1. @@ -679,31 +674,35 @@ def sanity_check_rules(txt: List[str]) -> None: # for header in header_list: # if header.level != 3: # hdbg.dassert_in(header.description, ["LLM", "Linter"]) + # TODO(gp): Implement this. # 5) All headers have no spaces. + # TODO(gp): Implement this. -# A `Rule` is a string separated by `:` characters, where each part is -# - `*` means any string -# - `string` -# - a list of strings separated by `|` -# E.g., +# A `Rule` is a string separated by `:` characters, where each part can be: +# - `*` (which means "match any string") +# - a `string` (e.g., `Spelling`) +# - a list of strings separated by `|` (e.g., `LLM|Linter`) +# +# E.g., valid rules are: # - `General:*:LLM`, `*:*:Linter|LLM`, `General|Python:*:LLM`, `Python:*:Linter` # - For a Python file -> `General|Python:*:LLM` # - For a Notebook file -> `General|Python|Notebooks:*:LLM` # - `Python:Naming|Docstrings|Comments:LLM` -RuleRegex = str +SelectionRule = str -# A "Guidelines" is a header list with only level 1 headers storing the full -# hierarchy of the rules, e.g., -# `(1, "Spelling:All:LLM", xyz)`` +# A `Guidelines`` is a header list with only level 1 headers storing the full +# hierarchy of the rules as a description, e.g., +# `(1, "Spelling:All:LLM", xyz)` # TODO(gp): Make Guidelines descend from HeaderList. Guidelines = HeaderList def convert_header_list_into_guidelines(header_list: HeaderList) -> Guidelines: """ - Expand the header list into a full rule list. + Convert the header list into a `Guidelines` object with only level 1 headers + and full hierarchy of the rules as description. Expand a header list like: ``` @@ -727,7 +726,7 @@ def convert_header_list_into_guidelines(header_list: HeaderList) -> Guidelines: (3, "LLM", xyz), (3, "Linter", xyz), ``` - into "rulelist" a header list with only level 1 headers like: + into: ``` [ (1, "Spelling:All:LLM", xyz), @@ -738,27 +737,34 @@ def convert_header_list_into_guidelines(header_list: HeaderList) -> Guidelines: ``` """ hdbg.dassert_isinstance(header_list, list) - # 1) Extract the level 3 headers. + # Store the last level headers. level_1 = "" level_2 = "" + # Accumulate the last level headers. level_3_headers = [] + # Scan the header list. for header_info in header_list: level, description, line_number = header_info.as_tuple() + # Store the headers found at each level. if level == 1: level_1 = description elif level == 2: level_2 = description elif level == 3: + # Store the level 3 header. hdbg.dassert_ne(level_1, "") hdbg.dassert_ne(level_2, "") full_level_3 = f"{level_1}:{level_2}:{description}" - level_3_headers.append(HeaderInfo(1, full_level_3, line_number)) + header_info_tmp = HeaderInfo(1, full_level_3, line_number) + level_3_headers.append(header_info_tmp) + else: + raise ValueError(f"Invalid header info={header_info}") return level_3_headers -def _convert_rule_into_regex(rule_regex: RuleRegex) -> str: +def _convert_rule_into_regex(selection_rule: SelectionRule) -> str: """ - Convert a rule regex into an actual regular expression. + Convert a rule into an actual regular expression. E.g., - `Spelling:*:LLM` -> `Spelling:(\S*):LLM` @@ -766,31 +772,39 @@ def _convert_rule_into_regex(rule_regex: RuleRegex) -> str: - `Spelling|Python:*:LLM` -> `Spelling|Python:(\S*):LLM` - `Python:*:Linter` -> `Python:(\S*):Linter` """ - hdbg.dassert_isinstance(rule_regex, RuleRegex) - # Parse the rule regex into a list of strings. - rule_regex = rule_regex.split(":") - hdbg.dassert_eq(len(rule_regex), 3) + hdbg.dassert_isinstance(selection_rule, SelectionRule) + # Parse the rule into tokens. + selection_rule_parts = selection_rule.split(":") + hdbg.dassert_eq(len(selection_rule_parts), 3) # Process each part of the rule regex. - for i, rule in enumerate(rule_regex): - hdbg.dassert_not_in(" ", rule) - if rule == "*": - # Convert `*`` into `\S*`` - rule_regex[i] = "(\S*)" - elif "|" in rule: - rule_regex[i] = "(" + rule_regex[i] + ")" + rule_parts_out = [] + for rule_part_in in selection_rule_parts: + hdbg.dassert_not_in(" ", rule_part_in) + if rule_part_in == "*": + # Convert `*` into `\S*`. + rule_part_out = "(\S*)" + elif "|" in rule_part_in: + # Convert `LLM|Linter` into `(LLM|Linter)`. + rule_part_out = "(" + rule_part_in + ")" else: - pass - # Join the parts of the rule regex back together. - rule_regex = ":".join(rule_regex) - return rule_regex + # Keep the string as is. + rule_part_out = rule_part_in + rule_parts_out.append(rule_part_out) + # Join the parts of the rule back together. + rule_out = ":".join(rule_parts_out) + return rule_out -def extract_rules(guidelines: Guidelines, rule_regexes: List[RuleRegex]) -> Guidelines: +def extract_rules(guidelines: Guidelines, selection_rules: List[SelectionRule]) -> Guidelines: """ Extract the set of rules from the `guidelines` that match the rule regex. + + :param guidelines: The guidelines to extract the rules from. + :param selection_rules: The selection rules to use to extract the rules. + :return: The extracted rules. """ hdbg.dassert_isinstance(guidelines, list) - hdbg.dassert_isinstance(rule_regexes, list) + hdbg.dassert_isinstance(selection_rules, list) # A rule regex is a string separated by `:` characters, where each part is # - `*` (meaning "any string") # - a `string` (e.g., `Spelling`) @@ -798,8 +812,8 @@ def extract_rules(guidelines: Guidelines, rule_regexes: List[RuleRegex]) -> Guid # E.g., `Spelling:*:LLM`, `*:*:Linter|LLM`, `Spelling|Python:*:LLM`. # Convert each rule regex into a regular expression. rule_regex_map = {} - for rule_regex_str in rule_regexes: - hdbg.dassert_isinstance(rule_regex_str, RuleRegex) + for rule_regex_str in selection_rules: + hdbg.dassert_isinstance(rule_regex_str, SelectionRule) regex = _convert_rule_into_regex(rule_regex_str) _LOG.debug(hprint.to_str("rule_regex_str regex")) hdbg.dassert_not_in(rule_regex_str, rule_regex_map) @@ -819,16 +833,15 @@ def extract_rules(guidelines: Guidelines, rule_regexes: List[RuleRegex]) -> Guid return rule_sections -# TODO(gp): -> parse_atomic_rules? -def extract_first_level_bullets_from_markdown(txt: str) -> List[str]: +def parse_rules_from_txt(txt: str) -> List[str]: """ - Parse atomic rules from a markdown. + Parse rules from a chunk of markdown text. - Extract first-level bullet point list items from text until the next one. - Sub-lists nested under first-level items are extracted together with the - first-level items. + first-level items. - :param text: text to process + :param txt: text to process ``` - Item 1 - Item 2 diff --git a/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt b/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt index c52ba8b00..38f3146a7 100644 --- a/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt +++ b/helpers/test/outcomes/Test_process_code_block1.test1/output/test.txt @@ -4,13 +4,13 @@ ```python def print_integers(values): - + def _is_integer(value): try: return value == int(value) except: return False - + for v in values: if _is_integer(v): print(v) diff --git a/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt b/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt index 8f082d49f..dacb761b7 100644 --- a/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt +++ b/helpers/test/outcomes/Test_process_lines1.test1/output/test.txt @@ -4,13 +4,13 @@ 3: ```python 4: def print_integers(values): -5: +5: 6: def _is_integer(value): 7: try: 8: return value == int(value) 9: except: 10: return False -11: +11: 12: for v in values: 13: if _is_integer(v): 14: print(v) diff --git a/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt b/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt index 37c27efc2..de229ba17 100644 --- a/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt +++ b/helpers/test/outcomes/Test_remove_code_delimiters1.test4/output/test.txt @@ -1,6 +1,6 @@ - Functions can be declared in the body of another function - E.g., to hide utility functions in the scope of the function that uses them - + def print_integers(values): def _is_integer(value): @@ -12,5 +12,5 @@ for v in values: if _is_integer(v): print(v) - -- Hello + +- Hello \ No newline at end of file diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index f784f6d1e..0cca6531b 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -549,7 +549,7 @@ def helper(self, text: str, expected: str) -> None: # Prepare inputs. text = hprint.dedent(text) # Call function. - actual = hmarkdo.extract_first_level_bullets_from_markdown(text) + actual = hmarkdo.parse_rules_from_txt(text) # Check output. act = "\n".join(actual) self.assert_equal(act, expected, dedent=True) @@ -1612,7 +1612,7 @@ def test1(self) -> None: class Test_extract_rules1(hunitest.TestCase): - def helper(self, rule_regexes: List[str], exp: str) -> None: + def helper(self, selection_rules: List[str], exp: str) -> None: """ Test extracting rules from a markdown file. """ @@ -1620,7 +1620,7 @@ def helper(self, rule_regexes: List[str], exp: str) -> None: guidelines = get_header_list6() guidelines = hmarkdo.convert_header_list_into_guidelines(guidelines) # Call function. - selected_guidelines = hmarkdo.extract_rules(guidelines, rule_regexes) + selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) # Check output. act = "\n".join(map(str, selected_guidelines)) self.assert_equal(act, exp, dedent=True) @@ -1629,37 +1629,37 @@ def test1(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["Spelling:*:LLM"] + selection_rules = ["Spelling:*:LLM"] exp = """ HeaderInfo(1, 'Spelling:All:LLM', 11) """ - self.helper(rule_regexes, exp) + self.helper(selection_rules, exp) def test2(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["Spelling:NONE:LLM"] + selection_rules = ["Spelling:NONE:LLM"] exp = """ """ - self.helper(rule_regexes, exp) + self.helper(selection_rules, exp) def test3(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["Spelling:All:*"] + selection_rules = ["Spelling:All:*"] exp = """ HeaderInfo(1, 'Spelling:All:LLM', 11) HeaderInfo(1, 'Spelling:All:Linter', 16) """ - self.helper(rule_regexes, exp) + self.helper(selection_rules, exp) def test4(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["Spelling:All:*", "Python:*:*"] + selection_rules = ["Spelling:All:*", "Python:*:*"] exp = """ HeaderInfo(1, 'Spelling:All:LLM', 11) HeaderInfo(1, 'Spelling:All:Linter', 16) @@ -1668,7 +1668,7 @@ def test4(self) -> None: HeaderInfo(1, 'Python:Docstrings:LLM', 46) HeaderInfo(1, 'Python:Docstrings:Linter', 51) """ - self.helper(rule_regexes, exp) + self.helper(selection_rules, exp) def get_guidelines_txt1() -> str: @@ -1727,8 +1727,10 @@ def test_get_header_list1(self) -> None: """ Test extracting headers from a markdown file. """ + # Prepare inputs. txt = get_guidelines_txt1() max_level = 4 + # Run function. header_list = hmarkdo.extract_headers_from_markdown(txt, max_level) # Check output. act = "\n".join(map(str, header_list)) @@ -1746,8 +1748,9 @@ def test_get_header_list1(self) -> None: HeaderInfo(3, 'LLM', 37) """ self.assert_equal(act, exp, dedent=True) - # + # Run function. guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) + # Check output. act = "\n".join(map(str, guidelines)) exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) @@ -1758,9 +1761,9 @@ def test_get_header_list1(self) -> None: """ self.assert_equal(act, exp, dedent=True) - def helper(self, rule_regexes: List[str], exp: str) -> None: + def helper_extract_rules(self, selection_rules: List[str], exp: str) -> None: """ - Test extracting rules from a markdown file. + Helper function to test extracting rules from a markdown file. """ # Prepare inputs. txt = get_guidelines_txt1() @@ -1768,7 +1771,7 @@ def helper(self, rule_regexes: List[str], exp: str) -> None: header_list = hmarkdo.extract_headers_from_markdown(txt, max_level) guidelines = hmarkdo.convert_header_list_into_guidelines(header_list) # Call function. - selected_guidelines = hmarkdo.extract_rules(guidelines, rule_regexes) + selected_guidelines = hmarkdo.extract_rules(guidelines, selection_rules) # Check output. act = "\n".join(map(str, selected_guidelines)) self.assert_equal(act, exp, dedent=True) @@ -1777,51 +1780,51 @@ def test1(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["General:*:LLM"] + selection_rules = ["General:*:LLM"] exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) """ - self.helper(rule_regexes, exp) + self.helper_extract_rules(selection_rules, exp) def test2(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["General:NONE:LLM"] + selection_rules = ["General:NONE:LLM"] exp = """ """ - self.helper(rule_regexes, exp) + self.helper_extract_rules(selection_rules, exp) def test3(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["*:*:LLM"] + selection_rules = ["*:*:LLM"] exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'Python:Naming:LLM', 18) HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) """ - self.helper(rule_regexes, exp) + self.helper_extract_rules(selection_rules, exp) def test4(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["*:*:LLM", "General:*:*"] + selection_rules = ["*:*:LLM", "General:*:*"] exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'General:Spelling:Linter', 7) HeaderInfo(1, 'Python:Naming:LLM', 18) HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) """ - self.helper(rule_regexes, exp) + self.helper_extract_rules(selection_rules, exp) def test5(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["*:*:*"] + selection_rules = ["*:*:*"] exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'General:Spelling:Linter', 7) @@ -1829,13 +1832,13 @@ def test5(self) -> None: HeaderInfo(1, 'Python:Naming:Linter', 28) HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) """ - self.helper(rule_regexes, exp) + self.helper_extract_rules(selection_rules, exp) def test6(self) -> None: """ Test extracting rules from a markdown file. """ - rule_regexes = ["*:*:*", "General:*:*"] + selection_rules = ["*:*:*", "General:*:*"] exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'General:Spelling:Linter', 7) @@ -1843,7 +1846,7 @@ def test6(self) -> None: HeaderInfo(1, 'Python:Naming:Linter', 28) HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) """ - self.helper(rule_regexes, exp) + self.helper_extract_rules(selection_rules, exp) # ############################################################################# # Test_inject_todos_from_cfile1 From b300683b8b205b6f330de878ddd09a353bf329d7 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 31 May 2025 17:12:46 -0400 Subject: [PATCH 151/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../Test_sanity_check_header_list1.test2/output/test.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt diff --git a/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt b/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt new file mode 100644 index 000000000..9f8585df5 --- /dev/null +++ b/helpers/test/outcomes/Test_sanity_check_header_list1.test2/output/test.txt @@ -0,0 +1,3 @@ +Consecutive headers increase by more than one level: + HeaderInfo(1, 'Chapter 1', 1) + HeaderInfo(3, 'Subsection 1.1.1', 6) \ No newline at end of file From 57bb492aa647918126e9d62f61d8f6e927c9c232 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 1 Jun 2025 16:37:15 -0400 Subject: [PATCH 152/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hmarkdown.py | 26 ++++++ helpers/test/test_hmarkdown.py | 159 +++++++++++++++------------------ 2 files changed, 98 insertions(+), 87 deletions(-) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 710ebdfbe..b95761d0b 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -890,6 +890,32 @@ def parse_rules_from_txt(txt: str) -> List[str]: return bullet_points +def extract_rules_from_section(txt: str, line_number: int) -> List[str]: + """ + Extract rules from a section of a markdown file. + + :param txt: The markdown text to extract the rules from. + :param line_number: The line number of the section to start extracting the + rules from. + :return: The extracted rules. + """ + # Find the line number of the next header. + i = line_number + while True: + hdbg.dassert_lt(i, len(txt)) + line = txt[i] + if line.startswith("#"): + break + i += 1 + # Parse the markdown text into a list of bullet points. + bullet_points = parse_rules_from_txt(txt) + # Extract the rules from the bullet points. + rules = [] + for bullet_point in bullet_points: + rules.append(bullet_point) + return rules + + # ############################################################################# # Process headers. # ############################################################################# diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 0cca6531b..27a8c5867 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -538,67 +538,6 @@ def test_no_headers(self) -> None: self.assert_equal(str(act), str(exp)) -# ############################################################################# -# Test_extract_headers_from_markdown1 -# ############################################################################# - - -class Test_extract_first_level_bullets_from_markdown1(hunitest.TestCase): - - def helper(self, text: str, expected: str) -> None: - # Prepare inputs. - text = hprint.dedent(text) - # Call function. - actual = hmarkdo.parse_rules_from_txt(text) - # Check output. - act = "\n".join(actual) - self.assert_equal(act, expected, dedent=True) - - def test_basic_list1(self) -> None: - """ - Test extracting simple first-level bullet points. - """ - text = """ - - Item 1 - - Item 2 - - Item 3 - """ - expected = """ - - Item 1 - - Item 2 - - Item 3 - """ - self.helper(text, expected) - - def test_nested_list1(self) -> None: - """ - Test extracting bullet points with nested sub-items. - """ - text = """ - - Item 1 - - Item 2 - - Sub-item 2.1 - - Sub-item 2.2 - - Item 3 - """ - expected = """ - - Item 1 - - Item 2 - - Sub-item 2.1 - - Sub-item 2.2 - - Item 3 - """ - self.helper(text, expected) - - def test_empty_list1(self) -> None: - """ - Test handling empty input. - """ - text = "" - expected = "" - self.helper(text, expected) - - # ############################################################################# # Test_remove_end_of_line_periods1 # ############################################################################# @@ -1671,6 +1610,67 @@ def test4(self) -> None: self.helper(selection_rules, exp) +# ############################################################################# +# Test_parse_rules_from_txt1 +# ############################################################################# + + +class Test_parse_rules_from_txt1(hunitest.TestCase): + + def helper(self, text: str, expected: str) -> None: + # Prepare inputs. + text = hprint.dedent(text) + # Call function. + actual = hmarkdo.parse_rules_from_txt(text) + # Check output. + act = "\n".join(actual) + self.assert_equal(act, expected, dedent=True) + + def test_basic_list1(self) -> None: + """ + Test extracting simple first-level bullet points. + """ + text = """ + - Item 1 + - Item 2 + - Item 3 + """ + expected = """ + - Item 1 + - Item 2 + - Item 3 + """ + self.helper(text, expected) + + def test_nested_list1(self) -> None: + """ + Test extracting bullet points with nested sub-items. + """ + text = """ + - Item 1 + - Item 2 + - Sub-item 2.1 + - Sub-item 2.2 + - Item 3 + """ + expected = """ + - Item 1 + - Item 2 + - Sub-item 2.1 + - Sub-item 2.2 + - Item 3 + """ + self.helper(text, expected) + + def test_empty_list1(self) -> None: + """ + Test handling empty input. + """ + text = "" + expected = "" + self.helper(text, expected) + + def get_guidelines_txt1() -> str: txt = r""" # General @@ -1721,7 +1721,7 @@ def get_guidelines_txt1() -> str: return txt -class Test_extract_rules2(hunitest.TestCase): +class Test_end_to_end_rules1(hunitest.TestCase): def test_get_header_list1(self) -> None: """ @@ -1776,7 +1776,7 @@ def helper_extract_rules(self, selection_rules: List[str], exp: str) -> None: act = "\n".join(map(str, selected_guidelines)) self.assert_equal(act, exp, dedent=True) - def test1(self) -> None: + def test_extract_rules1(self) -> None: """ Test extracting rules from a markdown file. """ @@ -1786,65 +1786,50 @@ def test1(self) -> None: """ self.helper_extract_rules(selection_rules, exp) - def test2(self) -> None: - """ - Test extracting rules from a markdown file. - """ + def test_extract_rules2(self) -> None: selection_rules = ["General:NONE:LLM"] exp = """ """ self.helper_extract_rules(selection_rules, exp) - def test3(self) -> None: - """ - Test extracting rules from a markdown file. - """ + def test_extract_rules3(self) -> None: selection_rules = ["*:*:LLM"] exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + HeaderInfo(1, 'Unit_test_extract_ruless:Rules:LLM', 37) """ self.helper_extract_rules(selection_rules, exp) - def test4(self) -> None: - """ - Test extracting rules from a markdown file. - """ + def test_extract_rules4(self) -> None: selection_rules = ["*:*:LLM", "General:*:*"] exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'General:Spelling:Linter', 7) HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + HeaderInfo(1, 'Unit_test_extract_ruless:Rules:LLM', 37) """ self.helper_extract_rules(selection_rules, exp) - def test5(self) -> None: - """ - Test extracting rules from a markdown file. - """ + def test_extract_rules5(self) -> None: selection_rules = ["*:*:*"] exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'General:Spelling:Linter', 7) HeaderInfo(1, 'Python:Naming:LLM', 18) HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + HeaderInfo(1, 'Unit_test_extract_ruless:Rules:LLM', 37) """ self.helper_extract_rules(selection_rules, exp) - def test6(self) -> None: - """ - Test extracting rules from a markdown file. - """ + def test_extract_rules6(self) -> None: selection_rules = ["*:*:*", "General:*:*"] exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'General:Spelling:Linter', 7) HeaderInfo(1, 'Python:Naming:LLM', 18) HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) + HeaderInfo(1, 'Unit_test_extract_ruless:Rules:LLM', 37) """ self.helper_extract_rules(selection_rules, exp) From a01a09639b4a3a3b147c2db86df815042101ad73 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 1 Jun 2025 16:33:12 -0400 Subject: [PATCH 153/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 43 +++++++++++++++++++---- dev_scripts_helpers/llms/llm_transform.py | 4 +-- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index ff990c6ff..b43d81db4 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -439,9 +439,7 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ - Use % formatting, like `"Hello, %s. - - You are %d years old." % (name, age)`. + Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. """ system = _CODING_CONTEXT system += r""" @@ -1227,13 +1225,30 @@ def slide_add_figure() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def slide_check() -> _PROMPT_OUT: + system = _MD_CONTEXT + system += r""" + - Do not print the content of the slide, but only the comment. + + - Is the content of the slide clear and correct? + - Answer with "The slide is clear" or "The slide is not clear" + + - Is there anything that can be clarified? + - Respond with at most 5 short bullet points about what can be clarified. + - Do not report things that you are not sure about. + """ + pre_transforms: Set[str] = set() + post_transforms: Set[str] = set() + post_container_transforms = ["format_markdown", "append_to_text"] + return system, pre_transforms, post_transforms, post_container_transforms + # ############################################################################# # Text. # ############################################################################# # Operate on pure text, not markdown. -# def text_expand() -> _PROMPT_OUT: +#def text_expand() -> _PROMPT_OUT: # """ # """ # system = hio.from_file("text_expand2.txt") @@ -1243,12 +1258,26 @@ def slide_add_figure() -> _PROMPT_OUT: # return system, pre_transforms, post_transforms, post_container_transforms +def text_idea() -> _PROMPT_OUT: + """ + """ + file = "text_idea.txt" + if os.path.exists(file): + system = hio.from_file(file) + else: + system = "" + pre_transforms: Set[str] = set() + post_transforms: Set[str] = set() + post_container_transforms = ["format_markdown"] + return system, pre_transforms, post_transforms, post_container_transforms + + def text_rephrase() -> _PROMPT_OUT: """ - Apply complex transformations to the text. """ - if os.path.exists("text_rephrase.txt"): - system = hio.from_file("text_rephrase.txt") + file = "text_rephrase.txt" + if os.path.exists(file): + system = hio.from_file(file) else: system = "" pre_transforms: Set[str] = set() diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 05ec07f08..bf7a30287 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -300,11 +300,11 @@ def _main(parser: argparse.ArgumentParser) -> None: out_txt_tmp = [] # Append the original text. txt = hio.from_file(tmp_in_file_name) - txt = hmarkdo.format_markdown(txt) - txt = hmarkdo.md_clean_up(txt) out_txt_tmp.append(txt) # Append the transformed text. + out_txt_tmp.append("\n#### Comments ####") out_txt_tmp.append(out_txt) + # Join everything. out_txt = "\n".join(out_txt_tmp) # Check that all post-transforms were run. hdbg.dassert_eq( From dee39a1dbea4b4d330031bfd5bf7181c608dec15 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 1 Jun 2025 17:00:07 -0400 Subject: [PATCH 154/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hsystem.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/helpers/hsystem.py b/helpers/hsystem.py index e42779044..644d819ea 100644 --- a/helpers/hsystem.py +++ b/helpers/hsystem.py @@ -798,6 +798,7 @@ def find_file_in_repo(file_name: str, *, root_dir: Optional[str] = None) -> str: return file_name_out +# TODO(gp): Use find_file def _find_file(filename: str, *, search_path: str = ".") -> Optional[str]: """ Find a file in a directory and report its absolute path. @@ -819,6 +820,7 @@ def _find_file(filename: str, *, search_path: str = ".") -> Optional[str]: return None +# TODO(gp): -> find_path_greedily def find_path(path: str, *, dir_name: str = ".", abort_on_error: bool = False) -> str: """ Find a path in a directory and report its absolute path. From 446bde88d7b9d78f12af19ac9fa0b501ee9c6a10 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 2 Jun 2025 05:32:11 -0400 Subject: [PATCH 155/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 11 +---------- helpers/hmarkdown.py | 4 ++-- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index b43d81db4..5d5beee74 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -972,16 +972,7 @@ def review_llm() -> _PROMPT_OUT: Review the code using LLMs. """ # Load the reference file. - file_name = hgit.find_file("all.llm_style_review_guidelines.reference.md") - return _review_from_file(file_name) - - -def review_linter() -> _PROMPT_OUT: - """ - Review the code for linter style (still using LLMs). - """ - # Load the reference file. - file_name = hgit.find_file("all.linter_style_review_guidelines.reference.md") + file_name = hgit.find_file("all.coding_style_guidelines.reference.md") return _review_from_file(file_name) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index b95761d0b..f21612d6f 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -763,7 +763,7 @@ def convert_header_list_into_guidelines(header_list: HeaderList) -> Guidelines: def _convert_rule_into_regex(selection_rule: SelectionRule) -> str: - """ + r""" Convert a rule into an actual regular expression. E.g., @@ -782,7 +782,7 @@ def _convert_rule_into_regex(selection_rule: SelectionRule) -> str: hdbg.dassert_not_in(" ", rule_part_in) if rule_part_in == "*": # Convert `*` into `\S*`. - rule_part_out = "(\S*)" + rule_part_out = r"(\S*)" elif "|" in rule_part_in: # Convert `LLM|Linter` into `(LLM|Linter)`. rule_part_out = "(" + rule_part_in + ")" From 81d8c435adf8e9a3038ce81e669dfb6e87e2d290 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 2 Jun 2025 16:29:02 -0400 Subject: [PATCH 156/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 35 ++- dev_scripts_helpers/system_tools/path | 4 +- .../all.coding_style_guidelines.reference.md | 93 ++++-- helpers/hdbg.py | 2 +- helpers/hgit.py | 36 ++- helpers/hmarkdown.py | 139 ++------- helpers/lib_tasks_git.py | 32 +- helpers/lib_tasks_lint.py | 25 +- helpers/test/test_hmarkdown.py | 275 ++---------------- .../Test_show_imports.test1/output/output.txt | 8 +- 10 files changed, 225 insertions(+), 424 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 5d5beee74..a77521e2c 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -62,10 +62,15 @@ def get_prompt_tags() -> List[str]: def get_post_container_transforms( transform_name: str, -) -> Dict[str, List[str]]: +) -> List[str]: + """ + Return the transformations for `transform_name`. + """ global _POST_CONTAINER_TRANSFORMS + # Initialize the dictionary, on the first call. if not _POST_CONTAINER_TRANSFORMS: valid_prompts = get_prompt_tags() + # Call all the functions and register their `post_container_transforms`. for prompt in valid_prompts: _, _, _, post_container_transforms = eval(f"{prompt}()") hdbg.dassert_not_in(prompt, _POST_CONTAINER_TRANSFORMS) @@ -104,9 +109,31 @@ def test() -> _PROMPT_OUT: # ############################################################################# +# Apply_cfile. + + +def code_apply_cfile() -> _PROMPT_OUT: + """ + Apply a cfile to the code. + """ + system = _CODING_CONTEXT + system += r""" + Replace any Python "from import" statement like `from X import Y` with the + form `import X` and then replace the uses of `Y` with `X.Y` + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + post_container_transforms: List[str] = [] + return system, pre_transforms, post_transforms, post_container_transforms + + # Fix +# TODO(gp): The code fixes are superseded by the llm_review.py approach using +# the guideline file. + + def code_fix_from_imports() -> _PROMPT_OUT: """ Fix code to use imports instead of "from import" statements. @@ -920,10 +947,10 @@ def _review_from_file(file: str) -> _PROMPT_OUT: - `- Bad:` followed by inline or code block examples Example: - - All functions must have a docstring + - All functions must have a docstring - Good: ```python - def foo(): + def foo(): pass ``` - Bad: @@ -1223,7 +1250,7 @@ def slide_check() -> _PROMPT_OUT: - Is the content of the slide clear and correct? - Answer with "The slide is clear" or "The slide is not clear" - + - Is there anything that can be clarified? - Respond with at most 5 short bullet points about what can be clarified. - Do not report things that you are not sure about. diff --git a/dev_scripts_helpers/system_tools/path b/dev_scripts_helpers/system_tools/path index a15869a5e..f5430361e 100755 --- a/dev_scripts_helpers/system_tools/path +++ b/dev_scripts_helpers/system_tools/path @@ -39,7 +39,7 @@ def _find_file(filename: str, *, search_path: str = ".") -> Optional[str]: else: return None - + def _find_path(path: str, *, dir_name: str = ".", abort_on_error: bool = False) -> str: """ Find a path in a directory and report its absolute path. @@ -75,4 +75,4 @@ if __name__ == "__main__": else: dir_name = "." path_out = _find_path(path, dir_name=dir_name, abort_on_error=abort_on_error) - print(path_out, end="") \ No newline at end of file + print(path_out, end="") diff --git a/docs/code_guidelines/all.coding_style_guidelines.reference.md b/docs/code_guidelines/all.coding_style_guidelines.reference.md index 114991b20..646f4c218 100644 --- a/docs/code_guidelines/all.coding_style_guidelines.reference.md +++ b/docs/code_guidelines/all.coding_style_guidelines.reference.md @@ -1,10 +1,68 @@ <!-- toc --> +- [General](#general) + * [Spelling](#spelling) + + [LLM](#llm) + + [Linter](#linter) +- [Python](#python) + * [Naming](#naming) + + [LLM](#llm-1) + + [Linter](#linter-1) + * [Docstrings](#docstrings) + + [LLM](#llm-2) + + [Linter](#linter-2) + * [Comments](#comments) + + [LLM](#llm-3) + + [Linter](#linter-3) + * [Code implementation](#code-implementation) + + [LLM](#llm-4) + + [Linter](#linter-4) + * [Code design](#code-design) + + [LLM](#llm-5) + + [Linter](#linter-5) + * [Imports](#imports) + + [LLM](#llm-6) + + [Linter](#linter-6) + * [Type annotations](#type-annotations) + + [LLM](#llm-7) + + [Linter](#linter-7) + * [Functions](#functions) + + [LLM](#llm-8) + + [Linter](#linter-8) + * [Scripts](#scripts) + + [LLM](#llm-9) + + [Linter](#linter-9) + * [Logging](#logging) + + [LLM](#llm-10) + + [Linter](#linter-10) + * [Misc](#misc) + + [LLM](#llm-11) + + [Linter](#linter-11) +- [Unit tests](#unit-tests) + * [Rules](#rules) + + [LLM](#llm-12) + + [Linter](#linter-12) +- [Notebooks](#notebooks) + * [General](#general-1) + + [LLM](#llm-13) + + [Linter](#linter-13) + * [Plotting](#plotting) + + [LLM](#llm-14) + + [Linter](#linter-14) + * [Jupytext](#jupytext) + + [LLM](#llm-15) + + [Linter](#linter-15) +- [Markdown](#markdown) + * [Naming](#naming-1) + + [LLM](#llm-16) + + [Linter](#linter-16) + * [General](#general-2) + + [LLM](#llm-17) + + [Linter](#linter-17) + <!-- tocstop --> -# ############################################################################# # General -# ############################################################################# ## Spelling @@ -46,7 +104,7 @@ - Good: `data` - Bad: `data_dict` - Abbreviations in the names of variables and functions should be avoided - - Exceptions are the following + - Exceptions are the following - `df` for dataframe - `srs` for series - `idx` for index @@ -113,6 +171,7 @@ :return: dictionary containing repository settings """ ``` + - The docstring must use imperative form, whenever possible - Good: "Calculate the sum of two numbers and return the result." - Bad: "Calculates the sum of two numbers and returns the result." @@ -182,18 +241,20 @@ function1() # Then do something else. function2() + ``` - Bad: ``` function1() function2() ``` + - Do not use inline comments; every comment should be on its own separate line, before the line it refers to - Good: ``` # Grant access to admin panel access_admin_panel(). - if user.is_admin(): + if user.is_admin(): ``` - Bad: ``` @@ -202,13 +263,13 @@ - In `if-elif-else` statements, the comments are placed underneath each statement in order to explain the code that belongs to each statement in particular - Good: - ``` - if ...: - # Do this - else: - # Do that - ``` + - Good: + ``` + if ...: + # Do this + else: + # Do that + ``` - Avoid referring to the type of a variable in the comments - Keeps comments focused on functionality rather than implementation specifics - Good: "Store the user's age for validation." @@ -288,8 +349,8 @@ for w in parts ] ``` - to: + to: - Good: ``` if i == 0: @@ -309,6 +370,7 @@ (i > 0 and not re.search(r'\w', tokens[i - 1])) or (i < len(tokens) - 1 and not re.search(r'\w', tokens[i + 1]))) ``` + - Provide clear and informative error messages in exceptions using f-strings - Good: `raise ValueError(f"Invalid server_name='{server_name}'")` - Good: `raise TypeError(f"Expected type int, but got {type(var).__name__}")` @@ -556,6 +618,7 @@ - Good: `_LOG.debug("cmd=%s", cmd1)` - Bad: `_LOG.debug(f"cmd={cmd1}")` - Use the following idiom to configure logging: + ```python import helpers.hdbg as hdbg @@ -577,9 +640,7 @@ then all the instances and references to it throughout the codebase should be updated -# ############################################################################# # Unit tests -# ############################################################################# ## Rules @@ -677,9 +738,7 @@ - If a unit test is renamed or removed in a PR, the corresponding files in the `outcomes` dir should also be renamed or removed -# ############################################################################# # Notebooks -# ############################################################################# ## General @@ -778,9 +837,7 @@ - If you update or delete the notebook, you must also update or delete its paired Python file, and vice versa -# ############################################################################# # Markdown -# ############################################################################# ## Naming diff --git a/helpers/hdbg.py b/helpers/hdbg.py index 734b63652..54b4d318e 100644 --- a/helpers/hdbg.py +++ b/helpers/hdbg.py @@ -856,7 +856,7 @@ def dassert_is_path_abs( """ dassert_isinstance(path, str) dassert_ne(path, "") - dassert_eq(os.path.isabs(path), True, "Path '%s' is not absolute", path, + dassert(os.path.isabs(path), "Path '%s' is not absolute", path, only_warning=only_warning) diff --git a/helpers/hgit.py b/helpers/hgit.py index c84dadf12..82de76a7a 100644 --- a/helpers/hgit.py +++ b/helpers/hgit.py @@ -262,12 +262,15 @@ def find_file(file_name: str, *, dir_path: Optional[str] = None) -> str: if dir_path is None: dir_path = find_git_root() _LOG.debug(hprint.to_str("dir_path")) - cmd = (rf'find {dir_path} ' + - r"\( -path '*/.git' -o -path '*/.mypy_cache' \) -prune " + - rf'-o -name "{file_name}" -print') + cmd = ( + rf"find {dir_path} " + + r"\( -path '*/.git' -o -path '*/.mypy_cache' \) -prune " + + rf'-o -name "{file_name}" -print' + ) _LOG.debug(hprint.to_str("cmd")) _, res = hsystem.system_to_one_line(cmd) hdbg.dassert_ne(res, "Can't find file '%s' in '%s'", file_name, dir_path) + res = cast(str, res) return res @@ -296,7 +299,9 @@ def find_helpers_root(dir_path: str = ".") -> str: # Make sure the dir and that `helpers` subdir exists. hdbg.dassert_dir_exists(helpers_root) hdbg.dassert_dir_exists(os.path.join(helpers_root), "helpers") - return helpers_root + # TODO(gp): Unclear why this happens. + helpers_root_ = cast(str, helpers_root) + return helpers_root_ # ############################################################################# @@ -306,8 +311,8 @@ def resolve_git_client_dir(git_client_name: str) -> str: """ Resolve the absolute path of the Git client directory. - :param git_client_name: the name of the Git client (e.g., "helpers1" or - "/Users/saggese/src/helpers1") + :param git_client_name: the name of the Git client (e.g., "helpers1" + or "/Users/saggese/src/helpers1") :return: the absolute path of the Git client directory """ if not os.path.isabs(git_client_name): @@ -322,11 +327,17 @@ def resolve_git_client_dir(git_client_name: str) -> str: return git_client_dir -def project_file_name_in_git_client(file_name: str, git_src_dir: str, git_dst_dir: str, - *, check_src_file_exists: bool = False, - check_dst_file_exists: bool = False) -> str: +def project_file_name_in_git_client( + file_name: str, + git_src_dir: str, + git_dst_dir: str, + *, + check_src_file_exists: bool = False, + check_dst_file_exists: bool = False, +) -> str: """ - Find the file corresponding to `file_name` in `git_src_dir` for the client `git_dst_dir`. + Find the file corresponding to `file_name` in `git_src_dir` for the client + `git_dst_dir`. This is useful when we want to find the file in a destination Git client directory corresponding to a file in a source Git client directory. @@ -434,7 +445,8 @@ def _is_repo(repo_short_name: str) -> bool: Return whether we are inside the module `repo_short_name`. """ curr_repo_short_name = hrecouti.get_repo_config().get_repo_short_name() - return curr_repo_short_name == repo_short_name + is_repo = bool(curr_repo_short_name == repo_short_name) + return is_repo def is_helpers() -> bool: @@ -761,7 +773,7 @@ def get_repo_full_name_from_client(super_module: bool) -> str: return repo_name -def is_cwd_git_repo(): +def is_cwd_git_repo() -> bool: """ Return whether the current working directory is a Git repo root. """ diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index f21612d6f..53429795f 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -7,11 +7,10 @@ import dataclasses import logging import re -from typing import Generator, List, Optional, Tuple, cast +from typing import Dict, Generator, List, Optional, Tuple, cast import dev_scripts_helpers.documentation.lint_notes as dshdlino import helpers.hdbg as hdbg -import helpers.hio as hio import helpers.hparser as hparser import helpers.hprint as hprint @@ -665,7 +664,8 @@ def sanity_check_rules(txt: List[str]) -> None: """ Sanity check the rules. """ - header_list = extract_headers_from_markdown(txt, max_level=5) + txt_tmp = "\n".join(txt) + header_list = extract_headers_from_markdown(txt_tmp, max_level=5) # 1) Start with level 1 headers. # 2) All level 1 headers are unique. # 3) Header levels are increasing / decreasing by at most 1. @@ -701,8 +701,8 @@ def sanity_check_rules(txt: List[str]) -> None: def convert_header_list_into_guidelines(header_list: HeaderList) -> Guidelines: """ - Convert the header list into a `Guidelines` object with only level 1 headers - and full hierarchy of the rules as description. + Convert the header list into a `Guidelines` object with only level 1 + headers and full hierarchy of the rules as description. Expand a header list like: ``` @@ -766,7 +766,7 @@ def _convert_rule_into_regex(selection_rule: SelectionRule) -> str: r""" Convert a rule into an actual regular expression. - E.g., + E.g., - `Spelling:*:LLM` -> `Spelling:(\S*):LLM` - `*:*:Linter|LLM` -> `(\S*):(\S*):(Linter|LLM)` - `Spelling|Python:*:LLM` -> `Spelling|Python:(\S*):LLM` @@ -795,12 +795,15 @@ def _convert_rule_into_regex(selection_rule: SelectionRule) -> str: return rule_out -def extract_rules(guidelines: Guidelines, selection_rules: List[SelectionRule]) -> Guidelines: +def extract_rules( + guidelines: Guidelines, selection_rules: List[SelectionRule] +) -> Guidelines: """ Extract the set of rules from the `guidelines` that match the rule regex. :param guidelines: The guidelines to extract the rules from. - :param selection_rules: The selection rules to use to extract the rules. + :param selection_rules: The selection rules to use to extract the + rules. :return: The extracted rules. """ hdbg.dassert_isinstance(guidelines, list) @@ -811,7 +814,7 @@ def extract_rules(guidelines: Guidelines, selection_rules: List[SelectionRule]) # - a list of strings separated by `|` (e.g., `LLM|Linter`) # E.g., `Spelling:*:LLM`, `*:*:Linter|LLM`, `Spelling|Python:*:LLM`. # Convert each rule regex into a regular expression. - rule_regex_map = {} + rule_regex_map: Dict[str, str] = {} for rule_regex_str in selection_rules: hdbg.dassert_isinstance(rule_regex_str, SelectionRule) regex = _convert_rule_into_regex(rule_regex_str) @@ -829,7 +832,11 @@ def extract_rules(guidelines: Guidelines, selection_rules: List[SelectionRule]) if guideline not in rule_sections: rule_sections.append(guideline) # Select the rules. - _LOG.debug("Selected %s sections:\n%s", len(rule_sections), "\n".join([r.description for r in rule_sections])) + _LOG.debug( + "Selected %s sections:\n%s", + len(rule_sections), + "\n".join([r.description for r in rule_sections]), + ) return rule_sections @@ -839,7 +846,7 @@ def parse_rules_from_txt(txt: str) -> List[str]: - Extract first-level bullet point list items from text until the next one. - Sub-lists nested under first-level items are extracted together with the - first-level items. + first-level items. :param txt: text to process ``` @@ -849,16 +856,6 @@ def parse_rules_from_txt(txt: str) -> List[str]: - Item 4 ``` :return: extracted bullet points, e.g., - ``` - [ - "- Item 1", - ''' - - Item 2 - - Item 3 - ''', - "- Item 4", - ] - ``` """ lines = txt.split("\n") # Store the first-level bullet points. @@ -895,8 +892,8 @@ def extract_rules_from_section(txt: str, line_number: int) -> List[str]: Extract rules from a section of a markdown file. :param txt: The markdown text to extract the rules from. - :param line_number: The line number of the section to start extracting the - rules from. + :param line_number: The line number of the section to start + extracting the rules from. :return: The extracted rules. """ # Find the line number of the next header. @@ -1141,91 +1138,6 @@ def selected_navigation_to_str( return txt -# ############################################################################# - - -def inject_todos_from_cfile( - cfile_txt: str, todo_user: str, comment_prefix: str -) -> None: - """ - Inject the TODOs from a cfile in the corresponding files. - - Given a cfile with the following content: - ``` - dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py:101: The logic ... - ``` - the function will inject the TODO in the corresponding file and line - - :param cfile_txt: The content of the cfile. - :param todo_user: The user to use in the TODO. - :param comment_prefix: The prefix to use for the comment (e.g., "#") - """ - # For each file, store - # - the current file content - # - the offset (i.e., how many lines we inserted in the file so far, so - # we can inject the TODO at the correct line number) - # - the index of the last line modified to make sure the TODOs are for - # increasing line numbers. - file_content = {} - for todo_line in cfile_txt.split("\n"): - _LOG.debug("\n%s", hprint.frame("todo line='%s'" % todo_line)) - if todo_line.strip() == "": - continue - # dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py:101: The logic for extracting required status checks and pull request reviews is repeated. Consider creating a helper function to handle this extraction to reduce redundancy. - m = re.match(r"^\s*(\S+):(\d+):\s*(.*)$", todo_line) - if not m: - _LOG.warning("Can't parse line='%s': skipping", todo_line) - continue - file_name, todo_line_number, todo = m.groups() - todo_line_number = int(todo_line_number) - _LOG.debug(hprint.to_str("file_name todo_line_number todo")) - # Update the state if needed. - if file_name not in file_content: - _LOG.debug("Reading %s", file_name) - hdbg.dassert_path_exists(file_name) - txt = hio.from_file(file_name).split("\n") - offset = 0 - last_line_modified = 0 - file_content[file_name] = (txt, offset, last_line_modified) - # Extract the info for the file to process. - txt, offset, last_line_modified = file_content[file_name] - _LOG.debug(hprint.to_str("offset last_line_modified")) - hdbg.dassert_lt( - last_line_modified, - todo_line_number, - "The TODOs don't look like they are increasing line numbers: " - "TODO at line %d is before the last line modified %d", - todo_line_number, - last_line_modified, - ) - # We subtract 1 from the line number since TODOs count from 1, while - # Python arrays count from 0. - act_line_number = todo_line_number - 1 + offset - hdbg.dassert_lte(0, act_line_number) - hdbg.dassert_lt(act_line_number, len(txt)) - insert_line = txt[act_line_number] - _LOG.debug(hprint.to_str("act_line_number insert_line")) - # Extract how many spaces there are at place where the line to insert - # the TODO. - m = re.match(r"^(\s*)\S", insert_line) - hdbg.dassert(m, "Can't parse insert_line='%s'", insert_line) - spaces = len(m.group(1)) * " " - # Build the new line to insert. - new_line = spaces + f"{comment_prefix} TODO({todo_user}): {todo}" - _LOG.debug(hprint.to_str("new_line")) - # Insert the new line in txt at the correct position. - txt = txt[:act_line_number] + [new_line] + txt[act_line_number:] - # Update the state. - offset += 1 - file_content[file_name] = (txt, offset, todo_line_number) - # Write updated files back. - for file_name, (txt, offset, last_line_modified) in file_content.items(): - _ = last_line_modified - _LOG.info("Writing %d lines in %s", offset, file_name) - txt = "\n".join(txt) - hio.to_file(file_name, txt) - - # ############################################################################# # Formatting markdown # ############################################################################# @@ -1298,9 +1210,10 @@ def bold_first_level_bullets(markdown_text: str, *, max_length: int = 30) -> str # First-level bullet, add bold markers. m = re.match(r"^(\s*-\s+)(.*)", line) hdbg.dassert(m, "Can't parse line='%s'", line) - bullet_text = m.group(2) + bullet_text = m.group(2) # type: ignore[union-attr] if max_length > -1 and len(bullet_text) <= max_length: - line = m.group(1) + "**" + bullet_text + "**" + spaces = m.group(1) # type: ignore[union-attr] + line = spaces + "**" + bullet_text + "**" result.append(line) return "\n".join(result) @@ -1399,7 +1312,8 @@ def prettier_markdown(txt: str) -> str: """ file_type = "md" txt = dshdlino.prettier_on_str(txt, file_type) - return txt + txt_ = cast(str, txt) + return txt_ def format_markdown(txt: str) -> str: @@ -1428,4 +1342,5 @@ def format_markdown_slide(txt: str) -> str: def format_latex(txt: str) -> str: file_type = "tex" txt = dshdlino.prettier_on_str(txt, file_type) - return txt + txt_ = cast(str, txt) + return txt_ diff --git a/helpers/lib_tasks_git.py b/helpers/lib_tasks_git.py index effa02430..25de1d204 100644 --- a/helpers/lib_tasks_git.py +++ b/helpers/lib_tasks_git.py @@ -212,6 +212,7 @@ def git_patch_create( # type: ignore cmd = f"tar czvf {dst_file} {files_as_str}" cmd_inv = "tar xvzf" elif mode == "diff": + opts: str if modified: opts = "HEAD" elif branch: @@ -219,12 +220,14 @@ def git_patch_create( # type: ignore elif last_commit: opts = "HEAD^" else: - hdbg.dfatal( + raise ValueError( "You need to specify one among -modified, --branch, " "--last-commit" ) cmd = f"git diff {opts} --binary {files_as_str} >{dst_file}" cmd_inv = "git apply" + else: + raise ValueError(f"Invalid cmd='{cmd}'") # Execute patch command. _LOG.info("Creating the patch into %s", dst_file) hdbg.dassert_ne(cmd, "") @@ -236,16 +239,17 @@ def git_patch_create( # type: ignore remote_file = os.path.basename(dst_file) abs_path_dst_file = os.path.abspath(dst_file) msg = f""" -# To apply the patch and execute: -> git checkout {hash_} -> {cmd_inv} {abs_path_dst_file} + # To apply the patch and execute: + > git checkout {hash_} + > {cmd_inv} {abs_path_dst_file} -# To apply the patch to a remote client: -> export SERVER="server" -> export CLIENT_PATH="~/src" -> scp {dst_file} $SERVER: -> ssh $SERVER 'cd $CLIENT_PATH && {cmd_inv} ~/{remote_file}'" + # To apply the patch to a remote client: + > export SERVER="server" + > export CLIENT_PATH="~/src" + > scp {dst_file} $SERVER: + > ssh $SERVER 'cd $CLIENT_PATH && {cmd_inv} ~/{remote_file}'" """ + msg = hprint.dedent(msg) print(msg) @@ -936,9 +940,13 @@ def git_repo_copy(ctx, file_name, src_git_dir, dst_git_dir): # type: ignore _ = ctx src_git_dir = hgit.resolve_git_client_dir(src_git_dir) dst_git_dir = hgit.resolve_git_client_dir(dst_git_dir) - dst_file_path = hgit.project_file_name_in_git_client(file_name, src_git_dir, dst_git_dir, - check_src_file_exists=True, - check_dst_file_exists=False) + dst_file_path = hgit.project_file_name_in_git_client( + file_name, + src_git_dir, + dst_git_dir, + check_src_file_exists=True, + check_dst_file_exists=False, + ) _LOG.info("Copying code from '%s' to '%s' ...", file_name, dst_git_dir) # Copy the file. hsystem.system_to_string(f"cp {file_name} {dst_file_path}") diff --git a/helpers/lib_tasks_lint.py b/helpers/lib_tasks_lint.py index b80e0e63b..ecf15c039 100644 --- a/helpers/lib_tasks_lint.py +++ b/helpers/lib_tasks_lint.py @@ -286,7 +286,7 @@ def lint( # type: ignore @task -def lint_check_if_it_was_run(ctx): +def lint_check_if_it_was_run(ctx): # type: ignore """ Check if the linter was run in the current branch. @@ -351,19 +351,22 @@ def _get_lint_docker_cmd( @task def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): # type: ignore """ - Sync the code needed to run linter and ai_review.py from a client to the current one. + Sync the code needed to run linter and ai_review.py from a client to the + current one. :param git_client_name: the name of the git client to sync from. It can be - something like "helpers1" and it will be used from "$HOME/src" or can + something like "helpers1" and it will be used from "$HOME/src" or can be a full path. :param revert_to_original: if `True`, revert the changes to the original """ _ = ctx hlitauti.report_task() + # Copy the code from the src git client to the current one. + src_git_dir = hgit.resolve_git_client_dir(git_client_name) # files_to_copy = [ - #"hgit.py", - #"hmarkdown.py", + # "hgit.py", + # "hmarkdown.py", "llm_prompts.py", "llm_transform.py", "inject_todos.py", @@ -380,8 +383,6 @@ def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): hsystem.system(cmd) _LOG.info("Done") return - # Copy the code from the src git client to the current one. - src_git_dir = hgit.resolve_git_client_dir(git_client_name) # Get the path to the helpers repo. src_helpers_dir = hgit.find_helpers_root(src_git_dir) hdbg.dassert_ne(src_helpers_dir, "") @@ -391,7 +392,9 @@ def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): hdbg.dassert_dir_exists(dst_helpers_dir) _LOG.debug(hprint.to_str("src_helpers_dir dst_helpers_dir")) # - _LOG.info("Copying files from '%s' to '%s' ...", src_helpers_dir, dst_helpers_dir) + _LOG.info( + "Copying files from '%s' to '%s' ...", src_helpers_dir, dst_helpers_dir + ) # Find the files to copy. for file_name in files_to_copy: _LOG.debug(hprint.to_str("file_name")) @@ -401,7 +404,9 @@ def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): _LOG.debug(hprint.to_str("src_file_path")) hdbg.dassert_file_exists(src_file_path) # Get the path to the file in the dst Git client. - dst_file_path = hgit.project_file_name_in_git_client(src_file_path, src_helpers_dir, dst_helpers_dir) + dst_file_path = hgit.project_file_name_in_git_client( + src_file_path, src_helpers_dir, dst_helpers_dir + ) _LOG.debug(hprint.to_str("dst_file_path")) # Copy the file. _LOG.debug(hprint.to_str("src_file_path dst_file_path")) @@ -411,4 +416,4 @@ def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): _LOG.debug(hprint.to_str("cmd")) _LOG.info("Copying file '%s' to '%s' ...", src_file_path, dst_file_path) hsystem.system(cmd) - _LOG.info("Done") \ No newline at end of file + _LOG.info("Done") diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 27a8c5867..66126bf26 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -1,7 +1,7 @@ import logging import os import pprint -from typing import Any, List, Tuple +from typing import Any, List, Tuple, cast import helpers.hio as hio import helpers.hmarkdown as hmarkdo @@ -314,6 +314,7 @@ def _get_markdown_example1() -> str: Content under header 3. """ content = hprint.dedent(content) + content = cast(str, content) return content @@ -325,6 +326,7 @@ def _get_markdown_example2() -> str: Content under subheader 2. """ content = hprint.dedent(content) + content = cast(str, content) return content @@ -333,6 +335,7 @@ def _get_markdown_example3() -> str: This is some content without any headers. """ content = hprint.dedent(content) + content = cast(str, content) return content @@ -408,6 +411,7 @@ def greet(name): Stay curious and keep exploring! """ content = hprint.dedent(content) + content = cast(str, content) return content @@ -423,6 +427,7 @@ def _get_markdown_example5() -> hmarkdo.HeaderList: ## Linear models """ content = hprint.dedent(content) + content = cast(str, content) return content @@ -1524,6 +1529,11 @@ def get_header_list6() -> hmarkdo.HeaderList: return header_list +# ############################################################################# +# Test_convert_header_list_into_guidelines1 +# ############################################################################# + + class Test_convert_header_list_into_guidelines1(hunitest.TestCase): def test1(self) -> None: @@ -1549,6 +1559,11 @@ def test1(self) -> None: self.assert_equal(act, exp, dedent=True) +# ############################################################################# +# Test_extract_rules1 +# ############################################################################# + + class Test_extract_rules1(hunitest.TestCase): def helper(self, selection_rules: List[str], exp: str) -> None: @@ -1718,9 +1733,15 @@ def get_guidelines_txt1() -> str: - E.g., "for these inputs the function responds with this output" """ txt = hprint.dedent(txt) + txt = cast(str, txt) return txt +# ############################################################################# +# Test_end_to_end_rules1 +# ############################################################################# + + class Test_end_to_end_rules1(hunitest.TestCase): def test_get_header_list1(self) -> None: @@ -1797,7 +1818,7 @@ def test_extract_rules3(self) -> None: exp = """ HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Unit_test_extract_ruless:Rules:LLM', 37) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) """ self.helper_extract_rules(selection_rules, exp) @@ -1807,7 +1828,7 @@ def test_extract_rules4(self) -> None: HeaderInfo(1, 'General:Spelling:LLM', 5) HeaderInfo(1, 'General:Spelling:Linter', 7) HeaderInfo(1, 'Python:Naming:LLM', 18) - HeaderInfo(1, 'Unit_test_extract_ruless:Rules:LLM', 37) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) """ self.helper_extract_rules(selection_rules, exp) @@ -1818,7 +1839,7 @@ def test_extract_rules5(self) -> None: HeaderInfo(1, 'General:Spelling:Linter', 7) HeaderInfo(1, 'Python:Naming:LLM', 18) HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_test_extract_ruless:Rules:LLM', 37) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) """ self.helper_extract_rules(selection_rules, exp) @@ -1829,254 +1850,10 @@ def test_extract_rules6(self) -> None: HeaderInfo(1, 'General:Spelling:Linter', 7) HeaderInfo(1, 'Python:Naming:LLM', 18) HeaderInfo(1, 'Python:Naming:Linter', 28) - HeaderInfo(1, 'Unit_test_extract_ruless:Rules:LLM', 37) + HeaderInfo(1, 'Unit_tests:Rules:LLM', 37) """ self.helper_extract_rules(selection_rules, exp) -# ############################################################################# -# Test_inject_todos_from_cfile1 -# ############################################################################# - - -class Test_inject_todos_from_cfile1(hunitest.TestCase): - - def test1(self) -> None: - """ - Test injecting TODOs from a cfile into a Python file. - """ - # Create a test file. - test_file_content = """ - def hello(msg): - print(msg) - - def world(): - print("world") - """ - file_path = self._create_test_file("test.py", test_file_content) - # Create cfile with TODOs. - cfile_content = [ - f"{file_path}:1: Add type hints.", - f"{file_path}:4: Add docstring.", - ] - self._create_cfile(cfile_content) - # Run the function under test. - self._inject_todos("\n".join(cfile_content)) - # Check output. - actual = hio.from_file(file_path) - expected = """ - # TODO(user): Add type hints. - def hello(msg): - print(msg) - - # TODO(user): Add docstring. - def world(): - print("world") - """ - self.assert_equal(actual, expected, dedent=True) - - def test_one_line_file(self) -> None: - """ - Test injecting TODOs into an empty file. - """ - # Create an empty test file - test_file_content = """ - print("hello") - """ - file_path = self._create_test_file("empty.py", test_file_content) - # Create cfile with TODOs - cfile_content = [f"{file_path}:1: Add content to empty file."] - self._create_cfile(cfile_content) - # Run the function under test - self._inject_todos("\n".join(cfile_content)) - # Check output - actual = hio.from_file(file_path) - expected = """ - # TODO(user): Add content to empty file. - print("hello") - """ - self.assert_equal(actual, expected, dedent=True) - - def test_invalid_line_numbers(self) -> None: - """ - Test handling of TODOs with invalid line numbers. - """ - # Create a test file - test_file_content = """ - line1 - line2 - """ - file_path = self._create_test_file("test.py", test_file_content) - # Create cfile with invalid line numbers - cfile_content = [ - f"{file_path}:999: This line number doesn't exist.", - ] - self._create_cfile(cfile_content) - # This should raise an assertion error due to invalid line numbers - with self.assertRaises(AssertionError) as err: - self._inject_todos("\n".join(cfile_content)) - # Check output. - expected = """ - ################################################################################ - * Failed assertion * - 998 < 2 - ################################################################################ - """ - self.assert_equal( - str(err.exception), expected, dedent=True, fuzzy_match=True - ) - - def test2(self) -> None: - """ - Test injecting TODOs from a cfile into a Python file with a complex - class. - """ - # Create a test file. - test_file_content = """ - import logging - from typing import List, Optional - - class DataProcessor: - def __init__(self): - self.logger = logging.getLogger(__name__) - self.data = [] - - def process_batch(self, items): - for item in items: - self.data.append(self._transform(item)) - - def _transform(self, item): - return item.upper() - - def get_results(self): - return self.data - - def clear(self): - self.data = [] - """ - file_path = self._create_test_file("test.py", test_file_content) - # Create cfile with TODOs. - cfile_content = [ - f"{file_path}:4: Add class docstring explaining purpose and usage", - f"{file_path}:5: Add type hints for instance variables", - f"{file_path}:9: Add type hints for items parameter", - f"{file_path}:10: Consider adding batch size validation", - f"{file_path}:13: Add error handling for non-string inputs", - f"{file_path}:16: Add return type hint and docstring", - f"{file_path}:19: Add docstring explaining clear behavior", - ] - self._create_cfile(cfile_content) - # Run function under test. - self._inject_todos("\n".join(cfile_content)) - # Check output. - actual = hio.from_file(file_path) - expected = """ - import logging - from typing import List, Optional - - # TODO(user): Add class docstring explaining purpose and usage - class DataProcessor: - # TODO(user): Add type hints for instance variables - def __init__(self): - self.logger = logging.getLogger(__name__) - self.data = [] - - # TODO(user): Add type hints for items parameter - def process_batch(self, items): - # TODO(user): Consider adding batch size validation - for item in items: - self.data.append(self._transform(item)) - - # TODO(user): Add error handling for non-string inputs - def _transform(self, item): - return item.upper() - - # TODO(user): Add return type hint and docstring - def get_results(self): - return self.data - - # TODO(user): Add docstring explaining clear behavior - def clear(self): - self.data = [] - """ - self.assert_equal(actual, expected, dedent=True) - - def test3(self) -> None: - """ - Test injecting TODOs from a cfile into multiple Python files. - """ - # Create first test file. - test_file1_content = """ - def foo(): - pass - """ - file_path1 = self._create_test_file("test1.py", test_file1_content) - # Create second test file. - test_file2_content = """ - def bar(): - return None - """ - file_path2 = self._create_test_file("test2.py", test_file2_content) - # Create cfile. - cfile_content = [ - f"{file_path1}:1: Add docstring for foo.", - f"{file_path2}:1: Add docstring for bar.", - f"{file_path2}:2: Add type hint for return.", - ] - self._create_cfile(cfile_content) - # Run function under test. - self._inject_todos("\n".join(cfile_content)) - # Check output. - actual1 = hio.from_file(file_path1) - expected1 = """ - # TODO(user): Add docstring for foo. - def foo(): - pass - """ - self.assert_equal(actual1, expected1, dedent=True) - # - actual2 = hio.from_file(file_path2) - expected2 = """ - # TODO(user): Add docstring for bar. - def bar(): - # TODO(user): Add type hint for return. - return None - """ - self.assert_equal(actual2, expected2, dedent=True) - - def _create_test_file(self, filename: str, content: str) -> str: - """ - Create a test file with given content in the scratch directory. - - :param scratch_dir: Directory to create file in - :param filename: Name of file to create - :param content: Content to write to file - :return: Full path to created file - """ - scratch_dir = self.get_scratch_space() - file_path = os.path.join(scratch_dir, filename) - content = hprint.dedent(content) - hio.to_file(file_path, content) - return file_path - - def _create_cfile(self, cfile_content: List[str]) -> str: - """ - Create a cfile with TODOs in the scratch directory. - - :param scratch_dir: Directory to create file in - :param cfile_content: List of TODO lines to write - :return: Full path to created cfile - """ - content = "\n".join(cfile_content) - return self._create_test_file("cfile.txt", content) - - def _inject_todos(self, cfile_content: str) -> None: - """ - Helper to inject TODOs with standard parameters. - """ - todo_user = "user" - comment_prefix = "#" - hmarkdo.inject_todos_from_cfile(cfile_content, todo_user, comment_prefix) - # ############################################################################# # Test_colorize_bold_text1 diff --git a/import_check/test/outcomes/Test_show_imports.test1/output/output.txt b/import_check/test/outcomes/Test_show_imports.test1/output/output.txt index 8c2bcfc54..34f420c6a 100644 --- a/import_check/test/outcomes/Test_show_imports.test1/output/output.txt +++ b/import_check/test/outcomes/Test_show_imports.test1/output/output.txt @@ -19,7 +19,7 @@ "input.file2" ], "imports": null, - "path": "/app/import_check/test/outcomes/Test_show_imports.test1/input/__init__.py", + "path": "/app/helpers_root/import_check/test/outcomes/Test_show_imports.test1/input/__init__.py", "truncated": false, "is_external": false, "is_file": false @@ -31,7 +31,7 @@ "input.file2" ], "imports": null, - "path": "/app/import_check/test/outcomes/Test_show_imports.test1/input/file1.py", + "path": "/app/helpers_root/import_check/test/outcomes/Test_show_imports.test1/input/file1.py", "truncated": false, "is_external": false, "is_file": true @@ -45,9 +45,9 @@ "input", "input.file1" ], - "path": "/app/import_check/test/outcomes/Test_show_imports.test1/input/file2.py", + "path": "/app/helpers_root/import_check/test/outcomes/Test_show_imports.test1/input/file2.py", "truncated": false, "is_external": false, "is_file": true } -} \ No newline at end of file +} From 47645a5397d1385ff6e5f97aa35a0822c664c8bf Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 3 Jun 2025 12:41:43 -0400 Subject: [PATCH 157/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_transform.py | 53 ++++++++++------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 4901c5184..93b488f93 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -5,9 +5,15 @@ an LLM, and then write the output to either stdout or a file. It is particularly useful for integrating with editors like Vim. -The script `dockerized_llm_transform.py` is executed within a Docker container to ensure -all dependencies are met. The Docker container is built dynamically if -necessary. The script requires an OpenAI API key to be set in the environment. +The script `dockerized_llm_transform.py` is executed within a Docker container +to ensure all dependencies are met. The Docker container is built dynamically if +necessary. + +There are different modes to run this script: +- Process a chunk of code through vim +- Process input and write transformed output +- Process input and extract a cfile to be used to review the points in the code + or for further processing with `llm_apply.py` Examples # Basic Usage @@ -23,10 +29,6 @@ > llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_propose_refactoring """ -# TODO(gp): There are different modes to run the script -# - run the script to process input and write transformed output -# - run the script to process input and extract a cfile - import argparse import logging import os @@ -67,20 +69,13 @@ def _parse() -> argparse.ArgumentParser: "-c", "--compare", action="store_true", - help="Print the original and transformed", - ) - # TODO(gp): Remove this. - parser.add_argument( - "-b", - "--bold_first_level_bullets", - action="store_true", - help="Bold the first level bullets", + help="Report the original and transformed text in the same response", ) parser.add_argument( "-s", "--skip-post-transforms", action="store_true", - help="Skip the post-transforms", + help="Skip the post-transforms outside the container", ) # Use CRITICAL to avoid logging anything. hparser.add_verbosity_arg(parser, log_level="CRITICAL") @@ -98,7 +93,7 @@ def _run_dockerized_llm_transform( suppress_output: bool = False, ) -> Optional[str]: """ - Run dockerized_llm_transform.py in a Docker container with all its + Run `dockerized_llm_transform.py` in a Docker container with all its dependencies. """ _LOG.debug(hprint.func_signature_to_str()) @@ -159,6 +154,7 @@ def _run_dockerized_llm_transform( is_caller_host=is_caller_host, use_sibling_container_for_callee=use_sibling_container_for_callee, ) + # Run the script inside the container. git_root = hgit.find_git_root() script = hsystem.find_file_in_repo( "dockerized_llm_transform.py", root_dir=git_root @@ -192,6 +188,7 @@ def _run_dockerized_llm_transform( return ret +# TODO(gp): Move this to somewhere else, `hdocker_utils.py`? def _convert_file_names(in_file_name: str, out_file_name: str) -> None: """ Convert the files from inside the container to outside. @@ -269,30 +266,23 @@ def _main(parser: argparse.ArgumentParser) -> None: # out_txt = hio.from_file(tmp_out_file_name) if dshlllpr.to_run("prettier_markdown", post_container_transforms): - out_txt = hmarkdo.prettier_markdown(out_txt) - # - if dshlllpr.to_run("format_markdown", post_container_transforms): # Note that we need to run this outside the `llm_transform` # container to avoid to do docker-in-docker in the `llm_transform` # container (which doesn't support that). + out_txt = hmarkdo.prettier_markdown(out_txt) + # + if dshlllpr.to_run("format_markdown", post_container_transforms): + # Same as `prettier_markdown`. out_txt = hmarkdo.md_clean_up(out_txt) out_txt = hmarkdo.format_markdown(out_txt) - if args.bold_first_level_bullets: - out_txt = hmarkdo.bold_first_level_bullets(out_txt) # if dshlllpr.to_run("format_latex", post_container_transforms): - # Note that we need to run this outside the `llm_transform` - # container to avoid to do docker-in-docker in the `llm_transform` - # container (which doesn't support that). + # Same as `prettier_markdown`. out_txt = hmarkdo.md_clean_up(out_txt) out_txt = hmarkdo.format_markdown(out_txt) - if args.bold_first_level_bullets: - out_txt = hmarkdo.bold_first_level_bullets(out_txt) # if dshlllpr.to_run("format_slide", post_container_transforms): - # Note that we need to run this outside the `llm_transform` - # container to avoid to do docker-in-docker in the `llm_transform` - # container (which doesn't support that). + # Same as `prettier_markdown`. out_txt = hmarkdo.md_clean_up(out_txt) out_txt = hmarkdo.format_markdown_slide(out_txt) # @@ -313,7 +303,8 @@ def _main(parser: argparse.ArgumentParser) -> None: "Not all post_transforms were run: %s", post_container_transforms, ) - # Save the original and transformed text on file and a script to compare them. + # Save the original and transformed text on file and a script to compare + # them. txt = hio.from_file(tmp_in_file_name) hio.to_file("original.txt", txt) hio.to_file("transformed.txt", out_txt) From 235a8ec004fcb40ccc335ea8e5e4843b38ac15ea Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 3 Jun 2025 12:41:51 -0400 Subject: [PATCH 158/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/ai_review.py | 32 ++------------------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/dev_scripts_helpers/llms/ai_review.py b/dev_scripts_helpers/llms/ai_review.py index 3a4cd1653..8ee9f01aa 100755 --- a/dev_scripts_helpers/llms/ai_review.py +++ b/dev_scripts_helpers/llms/ai_review.py @@ -1,32 +1,5 @@ #!/usr/bin/env python3 -""" -Read input from either stdin or a file, apply a specified transformation using -an LLM, and then write the output to either stdout or a file. It is -particularly useful for integrating with editors like Vim. - -The script `dockerized_llm_transform.py` is executed within a Docker container to ensure -all dependencies are met. The Docker container is built dynamically if -necessary. The script requires an OpenAI API key to be set in the environment. - -Examples -# Basic Usage -> llm_transform.py -i input.txt -o output.txt -p uppercase - -# List of transforms -> llm_transform.py -i input.txt -o output.txt -p list - -# Code review -> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_review - -# Propose refactoring -> llm_transform.py -i dev_scripts_helpers/documentation/render_images.py -o cfile -p code_propose_refactoring -""" - -# TODO(gp): There are different modes to run the script -# - run the script to process input and write transformed output -# - run the script to process input and extract a cfile - import argparse import logging import os @@ -43,7 +16,7 @@ # TODO(gp): -> _parser() or _get_parser() everywhere. def _parse() -> argparse.ArgumentParser: """ - Use the same argparse parser for `dockerized_llm_transform.py`. + Use the same argparse parser for `dockerized_ai_review.py`. """ parser = argparse.ArgumentParser( description=__doc__, @@ -83,7 +56,7 @@ def _main(parser: argparse.ArgumentParser) -> None: ) if out_file_name != "cfile": _LOG.warning( - "The output file name is %s, so it will be converted to `cfile`", + "The output file name is '%s': using `cfile`", out_file_name, ) out_file_name = "cfile" @@ -121,7 +94,6 @@ def _main(parser: argparse.ArgumentParser) -> None: # if dshlllpr.to_run("convert_file_names", post_container_transforms): dshllltr._convert_file_names(in_file_name, tmp_out_file_name) - # # Check that all post-transforms were run. hdbg.dassert_eq( len(post_container_transforms), From 6020a23d27a89b9f7e9b8b8142db14d7b7f7a790 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 3 Jun 2025 17:33:42 -0400 Subject: [PATCH 159/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .claude/settings.local.json | 10 ++ CLAUDE.md | 127 ++++++++++++++++++++++ dev_scripts_helpers/llms/llm_transform.py | 4 +- helpers/hdocker.py | 6 +- 4 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 .claude/settings.local.json create mode 100644 CLAUDE.md diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 000000000..4034a3004 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,10 @@ +{ + "permissions": { + "allow": [ + "Bash(find:*)", + "Bash(invoke --list)", + "Bash(grep:*)" + ], + "deny": [] + } +} \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..cb22daae1 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,127 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Architecture Overview + +This is the `helpers` repository - a foundational Python library providing utilities, development tools, and infrastructure components for a larger ecosystem. The codebase follows a modular architecture with these key components: + +### Core Structure + +- **`helpers/`** - Core utility modules (hdbg, hio, hpandas, etc.) following `h<name>` naming convention. Each module provides focused functionality (debugging, I/O, pandas extensions, etc.) +- **`config_root/`** - Configuration system with `Config` class and builders for hierarchical configuration management +- **`linters/`** - Pluggable linting framework with custom linters for code quality (amp_black, amp_isort, etc.) +- **`dev_scripts_helpers/`** - Development automation scripts organized by functionality (git, docker, documentation, etc.) + +### Task System Architecture + +The repository uses `pyinvoke` for task automation with a modular task system: +- **`tasks.py`** - Entry point that imports all task modules +- **`helpers/lib_tasks_*.py`** - Task modules organized by domain (docker, git, pytest, lint, etc.) +- Tasks are decorated with `@task` and accessible via `invoke <task_name>` + +### Testing Architecture + +- Uses pytest with custom markers: `slow`, `superslow`, `requires_docker_in_docker` +- **`helpers/hunit_test.py`** - Base test class with helpers for golden file testing and test utilities +- Tests are categorized by speed and infrastructure requirements +- Timeout-based test classification with different timeouts per category + +## Common Development Commands + +### Testing +```bash +# Run fast tests only +invoke run_fast_tests + +# Run all tests +invoke run_tests + +# Run specific test categories +invoke run_slow_tests +invoke run_superslow_tests + +# Run tests with coverage +invoke run_coverage + +# Run single test file +pytest path/to/test_file.py::TestClass::test_method +``` + +### Linting and Code Quality +```bash +# Lint all modified files +invoke lint --modified + +# Lint specific files +invoke lint --files "file1.py file2.py" + +# Check Python files compilation +invoke lint_check_python_files --modified +``` + +### Docker Development +```bash +# Start bash shell in development container +invoke docker_bash + +# Build local development image +invoke docker_build_local_image + +# Run Jupyter in container +invoke docker_jupyter +``` + +### Git and Branch Management +```bash +# Create new branch following naming convention +invoke git_branch_create --name "HelpersTask123_Description" + +# Show files in current branch vs master +invoke git_branch_files + +# Merge master into current branch +invoke git_merge_master +``` + +## Key Configuration + +- **`repo_config.yaml`** - Repository metadata including Docker image names, S3 buckets, GitHub settings +- **`pytest.ini`** - Test configuration with custom markers and options +- **`mypy.ini`** - Type checking configuration with library-specific ignore rules +- **`invoke.yaml`** - Invoke task configuration + +## Development Patterns + +### Module Import Conventions +```python +import helpers.hdbg as hdbg +import helpers.hio as hio +import config_root.config.config_ as crococon +``` + +### Task Implementation +- Tasks in `lib_tasks_*.py` files use `@task` decorator +- Minimize dependencies in task functions (they run outside Docker) +- Call `hlitauti.report_task()` at start of each task + +### Testing Patterns +- Inherit from `hunitest.TestCase` for enhanced test utilities +- Use golden file pattern via `check_string()` method +- Mark tests with appropriate speed markers +- Use `pytest.mark.requires_docker_in_docker` for Docker-dependent tests + +### Configuration Management +- Use `Config` class from `config_root.config.config_` for hierarchical configs +- Support config versioning (currently v3) +- Use `DUMMY` placeholder for multi-phase config building + +## Linting Framework + +The custom linting system in `linters/` provides: +- Modular linter plugins (`amp_*.py` files) +- Base framework in `linters/base.py` +- Integration with invoke tasks for automated linting +- Support for parallel execution via joblib + +When running `invoke lint`, it executes appropriate linters based on file types and applies fixes automatically where possible. \ No newline at end of file diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 93b488f93..8934647eb 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -181,6 +181,8 @@ def _run_dockerized_llm_transform( ] ) docker_cmd = " ".join(docker_cmd) + if suppress_output: + mode = "system_without_output" ret = hdocker.process_docker_cmd( docker_cmd, container_image, dockerfile, mode ) @@ -217,7 +219,7 @@ def _convert_file_names(in_file_name: str, out_file_name: str) -> None: def _main(parser: argparse.ArgumentParser) -> None: args = parser.parse_args() - hparser.init_logger_for_input_output_transform(args) + hparser.init_logger_for_input_output_transform(args, verbose=False) # if args.prompt == "list": print("# Available prompt tags:") diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 7d7e2c54b..989f8c58c 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -78,7 +78,11 @@ def process_docker_cmd( ret = docker_cmd elif mode == "system": # TODO(gp): Note that `suppress_output=False` seems to hang the call. - hsystem.system(docker_cmd, suppress_output=False) + #hsystem.system(docker_cmd, suppress_output=False) + hsystem.system(docker_cmd) + ret = "" + elif mode == "system_without_output": + hsystem.system(docker_cmd) ret = "" elif mode == "save_to_file": file_name = f"tmp.process_docker_cmd.{container_image}.txt" From c995c948ee74a60fd6bd918ae4f11f765ca8349b Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 5 Jun 2025 11:55:11 -0400 Subject: [PATCH 160/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 1 + .../all.coding_style_guidelines.reference.md | 96 ++++++++++++------- helpers/hdocker.py | 5 +- 3 files changed, 66 insertions(+), 36 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index ac95a0341..240be75d0 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -702,6 +702,7 @@ def md_rewrite() -> _PROMPT_OUT: - Rewrite the text passed to increase clarity and readability. - Maintain the structure of the text as much as possible, in terms of bullet points and their indentation. + - Whenever possible use "you" instead of "I" or "we" """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} diff --git a/docs/code_guidelines/all.coding_style_guidelines.reference.md b/docs/code_guidelines/all.coding_style_guidelines.reference.md index 646f4c218..2c6237e2d 100644 --- a/docs/code_guidelines/all.coding_style_guidelines.reference.md +++ b/docs/code_guidelines/all.coding_style_guidelines.reference.md @@ -839,30 +839,6 @@ # Markdown -## Naming - -### LLM - -- Boldface and italics should be used sparingly -- The use of bullet point lists is encouraged - - For the items, `-` should be used instead of `*` or circles -- Use active voice most of the time and use passive voice sparingly - - Good: "The user updates the file." - - Bad: "The file is updated by the user." -- Be efficient - - Do not explain things in a repetitive way - - Rewrite long-winded AI-generated texts in a concise way - - E.g., instead of "The process of updating the software can be done by - following these steps," use "Update the software by following these steps" - -- When describing a tool the format should be the following - - A description of what the tool does - - A list of examples of invocations of a tool, with a comment on the command - line, the command line, and its output if possible - - A copy-paste version of the tool interface running `-h` - -### Linter - ## General ### LLM @@ -879,24 +855,19 @@ - The `{topic}` part describes the subject matter of the documentation - The `{diataxis_tag}` part categorizes the documentation according to the Diátaxis framework (e.g., explanation, tutorial) + - All Markdown files should have a table of contents - - Linter automatically adds and updates the table of contents -- Items in bullet point lists should not end with a period + - The linter automatically adds and updates the table of contents + - There should be one and only one level 1 heading (with one `#`) in a Markdown - The level 1 heading serves as the main title of the document - It should clearly convey the primary topic or purpose of the document - The level 1 heading should be located above the table of contents + - Wrap file paths, names of variables, functions, and classes in backticks - E.g., `file_path`, `variable_name`, `function_name()`, `ClassName` - Use `>` to indicate a command line - E.g., `> git push` or `docker> pytest` -- Headings should not be boldfaced -- Headings should not be overcapitalized - - E.g., `Data schema` instead of `Data Schema` -- Text should be reflowed to the maximum of 80 columns per line -- Fenced code blocks should always be accompanied by language markers - - E.g., `bash`, `python` - - Fenced code blocks should be indented at the same level as the previous line - Commands should be prepended by `>` - Example ``` @@ -913,3 +884,62 @@ the right highlighting - E.g., instead of a screenshot of a terminal command, provide the command text: `> ls -la` + +## Headers + +### LLM + +- Do not use bold or italics in headings +- Headings should not be overcapitalized + - E.g., `Data schema` instead of `Data Schema` + - TODO(gp): Need to decide +- Use headers so that it's easy to refer to something by link +- We want to use level 2 headers + +### Linter + +## Text + +### LLM + +- We use bullet point lists + - For the items, `-` should be used instead of `*` or circles + - Items in bullet point lists should not end with a period + +- Boldface and italics should be used sparingly throughout the text + +- Structure the text so that bullet points of higher level correspond to + "nesting" int he concept + +- Examples should go in a sub-bullet + - Good + ``` + - We typically increment the revision, likely a minor one + - E.g., from `v0.3` to `v0.3.1` + ``` + +- Use "you" and not "we" or "one" + - Let's just be direct: no need to be passive-aggressive + +- Text should be reflowed to the maximum of 80 columns per line + - The linter performs this operation automatically +- Fenced code blocks should always be accompanied by language markers + - E.g., `bash`, `python` + - Fenced code blocks should be indented at the same level as the previous line + +- Use active voice most of the time and use passive voice sparingly + - Good: "The user updates the file." + - Bad: "The file is updated by the user." +- Be efficient + - Do not explain things in a repetitive way + - Rewrite long-winded AI-generated texts in a concise way + - E.g., instead of "The process of updating the software can be done by + following these steps," use "Update the software by following these steps" + +- When describing a tool the format should be the following + - A description of what the tool does + - A list of examples of invocations of a tool, with a comment on the command + line, the command line, and its output if possible + - A copy-paste version of the tool interface running `-h` + +### Linter diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 989f8c58c..e2a134dd8 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -78,11 +78,10 @@ def process_docker_cmd( ret = docker_cmd elif mode == "system": # TODO(gp): Note that `suppress_output=False` seems to hang the call. - #hsystem.system(docker_cmd, suppress_output=False) - hsystem.system(docker_cmd) + hsystem.system(docker_cmd, suppress_output=False) ret = "" elif mode == "system_without_output": - hsystem.system(docker_cmd) + hsystem.system(docker_cmd, suppress_output=True) ret = "" elif mode == "save_to_file": file_name = f"tmp.process_docker_cmd.{container_image}.txt" From 6f333754554ac93022717b81a771850b5e87411a Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 6 Jun 2025 08:53:21 -0400 Subject: [PATCH 161/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 7 ++++--- dev_scripts_helpers/llms/llm_transform.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 240be75d0..859619a0b 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -644,12 +644,13 @@ def latex_rewrite() -> _PROMPT_OUT: system = _LATEX_CONTEXT system += r""" - Rewrite the text passed to increase clarity and readability. - - Maintain the structure of the text as much as possible, in terms of bullet - points and their indentation + - Maintain the structure of the text as much as possible, in terms of items + and their indentation + - The output should be a valid Latex code (e.g., using itemize) """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - post_container_transforms = [] + post_container_transforms = ["format_latex"] return system, pre_transforms, post_transforms, post_container_transforms diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 8934647eb..7cb3b71bc 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -231,6 +231,17 @@ def _main(parser: argparse.ArgumentParser) -> None: tmp_in_file_name, tmp_out_file_name = ( hparser.adapt_input_output_args_for_dockerized_scripts(in_file_name, tag) ) + if args.prompt == "md_to_latex": + import helpers.hlatex as hlatex + # Read the input. + txt = hparser.read_file(tmp_in_file_name) + txt = "\n".join(txt) + #txt = hmarkdo.format_markdown(txt) + txt = hlatex.convert_pandoc_md_to_latex(txt) + txt = hmarkdo.format_latex(txt) + hparser.write_file(txt, out_file_name) + return + # TODO(gp): We should just automatically pass-through the options. cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] if args.fast_model: From a73d4734736527e9e7c828768099ab416d09445f Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 8 Jun 2025 17:01:17 -0400 Subject: [PATCH 162/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../Causify_dev_system.md | 72 +---- .../Causify_development_system/lint_latex.sh | 9 + .../Causify_development_system/run_latex.sh | 26 ++ papers/arxiv_template/arxiv.sty | 262 ++++++++++++++++++ papers/arxiv_template/arxiv_template.tex | 199 +++++++++++++ papers/arxiv_template/lint_latex.sh | 9 + papers/arxiv_template/references.bib | 27 ++ papers/arxiv_template/run_latex.sh | 31 +++ 8 files changed, 576 insertions(+), 59 deletions(-) create mode 100755 papers/Causify_development_system/lint_latex.sh create mode 100755 papers/Causify_development_system/run_latex.sh create mode 100644 papers/arxiv_template/arxiv.sty create mode 100644 papers/arxiv_template/arxiv_template.tex create mode 100755 papers/arxiv_template/lint_latex.sh create mode 100644 papers/arxiv_template/references.bib create mode 100755 papers/arxiv_template/run_latex.sh diff --git a/papers/Causify_development_system/Causify_dev_system.md b/papers/Causify_development_system/Causify_dev_system.md index 6fbb2b907..2a36a2b63 100644 --- a/papers/Causify_development_system/Causify_dev_system.md +++ b/papers/Causify_development_system/Causify_dev_system.md @@ -1,49 +1,3 @@ -<!-- toc --> - -- [Runnable Directories: The Solution to the Monorepo vs. Multi-repo Debate](#runnable-directories-the-solution-to-the-monorepo-vs-multi-repo-debate) - * [1. Introduction](#1-introduction) - * [2. Current landscape](#2-current-landscape) - + [2.1. Monorepo](#21-monorepo) - + [2.2. Multi-repo](#22-multi-repo) - + [2.3. What is needed](#23-what-is-needed) - * [3. Proposed solution](#3-proposed-solution) - * [Design goals](#design-goals) - + [Development functionalities](#development-functionalities) - + [Python package management](#python-package-management) - + [Testing](#testing) - + [DevOps functionalities](#devops-functionalities) - + [3.1. Runnable directory](#31-runnable-directory) - + [3.2. Docker](#32-docker) - + [3.3. Thin environment](#33-thin-environment) - + [3.4. Submodule of "helpers"](#34-submodule-of-helpers) - - [3.4.1. Git hooks](#341-git-hooks) - + [3.5. Executing tests](#35-executing-tests) - + [3.6. Dockerized executables](#36-dockerized-executables) - * [4. Discussion](#4-discussion) - * [Future directions](#future-directions) - * [References](#references) -- [Buildmeister: Daily Accountability for CI Stability](#buildmeister-daily-accountability-for-ci-stability) - * [Motivation](#motivation) - * [Core Responsibilities](#core-responsibilities) - * [Handover and Daily Reporting](#handover-and-daily-reporting) - * [Workflow in Practice](#workflow-in-practice) - * [Tools and Analysis](#tools-and-analysis) - + [Buildmeister Dashboard](#buildmeister-dashboard) - + [Allure Reports](#allure-reports) - + [Post-Mortem Log](#post-mortem-log) - * [Why It Matters](#why-it-matters) -- [Coverage Tracking with Codecov: A Layer of Continuous Accountability](#coverage-tracking-with-codecov-a-layer-of-continuous-accountability) - * [Motivation](#motivation-1) - * [Structured Coverage by Test Category](#structured-coverage-by-test-category) - * [CI Integration and Workflow Behavior](#ci-integration-and-workflow-behavior) - * [Enforced Thresholds and Quality Gates](#enforced-thresholds-and-quality-gates) - * [Visibility and Developer Experience](#visibility-and-developer-experience) - * [Best Practices and Operational Consistency](#best-practices-and-operational-consistency) - * [Beyond the Basics](#beyond-the-basics) - * [Summary](#summary) - -<!-- tocstop --> - # Runnable Directories: The Solution to the Monorepo vs. Multi-repo Debate ## 1. Introduction @@ -487,19 +441,6 @@ could be introduced for runnable directories in order to safeguard sensitive parts of the codebase. These steps will bolster both the security and efficiency of our workflows as the projects continue to scale. -## References - -- [1] - [Mono vs. multi-repo](https://free.gitkraken.com/hubfs/Mono_v_Multi-Repo_debate_2023.pdf) -- [2] - [Why Google stores billions of lines of code in a single repository](https://dl.acm.org/doi/10.1145/2854146) -- [3] - [What it is like to work in Meta's (Facebook's) monorepo](https://blog.3d-logic.com/2024/09/02/what-it-is-like-to-work-in-metas-facebooks-monorepo/) -- [4] - [Microsoft: How "Mono-repo" and "One Infra" Help Us Deliver a Better Developer Experience](https://devblogs.microsoft.com/appcenter/how-mono-repo-and-one-infra-help-us-deliver-a-better-developer-experience/) -- [5] - [Uber: Faster Together: Uber Engineering's iOS Monorepo](https://www.uber.com/blog/ios-monorepo/) - # Buildmeister: Daily Accountability for CI Stability ## Motivation @@ -683,3 +624,16 @@ feedback loop, and a source of engineering discipline. With structured test categories, resilient workflows, and project-level gates, our Codecov-based system transforms coverage data into actionable insights, reinforcing test quality across all levels of the stack. + +## References + +- [1] + [Mono vs. multi-repo](https://free.gitkraken.com/hubfs/Mono_v_Multi-Repo_debate_2023.pdf) +- [2] + [Why Google stores billions of lines of code in a single repository](https://dl.acm.org/doi/10.1145/2854146) +- [3] + [What it is like to work in Meta's (Facebook's) monorepo](https://blog.3d-logic.com/2024/09/02/what-it-is-like-to-work-in-metas-facebooks-monorepo/) +- [4] + [Microsoft: How "Mono-repo" and "One Infra" Help Us Deliver a Better Developer Experience](https://devblogs.microsoft.com/appcenter/how-mono-repo-and-one-infra-help-us-deliver-a-better-developer-experience/) +- [5] + [Uber: Faster Together: Uber Engineering's iOS Monorepo](https://www.uber.com/blog/ios-monorepo/) diff --git a/papers/Causify_development_system/lint_latex.sh b/papers/Causify_development_system/lint_latex.sh new file mode 100755 index 000000000..1d6fa3350 --- /dev/null +++ b/papers/Causify_development_system/lint_latex.sh @@ -0,0 +1,9 @@ +#!/bin/bash -xe +export GIT_ROOT=$(pwd) +if [[ -z $GIT_ROOT ]]; then + echo "Can't find GIT_ROOT=$GIT_ROOT" + exit -1 +fi; +FILE_NAME=$GIT_ROOT/papers/KaizenFlow/kaizen_flow.paper.tex + +dev_scripts/latex/lint_latex.sh $FILE_NAME diff --git a/papers/Causify_development_system/run_latex.sh b/papers/Causify_development_system/run_latex.sh new file mode 100755 index 000000000..ab07af416 --- /dev/null +++ b/papers/Causify_development_system/run_latex.sh @@ -0,0 +1,26 @@ +#!/bin/bash -xe +#GIT_ROOT="/Users/saggese/src/cmamp1" +export GIT_ROOT=$(pwd) +if [[ -z $GIT_ROOT ]]; then + echo "Can't find GIT_ROOT=$GIT_ROOT" + exit -1 +fi; + +# Relative to papers, without '.tex'. +FILE_NAME=$GIT_ROOT/papers/KaizenFlow/kaizen_flow.paper.tex + +PDF_FILE_NAME=$(basename $FILE_NAME).pdf + +dockerized_latex.py -i ${FILE_NAME} -o $PDF_FILE_NAME +dockerized_latex.py -i ${FILE_NAME} -o $PDF_FILE_NAME + +# From open_file_cmd.sh +/usr/bin/osascript << EOF +set theFile to POSIX file "$PDF_FILE_NAME" as alias +tell application "Skim" +activate +set theDocs to get documents whose path is (get POSIX path of theFile) +if (count of theDocs) > 0 then revert theDocs +open theFile +end tell +EOF diff --git a/papers/arxiv_template/arxiv.sty b/papers/arxiv_template/arxiv.sty new file mode 100644 index 000000000..081d21c8a --- /dev/null +++ b/papers/arxiv_template/arxiv.sty @@ -0,0 +1,262 @@ +\NeedsTeXFormat{LaTeX2e} + +\ProcessOptions\relax + +% fonts +\renewcommand{\rmdefault}{ptm} +\renewcommand{\sfdefault}{phv} + +% set page geometry +\usepackage[verbose=true,letterpaper]{geometry} +\AtBeginDocument{ + \newgeometry{ + textheight=9in, + textwidth=6.5in, + top=1in, + headheight=14pt, + headsep=25pt, + footskip=30pt + } +} + +\widowpenalty=10000 +\clubpenalty=10000 +\flushbottom +\sloppy + + + +\newcommand{\headeright}{A Preprint} +\newcommand{\undertitle}{A Preprint} +\newcommand{\shorttitle}{\@title} + +\usepackage{fancyhdr} +\fancyhf{} +\pagestyle{fancy} +\renewcommand{\headrulewidth}{0.4pt} +\fancyheadoffset{0pt} +\rhead{\scshape \footnotesize \headeright} +\chead{\shorttitle} +\cfoot{\thepage} + + +%Handling Keywords +\def\keywordname{{\bfseries \emph{Keywords}}}% +\def\keywords#1{\par\addvspace\medskipamount{\rightskip=0pt plus1cm +\def\and{\ifhmode\unskip\nobreak\fi\ $\cdot$ +}\noindent\keywordname\enspace\ignorespaces#1\par}} + +% font sizes with reduced leading +\renewcommand{\normalsize}{% + \@setfontsize\normalsize\@xpt\@xipt + \abovedisplayskip 7\p@ \@plus 2\p@ \@minus 5\p@ + \abovedisplayshortskip \z@ \@plus 3\p@ + \belowdisplayskip \abovedisplayskip + \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@ +} +\normalsize +\renewcommand{\small}{% + \@setfontsize\small\@ixpt\@xpt + \abovedisplayskip 6\p@ \@plus 1.5\p@ \@minus 4\p@ + \abovedisplayshortskip \z@ \@plus 2\p@ + \belowdisplayskip \abovedisplayskip + \belowdisplayshortskip 3\p@ \@plus 2\p@ \@minus 2\p@ +} +\renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt} +\renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt} +\renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt} +\renewcommand{\large}{\@setfontsize\large\@xiipt{14}} +\renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}} +\renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}} +\renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}} +\renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}} + +% sections with less space +\providecommand{\section}{} +\renewcommand{\section}{% + \@startsection{section}{1}{\z@}% + {-2.0ex \@plus -0.5ex \@minus -0.2ex}% + { 1.5ex \@plus 0.3ex \@minus 0.2ex}% + {\large\bf\raggedright}% +} +\providecommand{\subsection}{} +\renewcommand{\subsection}{% + \@startsection{subsection}{2}{\z@}% + {-1.8ex \@plus -0.5ex \@minus -0.2ex}% + { 0.8ex \@plus 0.2ex}% + {\normalsize\bf\raggedright}% +} +\providecommand{\subsubsection}{} +\renewcommand{\subsubsection}{% + \@startsection{subsubsection}{3}{\z@}% + {-1.5ex \@plus -0.5ex \@minus -0.2ex}% + { 0.5ex \@plus 0.2ex}% + {\normalsize\bf\raggedright}% +} +\providecommand{\paragraph}{} +\renewcommand{\paragraph}{% + \@startsection{paragraph}{4}{\z@}% + {1.5ex \@plus 0.5ex \@minus 0.2ex}% + {-1em}% + {\normalsize\bf}% +} +\providecommand{\subparagraph}{} +\renewcommand{\subparagraph}{% + \@startsection{subparagraph}{5}{\z@}% + {1.5ex \@plus 0.5ex \@minus 0.2ex}% + {-1em}% + {\normalsize\bf}% +} +\providecommand{\subsubsubsection}{} +\renewcommand{\subsubsubsection}{% + \vskip5pt{\noindent\normalsize\rm\raggedright}% +} + +% float placement +\renewcommand{\topfraction }{0.85} +\renewcommand{\bottomfraction }{0.4} +\renewcommand{\textfraction }{0.1} +\renewcommand{\floatpagefraction}{0.7} + +\newlength{\@abovecaptionskip}\setlength{\@abovecaptionskip}{7\p@} +\newlength{\@belowcaptionskip}\setlength{\@belowcaptionskip}{\z@} + +\setlength{\abovecaptionskip}{\@abovecaptionskip} +\setlength{\belowcaptionskip}{\@belowcaptionskip} + +% swap above/belowcaptionskip lengths for tables +\renewenvironment{table} + {\setlength{\abovecaptionskip}{\@belowcaptionskip}% + \setlength{\belowcaptionskip}{\@abovecaptionskip}% + \@float{table}} + {\end@float} + +% footnote formatting +\setlength{\footnotesep }{6.65\p@} +\setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@} +\renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@} +\setcounter{footnote}{0} + +% paragraph formatting +\setlength{\parindent}{\z@} +\setlength{\parskip }{5.5\p@} + +% list formatting +\setlength{\topsep }{4\p@ \@plus 1\p@ \@minus 2\p@} +\setlength{\partopsep }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@} +\setlength{\itemsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} +\setlength{\parsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} +\setlength{\leftmargin }{3pc} +\setlength{\leftmargini }{\leftmargin} +\setlength{\leftmarginii }{2em} +\setlength{\leftmarginiii}{1.5em} +\setlength{\leftmarginiv }{1.0em} +\setlength{\leftmarginv }{0.5em} +\def\@listi {\leftmargin\leftmargini} +\def\@listii {\leftmargin\leftmarginii + \labelwidth\leftmarginii + \advance\labelwidth-\labelsep + \topsep 2\p@ \@plus 1\p@ \@minus 0.5\p@ + \parsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ + \itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii + \labelwidth\leftmarginiii + \advance\labelwidth-\labelsep + \topsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ + \parsep \z@ + \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@ + \itemsep \topsep} +\def\@listiv {\leftmargin\leftmarginiv + \labelwidth\leftmarginiv + \advance\labelwidth-\labelsep} +\def\@listv {\leftmargin\leftmarginv + \labelwidth\leftmarginv + \advance\labelwidth-\labelsep} +\def\@listvi {\leftmargin\leftmarginvi + \labelwidth\leftmarginvi + \advance\labelwidth-\labelsep} + +% create title +\providecommand{\maketitle}{} +\renewcommand{\maketitle}{% + \par + \begingroup + \renewcommand{\thefootnote}{\fnsymbol{footnote}} + % for perfect author name centering + %\renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}} + % The footnote-mark was overlapping the footnote-text, + % added the following to fix this problem (MK) + \long\def\@makefntext##1{% + \parindent 1em\noindent + \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1 + } + \thispagestyle{empty} + \@maketitle + \@thanks + %\@notice + \endgroup + \let\maketitle\relax + \let\thanks\relax +} + +% rules for title box at top of first page +\newcommand{\@toptitlebar}{ + \hrule height 2\p@ + \vskip 0.25in + \vskip -\parskip% +} +\newcommand{\@bottomtitlebar}{ + \vskip 0.29in + \vskip -\parskip + \hrule height 2\p@ + \vskip 0.09in% +} + +% create title (includes both anonymized and non-anonymized versions) +\providecommand{\@maketitle}{} +\renewcommand{\@maketitle}{% + \vbox{% + \hsize\textwidth + \linewidth\hsize + \vskip 0.1in + \@toptitlebar + \centering + {\LARGE\sc \@title\par} + \@bottomtitlebar + \textsc{\undertitle}\\ + \vskip 0.1in + \def\And{% + \end{tabular}\hfil\linebreak[0]\hfil% + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% + } + \def\AND{% + \end{tabular}\hfil\linebreak[4]\hfil% + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% + } + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}% + \vskip 0.4in \@minus 0.1in \center{\@date} \vskip 0.2in + } +} + +% add conference notice to bottom of first page +\newcommand{\ftype@noticebox}{8} +\newcommand{\@notice}{% + % give a bit of extra room back to authors on first page + \enlargethispage{2\baselineskip}% + \@float{noticebox}[b]% + \footnotesize\@noticestring% + \end@float% +} + +% abstract styling +\renewenvironment{abstract} +{ + \centerline + {\large \bfseries \scshape Abstract} + \begin{quote} +} +{ + \end{quote} +} + +\endinput diff --git a/papers/arxiv_template/arxiv_template.tex b/papers/arxiv_template/arxiv_template.tex new file mode 100644 index 000000000..2e3863075 --- /dev/null +++ b/papers/arxiv_template/arxiv_template.tex @@ -0,0 +1,199 @@ +% From https://github.com/kourgeorge/arxiv-style + +\documentclass{article} + + + +\usepackage{arxiv} + +\usepackage[utf8]{inputenc} % allow utf-8 input +\usepackage[T1]{fontenc} % use 8-bit T1 fonts +\usepackage{hyperref} % hyperlinks +\usepackage{url} % simple URL typesetting +\usepackage{booktabs} % professional-quality tables +\usepackage{amsfonts} % blackboard math symbols +\usepackage{nicefrac} % compact symbols for 1/2, etc. +\usepackage{microtype} % microtypography +\usepackage{lipsum} % Can be removed after putting your text content +\usepackage{graphicx} +\usepackage{natbib} +\usepackage{doi} + + + +\title{A template for the \emph{arxiv} style} + +%\date{September 9, 1985} % Here you can change the date presented in the paper title +%\date{} % Or removing it + +\author{ David S.~Hippocampus\thanks{Use footnote for providing further + information about author (webpage, alternative + address)---\emph{not} for acknowledging funding agencies.} \\ + Department of Computer Science\\ + Cranberry-Lemon University\\ + Pittsburgh, PA 15213 \\ + \texttt{hippo@cs.cranberry-lemon.edu} \\ + %% examples of more authors + \And + Elias D.~Striatum \\ + Department of Electrical Engineering\\ + Mount-Sheikh University\\ + Santa Narimana, Levand \\ + \texttt{stariate@ee.mount-sheikh.edu} \\ + %% \AND + %% Coauthor \\ + %% Affiliation \\ + %% Address \\ + %% \texttt{email} \\ + %% \And + %% Coauthor \\ + %% Affiliation \\ + %% Address \\ + %% \texttt{email} \\ + %% \And + %% Coauthor \\ + %% Affiliation \\ + %% Address \\ + %% \texttt{email} \\ +} + +% Uncomment to remove the date +%\date{} + +% Uncomment to override the `A preprint' in the header +%\renewcommand{\headeright}{Technical Report} +%\renewcommand{\undertitle}{Technical Report} +\renewcommand{\shorttitle}{\textit{arXiv} Template} + +%%% Add PDF metadata to help others organize their library +%%% Once the PDF is generated, you can check the metadata with +%%% $ pdfinfo template.pdf +\hypersetup{ +pdftitle={A template for the arxiv style}, +pdfsubject={q-bio.NC, q-bio.QM}, +pdfauthor={David S.~Hippocampus, Elias D.~Striatum}, +pdfkeywords={First keyword, Second keyword, More}, +} + +\begin{document} +\maketitle + +\begin{abstract} + \lipsum[1] +\end{abstract} + + +% keywords can be removed +\keywords{First keyword \and Second keyword \and More} + + +\section{Introduction} +\lipsum[2] +\lipsum[3] + + +\section{Headings: first level} +\label{sec:headings} + +\lipsum[4] See Section \ref{sec:headings}. + +\subsection{Headings: second level} +\lipsum[5] +\begin{equation} + \xi _{ij}(t)=P(x_{t}=i,x_{t+1}=j|y,v,w;\theta)= {\frac {\alpha _{i}(t)a^{w_t}_{ij}\beta _{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}{\sum _{i=1}^{N} \sum _{j=1}^{N} \alpha _{i}(t)a^{w_t}_{ij}\beta _{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}} +\end{equation} + +\subsubsection{Headings: third level} +\lipsum[6] + +\paragraph{Paragraph} +\lipsum[7] + + + +\section{Examples of citations, figures, tables, references} +\label{sec:others} + +\subsection{Citations} +Citations use \verb+natbib+. The documentation may be found at +\begin{center} + \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf} +\end{center} + +Here is an example usage of the two main commands (\verb+citet+ and \verb+citep+): Some people thought a thing \citep{kour2014real, hadash2018estimate} but other people thought something else \citep{kour2014fast}. Many people have speculated that if we knew exactly why \citet{kour2014fast} thought this\dots + +\subsection{Figures} +\lipsum[10] +See Figure \ref{fig:fig1}. Here is how you add footnotes. \footnote{Sample of the first footnote.} +\lipsum[11] + +\begin{figure} + \centering + \fbox{\rule[-.5cm]{4cm}{4cm} \rule[-.5cm]{4cm}{0cm}} + \caption{Sample figure caption.} + \label{fig:fig1} +\end{figure} + +\subsection{Tables} +See awesome Table~\ref{tab:table}. + +The documentation for \verb+booktabs+ (`Publication quality tables in LaTeX') is available from: +\begin{center} + \url{https://www.ctan.org/pkg/booktabs} +\end{center} + + +\begin{table} + \caption{Sample table title} + \centering + \begin{tabular}{lll} + \toprule + \multicolumn{2}{c}{Part} \\ + \cmidrule(r){1-2} + Name & Description & Size ($\mu$m) \\ + \midrule + Dendrite & Input terminal & $\sim$100 \\ + Axon & Output terminal & $\sim$10 \\ + Soma & Cell body & up to $10^6$ \\ + \bottomrule + \end{tabular} + \label{tab:table} +\end{table} + +\subsection{Lists} +\begin{itemize} + \item Lorem ipsum dolor sit amet + \item consectetur adipiscing elit. + \item Aliquam dignissim blandit est, in dictum tortor gravida eget. In ac rutrum magna. +\end{itemize} + + +\bibliographystyle{unsrtnat} +\bibliography{references} %%% Uncomment this line and comment out the ``thebibliography'' section below to use the external .bib file (using bibtex) . + + +%%% Uncomment this section and comment out the \bibliography{references} line above to use inline references. +% \begin{thebibliography}{1} + +% \bibitem{kour2014real} +% George Kour and Raid Saabne. +% \newblock Real-time segmentation of on-line handwritten arabic script. +% \newblock In {\em Frontiers in Handwriting Recognition (ICFHR), 2014 14th +% International Conference on}, pages 417--422. IEEE, 2014. + +% \bibitem{kour2014fast} +% George Kour and Raid Saabne. +% \newblock Fast classification of handwritten on-line arabic characters. +% \newblock In {\em Soft Computing and Pattern Recognition (SoCPaR), 2014 6th +% International Conference of}, pages 312--318. IEEE, 2014. + +% \bibitem{hadash2018estimate} +% Guy Hadash, Einat Kermany, Boaz Carmeli, Ofer Lavi, George Kour, and Alon +% Jacovi. +% \newblock Estimate and replace: A novel approach to integrating deep neural +% networks with existing applications. +% \newblock {\em arXiv preprint arXiv:1804.09028}, 2018. + +% \end{thebibliography} + +\end{document} diff --git a/papers/arxiv_template/lint_latex.sh b/papers/arxiv_template/lint_latex.sh new file mode 100755 index 000000000..1d6fa3350 --- /dev/null +++ b/papers/arxiv_template/lint_latex.sh @@ -0,0 +1,9 @@ +#!/bin/bash -xe +export GIT_ROOT=$(pwd) +if [[ -z $GIT_ROOT ]]; then + echo "Can't find GIT_ROOT=$GIT_ROOT" + exit -1 +fi; +FILE_NAME=$GIT_ROOT/papers/KaizenFlow/kaizen_flow.paper.tex + +dev_scripts/latex/lint_latex.sh $FILE_NAME diff --git a/papers/arxiv_template/references.bib b/papers/arxiv_template/references.bib new file mode 100644 index 000000000..32e75dd21 --- /dev/null +++ b/papers/arxiv_template/references.bib @@ -0,0 +1,27 @@ +@inproceedings{kour2014real, + title={Real-time segmentation of on-line handwritten arabic script}, + author={Kour, George and Saabne, Raid}, + booktitle={Frontiers in Handwriting Recognition (ICFHR), 2014 14th International Conference on}, + pages={417--422}, + year={2014}, + organization={IEEE} +} + +@inproceedings{kour2014fast, + title={Fast classification of handwritten on-line Arabic characters}, + author={Kour, George and Saabne, Raid}, + booktitle={Soft Computing and Pattern Recognition (SoCPaR), 2014 6th International Conference of}, + pages={312--318}, + year={2014}, + organization={IEEE}, + doi={10.1109/SOCPAR.2014.7008025} +} + +@inproceedings{keshet2016prediction, + title={Prediction-Based, Prioritized Market-Share Insight Extraction}, + author={Keshet, Renato and Maor, Alina and Kour, George}, + booktitle={Advanced Data Mining and Applications: 12th International Conference, ADMA 2016, Gold Coast, QLD, Australia, December 12-15, 2016, Proceedings 12}, + pages={81--94}, + year={2016}, + organization={Springer} +} diff --git a/papers/arxiv_template/run_latex.sh b/papers/arxiv_template/run_latex.sh new file mode 100755 index 000000000..479a03c83 --- /dev/null +++ b/papers/arxiv_template/run_latex.sh @@ -0,0 +1,31 @@ +#!/bin/bash -xe +#GIT_ROOT="/Users/saggese/src/cmamp1" +export GIT_ROOT=$(pwd) +if [[ -z $GIT_ROOT ]]; then + echo "Can't find GIT_ROOT=$GIT_ROOT" + exit -1 +fi; + +PWD=$(pwd) + +cd $GIT_ROOT/papers/arxiv_template + +FILE_NAME=arxiv_template.tex + +PDF_FILE_NAME=$(basename $FILE_NAME).pdf + +dockerized_latex.py -i ${FILE_NAME} -o $PDF_FILE_NAME +dockerized_latex.py -i ${FILE_NAME} -o $PDF_FILE_NAME + +# From open_file_cmd.sh +/usr/bin/osascript << EOF +set theFile to POSIX file "$PDF_FILE_NAME" as alias +tell application "Skim" +activate +set theDocs to get documents whose path is (get POSIX path of theFile) +if (count of theDocs) > 0 then revert theDocs +open theFile +end tell +EOF + +cd $PWD From 8d38219b75028477cdbbfe70873b7458b11e168f Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 8 Jun 2025 17:14:52 -0400 Subject: [PATCH 163/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/{ => OLD}/lint_latex.sh | 0 .../documentation/{ => OLD}/lint_latex2.sh | 0 .../{ => OLD}/test_lint_latex.sh | 0 .../Causify_dev_system.tex | 204 ++++++++++++++ papers/Causify_development_system/arxiv.sty | 262 ++++++++++++++++++ .../Causify_development_system/lint_latex.sh | 2 +- 6 files changed, 467 insertions(+), 1 deletion(-) rename dev_scripts_helpers/documentation/{ => OLD}/lint_latex.sh (100%) rename dev_scripts_helpers/documentation/{ => OLD}/lint_latex2.sh (100%) rename dev_scripts_helpers/documentation/{ => OLD}/test_lint_latex.sh (100%) create mode 100644 papers/Causify_development_system/Causify_dev_system.tex create mode 100644 papers/Causify_development_system/arxiv.sty diff --git a/dev_scripts_helpers/documentation/lint_latex.sh b/dev_scripts_helpers/documentation/OLD/lint_latex.sh similarity index 100% rename from dev_scripts_helpers/documentation/lint_latex.sh rename to dev_scripts_helpers/documentation/OLD/lint_latex.sh diff --git a/dev_scripts_helpers/documentation/lint_latex2.sh b/dev_scripts_helpers/documentation/OLD/lint_latex2.sh similarity index 100% rename from dev_scripts_helpers/documentation/lint_latex2.sh rename to dev_scripts_helpers/documentation/OLD/lint_latex2.sh diff --git a/dev_scripts_helpers/documentation/test_lint_latex.sh b/dev_scripts_helpers/documentation/OLD/test_lint_latex.sh similarity index 100% rename from dev_scripts_helpers/documentation/test_lint_latex.sh rename to dev_scripts_helpers/documentation/OLD/test_lint_latex.sh diff --git a/papers/Causify_development_system/Causify_dev_system.tex b/papers/Causify_development_system/Causify_dev_system.tex new file mode 100644 index 000000000..52f0bc0c2 --- /dev/null +++ b/papers/Causify_development_system/Causify_dev_system.tex @@ -0,0 +1,204 @@ +% From https://github.com/kourgeorge/arxiv-style + +\documentclass{article} + +\usepackage{arxiv} + +% allow utf-8 input +\usepackage[utf8]{inputenc} +% use 8-bit T1 fonts +\usepackage[T1]{fontenc} +% hyperlinks +\usepackage{hyperref} +% simple URL typesetting +\usepackage{url} +% professional-quality tables +\usepackage{booktabs} +% blackboard math symbols +\usepackage{amsfonts} +% compact symbols for 1/2, etc. +\usepackage{nicefrac} +% microtypography +\usepackage{microtype} +% Can be removed after putting your text content +\usepackage{lipsum} +\usepackage{graphicx} +\usepackage{natbib} +\usepackage{doi} + +\title{The Causify Dev System} + +%\date{September 9, 1985} % Here you can change the date presented in the paper title +%\date{} % Or removing it + +\author{ David S.~Hippocampus + \thanks{Use footnote for providing further + information about author (webpage, alternative + address)---\emph{not} for acknowledging funding agencies.} \\ + Department of Computer Science\\ + Cranberry-Lemon University\\ + Pittsburgh, PA 15213 \\ + \texttt{hippo@cs.cranberry-lemon.edu} \\ + %% examples of more authors + \And + Elias D.~Striatum \\ + Department of Electrical Engineering\\ + Mount-Sheikh University\\ + Santa Narimana, Levand \\ + \texttt{stariate@ee.mount-sheikh.edu} \\ + %% \AND + %% Coauthor \\ + %% Affiliation \\ + %% Address \\ + %% \texttt{email} \\ + %% \And + %% Coauthor \\ + %% Affiliation \\ + %% Address \\ + %% \texttt{email} \\ + %% \And + %% Coauthor \\ + %% Affiliation \\ + %% Address \\ + %% \texttt{email} \\ +} + +% Uncomment to remove the date +%\date{} + +% Uncomment to override the `A preprint' in the header +%\renewcommand{\headeright}{Technical Report} +%\renewcommand{\undertitle}{Technical Report} +\renewcommand{\shorttitle}{\textit{arXiv} Template} + +%%% Add PDF metadata to help others organize their library +%%% Once the PDF is generated, you can check the metadata with +%%% $ pdfinfo template.pdf +\hypersetup{ +pdftitle={A template for the arxiv style}, +pdfsubject={q-bio.NC, q-bio.QM}, +pdfauthor={David S.~Hippocampus, Elias D.~Striatum}, +pdfkeywords={First keyword, Second keyword, More}, +} + +\begin{document} +\maketitle + +\begin{abstract} + \lipsum[1] +\end{abstract} + + +% keywords can be removed +\keywords{First keyword \and Second keyword \and More} + + +\section{Introduction} +\lipsum[2] +\lipsum[3] + + +\section{Headings: first level} +\label{sec:headings} + +\lipsum[4] See Section \ref{sec:headings}. + +\subsection{Headings: second level} +\lipsum[5] +\begin{equation} + \xi _{ij}(t)=P(x_{t}=i,x_{t+1}=j|y,v,w;\theta)= {\frac {\alpha _{i}(t)a^{w_t}_{ij}\beta _{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}{\sum _{i=1}^{N} \sum _{j=1}^{N} \alpha _{i}(t)a^{w_t}_{ij}\beta _{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}} +\end{equation} + +\subsubsection{Headings: third level} +\lipsum[6] + +\paragraph{Paragraph} +\lipsum[7] + + + +\section{Examples of citations, figures, tables, references} +\label{sec:others} + +\subsection{Citations} +Citations use \verb+natbib+. The documentation may be found at +\begin{center} + \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf} +\end{center} + +Here is an example usage of the two main commands (\verb+citet+ and \verb+citep+): Some people thought a thing \citep{kour2014real, hadash2018estimate} but other people thought something else \citep{kour2014fast}. Many people have speculated that if we knew exactly why \citet{kour2014fast} thought this\dots + +\subsection{Figures} +\lipsum[10] +See Figure \ref{fig:fig1}. Here is how you add footnotes. \footnote{Sample of the first footnote.} +\lipsum[11] + +\begin{figure} + \centering + \fbox{\rule[-.5cm]{4cm}{4cm} \rule[-.5cm]{4cm}{0cm}} + \caption{Sample figure caption.} + \label{fig:fig1} +\end{figure} + +\subsection{Tables} +See awesome Table~\ref{tab:table}. + +The documentation for \verb+booktabs+ (`Publication quality tables in LaTeX') is available from: +\begin{center} + \url{https://www.ctan.org/pkg/booktabs} +\end{center} + + +\begin{table} + \caption{Sample table title} + \centering + \begin{tabular}{lll} + \toprule + \multicolumn{2}{c}{Part} \\ + \cmidrule(r){1-2} + Name & Description & Size ($\mu$m) \\ + \midrule + Dendrite & Input terminal & $\sim$100 \\ + Axon & Output terminal & $\sim$10 \\ + Soma & Cell body & up to $10^6$ \\ + \bottomrule + \end{tabular} + \label{tab:table} +\end{table} + +\subsection{Lists} +\begin{itemize} + \item Lorem ipsum dolor sit amet + \item consectetur adipiscing elit. + \item Aliquam dignissim blandit est, in dictum tortor gravida eget. In ac rutrum magna. +\end{itemize} + + +\bibliographystyle{unsrtnat} +\bibliography{references} %%% Uncomment this line and comment out the ``thebibliography'' section below to use the external .bib file (using bibtex) . + +%%% Uncomment this section and comment out the \bibliography{references} line above to use inline references. +% \begin{thebibliography}{1} + +% \bibitem{kour2014real} +% George Kour and Raid Saabne. +% \newblock Real-time segmentation of on-line handwritten arabic script. +% \newblock In {\em Frontiers in Handwriting Recognition (ICFHR), 2014 14th +% International Conference on}, pages 417--422. IEEE, 2014. + +% \bibitem{kour2014fast} +% George Kour and Raid Saabne. +% \newblock Fast classification of handwritten on-line arabic characters. +% \newblock In {\em Soft Computing and Pattern Recognition (SoCPaR), 2014 6th +% International Conference of}, pages 312--318. IEEE, 2014. + +% \bibitem{hadash2018estimate} +% Guy Hadash, Einat Kermany, Boaz Carmeli, Ofer Lavi, George Kour, and Alon +% Jacovi. +% \newblock Estimate and replace: A novel approach to integrating deep neural +% networks with existing applications. +% \newblock {\em arXiv preprint arXiv:1804.09028}, 2018. + +% \end{thebibliography} + +\end{document} diff --git a/papers/Causify_development_system/arxiv.sty b/papers/Causify_development_system/arxiv.sty new file mode 100644 index 000000000..081d21c8a --- /dev/null +++ b/papers/Causify_development_system/arxiv.sty @@ -0,0 +1,262 @@ +\NeedsTeXFormat{LaTeX2e} + +\ProcessOptions\relax + +% fonts +\renewcommand{\rmdefault}{ptm} +\renewcommand{\sfdefault}{phv} + +% set page geometry +\usepackage[verbose=true,letterpaper]{geometry} +\AtBeginDocument{ + \newgeometry{ + textheight=9in, + textwidth=6.5in, + top=1in, + headheight=14pt, + headsep=25pt, + footskip=30pt + } +} + +\widowpenalty=10000 +\clubpenalty=10000 +\flushbottom +\sloppy + + + +\newcommand{\headeright}{A Preprint} +\newcommand{\undertitle}{A Preprint} +\newcommand{\shorttitle}{\@title} + +\usepackage{fancyhdr} +\fancyhf{} +\pagestyle{fancy} +\renewcommand{\headrulewidth}{0.4pt} +\fancyheadoffset{0pt} +\rhead{\scshape \footnotesize \headeright} +\chead{\shorttitle} +\cfoot{\thepage} + + +%Handling Keywords +\def\keywordname{{\bfseries \emph{Keywords}}}% +\def\keywords#1{\par\addvspace\medskipamount{\rightskip=0pt plus1cm +\def\and{\ifhmode\unskip\nobreak\fi\ $\cdot$ +}\noindent\keywordname\enspace\ignorespaces#1\par}} + +% font sizes with reduced leading +\renewcommand{\normalsize}{% + \@setfontsize\normalsize\@xpt\@xipt + \abovedisplayskip 7\p@ \@plus 2\p@ \@minus 5\p@ + \abovedisplayshortskip \z@ \@plus 3\p@ + \belowdisplayskip \abovedisplayskip + \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@ +} +\normalsize +\renewcommand{\small}{% + \@setfontsize\small\@ixpt\@xpt + \abovedisplayskip 6\p@ \@plus 1.5\p@ \@minus 4\p@ + \abovedisplayshortskip \z@ \@plus 2\p@ + \belowdisplayskip \abovedisplayskip + \belowdisplayshortskip 3\p@ \@plus 2\p@ \@minus 2\p@ +} +\renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt} +\renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt} +\renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt} +\renewcommand{\large}{\@setfontsize\large\@xiipt{14}} +\renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}} +\renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}} +\renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}} +\renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}} + +% sections with less space +\providecommand{\section}{} +\renewcommand{\section}{% + \@startsection{section}{1}{\z@}% + {-2.0ex \@plus -0.5ex \@minus -0.2ex}% + { 1.5ex \@plus 0.3ex \@minus 0.2ex}% + {\large\bf\raggedright}% +} +\providecommand{\subsection}{} +\renewcommand{\subsection}{% + \@startsection{subsection}{2}{\z@}% + {-1.8ex \@plus -0.5ex \@minus -0.2ex}% + { 0.8ex \@plus 0.2ex}% + {\normalsize\bf\raggedright}% +} +\providecommand{\subsubsection}{} +\renewcommand{\subsubsection}{% + \@startsection{subsubsection}{3}{\z@}% + {-1.5ex \@plus -0.5ex \@minus -0.2ex}% + { 0.5ex \@plus 0.2ex}% + {\normalsize\bf\raggedright}% +} +\providecommand{\paragraph}{} +\renewcommand{\paragraph}{% + \@startsection{paragraph}{4}{\z@}% + {1.5ex \@plus 0.5ex \@minus 0.2ex}% + {-1em}% + {\normalsize\bf}% +} +\providecommand{\subparagraph}{} +\renewcommand{\subparagraph}{% + \@startsection{subparagraph}{5}{\z@}% + {1.5ex \@plus 0.5ex \@minus 0.2ex}% + {-1em}% + {\normalsize\bf}% +} +\providecommand{\subsubsubsection}{} +\renewcommand{\subsubsubsection}{% + \vskip5pt{\noindent\normalsize\rm\raggedright}% +} + +% float placement +\renewcommand{\topfraction }{0.85} +\renewcommand{\bottomfraction }{0.4} +\renewcommand{\textfraction }{0.1} +\renewcommand{\floatpagefraction}{0.7} + +\newlength{\@abovecaptionskip}\setlength{\@abovecaptionskip}{7\p@} +\newlength{\@belowcaptionskip}\setlength{\@belowcaptionskip}{\z@} + +\setlength{\abovecaptionskip}{\@abovecaptionskip} +\setlength{\belowcaptionskip}{\@belowcaptionskip} + +% swap above/belowcaptionskip lengths for tables +\renewenvironment{table} + {\setlength{\abovecaptionskip}{\@belowcaptionskip}% + \setlength{\belowcaptionskip}{\@abovecaptionskip}% + \@float{table}} + {\end@float} + +% footnote formatting +\setlength{\footnotesep }{6.65\p@} +\setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@} +\renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@} +\setcounter{footnote}{0} + +% paragraph formatting +\setlength{\parindent}{\z@} +\setlength{\parskip }{5.5\p@} + +% list formatting +\setlength{\topsep }{4\p@ \@plus 1\p@ \@minus 2\p@} +\setlength{\partopsep }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@} +\setlength{\itemsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} +\setlength{\parsep }{2\p@ \@plus 1\p@ \@minus 0.5\p@} +\setlength{\leftmargin }{3pc} +\setlength{\leftmargini }{\leftmargin} +\setlength{\leftmarginii }{2em} +\setlength{\leftmarginiii}{1.5em} +\setlength{\leftmarginiv }{1.0em} +\setlength{\leftmarginv }{0.5em} +\def\@listi {\leftmargin\leftmargini} +\def\@listii {\leftmargin\leftmarginii + \labelwidth\leftmarginii + \advance\labelwidth-\labelsep + \topsep 2\p@ \@plus 1\p@ \@minus 0.5\p@ + \parsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ + \itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii + \labelwidth\leftmarginiii + \advance\labelwidth-\labelsep + \topsep 1\p@ \@plus 0.5\p@ \@minus 0.5\p@ + \parsep \z@ + \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@ + \itemsep \topsep} +\def\@listiv {\leftmargin\leftmarginiv + \labelwidth\leftmarginiv + \advance\labelwidth-\labelsep} +\def\@listv {\leftmargin\leftmarginv + \labelwidth\leftmarginv + \advance\labelwidth-\labelsep} +\def\@listvi {\leftmargin\leftmarginvi + \labelwidth\leftmarginvi + \advance\labelwidth-\labelsep} + +% create title +\providecommand{\maketitle}{} +\renewcommand{\maketitle}{% + \par + \begingroup + \renewcommand{\thefootnote}{\fnsymbol{footnote}} + % for perfect author name centering + %\renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}} + % The footnote-mark was overlapping the footnote-text, + % added the following to fix this problem (MK) + \long\def\@makefntext##1{% + \parindent 1em\noindent + \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1 + } + \thispagestyle{empty} + \@maketitle + \@thanks + %\@notice + \endgroup + \let\maketitle\relax + \let\thanks\relax +} + +% rules for title box at top of first page +\newcommand{\@toptitlebar}{ + \hrule height 2\p@ + \vskip 0.25in + \vskip -\parskip% +} +\newcommand{\@bottomtitlebar}{ + \vskip 0.29in + \vskip -\parskip + \hrule height 2\p@ + \vskip 0.09in% +} + +% create title (includes both anonymized and non-anonymized versions) +\providecommand{\@maketitle}{} +\renewcommand{\@maketitle}{% + \vbox{% + \hsize\textwidth + \linewidth\hsize + \vskip 0.1in + \@toptitlebar + \centering + {\LARGE\sc \@title\par} + \@bottomtitlebar + \textsc{\undertitle}\\ + \vskip 0.1in + \def\And{% + \end{tabular}\hfil\linebreak[0]\hfil% + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% + } + \def\AND{% + \end{tabular}\hfil\linebreak[4]\hfil% + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces% + } + \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}% + \vskip 0.4in \@minus 0.1in \center{\@date} \vskip 0.2in + } +} + +% add conference notice to bottom of first page +\newcommand{\ftype@noticebox}{8} +\newcommand{\@notice}{% + % give a bit of extra room back to authors on first page + \enlargethispage{2\baselineskip}% + \@float{noticebox}[b]% + \footnotesize\@noticestring% + \end@float% +} + +% abstract styling +\renewenvironment{abstract} +{ + \centerline + {\large \bfseries \scshape Abstract} + \begin{quote} +} +{ + \end{quote} +} + +\endinput diff --git a/papers/Causify_development_system/lint_latex.sh b/papers/Causify_development_system/lint_latex.sh index 1d6fa3350..2c037fe6f 100755 --- a/papers/Causify_development_system/lint_latex.sh +++ b/papers/Causify_development_system/lint_latex.sh @@ -4,6 +4,6 @@ if [[ -z $GIT_ROOT ]]; then echo "Can't find GIT_ROOT=$GIT_ROOT" exit -1 fi; -FILE_NAME=$GIT_ROOT/papers/KaizenFlow/kaizen_flow.paper.tex +FILE_NAME=$GIT_ROOT/papers/Causify_development_system/Causify_dev_system.tex dev_scripts/latex/lint_latex.sh $FILE_NAME From db22ce4290c5e47e56e52033c835c457982ece1e Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 8 Jun 2025 17:39:00 -0400 Subject: [PATCH 164/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../Causify_dev_system.md | 181 --- .../Causify_dev_system.tex | 1023 ++++++++++++++--- .../Causify_development_system/lint_latex.sh | 2 +- .../Causify_development_system/run_latex.sh | 9 +- papers/arxiv_template/arxiv_template.tex | 199 ---- papers/arxiv_template/lint_latex.sh | 4 +- papers/arxiv_template/run_latex.sh | 2 +- papers/arxiv_template/template.tex | 191 +++ 8 files changed, 1095 insertions(+), 516 deletions(-) delete mode 100644 papers/arxiv_template/arxiv_template.tex create mode 100644 papers/arxiv_template/template.tex diff --git a/papers/Causify_development_system/Causify_dev_system.md b/papers/Causify_development_system/Causify_dev_system.md index 2a36a2b63..485659e5f 100644 --- a/papers/Causify_development_system/Causify_dev_system.md +++ b/papers/Causify_development_system/Causify_dev_system.md @@ -75,187 +75,6 @@ coordination. Tools like [Jenkins](https://www.jenkins.io/) and [GitHub Actions](https://github.com/features/actions) help streamline CI/CD pipelines, but they often struggle when dealing with heterogeneous environments. -### 2.3. What is needed - -An ideal strategy would combine the best of both worlds: - -- The modularity of multi-repos, to keep the codebase scalable and simplify - day-to-day development processes. -- The environment consistency of monorepos, to avoid synchronization issues and - prevent errors that arise from executing code in misaligned environments. - -Both are achieved through the hybrid approach proposed in this paper, which will -be discussed in Section 3. - -## 3. Proposed solution - -- This section describes the design principles in our approach to create Git - repos that contain code that can be: - - Composed through different Git sub-module - - Tested, built, run, and released (on a per-directory basis or not) - -- The technologies that this approach relies on are: - - Git for source control - - Python virtual environment and `poetry` (or similar) to control Python - packages - - `pytest` for unit and end-to-end testing - - Docker for managing containers - -- The approach described in this paper is not strictly dependent of the specific - package (e.g., `poetry` can be replaced by `Conda` or another package manager) - -## Design goals - -The proposed development system supports the following functionalities - -### Development functionalities - -- Support composing code using a GitHub sub-module approach -- Make it easy to add the development tool chain to a "new project" by simply - adding the Git sub-module `//helpers` to the project -- Create complex workflows (e.g., for dev and devops functionalities) using - makefile-like tools based on Python `invoke` package -- Have a simple way to maintain common files across different repos in sync - through links and automatically diff-ing files -- Support for both local and remote development using IDEs (e.g., PyCharm, - Visual Studio Code) - -### Python package management - -- Carefully manage and control dependencies using Python managers (such as - `poetry`) and virtual environments -- Code and containers can be versioned and kept in sync automatically since a - certain version of the code can require a certain version of the container to - run properly - - Code is versioned through Git - - Each container has a `changelog.txt` that contains the current version and - the history - -### Testing - -- Run end-to-end tests using `pytest` by automatically discover tests based on - dependencies and test lists, supporting the dependencies needed by different - directories -- Native support for both children-containers (i.e., Docker-in-Docker) and - sibling containers - -### DevOps functionalities - -- Support automatically different stages for container development - - E.g., `test` / `local`, `dev`, `prod` -- Standardize ways of building, testing, retrieving, and deploying containers -- Ensure alignment between development environment, deployment, and CI/CD - systems (e.g., GitHub Actions) -- Bootstrap the development system using a "thin environment", which has the - minimum number of dependencies to allow development and deployment in exactly - the same way in different setups (e.g., server, personal laptop, CI/CD) -- Manage dependencies in a way that is uniform across platforms and OSes, using - Docker containers -- Separate the need to: - - Build and deploy containers (by devops) - - Use containers to develop and test (by developers) -- Built-in support for multi-architecture builds (e.g, for Intel `x86` and Arm) - across different OSes supporting containers (e.g., Linux, MacOS, Windows - Subsystem for Linux WSL) -- Support for developing, testing, and deploying multi-container applications - -### 3.1. Runnable directory - -The core concept of the proposed approach is a **runnable directory** — a -self-contained, independently executable directory with code, equipped with a -dedicated DevOps setup. A repository is thus a special case of a runnable -directory. Developers typically work within a single runnable directory for a -given application, enabling them to test and deploy code without affecting other -parts of the codebase. - -A runnable directory can contain other runnable directories as subdirectories. -For example, Figure 1 depicts three runnable directories: `A`, `B`, and `C`. -Here, `A` and `C` are repositories, with `C` incorporated into `A` as a -submodule, while `B` is a subdirectory within `A`. This setup provides the same -accessibility as if all the code were hosted in a single monorepo. Note that -each of `A`, `B`, and `C` has its own DevOps pipeline — a key feature of our -approach, which is discussed further in Section 3.2. - -```mermaid -graph RL - subgraph A [Runnable Dir A] - direction TB - subgraph C1 [Runnable Dir C] - DevOpsC1[DevOps C] - end - subgraph B [Runnable Dir B] - DevOpsB[DevOps B] - end - DevOpsA[DevOps A] - end - subgraph C [Runnable Dir C] - DevOpsC[DevOps C] - end - - C -->|Submodule| C1 - - style A fill:#FFF3CD - style C fill:#FFF3CD,stroke:#9E9D24 -``` - -Figure 1. Sample architecture of Causify's runnable directories. - -### 3.2. Docker - -Docker is the backbone of our containerized development environment. Every -runnable directory contains Dockerfiles that allow it to build and run its own -Docker containers, which include the code, its dependencies, and the runtime -system. - -This Docker-based approach addresses two important challenges. First, it ensures -consistency by isolating the application from variations in the host operating -system or underlying infrastructure. Second, a specific package (or package -version) can be added to the container of a particular runnable directory -without affecting other parts of the codebase. This prevents "bloating" the -environment with packages required by all applications — a common issue in -monorepos — while also effectively mitigating the risk of conflicting -dependencies, which can arise in a multi-repo setup. - -Our approach supports multiple stages for container release: - -- Local: used to work on updates to the container; accessible only to the - developer who built it. -- Development: used by all team members in day-to-day development of new - features. -- Production: used to run the system by end users. - -This multi-stage workflow enables seamless progression from testing to system -deployment. - -It is also possible to run a container within another container's environment in -a Docker-in-Docker setup. In this case, children containers are started directly -inside a parent container, allowing nested workflows or builds. Alternatively, -sibling containers can run side by side and share resources such as the host's -Docker daemon, enabling inter-container communication and orchestration. - -```mermaid -graph TD - host[Host] - docker_engine[Docker Engine] - subgraph sibling_container["Sibling Containers"] - container_1[Container 1] - container_2[Container 2] - end - subgraph children_container["Children Containers"] - container_1a[Container 1a] - container_1b[Container 1b] - end - host --> docker_engine - docker_engine --> container_1 - docker_engine --> container_2 - container_1 --> container_1a - container_1 --> container_1b - - style sibling_container fill:#FFF3CD,stroke:#9E9D24 - style children_container fill:#FFF3CD,stroke:#9E9D24 -``` - -Figure 2. Docker container flow. ### 3.3. Thin environment diff --git a/papers/Causify_development_system/Causify_dev_system.tex b/papers/Causify_development_system/Causify_dev_system.tex index 52f0bc0c2..e824d9fe0 100644 --- a/papers/Causify_development_system/Causify_dev_system.tex +++ b/papers/Causify_development_system/Causify_dev_system.tex @@ -4,63 +4,63 @@ \usepackage{arxiv} -% allow utf-8 input +% Allow utf-8 input. \usepackage[utf8]{inputenc} -% use 8-bit T1 fonts +% Use 8-bit T1 fonts. \usepackage[T1]{fontenc} -% hyperlinks +% Hyperlinks. \usepackage{hyperref} -% simple URL typesetting +% Simple URL typesetting. \usepackage{url} -% professional-quality tables +% Professional-quality tables. \usepackage{booktabs} -% blackboard math symbols +% Blackboard math symbols. \usepackage{amsfonts} -% compact symbols for 1/2, etc. +% Compact symbols for 1/2, etc. \usepackage{nicefrac} -% microtypography +% Microtypography. \usepackage{microtype} -% Can be removed after putting your text content +% Can be removed after putting your text content. \usepackage{lipsum} \usepackage{graphicx} \usepackage{natbib} \usepackage{doi} -\title{The Causify Dev System} +\title{The Causify Dev System (v0.1)} %\date{September 9, 1985} % Here you can change the date presented in the paper title %\date{} % Or removing it -\author{ David S.~Hippocampus - \thanks{Use footnote for providing further - information about author (webpage, alternative - address)---\emph{not} for acknowledging funding agencies.} \\ - Department of Computer Science\\ - Cranberry-Lemon University\\ - Pittsburgh, PA 15213 \\ - \texttt{hippo@cs.cranberry-lemon.edu} \\ - %% examples of more authors - \And - Elias D.~Striatum \\ - Department of Electrical Engineering\\ - Mount-Sheikh University\\ - Santa Narimana, Levand \\ - \texttt{stariate@ee.mount-sheikh.edu} \\ - %% \AND - %% Coauthor \\ - %% Affiliation \\ - %% Address \\ - %% \texttt{email} \\ - %% \And - %% Coauthor \\ - %% Affiliation \\ - %% Address \\ - %% \texttt{email} \\ - %% \And - %% Coauthor \\ - %% Affiliation \\ - %% Address \\ - %% \texttt{email} \\ +\author{ The Causify Team +\thanks{Use footnote for providing further information about author (webpage, +alternative address)---\emph{not} for acknowledging funding agencies.} \\ +Department of Computer Science\\ +Cranberry-Lemon University\\ +Pittsburgh, PA +15213 +\\ \texttt{hippo@cs.cranberry-lemon.edu} \\ +%% examples of more authors +\And +Elias D.~Striatum \\ +Department of Electrical Engineering\\ +Mount-Sheikh University\\ +Santa Narimana, Levand \\ +\texttt{stariate@ee.mount-sheikh.edu} \\ +%% \AND +%% Coauthor \\ +%% Affiliation \\ +%% Address \\ +%% \texttt{email} \\ +%% \And +%% Coauthor \\ +%% Affiliation \\ +%% Address \\ +%% \texttt{email} \\ +%% \And +%% Coauthor \\ +%% Affiliation \\ +%% Address \\ +%% \texttt{email} \\ } % Uncomment to remove the date @@ -75,130 +75,893 @@ %%% Once the PDF is generated, you can check the metadata with %%% $ pdfinfo template.pdf \hypersetup{ -pdftitle={A template for the arxiv style}, -pdfsubject={q-bio.NC, q-bio.QM}, -pdfauthor={David S.~Hippocampus, Elias D.~Striatum}, -pdfkeywords={First keyword, Second keyword, More}, + pdftitle={A template for the arxiv style}, + pdfsubject={q-bio.NC, q-bio.QM}, + pdfauthor={David S.~Hippocampus, Elias D.~Striatum}, + pdfkeywords={First keyword, Second keyword, More}, } \begin{document} -\maketitle + \maketitle + + \begin{abstract} + \end{abstract} + + % keywords can be removed + \keywords{First keyword \and Second keyword \and More} + + \section{Introduction} + Software development workflows are becoming more complex as they adapt to the + demands of large-scale systems and modern collaborative development practices. + As teams and codebases grow, companies face the challenge of organizing both + effectively. When it comes to structuring the codebase, two main approaches + emerge: monorepos and multi-repos[1]. Monorepos consolidate all code into a + single repository, simplifying version control but carrying a risk of + scalability and maintainability issues. Conversely, multi-repos store the code + in logically separated repositories, easier to manage and deploy but more + difficult to keep in sync. + + In this paper, we propose Causify dev system, an alternative hybrid solution: a + modular system architecture built around runnable directories. Although + independent, these directories maintain cohesion through shared tooling and + environments, offering a straightforward and scalable way to organize the + codebase while ensuring reliability in development, testing, and deployment. + + %In this paper, we first outline the current state-of-the-art (Section 2), then + %describe our approach, with a particular focus on the containerized workflows + %that support it (Section 3). We then discuss the strengths and limitations of + %our approach compared to existing practices (Section 4), and conclude by + %presenting potential avenues for future improvement (Section 5). + + \section{Runnable directories} + + \subsection{The monorepo approach} + + The monorepo approach involves storing all code for multiple applications within + a single repository. This strategy has been popularized by large tech companies + like Google[2], Meta[3], Microsoft[4] and Uber[5], proving that even codebases + with billions of lines of code can be effectively managed in a single + repository. The key benefits of this approach include: + + \begin{itemize} + \item Consistency in environment: with everything housed in one repository, there's + no risk of projects becoming incompatible due to conflicting versions of + third-party packages. + \item Simplified version control: there is a single commit history, which makes it + easy to track and, if needed, revert changes globally. + \item Reduced coordination overhead: developers work within the same repository, + with easy access to all code, shared knowledge, tools and consistent coding + standards. + \end{itemize} + + However, as monorepo setups scale, users often face significant challenges. A + major downside is long CI/CD build times, as even small changes can trigger + massive rebuilds and tests throughout the entire codebase. To cope with this, + extra tooling, such as [Buck](https://buck2.build/) or + [Bazel](https://bazel.build/), must be configured, adding complexity to + workflows. Even something as simple as searching and browsing the code becomes + more difficult, often requiring specialized tools and IDE plug-ins. + + Additionally, when everything is located in one place, it is harder to separate + concerns and maintain clear boundaries between projects. Managing permissions + also becomes more difficult when only selected developers should have access to + specific parts of the codebase. + +\subsection{Multi-repo approach} + +The multi-repo approach involves splitting code across several repositories, +with each one dedicated to a specific module or service. This modularity allows +teams to work independently on different parts of a system, making it easier to +manage changes and releases for individual components. Each repository can +evolve at its own pace, and developers can focus on smaller, more manageable +codebases. + +However, the multi-repo strategy comes with its own set of challenges, +particularly when it comes to managing dependencies and ensuring version +compatibility across repositories. For instance, different repositories might +rely on two different versions of a third-party package, or even conflicting +packages, making synchronization complex or, in some cases, nearly impossible. +In general, propagating changes from one repository to another requires careful +coordination. Tools like [Jenkins](https://www.jenkins.io/) and +[GitHub Actions](https://github.com/features/actions) help streamline CI/CD +pipelines, but they often struggle when dealing with heterogeneous environments. + +\subsection{Runnable directories} + +An ideal strategy would combine the best of both worlds: + +- The modularity of multi-repos, to keep the codebase scalable and simplify + day-to-day development processes. +- The environment consistency of monorepos, to avoid synchronization issues and + prevent errors that arise from executing code in misaligned environments. + +Both are achieved through the hybrid approach proposed in this paper, which will +be discussed in Section 3. -\begin{abstract} - \lipsum[1] -\end{abstract} +\begin{itemize} + + \item This section describes the design principles in our approach to create Git + repos that contain code that can be: + + \begin{itemize} + + \item Composed through different Git sub-module + + \item Tested, built, run, and released (on a per-directory basis or not) + \end{itemize} + + \item The technologies that this approach relies on are: + + \begin{itemize} + + \item Git for source control + + \item Python virtual environment and \texttt{poetry} (or similar) to + control Python packages + + \item \texttt{pytest} for unit and end-to-end testing + + \item Docker for managing containers + \end{itemize} + + \item The approach described in this paper is not strictly dependent of the specific + package (e.g., \texttt{poetry} can be replaced by \texttt{Conda} or another + package manager) +\end{itemize} + +\subsection{Design goals} + +The proposed development system supports the following functionalities + +\subsubsection{Development functionalities} + +\begin{itemize} + + \item Support composing code using a GitHub sub-module approach + + \item Make it easy to add the development tool chain to a ``new project'' by simply + adding the Git sub-module \texttt{//helpers} to the project + + \item Create complex workflows (e.g., for dev and devops functionalities) using + makefile-like tools based on Python \texttt{invoke} package + + \item Have a simple way to maintain common files across different repos in sync + through links and automatically diff-ing files + + \item Support for both local and remote development using IDEs (e.g., PyCharm, + Visual Studio Code) +\end{itemize} + +\subsubsection{Python package management} +\label{python-package-management} + +\begin{itemize} + + \item Carefully manage and control dependencies using Python managers (such as + \texttt{poetry}) and virtual environments + + \item Code and containers can be versioned and kept in sync automatically since + a certain version of the code can require a certain version of the container + to run properly + + \begin{itemize} + + \item Code is versioned through Git + + \item Each container has a \texttt{changelog.txt} that contains the + current version and the history + \end{itemize} +\end{itemize} + +\subsubsection{Testing} +\label{testing} + +\begin{itemize} + + \item Run end-to-end tests using \texttt{pytest} by automatically discover + tests based on dependencies and test lists, supporting the dependencies needed + by different directories + + \item Native support for both children-containers (i.e., Docker-in-Docker) and + sibling containers +\end{itemize} +\subsubsection{DevOps functionalities} +\label{devops-functionalities} -% keywords can be removed -\keywords{First keyword \and Second keyword \and More} +\begin{itemize} + + \item Support automatically different stages for container development + + \begin{itemize} + + \item E.g., \texttt{test} / \texttt{local}, \texttt{dev}, \texttt{prod} + \end{itemize} + + \item Standardize ways of building, testing, retrieving, and deploying + containers + \item Ensure alignment between development environment, deployment, and CI/CD systems + (e.g., GitHub Actions) -\section{Introduction} -\lipsum[2] -\lipsum[3] + \item Bootstrap the development system using a ``thin environment'', which has + the minimum number of dependencies to allow development and deployment in + exactly the same way in different setups (e.g., server, personal laptop, CI/CD) + \item Manage dependencies in a way that is uniform across platforms and OSes, + using Docker containers -\section{Headings: first level} -\label{sec:headings} + \item Separate the need to: -\lipsum[4] See Section \ref{sec:headings}. + \begin{itemize} -\subsection{Headings: second level} -\lipsum[5] -\begin{equation} - \xi _{ij}(t)=P(x_{t}=i,x_{t+1}=j|y,v,w;\theta)= {\frac {\alpha _{i}(t)a^{w_t}_{ij}\beta _{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}{\sum _{i=1}^{N} \sum _{j=1}^{N} \alpha _{i}(t)a^{w_t}_{ij}\beta _{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}} -\end{equation} + \item Build and deploy containers (by devops) -\subsubsection{Headings: third level} -\lipsum[6] + \item Use containers to develop and test (by developers) + \end{itemize} -\paragraph{Paragraph} -\lipsum[7] + \item Built-in support for multi-architecture builds (e.g, for Intel \texttt{x86} + and Arm) across different OSes supporting containers (e.g., Linux, MacOS, + Windows Subsystem for Linux WSL) + \item Support for developing, testing, and deploying multi-container applications +\end{itemize} +\subsubsection{3.1. Runnable directory} +\label{runnable-directory} + +The core concept of the proposed approach is a \textbf{runnable directory} --- a +self-contained, independently executable directory with code, equipped with a +dedicated DevOps setup. A repository is thus a special case of a runnable directory. +Developers typically work within a single runnable directory for a given +application, enabling them to test and deploy code without affecting other parts +of the codebase. + +A runnable directory can contain other runnable directories as subdirectories. +For example, Figure 1 depicts three runnable directories: \texttt{A}, \texttt{B}, +and \texttt{C}. Here, \texttt{A} and \texttt{C} are repositories, with \texttt{C} +incorporated into \texttt{A} as a submodule, while \texttt{B} is a subdirectory +within \texttt{A}. This setup provides the same accessibility as if all the code +were hosted in a single monorepo. Note that each of \texttt{A}, \texttt{B}, and \texttt{C} +has its own DevOps pipeline --- a key feature of our approach, which is discussed +further in Section 3.2. + +%```mermaid +%graph RL +% subgraph A [Runnable Dir A] +% direction TB +% subgraph C1 [Runnable Dir C] +% DevOpsC1[DevOps C] +% end +% subgraph B [Runnable Dir B] +% DevOpsB[DevOps B] +% end +% DevOpsA[DevOps A] +% end +% subgraph C [Runnable Dir C] +% DevOpsC[DevOps C] +% end +% +% C -->|Submodule| C1 +% +% style A fill:#FFF3CD +% style C fill:#FFF3CD,stroke:#9E9D24 +%``` +% Figure 1. Sample architecture of Causify's runnable directories. + +\subsubsection{3.2. Docker} +\label{docker} + +Docker is the backbone of our containerized development environment. Every +runnable directory contains Dockerfiles that allow it to build and run its own +Docker containers, which include the code, its dependencies, and the runtime system. + +This Docker-based approach addresses two important challenges. First, it ensures +consistency by isolating the application from variations in the host operating system +or underlying infrastructure. Second, a specific package (or package version) can +be added to the container of a particular runnable directory without affecting other +parts of the codebase. This prevents ``bloating'' the environment with packages +required by all applications --- a common issue in monorepos --- while also +effectively mitigating the risk of conflicting dependencies, which can arise in a +multi-repo setup. + +Our approach supports multiple stages for container release: -\section{Examples of citations, figures, tables, references} -\label{sec:others} +\begin{itemize} -\subsection{Citations} -Citations use \verb+natbib+. The documentation may be found at -\begin{center} - \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf} -\end{center} + \item Local: used to work on updates to the container; accessible only to the developer + who built it. -Here is an example usage of the two main commands (\verb+citet+ and \verb+citep+): Some people thought a thing \citep{kour2014real, hadash2018estimate} but other people thought something else \citep{kour2014fast}. Many people have speculated that if we knew exactly why \citet{kour2014fast} thought this\dots + \item Development: used by all team members in day-to-day development of new + features. -\subsection{Figures} -\lipsum[10] -See Figure \ref{fig:fig1}. Here is how you add footnotes. \footnote{Sample of the first footnote.} -\lipsum[11] + \item Production: used to run the system by end users. +\end{itemize} -\begin{figure} - \centering - \fbox{\rule[-.5cm]{4cm}{4cm} \rule[-.5cm]{4cm}{0cm}} - \caption{Sample figure caption.} - \label{fig:fig1} -\end{figure} +This multi-stage workflow enables seamless progression from testing to system +deployment. + +It is also possible to run a container within another container's environment in +a Docker-in-Docker setup. In this case, children containers are started directly +inside a parent container, allowing nested workflows or builds. Alternatively, sibling +containers can run side by side and share resources such as the host's Docker daemon, +enabling inter-container communication and orchestration. + +%```mermaid +%graph TD +% host[Host] +% docker_engine[Docker Engine] +% subgraph sibling_container["Sibling Containers"] +% container_1[Container 1] +% container_2[Container 2] +% end +% subgraph children_container["Children Containers"] +% container_1a[Container 1a] +% container_1b[Container 1b] +% end +% host --> docker_engine +% docker_engine --> container_1 +% docker_engine --> container_2 +% container_1 --> container_1a +% container_1 --> container_1b +% +% style sibling_container fill:#FFF3CD,stroke:#9E9D24 +% style children_container fill:#FFF3CD,stroke:#9E9D24 +%``` + +% Figure 2. Docker container flow. + +\subsubsection{2.3. What is needed} +\label{what-is-needed} + +An ideal strategy would combine the best of both worlds: -\subsection{Tables} -See awesome Table~\ref{tab:table}. +\begin{itemize} -The documentation for \verb+booktabs+ (`Publication quality tables in LaTeX') is available from: -\begin{center} - \url{https://www.ctan.org/pkg/booktabs} -\end{center} + \item The modularity of multi-repos, to keep the codebase scalable and + simplify day-to-day development processes. + \item The environment consistency of monorepos, to avoid synchronization + issues and prevent errors that arise from executing code in misaligned + environments. +\end{itemize} -\begin{table} - \caption{Sample table title} - \centering - \begin{tabular}{lll} - \toprule - \multicolumn{2}{c}{Part} \\ - \cmidrule(r){1-2} - Name & Description & Size ($\mu$m) \\ - \midrule - Dendrite & Input terminal & $\sim$100 \\ - Axon & Output terminal & $\sim$10 \\ - Soma & Cell body & up to $10^6$ \\ - \bottomrule - \end{tabular} - \label{tab:table} -\end{table} +Both are achieved through the hybrid approach proposed in this paper, which will +be discussed in Section 3. + +\subsubsection{3.3. Thin environment} +\label{thin-environment} + +To bootstrap development workflows, we use a thin client that installs a minimal +set of essential dependencies, such as Docker and invoke, in a lightweight +virtual environment. A single thin environment is shared across all runnable directories +which minimizes setup overhead (see Figure 3). This environment contains +everything that is needed to start development containers, which are in turn specific +to each runnable directory. With this approach, we ensure that development and deployment +remain consistent across different systems (e.g., server, personal laptop, CI/CD). + +%```mermaid +%graph RL +% thin_env[thin environment] +% subgraph A [Runnable Dir A] +% direction TB +% B[Runnable Dir B] +% C1[Runnable Dir C] +% end +% subgraph C [Runnable Dir C] +% end +% +% C -->|Submodule| C1 +% A -.-> thin_env +% B -.-> thin_env +% C1 -.-> thin_env +% C -.-> thin_env +% +% style A fill:#FFF3CD +% style C fill:#FFF3CD,stroke:#9E9D24 +%``` +% +%Figure 3. Thin environment shared across multiple runnable directories. + +\subsubsection{3.4. Submodule of ``helpers''} +\label{submodule-of-helpers} + +All Causify repositories include a dedicated ``helpers'' repository as a submodule. +This repository contains common utilities and development toolchains, such as the +thin environment, Linter, Docker, and invoke workflows. By centralizing these resources, +we eliminate code duplication and ensure that all teams, regardless of the project, +use the same tools and procedures. + +Additionally, it hosts symbolic link targets for files that must technically reside +in each repository but are identical across all of them (e.g., license and +certain configuration files). Manually keeping them in sync can be difficult and +error-prone over time. In our approach, these files are stored exclusively in ``helpers'', +and all other repositories utilize read-only symbolic links pointing to them. +This way, we avoid file duplication and reduce the risk of introducing accidental +discrepancies. + +%```mermaid +%graph RL +% subgraph A [Runnable Dir A] +% direction TB +% B[Runnable Dir B] +% H1[Helpers] +% end +% subgraph H [Helpers] +% end +% +% H -->|Submodule| H1 +% +% style A fill:#FFF3CD +% style H fill:#FFF3CD,stroke:#9E9D24 +%``` +% +%Figure 4. "Helpers" submodule integrated into a repository. + +\paragraph{3.4.1. Git hooks} +\label{git-hooks} + +Our ``helpers'' submodule includes a set of Git hooks used to enforce policies +across our development process, including Git workflow rules, coding standards, security +and compliance, and other quality checks. These hooks are installed by default when +the user activates the thin environment. They perform essential checks such as +verifying the branch, author information, file size limits, forbidden words, +Python file compilation, and potential secret leaks\ldots etc. + +\subsubsection{3.5. Executing tests} +\label{executing-tests} + +Our system supports robust testing workflows that leverage the containerized environment +for comprehensive code validation. Tests are executed inside Docker containers +to ensure consistency across development and production environments, preventing +discrepancies caused by variations in host system configurations. In the case of +nested runnable directories, tests are executed recursively within each directory's +corresponding container, which is automatically identified (see Figure 5). As a result, +the entire test suite can be run with a single command, while still allowing tests +in subdirectories to use dependencies that may not be compatible with the parent +directory's environment. + +%```mermaid +%graph LR +% start((start)) +% start --> A +% subgraph A[Runnable Dir A] +% direction LR +% pytest_1((pytest)) +% B[Runnable Dir B / Container B] +% C[Runnable Dir C / Container C] +% dirA1[dir1 / Container A] +% dirA2[dir2 / Container A] +% dirA11[dir1.1 / Container A] +% dirA12[dir1.2 / Container A] +% pytest_1 --> B +% pytest_1 --> C +% pytest_1 --> dirA1 +% pytest_1 --> dirA2 +% dirA1 --> dirA11 +% dirA1 --> dirA12 +% end +% +%style A fill:#FFF3CD,stroke:#9E9D24 +%style B font-size:15px +%style C font-size:15px +%``` +% +%Figure 5. Recursive test execution in dedicated containers. + +\subsubsection{3.6. Dockerized executables} +\label{dockerized-executables} + +Sometimes, installing a package within a development container may not be +justified, particularly if it is large and will only be used occasionally. In +such cases, we use \emph{dockerized executables}: when the package is needed, a +Docker container is dynamically created with only the specific dependencies +required for its installation. The package is then installed and executed within +the container, which is discarded once the task is complete. This prevents the development +environment from becoming bloated with dependencies that are rarely used. If +necessary, for example during test execution, a dockerized executable can be run +inside another Docker container, whether using the children or sibling container +approach, as discussed in Section 3.2. + +\subsection{4. Discussion} +\label{discussion} + +Causify's approach presents a strong alternative to existing code organization solutions, +offering scalability and efficiency for both small and large systems. + +The proposed modular architecture is centered around runnable directories, which +operate as independent units with their own build and release lifecycles. This +design bypasses the bottlenecks common in large monorepos, where centralized workflows +can slow down CI/CD processes unless specialized tools like Buck or Bazel are used. +By leveraging Docker containers, we ensure consistent application behavior across +development, testing, and production environments, avoiding problems caused by +system configuration discrepancies. Dependencies are isolated within each directory's +dedicated container, reducing the risks of issues that tight coupling or package +incompatibility might create in a monorepo or a multi-repo setup. + +Unlike multi-repos, runnable directories can utilize shared utilities from ``helper'' +submodules, eliminating code duplication and promoting consistent workflows +across projects. They can even reside under a unified repository structure which +simplifies codebase management and reduces the overhead of maintaining multiple repositories. +With support for recursive test execution spanning all components, runnable directories +allow for end-to-end validation of the whole codebase through a single command, +removing the need for testing each repository separately. + +There are, however, several challenges that might arise in the adoption of our +approach. Teams that are unfamiliar with containerized environments may need time +and training to effectively transition to the new workflows. The reliance on +Docker may introduce additional resource demands, particularly when running multiple +containers concurrently on development machines. This would require further +optimization, possibly aided by customized tooling. These adjustments, while ultimately +beneficial, can add complexity to the system's rollout and necessitate ongoing +maintenance to ensure seamless integration with existing CI/CD pipelines and +development practices. + +\subsection{Future directions} +\label{future-directions} + +Looking ahead, there are several areas where the proposed approach can be +improved. One direction is the implementation of dependency-aware caching to ensure +that only the necessary components are rebuilt or retested when changes are made. +This would reduce the time spent on development tasks, making the overall process +more efficient. Further optimization could involve designing our CI/CD pipelines +to execute builds, tests, and deployments for multiple runnable directories in +parallel, which would allow us to take full advantage of available compute resources. + +Additional measures can also be taken to enhance security. Integrating automated +container image scanning and validation before deployment would help guarantee +compliance with organizational policies and prevent vulnerabilities from +entering production environments. In addition, fine-grained access controls +could be introduced for runnable directories in order to safeguard sensitive +parts of the codebase. These steps will bolster both the security and efficiency +of our workflows as the projects continue to scale. + +\section{Buildmeister: Daily Accountability for CI Stability} +\label{buildmeister-daily-accountability-for-ci-stability} + +\subsection{Motivation} +\label{motivation} + +Automated test pipelines are essential, but without accountability, they often fall +into disrepair. The Buildmeister routine introduces a rotating, human-in-the-loop +system designed to enforce green builds, identify root causes, and ensure high-quality +CI/CD hygiene. This mechanism aligns technical execution with team +responsibility, fostering a culture of operational ownership. + +\subsection{Core Responsibilities} +\label{core-responsibilities} + +The Buildmeister is a rotating role assigned to a team member each week. Their primary +duties are: -\subsection{Lists} \begin{itemize} - \item Lorem ipsum dolor sit amet - \item consectetur adipiscing elit. - \item Aliquam dignissim blandit est, in dictum tortor gravida eget. In ac rutrum magna. + + \item Monitor build health daily via the Buildmeister Dashboard + + \item Investigate failures and ensure GitHub Issues are filed promptly + + \item Push responsible team members to fix or revert breaking code + + \item Maintain test quality by analyzing trends in Allure reports + + \item Document breakage through a structured post-mortem log \end{itemize} +The Buildmeister ensures builds are never ``temporarily broken'', our policy is: +``Fix it or revert within one hour.'' -\bibliographystyle{unsrtnat} -\bibliography{references} %%% Uncomment this line and comment out the ``thebibliography'' section below to use the external .bib file (using bibtex) . +\subsection{Handover and Daily Reporting} +\label{handover-and-daily-reporting} -%%% Uncomment this section and comment out the \bibliography{references} line above to use inline references. -% \begin{thebibliography}{1} +The routine begins each day with a status email to the team detailing: -% \bibitem{kour2014real} -% George Kour and Raid Saabne. -% \newblock Real-time segmentation of on-line handwritten arabic script. -% \newblock In {\em Frontiers in Handwriting Recognition (ICFHR), 2014 14th -% International Conference on}, pages 417--422. IEEE, 2014. +\begin{itemize} + + \item Overall build status (green/red) + + \item Failing test names and owners -% \bibitem{kour2014fast} -% George Kour and Raid Saabne. -% \newblock Fast classification of handwritten on-line arabic characters. -% \newblock In {\em Soft Computing and Pattern Recognition (SoCPaR), 2014 6th -% International Conference of}, pages 312--318. IEEE, 2014. + \item GitHub issue references -% \bibitem{hadash2018estimate} -% Guy Hadash, Einat Kermany, Boaz Carmeli, Ofer Lavi, George Kour, and Alon -% Jacovi. -% \newblock Estimate and replace: A novel approach to integrating deep neural -% networks with existing applications. -% \newblock {\em arXiv preprint arXiv:1804.09028}, 2018. + \item Expected resolution timelines + + \item A screenshot of the Buildmeister dashboard +\end{itemize} -% \end{thebibliography} +At the end of each rotation, the outgoing Buildmeister must confirm handover by receiving +an ``Acknowledged'' reply from the incoming one, ensuring continuity and +awareness. + +\subsection{Workflow in Practice} +\label{workflow-in-practice} + +When a build breaks: + +\begin{itemize} + + \item The team is alerted via Slack (\#build-notifications) through our GitHub + Actions bot + + \item The Buildmeister triages the issue: + + \begin{itemize} + + \item Quickly reruns or replicates the failed tests if uncertain + + \item Blames commits to identify the responsible party + + \item Notifies the team and files a structured GitHub Issue + \end{itemize} + + \item All information including test names, logs, responsible engineer are transparently + shared and tracked +\end{itemize} + +If the issue is not resolved within one hour, the Buildmeister must escalate and, +if needed, disable the test with explicit owner consent. + +\subsection{Tools and Analysis} +\label{tools-and-analysis} + +\subsubsection{Buildmeister Dashboard} +\label{buildmeister-dashboard} + +A centralized UI provides a real-time view of all builds across repos and branches. +It is the Buildmeister's daily launchpad. + +\subsubsection{Allure Reports} +\label{allure-reports} + +\begin{itemize} + + \item Every week, the Buildmeister reviews trends in skipped/failing tests, duration + anomalies, and retry spikes + + \item This process: + + \begin{itemize} + + \item Surfaces hidden test instability + + \item Provides historical context to new breaks + + \item Enables preventive action before regressions cascade + \end{itemize} +\end{itemize} + +\subsubsection{Post-Mortem Log} +\label{post-mortem-log} + +Every build break is logged in a shared spreadsheet, capturing: + +\begin{itemize} + + \item Repo and test type + + \item Link to the failing GitHub run + + \item Root cause + + \item Owner and fix timeline + + \item Whether the issue was fixed or test was disabled +\end{itemize} + +This living record forms the basis for failure mode analysis and future automation +improvements. + +\subsection{Why It Matters} +\label{why-it-matters} + +The Buildmeister is not just a rotating duty, it is a system of shared accountability. +It transforms test stability from an abstract ideal into a daily operational +habit, backed by clear expectations, defined processes, and human enforcement. +By combining automation with ownership, we achieve sustainable reliability in a complex, +multi-repo ecosystem. + +\section{Coverage Tracking with Codecov: A Layer of Continuous Accountability} +\label{coverage-tracking-with-codecov-a-layer-of-continuous-accountability} + +\subsection{Motivation} +\label{motivation-1} + +Maintaining comprehensive test coverage across a growing codebase requires more +than just writing tests, it demands visibility, automation, and enforcement. Our +integration with Codecov provides a system-wide view of test coverage, structured +into fast, slow, and superslow test suites. This setup ensures that all code +paths are exercised and that test coverage regressions are identified early and +reliably. + +\subsection{Structured Coverage by Test Category} +\label{structured-coverage-by-test-category} + +We categorize coverage tests into three suites based on runtime and scope: + +\begin{itemize} + + \item Fast tests run frequently (e.g., daily) and provide immediate feedback on + high-priority code paths + + \item Slow tests cover broader logic and data scenarios + + \item Superslow tests are comprehensive, long-running regressions executed on + a weekly cadence or on-demand +\end{itemize} + +Each suite produces its own coverage report, which is flagged and uploaded independently +to Codecov, enabling targeted inspection and carryforward of data when some +suites are skipped. + +\subsection{CI Integration and Workflow Behavior} +\label{ci-integration-and-workflow-behavior} + +Coverage reports are generated and uploaded automatically as part of our CI pipelines. +The workflow: + +\begin{itemize} + + \item Fails immediately on critical setup errors (e.g., dependency or + configuration issues) + + \item Continues gracefully if fast or slow tests fail mid-pipeline, but + surfaces those failures in a final gating step + + \item Treats superslow failures as critical, immediately halting the workflow +\end{itemize} + +This behavior ensures resilience while preventing silent test degradation. + +\subsection{Enforced Thresholds and Quality Gates} +\label{enforced-thresholds-and-quality-gates} + +Coverage checks are enforced at both project and patch levels: + +\begin{itemize} + + \item Project-level threshold: Pull requests fail if overall coverage drops + beyond a configured margin (e.g., \textgreater1\%) + + \item Patch-level checks: Changes are required to maintain or improve coverage + on modified lines + + \item Flags and branches: Checks are scoped per test suite and only enforced + on critical branches +\end{itemize} + +Together, these gates maintain coverage integrity while avoiding noise from +unrelated code paths. + +\subsection{Visibility and Developer Experience} +\label{visibility-and-developer-experience} + +Codecov is integrated tightly into the developer workflow: + +\begin{itemize} + + \item PRs show inline coverage status and file-level diffs + + \item Optional summary comments detail total coverage, changes, and affected files + + \item Reports can be viewed in Codecov's UI or served locally as HTML + + \item Carryforward settings retain historical data when full test suites aren't + executed +\end{itemize} + +Developers can also generate and inspect local reports for any test suite using +standard coverage commands. + +\subsection{Best Practices and Operational Consistency} +\label{best-practices-and-operational-consistency} + +To ensure effective usage: + +\begin{itemize} + + \item Coverage is always uploaded---even if tests fail---ensuring no blind spots + + \item Developers are encouraged to monitor coverage deltas in PRs + + \item The system defaults to global configuration, but supports fine-tuning + via repo-specific overrides + + \item Weekly reviews of coverage trends and flags help spot regressions and low-tested + areas +\end{itemize} + +\subsection{Beyond the Basics} +\label{beyond-the-basics} + +Our setup also supports: + +\begin{itemize} + + \item PR commenting: Optional automated comments on test impact + + \item Badges: Live indicators of coverage status + + \item Custom reporting: Layouts and thresholds can be adjusted to align with + evolving policies +\end{itemize} +\subsection{Summary} +\label{summary} + +Coverage tracking is more than a checkbox---it's an enforcement mechanism, a +feedback loop, and a source of engineering discipline. With structured test +categories, resilient workflows, and project-level gates, our Codecov-based system +transforms coverage data into actionable insights, reinforcing test quality +across all levels of the stack. + + \section{} + +% \begin{figure} +% \centering +% \fbox{\rule[-.5cm]{4cm}{4cm} \rule[-.5cm]{4cm}{0cm}} +% \caption{Sample figure caption.} +% \label{fig:fig1} +% \end{figure} + + \subsection{Tables} + See awesome Table~\ref{tab:table}. + + The documentation for \verb+booktabs+ (`Publication quality tables in LaTeX') + is available from: + \begin{center} + \url{https://www.ctan.org/pkg/booktabs} + \end{center} + + \begin{table} + \caption{Sample table title} + \centering + \begin{tabular}{lll} + \toprule \multicolumn{2}{c}{Part} \\ + \cmidrule(r){1-2} Name & Description & Size ($\mu$m) \\ + \midrule Dendrite & Input terminal & $\sim$100 \\ + Axon & Output terminal & $\sim$10 \\ + Soma & Cell body & up to $10^{6}$ \\ + \bottomrule + \end{tabular} + \label{tab:table} + \end{table} + + \bibliographystyle{unsrtnat} + %%% Uncomment this line and comment out the ``thebibliography'' section below to use the external .bib file (using bibtex) . + \bibliography{references} + +%- [1] +% [Mono vs. multi-repo](https://free.gitkraken.com/hubfs/Mono_v_Multi-Repo_debate_2023.pdf) +%- [2] +% [Why Google stores billions of lines of code in a single repository](https://dl.acm.org/doi/10.1145/2854146) +%- [3] +% [What it is like to work in Meta's (Facebook's) monorepo](https://blog.3d-logic.com/2024/09/02/what-it-is-like-to-work-in-metas-facebooks-monorepo/) +%- [4] +% [Microsoft: How "Mono-repo" and "One Infra" Help Us Deliver a Better Developer Experience](https://devblogs.microsoft.com/appcenter/how-mono-repo-and-one-infra-help-us-deliver-a-better-developer-experience/) +%- [5] +% [Uber: Faster Together: Uber Engineering's iOS Monorepo](https://www.uber.com/blog/ios-monorepo/) + + + %%% Uncomment this section and comment out the \bibliography{references} line above to use inline references. + % \begin{thebibliography}{1} + + % \bibitem{kour2014real} + % George Kour and Raid Saabne. + % \newblock Real-time segmentation of on-line handwritten arabic script. + % \newblock In {\em Frontiers in Handwriting Recognition (ICFHR), 2014 14th + % International Conference on}, pages 417--422. IEEE, 2014. + + % \bibitem{kour2014fast} + % George Kour and Raid Saabne. + % \newblock Fast classification of handwritten on-line arabic characters. + % \newblock In {\em Soft Computing and Pattern Recognition (SoCPaR), 2014 6th + % International Conference of}, pages 312--318. IEEE, 2014. + + % \bibitem{hadash2018estimate} + % Guy Hadash, Einat Kermany, Boaz Carmeli, Ofer Lavi, George Kour, and Alon + % Jacovi. + % \newblock Estimate and replace: A novel approach to integrating deep neural + % networks with existing applications. + % \newblock {\em arXiv preprint arXiv:1804.09028}, 2018. + + % \end{thebibliography} \end{document} diff --git a/papers/Causify_development_system/lint_latex.sh b/papers/Causify_development_system/lint_latex.sh index 2c037fe6f..9286037cc 100755 --- a/papers/Causify_development_system/lint_latex.sh +++ b/papers/Causify_development_system/lint_latex.sh @@ -6,4 +6,4 @@ if [[ -z $GIT_ROOT ]]; then fi; FILE_NAME=$GIT_ROOT/papers/Causify_development_system/Causify_dev_system.tex -dev_scripts/latex/lint_latex.sh $FILE_NAME +lint_notes.py -i $FILE_NAME --use_dockerized_prettier diff --git a/papers/Causify_development_system/run_latex.sh b/papers/Causify_development_system/run_latex.sh index ab07af416..697f0a853 100755 --- a/papers/Causify_development_system/run_latex.sh +++ b/papers/Causify_development_system/run_latex.sh @@ -6,8 +6,11 @@ if [[ -z $GIT_ROOT ]]; then exit -1 fi; -# Relative to papers, without '.tex'. -FILE_NAME=$GIT_ROOT/papers/KaizenFlow/kaizen_flow.paper.tex +PWD=$(pwd) + +cd $GIT_ROOT/papers/Causify_development_system + +FILE_NAME=Causify_dev_system.tex PDF_FILE_NAME=$(basename $FILE_NAME).pdf @@ -24,3 +27,5 @@ if (count of theDocs) > 0 then revert theDocs open theFile end tell EOF + +cd $PWD diff --git a/papers/arxiv_template/arxiv_template.tex b/papers/arxiv_template/arxiv_template.tex deleted file mode 100644 index 2e3863075..000000000 --- a/papers/arxiv_template/arxiv_template.tex +++ /dev/null @@ -1,199 +0,0 @@ -% From https://github.com/kourgeorge/arxiv-style - -\documentclass{article} - - - -\usepackage{arxiv} - -\usepackage[utf8]{inputenc} % allow utf-8 input -\usepackage[T1]{fontenc} % use 8-bit T1 fonts -\usepackage{hyperref} % hyperlinks -\usepackage{url} % simple URL typesetting -\usepackage{booktabs} % professional-quality tables -\usepackage{amsfonts} % blackboard math symbols -\usepackage{nicefrac} % compact symbols for 1/2, etc. -\usepackage{microtype} % microtypography -\usepackage{lipsum} % Can be removed after putting your text content -\usepackage{graphicx} -\usepackage{natbib} -\usepackage{doi} - - - -\title{A template for the \emph{arxiv} style} - -%\date{September 9, 1985} % Here you can change the date presented in the paper title -%\date{} % Or removing it - -\author{ David S.~Hippocampus\thanks{Use footnote for providing further - information about author (webpage, alternative - address)---\emph{not} for acknowledging funding agencies.} \\ - Department of Computer Science\\ - Cranberry-Lemon University\\ - Pittsburgh, PA 15213 \\ - \texttt{hippo@cs.cranberry-lemon.edu} \\ - %% examples of more authors - \And - Elias D.~Striatum \\ - Department of Electrical Engineering\\ - Mount-Sheikh University\\ - Santa Narimana, Levand \\ - \texttt{stariate@ee.mount-sheikh.edu} \\ - %% \AND - %% Coauthor \\ - %% Affiliation \\ - %% Address \\ - %% \texttt{email} \\ - %% \And - %% Coauthor \\ - %% Affiliation \\ - %% Address \\ - %% \texttt{email} \\ - %% \And - %% Coauthor \\ - %% Affiliation \\ - %% Address \\ - %% \texttt{email} \\ -} - -% Uncomment to remove the date -%\date{} - -% Uncomment to override the `A preprint' in the header -%\renewcommand{\headeright}{Technical Report} -%\renewcommand{\undertitle}{Technical Report} -\renewcommand{\shorttitle}{\textit{arXiv} Template} - -%%% Add PDF metadata to help others organize their library -%%% Once the PDF is generated, you can check the metadata with -%%% $ pdfinfo template.pdf -\hypersetup{ -pdftitle={A template for the arxiv style}, -pdfsubject={q-bio.NC, q-bio.QM}, -pdfauthor={David S.~Hippocampus, Elias D.~Striatum}, -pdfkeywords={First keyword, Second keyword, More}, -} - -\begin{document} -\maketitle - -\begin{abstract} - \lipsum[1] -\end{abstract} - - -% keywords can be removed -\keywords{First keyword \and Second keyword \and More} - - -\section{Introduction} -\lipsum[2] -\lipsum[3] - - -\section{Headings: first level} -\label{sec:headings} - -\lipsum[4] See Section \ref{sec:headings}. - -\subsection{Headings: second level} -\lipsum[5] -\begin{equation} - \xi _{ij}(t)=P(x_{t}=i,x_{t+1}=j|y,v,w;\theta)= {\frac {\alpha _{i}(t)a^{w_t}_{ij}\beta _{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}{\sum _{i=1}^{N} \sum _{j=1}^{N} \alpha _{i}(t)a^{w_t}_{ij}\beta _{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}} -\end{equation} - -\subsubsection{Headings: third level} -\lipsum[6] - -\paragraph{Paragraph} -\lipsum[7] - - - -\section{Examples of citations, figures, tables, references} -\label{sec:others} - -\subsection{Citations} -Citations use \verb+natbib+. The documentation may be found at -\begin{center} - \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf} -\end{center} - -Here is an example usage of the two main commands (\verb+citet+ and \verb+citep+): Some people thought a thing \citep{kour2014real, hadash2018estimate} but other people thought something else \citep{kour2014fast}. Many people have speculated that if we knew exactly why \citet{kour2014fast} thought this\dots - -\subsection{Figures} -\lipsum[10] -See Figure \ref{fig:fig1}. Here is how you add footnotes. \footnote{Sample of the first footnote.} -\lipsum[11] - -\begin{figure} - \centering - \fbox{\rule[-.5cm]{4cm}{4cm} \rule[-.5cm]{4cm}{0cm}} - \caption{Sample figure caption.} - \label{fig:fig1} -\end{figure} - -\subsection{Tables} -See awesome Table~\ref{tab:table}. - -The documentation for \verb+booktabs+ (`Publication quality tables in LaTeX') is available from: -\begin{center} - \url{https://www.ctan.org/pkg/booktabs} -\end{center} - - -\begin{table} - \caption{Sample table title} - \centering - \begin{tabular}{lll} - \toprule - \multicolumn{2}{c}{Part} \\ - \cmidrule(r){1-2} - Name & Description & Size ($\mu$m) \\ - \midrule - Dendrite & Input terminal & $\sim$100 \\ - Axon & Output terminal & $\sim$10 \\ - Soma & Cell body & up to $10^6$ \\ - \bottomrule - \end{tabular} - \label{tab:table} -\end{table} - -\subsection{Lists} -\begin{itemize} - \item Lorem ipsum dolor sit amet - \item consectetur adipiscing elit. - \item Aliquam dignissim blandit est, in dictum tortor gravida eget. In ac rutrum magna. -\end{itemize} - - -\bibliographystyle{unsrtnat} -\bibliography{references} %%% Uncomment this line and comment out the ``thebibliography'' section below to use the external .bib file (using bibtex) . - - -%%% Uncomment this section and comment out the \bibliography{references} line above to use inline references. -% \begin{thebibliography}{1} - -% \bibitem{kour2014real} -% George Kour and Raid Saabne. -% \newblock Real-time segmentation of on-line handwritten arabic script. -% \newblock In {\em Frontiers in Handwriting Recognition (ICFHR), 2014 14th -% International Conference on}, pages 417--422. IEEE, 2014. - -% \bibitem{kour2014fast} -% George Kour and Raid Saabne. -% \newblock Fast classification of handwritten on-line arabic characters. -% \newblock In {\em Soft Computing and Pattern Recognition (SoCPaR), 2014 6th -% International Conference of}, pages 312--318. IEEE, 2014. - -% \bibitem{hadash2018estimate} -% Guy Hadash, Einat Kermany, Boaz Carmeli, Ofer Lavi, George Kour, and Alon -% Jacovi. -% \newblock Estimate and replace: A novel approach to integrating deep neural -% networks with existing applications. -% \newblock {\em arXiv preprint arXiv:1804.09028}, 2018. - -% \end{thebibliography} - -\end{document} diff --git a/papers/arxiv_template/lint_latex.sh b/papers/arxiv_template/lint_latex.sh index 1d6fa3350..e046cddbe 100755 --- a/papers/arxiv_template/lint_latex.sh +++ b/papers/arxiv_template/lint_latex.sh @@ -4,6 +4,6 @@ if [[ -z $GIT_ROOT ]]; then echo "Can't find GIT_ROOT=$GIT_ROOT" exit -1 fi; -FILE_NAME=$GIT_ROOT/papers/KaizenFlow/kaizen_flow.paper.tex +FILE_NAME=$GIT_ROOT/papers/arxiv_template/template.tex -dev_scripts/latex/lint_latex.sh $FILE_NAME +lint_notes.py -i $FILE_NAME --use_dockerized_prettier diff --git a/papers/arxiv_template/run_latex.sh b/papers/arxiv_template/run_latex.sh index 479a03c83..647bed2bd 100755 --- a/papers/arxiv_template/run_latex.sh +++ b/papers/arxiv_template/run_latex.sh @@ -10,7 +10,7 @@ PWD=$(pwd) cd $GIT_ROOT/papers/arxiv_template -FILE_NAME=arxiv_template.tex +FILE_NAME=template.tex PDF_FILE_NAME=$(basename $FILE_NAME).pdf diff --git a/papers/arxiv_template/template.tex b/papers/arxiv_template/template.tex new file mode 100644 index 000000000..36db61aa7 --- /dev/null +++ b/papers/arxiv_template/template.tex @@ -0,0 +1,191 @@ +% From https://github.com/kourgeorge/arxiv-style + +\documentclass{article} + +\usepackage{arxiv} + +% Allow utf-8 input. +\usepackage[utf8]{inputenc} +% Use 8-bit T1 fonts. +\usepackage[T1]{fontenc} +% Hyperlinks. +\usepackage{hyperref} +% Simple URL typesetting. +\usepackage{url} +% Professional-quality tables. +\usepackage{booktabs} +% Blackboard math symbols. +\usepackage{amsfonts} +% Compact symbols for 1/2, etc. +\usepackage{nicefrac} +% Microtypography. +\usepackage{microtype} +% Can be removed after putting your text content. +\usepackage{lipsum} +\usepackage{graphicx} +\usepackage{natbib} +\usepackage{doi} + +\title{The Causify Dev System} + +%\date{September 9, 1985} % Here you can change the date presented in the paper title +%\date{} % Or removing it + +\author{ David S.~Hippocampus +\thanks{Use footnote for providing further information about author (webpage, +alternative address)---\emph{not} for acknowledging funding agencies.} +\\ Department of Computer Science\\ Cranberry-Lemon University\\ Pittsburgh, PA +15213 \\ \texttt{hippo@cs.cranberry-lemon.edu} \\ +%% examples of more authors +\And Elias D.~Striatum \\ Department of Electrical Engineering\\ Mount-Sheikh University\\ +Santa Narimana, Levand \\ \texttt{stariate@ee.mount-sheikh.edu} \\ +%% \AND +%% Coauthor \\ +%% Affiliation \\ +%% Address \\ +%% \texttt{email} \\ +%% \And +%% Coauthor \\ +%% Affiliation \\ +%% Address \\ +%% \texttt{email} \\ +%% \And +%% Coauthor \\ +%% Affiliation \\ +%% Address \\ +%% \texttt{email} \\ +} + +% Uncomment to remove the date +%\date{} + +% Uncomment to override the `A preprint' in the header +%\renewcommand{\headeright}{Technical Report} +%\renewcommand{\undertitle}{Technical Report} +\renewcommand{\shorttitle}{\textit{arXiv} Template} + +%%% Add PDF metadata to help others organize their library +%%% Once the PDF is generated, you can check the metadata with +%%% $ pdfinfo template.pdf +\hypersetup{ + pdftitle={A template for the arxiv style}, + pdfsubject={q-bio.NC, q-bio.QM}, + pdfauthor={David S.~Hippocampus, Elias D.~Striatum}, + pdfkeywords={First keyword, Second keyword, More}, +} + +\begin{document} + \maketitle + + \begin{abstract} + \lipsum[1] + \end{abstract} + + % keywords can be removed + \keywords{First keyword \and Second keyword \and More} + + \section{Introduction} + \lipsum[2] \lipsum[3] + + \section{Headings: first level} + \label{sec:headings} + + \lipsum[4] See Section \ref{sec:headings}. + + \subsection{Headings: second level} + \lipsum[5] + \begin{equation} + \xi_{ij}(t)=P(x_{t}=i,x_{t+1}=j|y,v,w;\theta)={\frac{\alpha_{i}(t)a^{w_t}_{ij}\beta_{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}{\sum_{i=1}^{N}\sum_{j=1}^{N}\alpha_{i}(t)a^{w_t}_{ij}\beta_{j}(t+1)b^{v_{t+1}}_{j}(y_{t+1})}} + \end{equation} + + \subsubsection{Headings: third level} + \lipsum[6] + + \paragraph{Paragraph} + \lipsum[7] + + \section{Examples of citations, figures, tables, references} + \label{sec:others} + + \subsection{Citations} + Citations use \verb+natbib+. The documentation may be found at + \begin{center} + \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf} + \end{center} + + Here is an example usage of the two main commands (\verb+citet+ and + \verb+citep+): Some people thought a thing \citep{kour2014real, hadash2018estimate} + but other people thought something else \citep{kour2014fast}. Many people have + speculated that if we knew exactly why \citet{kour2014fast} thought this\dots + + \subsection{Figures} + \lipsum[10] See Figure \ref{fig:fig1}. Here is how you add footnotes. \footnote{Sample + of the first footnote.} \lipsum[11] + + \begin{figure} + \centering + \fbox{\rule[-.5cm]{4cm}{4cm} \rule[-.5cm]{4cm}{0cm}} + \caption{Sample figure caption.} + \label{fig:fig1} + \end{figure} + + \subsection{Tables} + See awesome Table~\ref{tab:table}. + + The documentation for \verb+booktabs+ (`Publication quality tables in LaTeX') + is available from: + \begin{center} + \url{https://www.ctan.org/pkg/booktabs} + \end{center} + + \begin{table} + \caption{Sample table title} + \centering + \begin{tabular}{lll} + \toprule \multicolumn{2}{c}{Part} \\ + \cmidrule(r){1-2} Name & Description & Size ($\mu$m) \\ + \midrule Dendrite & Input terminal & $\sim$100 \\ + Axon & Output terminal & $\sim$10 \\ + Soma & Cell body & up to $10^{6}$ \\ + \bottomrule + \end{tabular} + \label{tab:table} + \end{table} + + \subsection{Lists} + \begin{itemize} + \item Lorem ipsum dolor sit amet + + \item consectetur adipiscing elit. + + \item Aliquam dignissim blandit est, in dictum tortor gravida eget. In ac rutrum + magna. + \end{itemize} + + \bibliographystyle{unsrtnat} + \bibliography{references} %%% Uncomment this line and comment out the ``thebibliography'' section below to use the external .bib file (using bibtex) . + + %%% Uncomment this section and comment out the \bibliography{references} line above to use inline references. + % \begin{thebibliography}{1} + + % \bibitem{kour2014real} + % George Kour and Raid Saabne. + % \newblock Real-time segmentation of on-line handwritten arabic script. + % \newblock In {\em Frontiers in Handwriting Recognition (ICFHR), 2014 14th + % International Conference on}, pages 417--422. IEEE, 2014. + + % \bibitem{kour2014fast} + % George Kour and Raid Saabne. + % \newblock Fast classification of handwritten on-line arabic characters. + % \newblock In {\em Soft Computing and Pattern Recognition (SoCPaR), 2014 6th + % International Conference of}, pages 312--318. IEEE, 2014. + + % \bibitem{hadash2018estimate} + % Guy Hadash, Einat Kermany, Boaz Carmeli, Ofer Lavi, George Kour, and Alon + % Jacovi. + % \newblock Estimate and replace: A novel approach to integrating deep neural + % networks with existing applications. + % \newblock {\em arXiv preprint arXiv:1804.09028}, 2018. + + % \end{thebibliography} +\end{document} From 8e4553e3b89b68503cedd8233ae69411e5dbe007 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 8 Jun 2025 17:57:59 -0400 Subject: [PATCH 165/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../Causify_dev_system.md | 458 ------------------ .../Causify_dev_system.tex | 15 +- 2 files changed, 10 insertions(+), 463 deletions(-) delete mode 100644 papers/Causify_development_system/Causify_dev_system.md diff --git a/papers/Causify_development_system/Causify_dev_system.md b/papers/Causify_development_system/Causify_dev_system.md deleted file mode 100644 index 485659e5f..000000000 --- a/papers/Causify_development_system/Causify_dev_system.md +++ /dev/null @@ -1,458 +0,0 @@ -# Runnable Directories: The Solution to the Monorepo vs. Multi-repo Debate - -## 1. Introduction - -Software development workflows are becoming more complex as they adapt to the -demands of large-scale systems and modern collaborative development practices. -As teams and codebases grow, companies face the challenge of organizing both -effectively. When it comes to structuring the codebase, two main approaches -emerge: monorepos and multi-repos[1]. Monorepos consolidate all code into a -single repository, simplifying version control but carrying a risk of -scalability and maintainability issues. Conversely, multi-repos store the code -in logically separated repositories, easier to manage and deploy but more -difficult to keep in sync. - -In this paper, we propose Causify dev system, an alternative hybrid solution: a -modular system architecture built around _runnable directories_. Although -independent, these directories maintain cohesion through shared tooling and -environments, offering a straightforward and scalable way to organize the -codebase while ensuring reliability in development, testing, and deployment. - -In this paper, we first outline the current state-of-the-art (Section 2), then -describe our approach, with a particular focus on the containerized workflows -that support it (Section 3). We then discuss the strengths and limitations of -our approach compared to existing practices (Section 4), and conclude by -presenting potential avenues for future improvement (Section 5). - -## 2. Current landscape - -### 2.1. Monorepo - -The monorepo approach involves storing all code for multiple applications within -a single repository. This strategy has been popularized by large tech companies -like Google[2], Meta[3], Microsoft[4] and Uber[5], proving that even codebases -with billions of lines of code can be effectively managed in a single -repository. The key benefits of this approach include: - -- Consistency in environment: with everything housed in one repository, there's - no risk of projects becoming incompatible due to conflicting versions of - third-party packages. -- Simplified version control: there is a single commit history, which makes it - easy to track and, if needed, revert changes globally. -- Reduced coordination overhead: developers work within the same repository, - with easy access to all code, shared knowledge, tools and consistent coding - standards. - -However, as monorepo setups scale, users often face significant challenges. A -major downside is long CI/CD build times, as even small changes can trigger -massive rebuilds and tests throughout the entire codebase. To cope with this, -extra tooling, such as [Buck](https://buck2.build/) or -[Bazel](https://bazel.build/), must be configured, adding complexity to -workflows. Even something as simple as searching and browsing the code becomes -more difficult, often requiring specialized tools and IDE plug-ins. - -Additionally, when everything is located in one place, it is harder to separate -concerns and maintain clear boundaries between projects. Managing permissions -also becomes more difficult when only selected developers should have access to -specific parts of the codebase. - -### 2.2. Multi-repo - -The multi-repo approach involves splitting code across several repositories, -with each one dedicated to a specific module or service. This modularity allows -teams to work independently on different parts of a system, making it easier to -manage changes and releases for individual components. Each repository can -evolve at its own pace, and developers can focus on smaller, more manageable -codebases. - -However, the multi-repo strategy comes with its own set of challenges, -particularly when it comes to managing dependencies and ensuring version -compatibility across repositories. For instance, different repositories might -rely on two different versions of a third-party package, or even conflicting -packages, making synchronization complex or, in some cases, nearly impossible. -In general, propagating changes from one repository to another requires careful -coordination. Tools like [Jenkins](https://www.jenkins.io/) and -[GitHub Actions](https://github.com/features/actions) help streamline CI/CD -pipelines, but they often struggle when dealing with heterogeneous environments. - - -### 3.3. Thin environment - -To bootstrap development workflows, we use a thin client that installs a minimal -set of essential dependencies, such as Docker and invoke, in a lightweight -virtual environment. A single thin environment is shared across all runnable -directories which minimizes setup overhead (see Figure 3). This environment -contains everything that is needed to start development containers, which are in -turn specific to each runnable directory. With this approach, we ensure that -development and deployment remain consistent across different systems (e.g., -server, personal laptop, CI/CD). - -```mermaid -graph RL - thin_env[thin environment] - subgraph A [Runnable Dir A] - direction TB - B[Runnable Dir B] - C1[Runnable Dir C] - end - subgraph C [Runnable Dir C] - end - - C -->|Submodule| C1 - A -.-> thin_env - B -.-> thin_env - C1 -.-> thin_env - C -.-> thin_env - - style A fill:#FFF3CD - style C fill:#FFF3CD,stroke:#9E9D24 -``` - -Figure 3. Thin environment shared across multiple runnable directories. - -### 3.4. Submodule of "helpers" - -All Causify repositories include a dedicated "helpers" repository as a -submodule. This repository contains common utilities and development toolchains, -such as the thin environment, Linter, Docker, and invoke workflows. By -centralizing these resources, we eliminate code duplication and ensure that all -teams, regardless of the project, use the same tools and procedures. - -Additionally, it hosts symbolic link targets for files that must technically -reside in each repository but are identical across all of them (e.g., license -and certain configuration files). Manually keeping them in sync can be difficult -and error-prone over time. In our approach, these files are stored exclusively -in "helpers", and all other repositories utilize read-only symbolic links -pointing to them. This way, we avoid file duplication and reduce the risk of -introducing accidental discrepancies. - -```mermaid -graph RL - subgraph A [Runnable Dir A] - direction TB - B[Runnable Dir B] - H1[Helpers] - end - subgraph H [Helpers] - end - - H -->|Submodule| H1 - - style A fill:#FFF3CD - style H fill:#FFF3CD,stroke:#9E9D24 -``` - -Figure 4. "Helpers" submodule integrated into a repository. - -#### 3.4.1. Git hooks - -Our "helpers" submodule includes a set of Git hooks used to enforce policies -across our development process, including Git workflow rules, coding standards, -security and compliance, and other quality checks. These hooks are installed by -default when the user activates the thin environment. They perform essential -checks such as verifying the branch, author information, file size limits, -forbidden words, Python file compilation, and potential secret leaks...etc. - -### 3.5. Executing tests - -Our system supports robust testing workflows that leverage the containerized -environment for comprehensive code validation. Tests are executed inside Docker -containers to ensure consistency across development and production environments, -preventing discrepancies caused by variations in host system configurations. In -the case of nested runnable directories, tests are executed recursively within -each directory's corresponding container, which is automatically identified (see -Figure 5). As a result, the entire test suite can be run with a single command, -while still allowing tests in subdirectories to use dependencies that may not be -compatible with the parent directory's environment. - -```mermaid -graph LR - start((start)) - start --> A - subgraph A[Runnable Dir A] - direction LR - pytest_1((pytest)) - B[Runnable Dir B / Container B] - C[Runnable Dir C / Container C] - dirA1[dir1 / Container A] - dirA2[dir2 / Container A] - dirA11[dir1.1 / Container A] - dirA12[dir1.2 / Container A] - pytest_1 --> B - pytest_1 --> C - pytest_1 --> dirA1 - pytest_1 --> dirA2 - dirA1 --> dirA11 - dirA1 --> dirA12 - end - -style A fill:#FFF3CD,stroke:#9E9D24 -style B font-size:15px -style C font-size:15px -``` - -Figure 5. Recursive test execution in dedicated containers. - -### 3.6. Dockerized executables - -Sometimes, installing a package within a development container may not be -justified, particularly if it is large and will only be used occasionally. In -such cases, we use _dockerized executables_: when the package is needed, a -Docker container is dynamically created with only the specific dependencies -required for its installation. The package is then installed and executed within -the container, which is discarded once the task is complete. This prevents the -development environment from becoming bloated with dependencies that are rarely -used. If necessary, for example during test execution, a dockerized executable -can be run inside another Docker container, whether using the children or -sibling container approach, as discussed in Section 3.2. - -## 4. Discussion - -Causify's approach presents a strong alternative to existing code organization -solutions, offering scalability and efficiency for both small and large systems. - -The proposed modular architecture is centered around runnable directories, which -operate as independent units with their own build and release lifecycles. This -design bypasses the bottlenecks common in large monorepos, where centralized -workflows can slow down CI/CD processes unless specialized tools like Buck or -Bazel are used. By leveraging Docker containers, we ensure consistent -application behavior across development, testing, and production environments, -avoiding problems caused by system configuration discrepancies. Dependencies are -isolated within each directory's dedicated container, reducing the risks of -issues that tight coupling or package incompatibility might create in a monorepo -or a multi-repo setup. - -Unlike multi-repos, runnable directories can utilize shared utilities from -"helper" submodules, eliminating code duplication and promoting consistent -workflows across projects. They can even reside under a unified repository -structure which simplifies codebase management and reduces the overhead of -maintaining multiple repositories. With support for recursive test execution -spanning all components, runnable directories allow for end-to-end validation of -the whole codebase through a single command, removing the need for testing each -repository separately. - -There are, however, several challenges that might arise in the adoption of our -approach. Teams that are unfamiliar with containerized environments may need -time and training to effectively transition to the new workflows. The reliance -on Docker may introduce additional resource demands, particularly when running -multiple containers concurrently on development machines. This would require -further optimization, possibly aided by customized tooling. These adjustments, -while ultimately beneficial, can add complexity to the system's rollout and -necessitate ongoing maintenance to ensure seamless integration with existing -CI/CD pipelines and development practices. - -## Future directions - -Looking ahead, there are several areas where the proposed approach can be -improved. One direction is the implementation of dependency-aware caching to -ensure that only the necessary components are rebuilt or retested when changes -are made. This would reduce the time spent on development tasks, making the -overall process more efficient. Further optimization could involve designing our -CI/CD pipelines to execute builds, tests, and deployments for multiple runnable -directories in parallel, which would allow us to take full advantage of -available compute resources. - -Additional measures can also be taken to enhance security. Integrating automated -container image scanning and validation before deployment would help guarantee -compliance with organizational policies and prevent vulnerabilities from -entering production environments. In addition, fine-grained access controls -could be introduced for runnable directories in order to safeguard sensitive -parts of the codebase. These steps will bolster both the security and efficiency -of our workflows as the projects continue to scale. - -# Buildmeister: Daily Accountability for CI Stability - -## Motivation - -Automated test pipelines are essential, but without accountability, they often -fall into disrepair. The Buildmeister routine introduces a rotating, -human-in-the-loop system designed to enforce green builds, identify root causes, -and ensure high-quality CI/CD hygiene. This mechanism aligns technical execution -with team responsibility, fostering a culture of operational ownership. - -## Core Responsibilities - -The Buildmeister is a rotating role assigned to a team member each week. Their -primary duties are: - -- Monitor build health daily via the Buildmeister Dashboard -- Investigate failures and ensure GitHub Issues are filed promptly -- Push responsible team members to fix or revert breaking code -- Maintain test quality by analyzing trends in Allure reports -- Document breakage through a structured post-mortem log - -The Buildmeister ensures builds are never "temporarily broken", our policy is: -"Fix it or revert within one hour." - -## Handover and Daily Reporting - -The routine begins each day with a status email to the team detailing: - -- Overall build status (green/red) -- Failing test names and owners -- GitHub issue references -- Expected resolution timelines -- A screenshot of the Buildmeister dashboard - -At the end of each rotation, the outgoing Buildmeister must confirm handover by -receiving an "Acknowledged" reply from the incoming one, ensuring continuity and -awareness. - -## Workflow in Practice - -When a build breaks: - -- The team is alerted via Slack (#build-notifications) through our GitHub - Actions bot -- The Buildmeister triages the issue: - - Quickly reruns or replicates the failed tests if uncertain - - Blames commits to identify the responsible party - - Notifies the team and files a structured GitHub Issue -- All information including test names, logs, responsible engineer are - transparently shared and tracked - -If the issue is not resolved within one hour, the Buildmeister must escalate -and, if needed, disable the test with explicit owner consent. - -## Tools and Analysis - -### Buildmeister Dashboard - -A centralized UI provides a real-time view of all builds across repos and -branches. It is the Buildmeister's daily launchpad. - -### Allure Reports - -- Every week, the Buildmeister reviews trends in skipped/failing tests, duration - anomalies, and retry spikes -- This process: - - Surfaces hidden test instability - - Provides historical context to new breaks - - Enables preventive action before regressions cascade - -### Post-Mortem Log - -Every build break is logged in a shared spreadsheet, capturing: - -- Repo and test type -- Link to the failing GitHub run -- Root cause -- Owner and fix timeline -- Whether the issue was fixed or test was disabled - -This living record forms the basis for failure mode analysis and future -automation improvements. - -## Why It Matters - -The Buildmeister is not just a rotating duty, it is a system of shared -accountability. It transforms test stability from an abstract ideal into a daily -operational habit, backed by clear expectations, defined processes, and human -enforcement. By combining automation with ownership, we achieve sustainable -reliability in a complex, multi-repo ecosystem. - -# Coverage Tracking with Codecov: A Layer of Continuous Accountability - -## Motivation - -Maintaining comprehensive test coverage across a growing codebase requires more -than just writing tests, it demands visibility, automation, and enforcement. Our -integration with Codecov provides a system-wide view of test coverage, -structured into fast, slow, and superslow test suites. This setup ensures that -all code paths are exercised and that test coverage regressions are identified -early and reliably. - -## Structured Coverage by Test Category - -We categorize coverage tests into three suites based on runtime and scope: - -- Fast tests run frequently (e.g., daily) and provide immediate feedback on - high-priority code paths -- Slow tests cover broader logic and data scenarios -- Superslow tests are comprehensive, long-running regressions executed on a - weekly cadence or on-demand - -Each suite produces its own coverage report, which is flagged and uploaded -independently to Codecov, enabling targeted inspection and carryforward of data -when some suites are skipped. - -## CI Integration and Workflow Behavior - -Coverage reports are generated and uploaded automatically as part of our CI -pipelines. The workflow: - -- Fails immediately on critical setup errors (e.g., dependency or configuration - issues) -- Continues gracefully if fast or slow tests fail mid-pipeline, but surfaces - those failures in a final gating step -- Treats superslow failures as critical, immediately halting the workflow - -This behavior ensures resilience while preventing silent test degradation. - -## Enforced Thresholds and Quality Gates - -Coverage checks are enforced at both project and patch levels: - -- Project-level threshold: Pull requests fail if overall coverage drops beyond a - configured margin (e.g., >1%) -- Patch-level checks: Changes are required to maintain or improve coverage on - modified lines -- Flags and branches: Checks are scoped per test suite and only enforced on - critical branches - -Together, these gates maintain coverage integrity while avoiding noise from -unrelated code paths. - -## Visibility and Developer Experience - -Codecov is integrated tightly into the developer workflow: - -- PRs show inline coverage status and file-level diffs -- Optional summary comments detail total coverage, changes, and affected files -- Reports can be viewed in Codecov's UI or served locally as HTML -- Carryforward settings retain historical data when full test suites aren't - executed - -Developers can also generate and inspect local reports for any test suite using -standard coverage commands. - -## Best Practices and Operational Consistency - -To ensure effective usage: - -- Coverage is always uploaded—even if tests fail—ensuring no blind spots -- Developers are encouraged to monitor coverage deltas in PRs -- The system defaults to global configuration, but supports fine-tuning via - repo-specific overrides -- Weekly reviews of coverage trends and flags help spot regressions and - low-tested areas - -## Beyond the Basics - -Our setup also supports: - -- PR commenting: Optional automated comments on test impact -- Badges: Live indicators of coverage status -- Custom reporting: Layouts and thresholds can be adjusted to align with - evolving policies - -## Summary - -Coverage tracking is more than a checkbox—it's an enforcement mechanism, a -feedback loop, and a source of engineering discipline. With structured test -categories, resilient workflows, and project-level gates, our Codecov-based -system transforms coverage data into actionable insights, reinforcing test -quality across all levels of the stack. - -## References - -- [1] - [Mono vs. multi-repo](https://free.gitkraken.com/hubfs/Mono_v_Multi-Repo_debate_2023.pdf) -- [2] - [Why Google stores billions of lines of code in a single repository](https://dl.acm.org/doi/10.1145/2854146) -- [3] - [What it is like to work in Meta's (Facebook's) monorepo](https://blog.3d-logic.com/2024/09/02/what-it-is-like-to-work-in-metas-facebooks-monorepo/) -- [4] - [Microsoft: How "Mono-repo" and "One Infra" Help Us Deliver a Better Developer Experience](https://devblogs.microsoft.com/appcenter/how-mono-repo-and-one-infra-help-us-deliver-a-better-developer-experience/) -- [5] - [Uber: Faster Together: Uber Engineering's iOS Monorepo](https://www.uber.com/blog/ios-monorepo/) diff --git a/papers/Causify_development_system/Causify_dev_system.tex b/papers/Causify_development_system/Causify_dev_system.tex index e824d9fe0..11bacc3e5 100644 --- a/papers/Causify_development_system/Causify_dev_system.tex +++ b/papers/Causify_development_system/Causify_dev_system.tex @@ -95,7 +95,7 @@ demands of large-scale systems and modern collaborative development practices. As teams and codebases grow, companies face the challenge of organizing both effectively. When it comes to structuring the codebase, two main approaches - emerge: monorepos and multi-repos[1]. Monorepos consolidate all code into a + emerge: monorepos and multi-repos. Monorepos consolidate all code into a single repository, simplifying version control but carrying a risk of scalability and maintainability issues. Conversely, multi-repos store the code in logically separated repositories, easier to manage and deploy but more @@ -170,10 +170,15 @@ \subsection{Runnable directories} An ideal strategy would combine the best of both worlds: -- The modularity of multi-repos, to keep the codebase scalable and simplify - day-to-day development processes. -- The environment consistency of monorepos, to avoid synchronization issues and - prevent errors that arise from executing code in misaligned environments. +\begin{itemize} + + \item The modularity of multi-repos, to keep the codebase scalable and + simplify day-to-day development processes. + + \item The environment consistency of monorepos, to avoid synchronization + issues and prevent errors that arise from executing code in misaligned + environments. +\end{itemize} Both are achieved through the hybrid approach proposed in this paper, which will be discussed in Section 3. From ccf92ea09c0162cb827b1d4d490e12856f4729a1 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 8 Jun 2025 17:58:42 -0400 Subject: [PATCH 166/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 859619a0b..cab5bd89b 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -654,6 +654,19 @@ def latex_rewrite() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def latex_check() -> _PROMPT_OUT: + system = _LATEX_CONTEXT + system += r""" + Check the Latex code is correct and doesn't have errors. + + Print the errors in one line. + """ + pre_transforms: Set[str] = set() + post_transforms = set() + post_container_transforms = [] + return system, pre_transforms, post_transforms, post_container_transforms + + # ############################################################################# # Markdown. # ############################################################################# From 175696de7f4047e6d44850c47a58b87985a1a91e Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 9 Jun 2025 07:06:18 -0400 Subject: [PATCH 167/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 12 ++++++++---- dev_scripts_helpers/llms/llm_transform.py | 10 +++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index cab5bd89b..423727896 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -1263,12 +1263,16 @@ def slide_check() -> _PROMPT_OUT: system += r""" - Do not print the content of the slide, but only the comment. - - Is the content of the slide clear and correct? + - Is the content of the slide clear? - Answer with "The slide is clear" or "The slide is not clear" - - Is there anything that can be clarified? - - Respond with at most 5 short bullet points about what can be clarified. - - Do not report things that you are not sure about. + - Is the content of the slide correct? + - Answer with "The slide is correct" or "The slide is not correct" + + - What can be clarified or improved? + - Respond with at most 3 short bullet points about what can be clarified + or improved. + - You MUST report only things that you are sure about. """ pre_transforms: Set[str] = set() post_transforms: Set[str] = set() diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 7cb3b71bc..9fc5ff159 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -45,6 +45,7 @@ import helpers.hprint as hprint import helpers.hserver as hserver import helpers.hsystem as hsystem +import helpers.hlatex as hlatex _LOG = logging.getLogger(__name__) @@ -232,7 +233,6 @@ def _main(parser: argparse.ArgumentParser) -> None: hparser.adapt_input_output_args_for_dockerized_scripts(in_file_name, tag) ) if args.prompt == "md_to_latex": - import helpers.hlatex as hlatex # Read the input. txt = hparser.read_file(tmp_in_file_name) txt = "\n".join(txt) @@ -241,6 +241,14 @@ def _main(parser: argparse.ArgumentParser) -> None: txt = hmarkdo.format_latex(txt) hparser.write_file(txt, out_file_name) return + elif args.prompt == "md_clean_up": + # Read the input. + txt = hparser.read_file(tmp_in_file_name) + txt = "\n".join(txt) + txt = hmarkdo.md_clean_up(txt) + txt = hmarkdo.format_markdown(txt) + hparser.write_file(txt, out_file_name) + return # TODO(gp): We should just automatically pass-through the options. cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] From fe0036023b480c26fe77a2164275b7e171504602 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 9 Jun 2025 10:34:20 -0400 Subject: [PATCH 168/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hdocker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 28268f712..760e27414 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -681,7 +681,7 @@ def run_dockerized_prettier( is_caller_host = not hserver.is_inside_docker() # TODO(gp): After fix for CmampTask10710 enable this. # use_sibling_container_for_callee = hserver.use_docker_sibling_containers() - use_sibling_container_for_callee = True + use_sibling_container_for_callee = False caller_mount_path, callee_mount_path, mount = get_docker_mount_info( is_caller_host, use_sibling_container_for_callee ) From 46c377f115f2d930b5255ff203e30da3f0b1f790 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 10 Jun 2025 08:50:22 -0400 Subject: [PATCH 169/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/sync_ai_review.sh | 19 ------ .../git/all.ai_review.how_to_guide.md | 59 +++++++++++++------ helpers/lib_tasks_lint.py | 23 ++++++-- 3 files changed, 58 insertions(+), 43 deletions(-) delete mode 100755 dev_scripts_helpers/llms/sync_ai_review.sh diff --git a/dev_scripts_helpers/llms/sync_ai_review.sh b/dev_scripts_helpers/llms/sync_ai_review.sh deleted file mode 100755 index aa376d6e6..000000000 --- a/dev_scripts_helpers/llms/sync_ai_review.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -xe -HELPERS_ROOT_DIR=$(find . -name "helpers_root" -type d | grep -v git) || true -if [[ -z $HELPERS_ROOT_DIR ]]; then - HELPERS_ROOT_DIR="." -fi; -echo HELPERS_ROOT_DIR=$HELPERS_ROOT_DIR - -ls $HELPERS_ROOT_DIR - -\cp -rf /Users/saggese/src/helpers1/helpers/hgit.py $HELPERS_ROOT_DIR/helpers - -ls $HELPERS_ROOT_DIR/dev_scripts_helpers/llms -\cp -rf /Users/saggese/src/helpers1/dev_scripts_helpers/llms/{ai_review.py,llm_prompts.py,llm_transform.py,inject_todos.py} $HELPERS_ROOT_DIR/dev_scripts_helpers/llms - -ls $HELPERS_ROOT_DIR/helpers -\cp -rf /Users/saggese/src/helpers1/helpers/hmarkdown.py $HELPERS_ROOT_DIR/helpers - -ls $HELPERS_ROOT_DIR/docs/code_guidelines -\cp -rf /Users/saggese/src/helpers1/docs/code_guidelines/*guidelines* $HELPERS_ROOT_DIR/docs/code_guidelines diff --git a/docs/work_tools/git/all.ai_review.how_to_guide.md b/docs/work_tools/git/all.ai_review.how_to_guide.md index cff7bdca3..4c1a7cdf2 100644 --- a/docs/work_tools/git/all.ai_review.how_to_guide.md +++ b/docs/work_tools/git/all.ai_review.how_to_guide.md @@ -155,10 +155,11 @@ - The goal is to make these tools robust enough so that they can be used directly by the author and potentially integrated in the `linter` flow itself - - Initially, reviewers use these tools as part of initial dog-fooding of the + - Initially, reviewers use these tools as part of initial dogfooding of the flows -- Go to the Git branch with the code +- Go to the Git branch with the code to review + - Check which files are modified ```bash > invoke git_branch_diff_with -t base --only-print-files @@ -187,29 +188,19 @@ - You should always commit your code and apply the automatic transforms that modify a file in a separate commit, so that it's easy to review -## How to change the logic in place while reviewing - -- A common problem is that we might want to adjust one of our tools (e.g., - `linter.py`, `ai_review.py`) while reviewing somebody's else code - -- The approach is to copy files from a different Git client in the one with the - code being tested using one of the scripts - ``` - > ai_review.py -i template_code.py - ``` - ``` - > llm_transform.py -i template_code.py -p code_fix_code - ``` - +- There are multiple targets for the `ai_review.py` ```bash > PROMPT=review_llm > PROMPT=review_correctness > PROMPT=review_linter > PROMPT=review_architecture - > + > FILE=dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py - > \cp -f /Users/saggese/src/helpers1/dev_scripts_helpers/llms/sync_ai_review.sh $HELPERS_ROOT_DIR/dev_scripts_helpers/llms && sync_ai_review.sh && ai_review.py -i $FILE -p $PROMPT + > ai_review.py -i $FILE -p $PROMPT + + # To copy all the reviewer code. + > \cp -f /Users/saggese/src/helpers1/helpers/lib_tasks_lint.py helpers && i lint_sync_code && ai_review.py -i $FILE -p $PROMPT > vi -c "cfile cfile" @@ -217,3 +208,35 @@ > llm_transform.py -i dev_scripts_helpers/github/dockerized_sync_gh_repo_settings.py -p code_fix_code ``` + + ``` + > ai_review.py -i template_code.py + ``` + ``` + > llm_transform.py -i template_code.py -p code_fix_code + ``` + +## How to improve the code in place while reviewing + +- A common problem is that we might want to adjust one of our tools (e.g., + `linter.py`, `ai_review.py`) while reviewing somebody's else code + +- The approach is to copy files from a different Git client in the one with the + code being tested using one of the scripts + +- There are two use cases + 1. When the code to review is in repo including `//helpers` + - In this case we can simply create a branch in `//helpers` and modify the + code for the tools in place + 2. When the code to review is in the repo `//helpers` + - In this case, we can use a different Git client to develop and "sync" the + `linter.py` / `ai_review.py` code from one client to another + ```bash + > \cp -f /Users/saggese/src/helpers1/helpers/lib_tasks_lint.py helpers && i lint_sync_code + ``` + - Before committing the review, we then revert the `linter.py / + ai_review.py` code + ```bash + > i lint_sync_code -r + ``` + diff --git a/helpers/lib_tasks_lint.py b/helpers/lib_tasks_lint.py index 8d216bccf..10250baec 100644 --- a/helpers/lib_tasks_lint.py +++ b/helpers/lib_tasks_lint.py @@ -5,6 +5,7 @@ """ import datetime +import filecmp import logging import os @@ -359,17 +360,16 @@ def _get_lint_docker_cmd( @task def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): # type: ignore """ - Sync the code needed to run linter and ai_review.py from a client to the - current one. + Sync code needed to run linter / ai_review from a Git client to the current one. - :param git_client_name: the name of the git client to sync from. It can be + :param git_client_name: the name of the Git client to sync from. It can be something like "helpers1" and it will be used from "$HOME/src" or can be a full path. :param revert_to_original: if `True`, revert the changes to the original """ _ = ctx hlitauti.report_task() - # Copy the code from the src git client to the current one. + # Copy the code from the src Git client to the current one. src_git_dir = hgit.resolve_git_client_dir(git_client_name) # files_to_copy = [ @@ -378,8 +378,7 @@ def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): "llm_prompts.py", "llm_transform.py", "inject_todos.py", - "all.linter_style_review_guidelines.reference.md", - "all.llm_style_review_guidelines.reference.md", + "all.coding_style_guidelines.reference.md", ] # Revert the files in the current git client to the original code. if revert_to_original: @@ -387,6 +386,8 @@ def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): for file_name in files_to_copy: _LOG.debug("Reverting %s to original code", file_name) src_file_path = hgit.find_file(file_name, dir_path=src_git_dir) + git_root_dir = hgit.find_git_root(src_git_dir) + src_file_path = os.path.relpath(src_file_path, git_root_dir) cmd = "git checkout -- %s" % src_file_path hsystem.system(cmd) _LOG.info("Done") @@ -419,6 +420,16 @@ def lint_sync_code(ctx, git_client_name="helpers1", revert_to_original=False): # Copy the file. _LOG.debug(hprint.to_str("src_file_path dst_file_path")) dir_name = os.path.dirname(dst_file_path) + # Check that the files are different. + if os.path.exists(src_file_path) and os.path.isdir(dst_file_path): + if filecmp.cmp(src_file_path, dst_file_path, shallow=False): + _LOG.info( + "File '%s' is identical to '%s', skipping", + src_file_path, + dst_file_path, + ) + continue + # Copy the file. hio.create_dir(dir_name, incremental=True) cmd = f"cp -f {src_file_path} {dst_file_path}" _LOG.debug(hprint.to_str("cmd")) From 3e9dda6315e0a4fec0a72a45aa4d656fd709f9a3 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 10 Jun 2025 18:06:47 -0400 Subject: [PATCH 170/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/notes_to_pdf.py | 27 ++++++- .../documentation/render_images.py | 6 +- dev_scripts_helpers/llms/llm_transform.py | 24 +++---- helpers/hmarkdown.py | 37 +++++++++- helpers/test/test_hmarkdown.py | 70 ++++++++++++++++++- 5 files changed, 145 insertions(+), 19 deletions(-) diff --git a/dev_scripts_helpers/documentation/notes_to_pdf.py b/dev_scripts_helpers/documentation/notes_to_pdf.py index 0df86a3d8..e27ee8238 100755 --- a/dev_scripts_helpers/documentation/notes_to_pdf.py +++ b/dev_scripts_helpers/documentation/notes_to_pdf.py @@ -154,6 +154,25 @@ def _filter_by_lines(file_name: str, filter_by_lines: str, prefix: str) -> str: return file_out +def _filter_by_slides(file_name: str, filter_by_slides: str, prefix: str) -> str: + """ + Filter the lines of a file in [start_slide, end_slide[. + + :param file_name: The input file to be processed + :param filter_by_slides: a string like `1:10` or `1:None` or `None:10` + :param prefix: The prefix used for the output file (e.g., `tmp.pandoc`) + :return: The path to the processed file + """ + # Read the file. + txt = hio.from_file(file_name) + # Filter by header. + txt = hmarkdo.extract_section_from_markdown(txt, header) + # Save the file. + file_out = f"{prefix}.filter_by_slides.txt" + hio.to_file(file_out, txt) + return file_out + + # ############################################################################# @@ -570,7 +589,8 @@ def _run_all(args: argparse.Namespace) -> None: file_name = _filter_by_header(file_name, args.filter_by_header, prefix) if args.filter_by_lines: file_name = _filter_by_lines(file_name, args.filter_by_lines, prefix) - # E.g., file_='/app/helpers_root/tmp.notes_to_pdf.render_image2.txt' + if args.filter_by_slides: + file_name = _filter_by_slides(file_name, args.filter_by_slides, prefix) # - Preprocess_notes action = "preprocess_notes" to_execute, actions = _mark_action(action, actions) @@ -698,6 +718,11 @@ def _parse() -> argparse.ArgumentParser: action="store", help="Filter by lines (e.g., `0:10`, `1:None`, `None:10`)", ) + parser.add_argument( + "--filter_by_slides", + action="store", + help="Filter by slides (e.g., `0:10`, `1:None`, `None:10`)", + ) # TODO(gp): -> out_action_script parser.add_argument( "--script", diff --git a/dev_scripts_helpers/documentation/render_images.py b/dev_scripts_helpers/documentation/render_images.py index 6a4b2e797..fb24fc4f5 100755 --- a/dev_scripts_helpers/documentation/render_images.py +++ b/dev_scripts_helpers/documentation/render_images.py @@ -250,6 +250,8 @@ def _render_image_code( \usepackage{tikz} \usepackage{amsmath} \usepackage{pgfplots} + \usepackage{mathrsfs} % For script font + \usepackage{xcolor} \pgfplotsset{compat=1.17} \begin{document} \begin{tikzpicture} @@ -265,11 +267,11 @@ def _render_image_code( \usepackage{tabularx} \usepackage{enumitem} \usepackage{booktabs} % Optional: For nicer tables - \begin{document} + %\begin{document} """) end_tag = hprint.dedent(r""" - \end{document} + %\end{document} """) image_code_txt = "\n".join([start_tag, image_code_txt, end_tag]) # Get paths for rendered files. diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 9fc5ff159..cab8f8a6a 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -232,21 +232,21 @@ def _main(parser: argparse.ArgumentParser) -> None: tmp_in_file_name, tmp_out_file_name = ( hparser.adapt_input_output_args_for_dockerized_scripts(in_file_name, tag) ) - if args.prompt == "md_to_latex": + if args.prompt in ("md_to_latex", "md_clean_up", "md_bold_bullets"): # Read the input. txt = hparser.read_file(tmp_in_file_name) txt = "\n".join(txt) - #txt = hmarkdo.format_markdown(txt) - txt = hlatex.convert_pandoc_md_to_latex(txt) - txt = hmarkdo.format_latex(txt) - hparser.write_file(txt, out_file_name) - return - elif args.prompt == "md_clean_up": - # Read the input. - txt = hparser.read_file(tmp_in_file_name) - txt = "\n".join(txt) - txt = hmarkdo.md_clean_up(txt) - txt = hmarkdo.format_markdown(txt) + if args.prompt == "md_to_latex": + txt = hlatex.convert_pandoc_md_to_latex(txt) + txt = hmarkdo.format_latex(txt) + elif args.prompt == "md_clean_up": + txt = hmarkdo.md_clean_up(txt) + txt = hmarkdo.format_markdown(txt) + elif args.prompt == "md_bold_bullets": + txt = hmarkdo.bold_first_level_bullets(txt) + txt = hmarkdo.format_markdown(txt) + else: + raise ValueError(f"Invalid prompt='{args.prompt}'") hparser.write_file(txt, out_file_name) return diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 765a755c3..e8a355a23 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -329,7 +329,7 @@ def md_clean_up(txt: str) -> str: # ############################################################################# -# TODO(gp): This could be done with `HeaderList`. +# TODO(gp): This could be done by processing `HeaderList`. def extract_section_from_markdown(content: str, header_name: str) -> str: """ Extract a section of text from a Markdown document based on the header @@ -535,6 +535,39 @@ def extract_headers_from_markdown( return header_list +def extract_slides_from_markdown( + txt: str, +) -> HeaderList: + """ + Extract slides (i.e., sections prepended by `*`) from Markdown file and + return an `HeaderList`. + + :param txt: content of the input Markdown file. + :return: the generated `HeaderList`, e.g., + ``` + [ + (1, "Slide 1", 5), + (1, "Slide 2", 10), ...] + ``` + """ + hdbg.dassert_isinstance(txt, str) + header_list: HeaderList = [] + # Process the input file to extract headers. + for line_number, line in enumerate(txt.splitlines(), start=1): + # TODO(gp): Use the iterator. + # Skip the visual separators. + if is_markdown_line_separator(line): + continue + # Get the header level and title. + m = re.match(r"^\* (.*)$", line) + is_slide = m is not None + if is_slide: + title = m.group(1) + header_info = HeaderInfo(1, title, line_number) + header_list.append(header_info) + return header_list + + def header_list_to_vim_cfile(markdown_file: str, header_list: HeaderList) -> str: """ Convert a list of headers into a Vim cfile format. @@ -1350,4 +1383,4 @@ def format_latex(txt: str) -> str: file_type = "tex" txt = dshdlino.prettier_on_str(txt, file_type) txt_ = cast(str, txt) - return txt_ + return txt_ \ No newline at end of file diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 55598addd..218355e60 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -327,7 +327,7 @@ def _get_markdown_example2() -> str: return content -def _get_markdown_example3() -> str: +def _get_markdown_no_header_example1() -> str: content = r""" This is some content without any headers. """ @@ -428,6 +428,38 @@ def _get_markdown_example5() -> hmarkdo.HeaderList: return content +def _get_markdown_slides_example1() -> str: + content = r""" + # Header1 + + * Slide 1 + Content 1. + + ## Header2 + + * Slide 2 + Content 2. + + * Slide 3 + Content 3. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + +def _get_markdown_slides_example2() -> str: + content = r""" + # Header1 + + * Slide1 + Content 1. + """ + content = hprint.dedent(content) + content = cast(str, content) + return content + + # ############################################################################# # Test_extract_section_from_markdown1 # ############################################################################# @@ -491,7 +523,7 @@ def test4(self) -> None: def test_no_header(self) -> None: # Prepare inputs. - content = _get_markdown_example3() + content = _get_markdown_no_header_example1() # Call tested function. with self.assertRaises(ValueError) as fail: hmarkdo.extract_section_from_markdown(content, "Header4") @@ -538,6 +570,40 @@ def test_no_headers(self) -> None: self.assert_equal(str(act), str(exp)) +# ############################################################################# +# Test_extract_slides_from_markdown1 +# ############################################################################# + + +class Test_extract_slides_from_markdown1(hunitest.TestCase): + def test_multiple_slides(self) -> None: + # Prepare inputs. + content = _get_markdown_slides_example1() + # Call function. + act = hmarkdo.extract_slides_from_markdown(content) + # Check output. + exp = r"""[HeaderInfo(1, 'Slide 1', 1), HeaderInfo(1, 'Slide 2', 3), HeaderInfo(1, 'Slide 3', 5)]""" + self.assert_equal(str(act), exp) + + def test_single_slides(self) -> None: + # Prepare inputs. + content = _get_markdown_slides_example2() + # Call function. + act = hmarkdo.extract_slides_from_markdown(content) + # Check output. + exp = r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3)]""" + self.assert_equal(str(act), exp) + + def test_no_slides(self) -> None: + # Prepare inputs. + content = _get_markdown_no_header_example1() + # Call function. + act = hmarkdo.extract_slides_from_markdown(content) + # Check output. + exp: List[str] = [] + self.assert_equal(str(act), str(exp)) + + # ############################################################################# # Test_remove_end_of_line_periods1 # ############################################################################# From 4e4ce09316863d828fb7689039b2a332f0008ce9 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 10 Jun 2025 18:18:47 -0400 Subject: [PATCH 171/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hdocker.py | 4 ++-- helpers/hserver.py | 2 -- helpers/test/test_hmarkdown.py | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 760e27414..e2a134dd8 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -681,7 +681,7 @@ def run_dockerized_prettier( is_caller_host = not hserver.is_inside_docker() # TODO(gp): After fix for CmampTask10710 enable this. # use_sibling_container_for_callee = hserver.use_docker_sibling_containers() - use_sibling_container_for_callee = False + use_sibling_container_for_callee = True caller_mount_path, callee_mount_path, mount = get_docker_mount_info( is_caller_host, use_sibling_container_for_callee ) @@ -1851,7 +1851,7 @@ def run_dockerized_graphviz( ) # Convert files to Docker paths. is_caller_host = not hserver.is_inside_docker() - use_sibling_container_for_callee = True + use_sibling_container_for_callee = False caller_mount_path, callee_mount_path, mount = get_docker_mount_info( is_caller_host, use_sibling_container_for_callee ) diff --git a/helpers/hserver.py b/helpers/hserver.py index 2e8dbc06c..b867836da 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -645,8 +645,6 @@ def docker_needs_sudo() -> bool: """ if not has_docker(): return False - if not has_dind_support() and not use_docker_sibling_containers(): - return False # Another way to check is to see if your user is in the docker group: # > groups | grep docker rc = os.system("docker run hello-world 2>&1 >/dev/null") diff --git a/helpers/test/test_hmarkdown.py b/helpers/test/test_hmarkdown.py index 218355e60..bf7db32b6 100644 --- a/helpers/test/test_hmarkdown.py +++ b/helpers/test/test_hmarkdown.py @@ -582,7 +582,7 @@ def test_multiple_slides(self) -> None: # Call function. act = hmarkdo.extract_slides_from_markdown(content) # Check output. - exp = r"""[HeaderInfo(1, 'Slide 1', 1), HeaderInfo(1, 'Slide 2', 3), HeaderInfo(1, 'Slide 3', 5)]""" + exp = r"""[HeaderInfo(1, 'Slide 1', 3), HeaderInfo(1, 'Slide 2', 8), HeaderInfo(1, 'Slide 3', 11)]""" self.assert_equal(str(act), exp) def test_single_slides(self) -> None: @@ -591,7 +591,7 @@ def test_single_slides(self) -> None: # Call function. act = hmarkdo.extract_slides_from_markdown(content) # Check output. - exp = r"""[HeaderInfo(1, 'Header1', 1), HeaderInfo(2, 'Header2', 3)]""" + exp = r"""[HeaderInfo(1, 'Slide1', 3)]""" self.assert_equal(str(act), exp) def test_no_slides(self) -> None: From b95957f9d4c16cabf40d94e628fa50e128388503 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 10 Jun 2025 18:33:55 -0400 Subject: [PATCH 172/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/notes_to_pdf.py | 49 ++++++++++++++----- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/dev_scripts_helpers/documentation/notes_to_pdf.py b/dev_scripts_helpers/documentation/notes_to_pdf.py index e27ee8238..ecd3559bb 100755 --- a/dev_scripts_helpers/documentation/notes_to_pdf.py +++ b/dev_scripts_helpers/documentation/notes_to_pdf.py @@ -101,6 +101,7 @@ def _cleanup_before(prefix: str) -> None: # ############################################################################# +# TODO(gp): Move to hmarkdown and test it. def _filter_by_header(file_name: str, header: str, prefix: str) -> str: """ Extract a specific header from a file. @@ -120,6 +121,29 @@ def _filter_by_header(file_name: str, header: str, prefix: str) -> str: return file_out +# TODO(gp): Move to hmarkdown and test it. +def _parse_range(range_as_str: str, max_value: int) -> tuple[int, int]: + """ + Parse a line range string like '1:10' into start and end line numbers. + + :param range_as_str: String in format 'start:end' where start/end can be numbers or 'None' + :param max_value: Maximum value to use when 'None' is specified for end + :return: Tuple of (start_line, end_line) as integers + """ + m = re.match(r"^(\S+):(\S+)$", range_as_str) + hdbg.dassert(m, "Invalid range_as_str='%s'", range_as_str) + start_value, end_value = m.groups() + if start_value.lower() == "none": + start_value = 1 + else: + start_value = int(start_value) + if end_value.lower() == "none": + end_value = max_value + 1 + else: + end_value = int(end_value) + return start_value, end_value + + def _filter_by_lines(file_name: str, filter_by_lines: str, prefix: str) -> str: """ Filter the lines of a file in [start_line, end_line[. @@ -133,17 +157,7 @@ def _filter_by_lines(file_name: str, filter_by_lines: str, prefix: str) -> str: txt = hio.from_file(file_name) txt = txt.split("\n") # E.g., filter_by_lines='1:10'. - m = re.match(r"^(\S+):(\S+)$", filter_by_lines) - hdbg.dassert(m, "Invalid filter_by_lines='%s'", filter_by_lines) - start_line, end_line = m.groups() - if start_line.lower() == "none": - start_line = 1 - else: - start_line = int(start_line) - if end_line.lower() == "none": - end_line = len(txt) + 1 - else: - end_line = int(end_line) + start_line, end_line = _parse_range(filter_by_lines, len(txt)) # Filter by header. hdbg.dassert_lte(start_line, end_line) txt = txt[start_line - 1 : end_line - 1] @@ -154,6 +168,7 @@ def _filter_by_lines(file_name: str, filter_by_lines: str, prefix: str) -> str: return file_out +# TODO(gp): Move to hmarkdown and test it. def _filter_by_slides(file_name: str, filter_by_slides: str, prefix: str) -> str: """ Filter the lines of a file in [start_slide, end_slide[. @@ -166,7 +181,17 @@ def _filter_by_slides(file_name: str, filter_by_slides: str, prefix: str) -> str # Read the file. txt = hio.from_file(file_name) # Filter by header. - txt = hmarkdo.extract_section_from_markdown(txt, header) + slides_info = hmarkdo.extract_slides_from_markdown(txt) + # E.g., filter_by_lines='1:10'. + start_slide, end_slide = _parse_range(filter_by_slides, len(slides_info)) + hdbg.dassert_lte(start_slide, end_slide) + hdbg.dassert_lt(end_slide, len(slides_info)) + start_slide_line = slides_info[start_slide].line_number + end_slide_line = slides_info[end_slide].line_number + # Filter by slides. + txt = txt.split("\n") + txt = txt[start_slide_line - 1 : end_slide_line - 1] + txt = "\n".join(txt) # Save the file. file_out = f"{prefix}.filter_by_slides.txt" hio.to_file(file_out, txt) From 3dde2f64a2152b070cd02e55c1a534cfe39ea68b Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 10 Jun 2025 21:54:29 -0400 Subject: [PATCH 173/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/documentation/notes_to_pdf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dev_scripts_helpers/documentation/notes_to_pdf.py b/dev_scripts_helpers/documentation/notes_to_pdf.py index ecd3559bb..80df66ba0 100755 --- a/dev_scripts_helpers/documentation/notes_to_pdf.py +++ b/dev_scripts_helpers/documentation/notes_to_pdf.py @@ -162,6 +162,7 @@ def _filter_by_lines(file_name: str, filter_by_lines: str, prefix: str) -> str: hdbg.dassert_lte(start_line, end_line) txt = txt[start_line - 1 : end_line - 1] txt = "\n".join(txt) + _LOG.warning("filter_by_lines='%s' -> lines=[%s:%s]", filter_by_lines, start_line, end_line) # file_out = f"{prefix}.filter_by_lines.txt" hio.to_file(file_out, txt) @@ -186,11 +187,12 @@ def _filter_by_slides(file_name: str, filter_by_slides: str, prefix: str) -> str start_slide, end_slide = _parse_range(filter_by_slides, len(slides_info)) hdbg.dassert_lte(start_slide, end_slide) hdbg.dassert_lt(end_slide, len(slides_info)) - start_slide_line = slides_info[start_slide].line_number - end_slide_line = slides_info[end_slide].line_number + start_line = slides_info[start_slide].line_number + end_line = slides_info[end_slide].line_number + _LOG.warning("filter_by_slides='%s' -> lines=[%s:%s]", filter_by_slides, start_line, end_line) # Filter by slides. txt = txt.split("\n") - txt = txt[start_slide_line - 1 : end_slide_line - 1] + txt = txt[start_line - 1 : end_line - 1] txt = "\n".join(txt) # Save the file. file_out = f"{prefix}.filter_by_slides.txt" From 15794a0823e597afbe0c44749b529266644bd3a7 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Wed, 11 Jun 2025 14:43:29 -0400 Subject: [PATCH 174/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 3 ++- helpers/hlatex.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 423727896..a24a24c1b 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -650,7 +650,8 @@ def latex_rewrite() -> _PROMPT_OUT: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} - post_container_transforms = ["format_latex"] + #post_container_transforms = ["format_latex"] + post_container_transforms = [] return system, pre_transforms, post_transforms, post_container_transforms diff --git a/helpers/hlatex.py b/helpers/hlatex.py index 33fe3b270..2be5d41de 100644 --- a/helpers/hlatex.py +++ b/helpers/hlatex.py @@ -27,6 +27,8 @@ def convert_pandoc_md_to_latex(txt: str) -> str: hdocker.run_dockerized_pandoc(cmd, container_type) # Read tmp file. res = hio.from_file(out_file_name) + # Remove lines that contain \tightlist. + res = "\n".join([line for line in res.splitlines() if "\\tightlist" not in line]) return res From 28b2d33602023e6e7d18ceb1b4c338daa903a58b Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 14 Jun 2025 06:23:25 -0400 Subject: [PATCH 175/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hmarkdown.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index e8a355a23..3a6d938a3 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -414,7 +414,7 @@ def __init__(self, level: int, description: str, line_number: int): self.level = level # hdbg.dassert_isinstance(description, str) - hdbg.dassert_ne(description, "") + hdbg.dassert_ne(description, "", "Invalid HeaderInfo: %s, %s, %s", level, description, line_number) self.description = description # hdbg.dassert_isinstance(line_number, int) @@ -1383,4 +1383,4 @@ def format_latex(txt: str) -> str: file_type = "tex" txt = dshdlino.prettier_on_str(txt, file_type) txt_ = cast(str, txt) - return txt_ \ No newline at end of file + return txt_ From 553f591b41c129e13a267efde28f168f15f06c1a Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 15 Jun 2025 07:05:27 -0400 Subject: [PATCH 176/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 3 ++- .../dockerized_extract_notebook_images.py | 14 ++++---------- .../notebooks/extract_notebook_images.py | 9 ++++++--- ...nb => test_notebook_image_extractor.ipynb} | 0 ...test_dockerized_extract_notebook_images.py | 5 ++++- .../test/test_extract_notebook_images.py | 11 ++++++----- .../all.extract_notes.reference.md | 19 +++++++++++++++++++ 7 files changed, 41 insertions(+), 20 deletions(-) rename dev_scripts_helpers/notebooks/test/outcomes/Test_run_dockerized_notebook_image_extractor1.test_run_dockerized_notebook_image_extractor/input/{test_images.ipynb => test_notebook_image_extractor.ipynb} (100%) create mode 100644 docs/tools/documentation_toolchain/all.extract_notes.reference.md diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index a24a24c1b..80eb1fed2 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -1119,11 +1119,12 @@ def slide_reduce() -> _PROMPT_OUT: - Keep all the figures - Make sure that the text is clean and readable - Remove all the words that are not needed - - Minimize the changes to the text + - Use "you" instead of "we" - Use `E.g.,` instead of `Example` Print only the markdown without any explanation. """ + # - Minimize the changes to the text pre_transforms: Set[str] = set() post_transforms = { "remove_code_delimiters", diff --git a/dev_scripts_helpers/notebooks/dockerized_extract_notebook_images.py b/dev_scripts_helpers/notebooks/dockerized_extract_notebook_images.py index 4a50bc636..8acdb6f52 100755 --- a/dev_scripts_helpers/notebooks/dockerized_extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/dockerized_extract_notebook_images.py @@ -1,14 +1,5 @@ #!/usr/bin/env python3 -""" -This script is designed to run a transformation script using LLMs. It requires -certain dependencies to be present (e.g., `openai`) and thus it is executed -within a Docker container. - -To use this script, you need to provide the input file, output file, and -the type of transformation to apply. -""" - import argparse import logging import os @@ -33,8 +24,11 @@ class _NotebookImageExtractor: """ Extract marked regions from a Jupyter notebook, convert them to HTML and - captures screenshots. + capture screenshots. Initialize with input notebook path and output directory. + + See documentation at: + //helpers/docs/tools/documentation_toolchain/all.extract_notebook_images.* """ def __init__(self, notebook_path: str, output_dir: str) -> None: diff --git a/dev_scripts_helpers/notebooks/extract_notebook_images.py b/dev_scripts_helpers/notebooks/extract_notebook_images.py index 984e498f1..9349d3d82 100755 --- a/dev_scripts_helpers/notebooks/extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/extract_notebook_images.py @@ -3,10 +3,13 @@ Extract images from a Jupyter notebook by running inside a Docker container. This script builds the container dynamically if necessary and extracts images -from the specified Jupyter notebook using the NotebookImageExtractor module. +from the specified Jupyter notebook using the `NotebookImageExtractor` module. -Extract images from notebook test_images.ipynb and save them to `screenshots` -directory. +See documentation at: +//helpers/docs/tools/documentation_toolchain/all.extract_notebook_images.* + +# Extract images from notebook `test_images.ipynb` and save them to +# `screenshots` directory. ```bash > dev_scripts_helpers/notebooks/extract_notebook_images.py \ -i dev_scripts_helpers/notebooks/test_images.ipynb \ diff --git a/dev_scripts_helpers/notebooks/test/outcomes/Test_run_dockerized_notebook_image_extractor1.test_run_dockerized_notebook_image_extractor/input/test_images.ipynb b/dev_scripts_helpers/notebooks/test/outcomes/Test_run_dockerized_notebook_image_extractor1.test_run_dockerized_notebook_image_extractor/input/test_notebook_image_extractor.ipynb similarity index 100% rename from dev_scripts_helpers/notebooks/test/outcomes/Test_run_dockerized_notebook_image_extractor1.test_run_dockerized_notebook_image_extractor/input/test_images.ipynb rename to dev_scripts_helpers/notebooks/test/outcomes/Test_run_dockerized_notebook_image_extractor1.test_run_dockerized_notebook_image_extractor/input/test_notebook_image_extractor.ipynb diff --git a/dev_scripts_helpers/notebooks/test/test_dockerized_extract_notebook_images.py b/dev_scripts_helpers/notebooks/test/test_dockerized_extract_notebook_images.py index 8c5674160..df9891633 100644 --- a/dev_scripts_helpers/notebooks/test/test_dockerized_extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/test/test_dockerized_extract_notebook_images.py @@ -13,9 +13,12 @@ class TestNotebookImageExtractor1(hunitest.TestCase): def test1(self) -> None: + # Prepare the input. input_dir = self.get_input_dir() - src_test_notebook = os.path.join(input_dir, "test_images.ipynb") + src_test_notebook = os.path.join(input_dir, "test_notebook_image_extractor.ipynb") + # Run the function. act = dshndb._NotebookImageExtractor._extract_regions_from_notebook( src_test_notebook ) + # Check the output. print(act) diff --git a/dev_scripts_helpers/notebooks/test/test_extract_notebook_images.py b/dev_scripts_helpers/notebooks/test/test_extract_notebook_images.py index 61d4d462e..50795bf32 100644 --- a/dev_scripts_helpers/notebooks/test/test_extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/test/test_extract_notebook_images.py @@ -21,14 +21,15 @@ class Test_run_dockerized_notebook_image_extractor1(hunitest.TestCase): def test1(self) -> None: """ - Test the `run_dockerized_notebook_image_extractor` function. + Test the `_run_dockerized_notebook_image_extractor()` function. - Get the test notebook ('test_images.ipynb') from the input directory, - run the Docker container to extract images, and verify that the expected - output files are produced. + - Get the test notebook from the input directory + - Run the Docker container to extract images + - Verify that the expected output files are produced """ + # Prepare the input and output. input_dir = self.get_input_dir() - src_test_notebook = os.path.join(input_dir, "test_images.ipynb") + src_test_notebook = os.path.join(input_dir, "test_notebook_image_extractor.ipynb") output_dir = self.get_output_dir() # Run the container. dshnbe._run_dockerized_extract_notebook_images( diff --git a/docs/tools/documentation_toolchain/all.extract_notes.reference.md b/docs/tools/documentation_toolchain/all.extract_notes.reference.md new file mode 100644 index 000000000..f3c9aba48 --- /dev/null +++ b/docs/tools/documentation_toolchain/all.extract_notes.reference.md @@ -0,0 +1,19 @@ +# + +- The documentation is: + - `docs/tools/documentation_toolchain/all.extract_notebook_images.how_to_guide.md` + - `docs/tools/documentation_toolchain/all.extract_notebook_images.reference.md` + +- This is implemented as a dockerized executables and thus there are two scripts: + - `dev_scripts_helpers/notebooks/extract_notebook_images.py` + - Parses the command line options + - Create a Docker image with all the dependencies and run it + - `dev_scripts_helpers/notebooks/dockerized_extract_notebook_images.py` + - Instantiate `_NotebookImageExtractor` + - Run the image extraction + +- The testing of the scripts are: + - `dev_scripts_helpers/notebooks/test/test_extract_notebook_images.py` + - Test the end-to-end script with a handcrafted notebook + `dev_scripts_helpers/notebooks/test/outcomes/Test_run_dockerized_notebook_image_extractor1.test_run_dockerized_notebook_image_extractor/input/test_notebook_image_extractor.ipynb` + - `dev_scripts_helpers/notebooks/test/test_dockerized_extract_notebook_images.py` From f8990523f376913644eb3c6006434a3ed4451397 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 15 Jun 2025 08:14:36 -0400 Subject: [PATCH 177/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../dockerized_extract_notebook_images.py | 80 ++++++++----- .../notebooks/extract_notebook_images.py | 113 +++++++++++------- ...test_dockerized_extract_notebook_images.py | 6 +- ...ll.extract_notebook_images.how_to_guide.md | 25 ++-- 4 files changed, 140 insertions(+), 84 deletions(-) diff --git a/dev_scripts_helpers/notebooks/dockerized_extract_notebook_images.py b/dev_scripts_helpers/notebooks/dockerized_extract_notebook_images.py index 8acdb6f52..14489c0b2 100755 --- a/dev_scripts_helpers/notebooks/dockerized_extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/dockerized_extract_notebook_images.py @@ -12,6 +12,7 @@ import helpers.hdbg as hdbg import helpers.hio as hio import helpers.hparser as hparser +import helpers.hprint as hprint _LOG = logging.getLogger(__name__) @@ -25,13 +26,20 @@ class _NotebookImageExtractor: """ Extract marked regions from a Jupyter notebook, convert them to HTML and capture screenshots. - Initialize with input notebook path and output directory. See documentation at: //helpers/docs/tools/documentation_toolchain/all.extract_notebook_images.* """ def __init__(self, notebook_path: str, output_dir: str) -> None: + """ + Initialize the notebook image extractor. + + :param notebook_path: path to the input notebook + :param output_dir: path to the output directory + """ + _LOG.debug(hprint.to_str("notebook_path output_dir")) + hdbg.dassert_file_exists(notebook_path) self.notebook_path = notebook_path self.output_dir = output_dir @@ -76,8 +84,12 @@ def test_func(): :return: tuples (mode, out_filename, region_cells) for each extraction region. """ + _LOG.debug(hprint.to_str("notebook_path")) + hdbg.dassert_file_exists(notebook_path) # Read notebook. nb = nbformat.read(notebook_path, as_version=4) + hdbg.dassert_in("cells", nb) + _LOG.debug("cells=\n%s", "\n".join(map(str, nb.cells))) # Define the regex for the start / endmarker. start_marker_regex = re.compile( r""" @@ -99,25 +111,33 @@ def test_func(): end_marker_regex = re.compile(r"#\s*end_extract\s*") # Initialize variables. regions = [] + # Store if we are inside an extraction region. in_extract = False + # Store the current extraction mode (e.g., `only_input`, `only_output`, + # or `all`). current_mode = None + # Store the current output filename. current_out_filename = None + # List of the cells in the current extraction region. current_cells = [] # Iterate over the cells in the notebook. for cell_idx, cell in enumerate(nb.cells): + _LOG.debug("\n" + hprint.frame(hprint.to_str("cell_idx cell"))) + _LOG.debug("-> " + hprint.to_str("in_extract")) if cell.cell_type != "code": continue # Check if the cell contains a start marker. m = start_marker_regex.search(cell.source) if m: + # A start marker was found. + _LOG.debug("Found a start marker") hdbg.dassert( not in_extract, - "Found a start marker while in an extraction region at cell %s\n%s", + "Found a start marker inside an extraction region: %s\n%s", cell_idx, cell.source, ) - # A start marker was found. - # Capture the mode and output filename + # Capture the mode and output filename. current_mode = m.group(1) hdbg.dassert_in( current_mode, @@ -125,30 +145,36 @@ def test_func(): ) current_out_filename = m.group(2) in_extract = True - # Remove the start marker from the cell. + _LOG.debug(hprint.to_str("current_mode current_out_filename in_extract")) + # Remove the start marker from the cell, since we don't want + # to show it in the captured picture. cell.source = start_marker_regex.sub("", cell.source).strip() + # We are inside an extraction region, so: + # - end the extraction region; or + # - continue adding cells to the region + m = end_marker_regex.search(cell.source) + if m: + _LOG.debug("Found an end marker") + hdbg.dassert( + in_extract, + "Found an end marker outside an extraction region: %s\n%s", + cell_idx, + cell.source, + ) + # Add the current region to the list of regions. + current_cells.append(cell) + regions.append( + (current_mode, current_out_filename, current_cells) + ) + # Reset the state. + current_cells = [] + in_extract = False + # Remove the end marker from the cell. + cell.source = end_marker_regex.sub("", cell.source).strip() else: - # We are inside an extraction region, so continue adding cells - # to the region. - m = end_marker_regex.search(cell.source) - if m: - hdbg.dassert( - in_extract, - "Found an end marker while not in an extraction region at cell %s\n%s", - cell_idx, - cell.source, - ) - current_cells.append(cell) - regions.append( - (current_mode, current_out_filename, current_cells) - ) - current_cells = [] - in_extract = False - # Remove the end marker from the cell. - cell.source = end_marker_regex.sub("", cell.source).strip() - else: - # If there's no end marker, just keep adding cells to the current region. - current_cells.append(cell) + # If there's no end marker, just keep adding cells to the current region. + _LOG.debug("Continuing to add cells to the current region") + current_cells.append(cell) if not regions: _LOG.warning("No extraction markers found in the notebook.") return regions @@ -218,7 +244,7 @@ def extract_and_capture(self) -> list: :return: list of paths to the screenshot files. """ - regions = self._extract_regions_from_notebook() + regions = self._extract_regions_from_notebook(self.notebook_path) screenshot_files = [] # Create screenshots folder if it doesn't exist. screenshots_folder = self.output_dir diff --git a/dev_scripts_helpers/notebooks/extract_notebook_images.py b/dev_scripts_helpers/notebooks/extract_notebook_images.py index 9349d3d82..6c2e553ec 100755 --- a/dev_scripts_helpers/notebooks/extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/extract_notebook_images.py @@ -71,49 +71,76 @@ def _run_dockerized_extract_notebook_images( _LOG.debug(hprint.func_signature_to_str()) # Build the container image, if needed. container_image = "tmp.extract_notebook_images" - dockerfile = r""" - # This seems to be flaky on ARM64 architectures. - #FROM python:3.10-slim - FROM python:3.10 - - # Install required system libraries for Chromium and Playwright. - RUN apt-get update && apt-get install -y \ - libglib2.0-0 \ - libnss3 \ - libnspr4 \ - libdbus-1-3 \ - libatk1.0-0 \ - libatk-bridge2.0-0 \ - libexpat1 \ - libatspi2.0-0 \ - libdbus-glib-1-2 \ - libxcomposite1 \ - libxdamage1 \ - libxfixes3 \ - libxrandr2 \ - libgbm1 \ - libxkbcommon0 \ - libasound2 \ - libcups2 \ - libpango-1.0-0 \ - libcairo2 \ - && rm -rf /var/lib/apt/lists/* - - # Set the environment variable for Playwright to install browsers in a known - # location. - ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright - - # Create the directory for Playwright browsers and ensure it's writable. - RUN mkdir -p /ms-playwright && chmod -R 777 /ms-playwright - - # Install required packages. - RUN pip install nbconvert nbformat playwright pyyaml - - # Install Playwright browsers. - RUN python -m playwright install - - WORKDIR /app - """ + if False: + container_image = "tmp.extract_notebook_images" + dockerfile = r""" + # This seems to be flaky on ARM64 architectures. + #FROM python:3.10-slim + FROM python:3.10 + + # # Install required system libraries for Chromium and Playwright. + # RUN apt-get update && apt-get install -y \ + # libglib2.0-0 \ + # libnss3 \ + # libnspr4 \ + # libdbus-1-3 \ + # libatk1.0-0 \ + # libatk-bridge2.0-0 \ + # libexpat1 \ + # libatspi2.0-0 \ + # libdbus-glib-1-2 \ + # libxcomposite1 \ + # libxdamage1 \ + # libxfixes3 \ + # libxrandr2 \ + # libgbm1 \ + # libxkbcommon0 \ + # libasound2 \ + # libcups2 \ + # libpango-1.0-0 \ + # libcairo2 + + RUN rm -rf /var/lib/apt/lists/* + RUN apt-get update + + # Set the environment variable for Playwright to install browsers in a known + # location. + ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright + + # Create the directory for Playwright browsers and ensure it's writable. + RUN mkdir -p /ms-playwright && chmod -R 777 /ms-playwright + + # Install required packages. + RUN pip install nbconvert nbformat playwright pyyaml + + # Install Playwright browsers. + RUN python -m playwright install + + RUN playwright install-deps + + RUN playwrite --version + + WORKDIR /app + """ + if True: + dockerfile = r""" + FROM mcr.microsoft.com/playwright:v1.53.0-noble + + WORKDIR /app + + #RUN playwright --version + + # Install Python and pip + RUN apt-get update && apt-get install -y \ + python3 \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* + + # Copy your requirements and install Python dependencies + RUN pip install nbconvert nbformat playwright pyyaml + + RUN python --version + """ container_image = hdocker.build_container_image( container_image, dockerfile, force_rebuild, use_sudo ) diff --git a/dev_scripts_helpers/notebooks/test/test_dockerized_extract_notebook_images.py b/dev_scripts_helpers/notebooks/test/test_dockerized_extract_notebook_images.py index df9891633..e7e0d5a07 100644 --- a/dev_scripts_helpers/notebooks/test/test_dockerized_extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/test/test_dockerized_extract_notebook_images.py @@ -2,6 +2,7 @@ import os import helpers.hunit_test as hunitest +import helpers.hprint as hprint import dev_scripts_helpers.notebooks.dockerized_extract_notebook_images as dshndb _LOG = logging.getLogger(__name__) @@ -14,7 +15,10 @@ class TestNotebookImageExtractor1(hunitest.TestCase): def test1(self) -> None: # Prepare the input. - input_dir = self.get_input_dir() + input_dir = self.get_input_dir( + test_class_name="Test_run_dockerized_notebook_image_extractor1", + test_method_name="test_run_dockerized_notebook_image_extractor") + _LOG.info(hprint.to_str("input_dir")) src_test_notebook = os.path.join(input_dir, "test_notebook_image_extractor.ipynb") # Run the function. act = dshndb._NotebookImageExtractor._extract_regions_from_notebook( diff --git a/docs/tools/documentation_toolchain/all.extract_notebook_images.how_to_guide.md b/docs/tools/documentation_toolchain/all.extract_notebook_images.how_to_guide.md index fd0543e56..c5a016877 100644 --- a/docs/tools/documentation_toolchain/all.extract_notebook_images.how_to_guide.md +++ b/docs/tools/documentation_toolchain/all.extract_notebook_images.how_to_guide.md @@ -1,17 +1,16 @@ -Extract images from a Jupyter notebook by running inside a Docker container. -This script builds the container dynamically if necessary and extracts images -from the specified Jupyter notebook using the NotebookImageExtractor module. +This script extracts images from a Jupyter notebook annotated with tags to +determine which cells and how they need to be saved: -Extract images from notebook test_images.ipynb and save them to `screenshots` -directory. ```bash +# Extract images from notebook and save them to `screenshots` directory: > dev_scripts_helpers/notebooks/extract_notebook_images.py \ - -i dev_scripts_helpers/notebooks/test_images.ipynb \ - -o dev_scripts_helpers/notebooks/screenshots + --in_notebook_filename ./dev_scripts_helpers/notebooks/test/outcomes/Test_run_dockerized_notebook_image_extractor1.test_run_dockerized_notebook_image_extractor/input/test_notebook_image_extractor.ipynb \ + --out_image_dir screenshots ``` -``` -# start_extract(mode)=<output_filename> +The notebook contains tags inside the cells in the format below: +```text +# start_extract(<mode>)=<output_filename> ... # end_extract ``` @@ -19,19 +18,19 @@ directory. Example: 1. To extract only the input code: - # start_extract(only_input)=input_code.py ```python + # start_extract(only_input)=input_code.py def test_func(): return "Test" - ``` # end_extract + ``` 2. To extract only the output of code: - # start_extract(only_output)=output.png ```python + # start_extract(only_output)=output.png print("This is the output") - ``` # end_extract + ``` 3. To extract both code and output: # start_extract(all)=full_output.html From f86fa93668cea13afc31ae1ce754038b45d1db74 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 15 Jun 2025 09:06:16 -0400 Subject: [PATCH 178/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/notebooks/extract_notebook_images.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dev_scripts_helpers/notebooks/extract_notebook_images.py b/dev_scripts_helpers/notebooks/extract_notebook_images.py index 6c2e553ec..09336dd89 100755 --- a/dev_scripts_helpers/notebooks/extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/extract_notebook_images.py @@ -128,7 +128,6 @@ def _run_dockerized_extract_notebook_images( WORKDIR /app - #RUN playwright --version # Install Python and pip RUN apt-get update && apt-get install -y \ @@ -137,9 +136,13 @@ def _run_dockerized_extract_notebook_images( && rm -rf /var/lib/apt/lists/* # Copy your requirements and install Python dependencies - RUN pip install nbconvert nbformat playwright pyyaml + RUN pip install --break-system-packages nbconvert nbformat pyyaml playwright + + RUN npx playwright --version + + RUN python3 --version - RUN python --version + RUN python3 -c 'import importlib.metadata; print(importlib.metadata.version("playwright"))' """ container_image = hdocker.build_container_image( container_image, dockerfile, force_rebuild, use_sudo From ccaa2b8cdd8d5d3f6490392741dfee90c06386df Mon Sep 17 00:00:00 2001 From: saggese <saggese@gmail.com> Date: Sun, 15 Jun 2025 14:16:33 +0000 Subject: [PATCH 179/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../notebooks/extract_notebook_images.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dev_scripts_helpers/notebooks/extract_notebook_images.py b/dev_scripts_helpers/notebooks/extract_notebook_images.py index 09336dd89..993215b36 100755 --- a/dev_scripts_helpers/notebooks/extract_notebook_images.py +++ b/dev_scripts_helpers/notebooks/extract_notebook_images.py @@ -71,7 +71,7 @@ def _run_dockerized_extract_notebook_images( _LOG.debug(hprint.func_signature_to_str()) # Build the container image, if needed. container_image = "tmp.extract_notebook_images" - if False: + if True: container_image = "tmp.extract_notebook_images" dockerfile = r""" # This seems to be flaky on ARM64 architectures. @@ -114,15 +114,15 @@ def _run_dockerized_extract_notebook_images( RUN pip install nbconvert nbformat playwright pyyaml # Install Playwright browsers. - RUN python -m playwright install - RUN playwright install-deps + RUN python -m playwright install - RUN playwrite --version + #RUN npx playwright --version + RUN python --version WORKDIR /app """ - if True: + if False: dockerfile = r""" FROM mcr.microsoft.com/playwright:v1.53.0-noble From ca5578e25eed9f4ca689bac72c7cead1fca91af9 Mon Sep 17 00:00:00 2001 From: saggese <saggese@gmail.com> Date: Sun, 15 Jun 2025 14:29:31 +0000 Subject: [PATCH 180/193] Improve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hserver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index b867836da..b3b01653e 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -913,10 +913,13 @@ def use_docker_sibling_containers() -> bool: """ Return whether to use Docker sibling containers. - Using sibling containers requires that all Docker containers in the + Using sibling containers requires that all Docker containers are in the same network so that they can communicate with each other. """ - val = is_dev4() or _is_mac_version_with_sibling_containers() + if is_dev_csfy(): + val = True + else: + val = is_dev4() or _is_mac_version_with_sibling_containers() return val From 47c043789515186c00c07bf8b0b5c2bc284120a4 Mon Sep 17 00:00:00 2001 From: saggese <saggese@gmail.com> Date: Sun, 15 Jun 2025 14:55:57 +0000 Subject: [PATCH 181/193] Improve --- helpers/hserver.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index b3b01653e..06c351b33 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -916,10 +916,11 @@ def use_docker_sibling_containers() -> bool: Using sibling containers requires that all Docker containers are in the same network so that they can communicate with each other. """ - if is_dev_csfy(): - val = True - else: - val = is_dev4() or _is_mac_version_with_sibling_containers() + return has_docker_sibling_containers_support() + # if is_dev_csfy(): + # val = True + # else: + val = is_dev4() or _is_mac_version_with_sibling_containers() return val @@ -1165,4 +1166,4 @@ def config_func_to_str() -> str: ret.append(msg) # Package. ret = "\n".join(ret) - return ret + t return ret From c37d9ccec91a3320d9dcf9c9bedfaf73bcc6b7ac Mon Sep 17 00:00:00 2001 From: saggese <saggese@gmail.com> Date: Sun, 15 Jun 2025 14:59:39 +0000 Subject: [PATCH 182/193] Improve --- helpers/hserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helpers/hserver.py b/helpers/hserver.py index 06c351b33..68c4281d6 100644 --- a/helpers/hserver.py +++ b/helpers/hserver.py @@ -1166,4 +1166,4 @@ def config_func_to_str() -> str: ret.append(msg) # Package. ret = "\n".join(ret) - t return ret + return ret From 3f57aef34f14b6b80d0739012c9d9eed36e07a01 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Tue, 17 Jun 2025 06:42:15 -0400 Subject: [PATCH 183/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_transform.py | 1 - helpers/hmarkdown.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index cab8f8a6a..529b141c7 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -249,7 +249,6 @@ def _main(parser: argparse.ArgumentParser) -> None: raise ValueError(f"Invalid prompt='{args.prompt}'") hparser.write_file(txt, out_file_name) return - # TODO(gp): We should just automatically pass-through the options. cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] if args.fast_model: diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index 3a6d938a3..c9111a085 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -554,6 +554,7 @@ def extract_slides_from_markdown( header_list: HeaderList = [] # Process the input file to extract headers. for line_number, line in enumerate(txt.splitlines(), start=1): + _LOG.debug("%d: %s", line_number, line) # TODO(gp): Use the iterator. # Skip the visual separators. if is_markdown_line_separator(line): From e894113443124a2b10c616c9924bb2124f539a12 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 19 Jun 2025 13:45:45 -0400 Subject: [PATCH 184/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/lib_tasks_gh.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/helpers/lib_tasks_gh.py b/helpers/lib_tasks_gh.py index 327c4dcb3..58bcdce1e 100644 --- a/helpers/lib_tasks_gh.py +++ b/helpers/lib_tasks_gh.py @@ -906,3 +906,9 @@ def render_repo_workflow_status_table( subset=["conclusion"], ) ) + +# ############################################################################# + +# def gh_get_pr_title(pr_url: str) -> str: +# > gh pr view https://github.com/causify-ai/helpers/pull/754 --json title -q .title +# HelpersTask705_Extend_coverage_in_pytest_to_cover_when_we_run_through_system \ No newline at end of file From 2218f7d8d1a598c82c6a81b6bc7d141fc7d8d7bd Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 21 Jun 2025 08:20:39 -0400 Subject: [PATCH 185/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- docs/work_tools/git/all.ai_review.how_to_guide.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/work_tools/git/all.ai_review.how_to_guide.md b/docs/work_tools/git/all.ai_review.how_to_guide.md index 4c1a7cdf2..492d24e7d 100644 --- a/docs/work_tools/git/all.ai_review.how_to_guide.md +++ b/docs/work_tools/git/all.ai_review.how_to_guide.md @@ -66,6 +66,7 @@ - `text_*`: process free form (not markdown) text - You can list the available transformations with: + ```bash > llm_transform.py -p list # Available prompt tags: @@ -119,6 +120,7 @@ - These transformations don't need LLMs and are implemented as code - You can see the available transforms with: + ```bash > transform_notes.py -a list test: compute the hash of a string to test the flow @@ -161,9 +163,11 @@ - Go to the Git branch with the code to review - Check which files are modified + ```bash > invoke git_branch_diff_with -t base --only-print-files ``` + - Run `ai_review.py` on each file to generate a list of comments on the code - This is equivalent to running a `review` target with `llm_transform.py` (e.g., `llm_transform.py -p review_*`) but it is a separated flow for @@ -175,6 +179,7 @@ ```bash > vim -c "cfile cfile" ``` + - You can fix the code according to the TODOs - Discard a TODO as a false positive or not important @@ -189,6 +194,7 @@ modify a file in a separate commit, so that it's easy to review - There are multiple targets for the `ai_review.py` + ```bash > PROMPT=review_llm > PROMPT=review_correctness @@ -212,6 +218,7 @@ ``` > ai_review.py -i template_code.py ``` + ``` > llm_transform.py -i template_code.py -p code_fix_code ``` @@ -231,12 +238,14 @@ 2. When the code to review is in the repo `//helpers` - In this case, we can use a different Git client to develop and "sync" the `linter.py` / `ai_review.py` code from one client to another + ```bash > \cp -f /Users/saggese/src/helpers1/helpers/lib_tasks_lint.py helpers && i lint_sync_code ``` + - Before committing the review, we then revert the `linter.py / ai_review.py` code + ```bash > i lint_sync_code -r - ``` - + ``` \ No newline at end of file From b77126b9a11d64b92524be0ab6844a439e20dd9b Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Mon, 23 Jun 2025 17:49:52 -0400 Subject: [PATCH 186/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/render_images.py | 3 +- dev_scripts_helpers/llms/llm_prompts.py | 60 ++++++++++++++++--- 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/dev_scripts_helpers/documentation/render_images.py b/dev_scripts_helpers/documentation/render_images.py index fb24fc4f5..b2e49f8f0 100755 --- a/dev_scripts_helpers/documentation/render_images.py +++ b/dev_scripts_helpers/documentation/render_images.py @@ -252,7 +252,8 @@ def _render_image_code( \usepackage{pgfplots} \usepackage{mathrsfs} % For script font \usepackage{xcolor} - \pgfplotsset{compat=1.17} + \usetikzlibrary{positioning} + \pgfplotsset{compat=newest} \begin{document} \begin{tikzpicture} """) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 80eb1fed2..f7a0bce01 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -1066,11 +1066,20 @@ def review_refactoring() -> _PROMPT_OUT: # ############################################################################# +_SLIDE_CONTEXT = r""" + You are a proficient technical writer who writes slides in markdown. + + - If a line starts with an asterisk *, consider it as an header andleave it unchanged + + I will pass you a chunk of markdown code. + """ + + def slide_to_bullet_points() -> _PROMPT_OUT: """ Convert the markdown text into bullet points. """ - system = _MD_CONTEXT + system = _SLIDE_CONTEXT system += r""" You will: - Convert the following markdown text into bullet points @@ -1089,7 +1098,7 @@ def slide_to_bullet_points() -> _PROMPT_OUT: def slide_expand() -> _PROMPT_OUT: - system = _MD_CONTEXT + system = _SLIDE_CONTEXT system += r""" You will: - Maintain the structure of the text and keep the content of the existing @@ -1112,10 +1121,11 @@ def slide_expand() -> _PROMPT_OUT: def slide_reduce() -> _PROMPT_OUT: - system = _MD_CONTEXT + system = _SLIDE_CONTEXT system += r""" You will: - Maintain the structure of the text + - If a line starts with an asterisk *, consider it as an header andleave it unchanged - Keep all the figures - Make sure that the text is clean and readable - Remove all the words that are not needed @@ -1136,7 +1146,7 @@ def slide_reduce() -> _PROMPT_OUT: def slide_reduce_bullets() -> _PROMPT_OUT: - system = _MD_CONTEXT + system = _SLIDE_CONTEXT system += r""" You will: - Maintain the structure of the text @@ -1156,8 +1166,42 @@ def slide_reduce_bullets() -> _PROMPT_OUT: return system, pre_transforms, post_transforms, post_container_transforms +def slide_reduce2() -> _PROMPT_OUT: + system = _SLIDE_CONTEXT + system += r""" + You will: + - Maintain the structure of the text + - Keep all the figures + - Make sure that the text is clean and readable + - Use "you" instead of "we" + + - 1 idea per bullet: Keep each point focused on a single concept + - Max 5-6 bullets per slide: Avoid cognitive overload + - Max 6-8 words per bullet: Short phrases, not full sentences + - Parallel structure: Start each bullet with the same part of speech (e.g., verbs) + - No full stops (unless it's a complete sentence needing emphasis) + + - Be concise: Drop filler words ("the", "that", etc.) + - Use active voice: "Improve accuracy" instead of "Accuracy can be improved." + - Use verbs for actions: e.g., "Collect feedback", "Analyze results" + - Highlight outcomes: Emphasize value, not process: + "Boost retention" > "Use spaced repetition." + + Print only the markdown without any explanation. + """ + # - Minimize the changes to the text + pre_transforms: Set[str] = set() + post_transforms = { + "remove_code_delimiters", + "remove_end_of_line_periods", + "remove_empty_lines", + } + post_container_transforms = ["format_slide"] + return system, pre_transforms, post_transforms, post_container_transforms + + def slide_bold() -> _PROMPT_OUT: - system = _MD_CONTEXT + system = _SLIDE_CONTEXT system += r""" You will: - Not change the text or the structure of the text @@ -1176,7 +1220,7 @@ def slide_bold() -> _PROMPT_OUT: def slide_smart_colorize() -> _PROMPT_OUT: - system = _MD_CONTEXT + system = _SLIDE_CONTEXT system += r""" You will: - Not change the text or the structure of the text @@ -1216,7 +1260,7 @@ def slide_smart_colorize() -> _PROMPT_OUT: def slide_add_figure() -> _PROMPT_OUT: - system = _MD_CONTEXT + system = _SLIDE_CONTEXT system += r""" You will create a figure that illustrates the text using Graphviz dot. @@ -1261,7 +1305,7 @@ def slide_add_figure() -> _PROMPT_OUT: def slide_check() -> _PROMPT_OUT: - system = _MD_CONTEXT + system = _SLIDE_CONTEXT system += r""" - Do not print the content of the slide, but only the comment. From 2cae6f02e0df94dcff16fbb41f5ac0a649fb6a85 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 26 Jun 2025 20:21:30 -0400 Subject: [PATCH 187/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 76 ++++++++++++++++++++----- helpers/hmarkdown.py | 2 +- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index f7a0bce01..9eb3962d3 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -1069,8 +1069,6 @@ def review_refactoring() -> _PROMPT_OUT: _SLIDE_CONTEXT = r""" You are a proficient technical writer who writes slides in markdown. - - If a line starts with an asterisk *, consider it as an header andleave it unchanged - I will pass you a chunk of markdown code. """ @@ -1121,16 +1119,70 @@ def slide_expand() -> _PROMPT_OUT: def slide_reduce() -> _PROMPT_OUT: + """ + Reduce the text in a slide. + """ system = _SLIDE_CONTEXT system += r""" You will: - - Maintain the structure of the text - - If a line starts with an asterisk *, consider it as an header andleave it unchanged + - Maintain the structure of the text in terms of bullet and sub-bullet points - Keep all the figures - - Make sure that the text is clean and readable - - Remove all the words that are not needed + - Make the text clean and readable + - Remove all the words that are not needed and that are not important - Use "you" instead of "we" - - Use `E.g.,` instead of `Example` + - Be concise: Drop filler words ("the", "that", etc.) + - Use active voice: "Improve accuracy" instead of "Accuracy can be improved." + + - If a line starts with an asterisk *, it's the slide title and leave it + unchanged + Examples: + <input> + * Slide title + - This is a very long bullet point that is not clear and should be removed + - This is a clear bullet point that should be kept + </input> + + <output> + * Slide title + - This is a clear bullet point that should be kept + </output> + + Print only the markdown without any explanation. + """ + # - Minimize the changes to the text + pre_transforms: Set[str] = set() + post_transforms = { + "remove_code_delimiters", + "remove_end_of_line_periods", + "remove_empty_lines", + } + post_container_transforms = ["format_slide"] + return system, pre_transforms, post_transforms, post_container_transforms + + +def slide_definition() -> _PROMPT_OUT: + """ + Highlight the definitions in the text. + """ + system = _SLIDE_CONTEXT + system += r""" + - If there is a definition in the text, add bold to the term being defined + - Do not bold the term if it is already bold + - Use the latex `\defeq` in the definition instead of `=` + - Move the symbol of the definition to the left of the term being defined + + Example1: + Input: + - Entropy of a discrete random variable $X$ is defined as + $$H(X) = -\sum_x p(x) \log p(x)$$ + - Entropy quantifies the average level of information / surprise / uncertainty + inherent in the variable's possible outcomes + + Output: + - The **entropy** $H(X)$ of a discrete random variable $X$ is defined as: + $$H(X) \defeq -\sum_x p(x) \log p(x)$$ + - Entropy quantifies the average level of information / surprise / uncertainty + inherent in the variable's possible outcomes Print only the markdown without any explanation. """ @@ -1146,6 +1198,9 @@ def slide_reduce() -> _PROMPT_OUT: def slide_reduce_bullets() -> _PROMPT_OUT: + """ + Remove the bullet points that are + """ system = _SLIDE_CONTEXT system += r""" You will: @@ -1169,12 +1224,7 @@ def slide_reduce_bullets() -> _PROMPT_OUT: def slide_reduce2() -> _PROMPT_OUT: system = _SLIDE_CONTEXT system += r""" - You will: - - Maintain the structure of the text - - Keep all the figures - - Make sure that the text is clean and readable - - Use "you" instead of "we" - + You will make sure that the text has the following characteristics: - 1 idea per bullet: Keep each point focused on a single concept - Max 5-6 bullets per slide: Avoid cognitive overload - Max 6-8 words per bullet: Short phrases, not full sentences diff --git a/helpers/hmarkdown.py b/helpers/hmarkdown.py index c9111a085..3c65ebd29 100644 --- a/helpers/hmarkdown.py +++ b/helpers/hmarkdown.py @@ -289,7 +289,7 @@ def md_clean_up(txt: str) -> str: # Replace \[ ... \] math syntax with $$ ... $$, handling multiline equations. txt = re.sub(r"\\\[(.*?)\\\]", r"$$\1$$", txt, flags=re.DOTALL) # Replace `P(.)`` with `\Pr(.)`. - txt = re.sub(r"P\((.*?)\)", r"\\Pr(\1)", txt) + # txt = re.sub(r"P\((.*?)\)", r"\\Pr(\1)", txt) # Replace \left[, \right]. txt = re.sub(r"\\left\[", r"[", txt) txt = re.sub(r"\\right\]", r"]", txt) From 7c6e1577afe9b95846578568dc46af3bbc4b1198 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 27 Jun 2025 06:21:25 -0400 Subject: [PATCH 188/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hcoverage.py | 78 ++++++++++++++++++++++++-------------------- helpers/hdocker.py | 10 ++++-- 2 files changed, 49 insertions(+), 39 deletions(-) diff --git a/helpers/hcoverage.py b/helpers/hcoverage.py index 3ea7a74c5..26b3909d4 100644 --- a/helpers/hcoverage.py +++ b/helpers/hcoverage.py @@ -27,6 +27,7 @@ def _detect_site_packages() -> pathlib.Path: if purelib: return pathlib.Path(purelib) except (KeyError, IOError): + # TODO(Maddy): _LOG.debug pass try: sp_dirs = site.getsitepackages() @@ -40,12 +41,14 @@ def _detect_site_packages() -> pathlib.Path: def inject(coveragerc: str = ".coveragerc") -> None: """ - Install the coverage startup hook into this envs site-packages using sudo - tee. + Install the coverage startup hook into this env site-packages. """ rc = pathlib.Path(coveragerc).resolve() + # TODO(Maddy): -> dassert. if not rc.is_file(): raise FileNotFoundError(f".coveragerc not found at {rc}") + # TODO(Maddy): IMO this doesn't work since the var is created in a bash + # that is then killed. It's not persistent. hsystem.system(f"export COVERAGE_PROCESS_START={rc}") sp = _detect_site_packages() target = sp / "coverage.pth" @@ -53,16 +56,16 @@ def inject(coveragerc: str = ".coveragerc") -> None: cmd = f'echo "{hook_line}" | sudo tee "{target}" > /dev/null' try: hsystem.system(cmd) - _LOG.info("Installed coverage hook to %s via sudo tee", target) + _LOG.debug("Installed coverage hook to %s via sudo tee", target) except Exception as e: + # TODO(Maddy): Just assert here, no reason to continue. _LOG.error("Failed to install coverage hook via sudo tee: %s", e) raise e def remove() -> None: """ - Remove the coverage startup hook from this envs site-packages using sudo - rm. + Remove the coverage startup hook from this env site-packages. """ sp = _detect_site_packages() target = sp / "coverage.pth" @@ -75,14 +78,15 @@ def remove() -> None: _LOG.error("Failed to remove coverage hook via sudo rm: %s", e) raise else: + # TODO(Maddy): Is this acceptable? _LOG.warning("No coverage.pth found in %s", sp) - # Remove coverage environment variables. + # Remove coverage environment variables. try: if "COVERAGE_PROCESS_START" in os.environ: hsystem.system("unset COVERAGE_PROCESS_START") _LOG.info("Removed COVERAGE_PROCESS_START from environment") else: - _LOG.info("COVERAGE_PROCESS_START not found in environment") + _LOG.debug("COVERAGE_PROCESS_START not found in environment") except Exception as e: _LOG.error("Failed to remove COVERAGE_PROCESS_START: %s", e) raise @@ -90,39 +94,41 @@ def remove() -> None: def generate_coverage_dockerfile() -> str: """ - Build a Dockerfile string that appends coverage support: - 1. Installs coverage, pytest, pytest-cov at build time - 2. Creates /coverage_data and writes .coveragerc - 3. Sets ENV COVERAGE_PROCESS_START to /coverage_data/.coveragerc - 4. Writes a coverage.pth into site-packages so coverage auto-starts + Build a Dockerfile string that appends coverage support. """ - return """ -# Install coverage and testing dependencies -RUN pip install --no-cache-dir coverage pytest pytest-cov - -# Create coverage data directory with proper permissions -RUN mkdir -p /app/coverage_data && chmod 777 /app/coverage_data - -# Setup coverage configuration -COPY .coveragerc /app/coverage_data/.coveragerc -ENV COVERAGE_PROCESS_START=/app/coverage_data/.coveragerc - -# Create coverage.pth file for automatic startup -# This ensures coverage tracking starts automatically when Python runs -RUN python - <<PYCODE -import site, os -site_dir = site.getsitepackages()[0] -pth_file = os.path.join(site_dir, 'coverage.pth') -with open(pth_file, 'w') as f: - f.write('import coverage; coverage.process_startup()') -PYCODE -""" + # This requires to: + # - Install coverage, pytest, pytest-cov at build time + # - Create /coverage_data and writes .coveragerc + # - Set ENV COVERAGE_PROCESS_START to /coverage_data/.coveragerc + # - Write a coverage.pth into site-packages so coverage auto-starts + txt = """ + # Install coverage and testing dependencies. + RUN pip install --no-cache-dir coverage pytest pytest-cov + + # Create coverage data directory with proper permissions. + RUN mkdir -p /app/coverage_data && chmod 777 /app/coverage_data + + # Setup coverage configuration. + COPY .coveragerc /app/coverage_data/.coveragerc + ENV COVERAGE_PROCESS_START=/app/coverage_data/.coveragerc + + # Create coverage.pth file for automatic startup. + # This ensures coverage tracking starts automatically when Python runs. + RUN python - <<PYCODE + import site, os + site_dir = site.getsitepackages()[0] + pth_file = os.path.join(site_dir, 'coverage.pth') + with open(pth_file, 'w') as f: + f.write('import coverage; coverage.process_startup()') + PYCODE + """ + txt = hprint.dedent(txt) + return txt def coverage_commands_subprocess() -> None: """ - Return a list of shell commands to run coverage steps in a Docker - container. + Execute shell commands to run coverage steps in a Docker container. Assumes: - A valid .coveragerc exists in the current working directory. @@ -138,7 +144,7 @@ def coverage_commands_subprocess() -> None: def coverage_combine() -> None: """ - Return a list of shell commands to combine coverage data. + Execute shell commands to combine coverage data. Assumes: - .coverage.* files are present in the current directory. diff --git a/helpers/hdocker.py b/helpers/hdocker.py index 553dab3d3..a8061d5ef 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -369,6 +369,7 @@ def get_docker_base_cmd(use_sudo: bool) -> List[str]: vars_to_pass_as_str, ] if os.environ.get("COVERAGE_PROCESS_START"): + _LOG.debug("Enabling coverage") host_cov_dir = os.path.abspath("coverage_data") os.makedirs(host_cov_dir, exist_ok=True) os.chmod(host_cov_dir, 0o777) @@ -411,9 +412,12 @@ def build_container_image( # dockerfile = hprint.dedent(dockerfile) # Add install coverage and hook to the Dockerfile. - dockerfile = ( - dockerfile.strip() + "\n" + hcovera.generate_coverage_dockerfile() - ) + # TODO(Maddy): Pass a var. + if False: + _LOG.debug("Enabling coverage") + dockerfile = ( + dockerfile.strip() + "\n" + hcovera.generate_coverage_dockerfile() + ) _LOG.debug("Dockerfile:\n%s", dockerfile) # Get the current architecture. current_arch = get_current_arch() From d76644a2ce1166dff636bc514fa0bb806e96a1d1 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Fri, 27 Jun 2025 18:36:13 -0400 Subject: [PATCH 189/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/lint_notes.py | 59 ++++++++++++++++++- .../documentation/test/test_lint_notes.py | 33 +++++++++++ .../all.notes_toolchain.how_to_guide.md | 10 ++-- 3 files changed, 95 insertions(+), 7 deletions(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index 8e03b3ff3..a28d4ab91 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ -Lint "notes" files. +Lint md / tex / txt files > lint_notes.py -i foo.md -o bar.md \ --use_dockerized_prettier \ @@ -13,7 +13,7 @@ ``` """ -# TODO(gp): -> lint_md.py +# TODO(gp): -> lint_md.py? import argparse import logging @@ -340,6 +340,54 @@ def _refresh_toc( return txt # type: ignore +def _improve_header_and_slide_titles(txt: str) -> str: + """ + Improve the header and slide titles. + + - Headers start with one or more `#`s. + - Slide titles start with one `*` + - The title is transformed to title case as below: + - ML theory -> ML Theory + - A map of machine learning -> A Map of Machine Learning + """ + txt_new: List[str] = [] + for i, line in enumerate(txt.split("\n")): + # Parse header (starting with `#`) and slide title (starting with `*`). + m = re.match(r"^(\#+|\*) (.*)$", line) + if m: + # Parse the title. + title = m.group(2) + # Transform to title case, leaving words that are all capitalized + # and conjunctions as is. + non_cap_words = {'a', 'an', 'and', 'as', 'at', 'but', 'by', 'for', + 'in', 'of', 'on', 'or', 'the', 'to', 'vs', 'with'} + # Split into words + words = title.split() + # Process each word. + for i, word in enumerate(words): + if i == 0 and not word.isupper(): + # Capitalize the first word. + words[i] = word.title() + elif word.isupper(): + # Skip words that are all caps (e.g. ML, API). + continue + elif word.lower() in non_cap_words: + # Don't capitalize conjunctions and other minor words. + words[i] = word.lower() + else: + # Capitalize other words. + words[i] = word.title() + + title = ' '.join(words) + # Reconstruct the line. + line = m.group(1) + " " + title + txt_new.append(line) + else: + txt_new.append(line) + txt_new_as_str = "\n".join(txt_new) + return txt_new_as_str + + # ############################################################################# @@ -388,8 +436,14 @@ def _process( # Frame chapters. action = "frame_chapters" if _to_execute_action(action, actions): + # For markdown files, we don't use the frame since it's not rendered + # correctly. if not is_md_file: txt = _frame_chapters(txt) + # Improve header and slide titles. + action = "improve_header_and_slide_titles" + if _to_execute_action(action, actions): + txt = _improve_header_and_slide_titles(txt) # Refresh table of content. action = "refresh_toc" if _to_execute_action(action, actions): @@ -405,6 +459,7 @@ def _process( "prettier", "postprocess", "frame_chapters", + "improve_header_and_slide_titles", "refresh_toc", ] diff --git a/dev_scripts_helpers/documentation/test/test_lint_notes.py b/dev_scripts_helpers/documentation/test/test_lint_notes.py index e3e8af6ff..6dffdf5ef 100644 --- a/dev_scripts_helpers/documentation/test/test_lint_notes.py +++ b/dev_scripts_helpers/documentation/test/test_lint_notes.py @@ -461,3 +461,36 @@ def test2(self) -> None: output_txt = self.run_lint_notes(in_file, type_, cmd_opts) # Check. self.check_string(output_txt) + + +class Test_improve_header_and_slide_titles1(hunitest.TestCase): + """ + Test the function `_improve_header_and_slide_titles`. + """ + + def helper(self, txt: str, exp: str) -> None: + txt = hprint.dedent(txt) + exp = hprint.dedent(exp) + act = dshdlino._improve_header_and_slide_titles(txt) + self.assert_equal(act, exp) + + def test1(self) -> None: + txt = r""" + * ML theory + """ + exp = r""" + * ML Theory + """ + self.helper(txt, exp) + + def test2(self) -> None: + """ + Test the function `_improve_header_and_slide_titles`. + """ + txt = r""" + * A map of machine learning + """ + exp = r""" + * A Map of Machine Learning + """ + self.helper(txt, exp) \ No newline at end of file diff --git a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md index f0ed8f077..7b7dee884 100644 --- a/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md +++ b/docs/tools/documentation_toolchain/all.notes_toolchain.how_to_guide.md @@ -300,14 +300,14 @@ The supported File types and code blocks are: ### What it does - Tidy up Markdown/LaTeX/txt notes by: - - Normalising G‑Doc artifacts - - Running Prettier + - Normalising Google Doc artifacts + - Running `prettier` - Fixing bullet/heading quirks - - Refreshing the Table of Contents + - Refreshing the table of contents ### Examples -- Prettify with Dockerised Prettier and TOC rebuild +- Prettify with Dockerized Prettier and TOC rebuild ```bash > lint_notes.py -i Lesson10.md \ @@ -318,7 +318,7 @@ The supported File types and code blocks are: - Custom print width and selective actions ```bash > lint_notes.py -i draft.txt -o tidy.txt -w 100 \ - --action preprocess,prettier,postprocess + --action preprocess,prettier,postprocess ``` ### Interface From afdc21059b2a9da4de89a1e9d8c2a61c06e5bf32 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 29 Jun 2025 10:12:46 -0400 Subject: [PATCH 190/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../documentation/lint_notes.py | 10 +- .../documentation/preprocess_notes.py | 121 +++++++++++++----- .../test/test_preprocess_notes.py | 46 ++++++- 3 files changed, 144 insertions(+), 33 deletions(-) diff --git a/dev_scripts_helpers/documentation/lint_notes.py b/dev_scripts_helpers/documentation/lint_notes.py index a28d4ab91..feccf9db9 100755 --- a/dev_scripts_helpers/documentation/lint_notes.py +++ b/dev_scripts_helpers/documentation/lint_notes.py @@ -1,7 +1,13 @@ #!/usr/bin/env python """ -Lint md / tex / txt files +Lint md / tex / txt files by applying a series of actions: +- preprocess: remove some artifacts when copying from gdoc" +- prettier: run prettier to format the text +- postprocess: remove empty lines before ```, before higher level bullets, ... +- frame_chapters: add a frame around each chapter +- improve_header_and_slide_titles: improve the header and slide titles +- refresh_toc: refresh the table of content, if needed > lint_notes.py -i foo.md -o bar.md \ --use_dockerized_prettier \ @@ -478,6 +484,7 @@ def _parser() -> argparse.ArgumentParser: action="store", type=str, default="", + help="The type of the input file, e.g., `md`, `tex`, `txt`", ) parser.add_argument( "-w", @@ -485,6 +492,7 @@ def _parser() -> argparse.ArgumentParser: action="store", type=int, default=80, + help="The maximum line width for the formatted text. If None, 80 is used" ) parser.add_argument( "--use_dockerized_prettier", diff --git a/dev_scripts_helpers/documentation/preprocess_notes.py b/dev_scripts_helpers/documentation/preprocess_notes.py index bee0f7783..ed003ca86 100755 --- a/dev_scripts_helpers/documentation/preprocess_notes.py +++ b/dev_scripts_helpers/documentation/preprocess_notes.py @@ -56,6 +56,31 @@ def _process_abbreviations(in_line: str) -> str: return line +# Define colors and their LaTeX equivalents. +_COLORS = { + "red": "red", + "orange": "orange", + "yellow": "yellow", + "lime": "lime", + # + "green": "darkgreen", + "teal": "teal", + "cyan": "cyan", + "blue": "blue", + "purple": "purple", + "violet": "violet", + "magenta": "magenta", + "pink": "pink", + "brown": "brown", + "olive": "olive", + "gray": "gray", + "darkgray": "darkgray", + "lightgray": "lightgray", + "black": "black", + "white": "white", +} + + def _process_color_commands(in_line: str) -> str: r""" Transform color commands like `\red{xyz}` into valid LaTeX syntax. @@ -66,29 +91,7 @@ def _process_color_commands(in_line: str) -> str: - \red{abc} -> \textcolor{red}{\text{abc}} - \blue{x + y} -> \textcolor{blue}{x + y} """ - # Define supported colors - colors = { - "red": "red", - "orange": "orange", - "yellow": "yellow", - "lime": "lime", - "green": "darkgreen", - "teal": "teal", - "cyan": "cyan", - "blue": "blue", - "purple": "purple", - "violet": "violet", - "magenta": "magenta", - "pink": "pink", - "brown": "brown", - "olive": "olive", - "gray": "gray", - "darkgray": "darkgray", - "lightgray": "lightgray", - "black": "black", - "white": "white", - } - for color, value in colors.items(): + for color, value in _COLORS.items(): # This regex matches LaTeX color commands like \red{content}, \blue{content}, etc. pattern = re.compile( rf""" @@ -112,6 +115,62 @@ def _replacement(match: re.Match, value: str) -> str: # Replace the color command with the LaTeX color command. in_line = re.sub(pattern, lambda m: _replacement(m, value), in_line) return in_line + + +def _has_color_command(line: str) -> bool: + hdbg.dassert_isinstance(line, str) + hdbg.dassert_not_in("\n", line) + for color in _COLORS.keys(): + # This regex matches LaTeX color commands like \red{content}, \blue{content}, etc. + pattern = re.compile( + rf""" + \\{color} # Match the color command (e.g., \red, \blue, etc.). + \{{ # Match the opening curly brace. + ([^}}]*) # Capture everything inside the curly braces. + \}} # Match the closing curly brace. + """, + re.VERBOSE, + ) + if re.search(pattern, line): + return True + return False + + +def _colorize_bullet_points(txt: str) -> str: + """ + Given a string with bold text (but no color), colorize the bold text. + """ + # Scan the text line by line and count how many bold items there are. + for line in txt.split("\n"): + # Count the number of bold items. + num_bold = len(re.findall(r"\*\*", line)) + tot_bold += num_bold + _LOG.debug("tot_bold=%s", tot_bold) + if tot_bold == 0: + return txt + hdbg.dassert_eq(tot_bold % 2, 0, "tot_bold=%s needs to be even", tot_bold) + # Use the colors in the order of the list of colors. + num_bolds = tot_bold // 2 + hdbg.dassert_lte(num_bolds, len(_COLORS)) + colors = list(_COLORS.keys())[:num_bolds] + _LOG.debug("colors=%s", colors) + # Colorize the bold items. + color_idx = 0 + out_txt = "" + for line in txt.split("\n"): + # Replace the strings like "**foo**" with a string like "**\red{foo}**". + # Find all bold text patterns and wrap them with color commands + # Keep track of which color to use for each match + def color_replacer(match): + nonlocal color_idx + text = match.group(1) + hdbg.dassert_lte(color_idx, len(colors)) + color_to_use = colors[color_idx] + color_idx += 1 + return f"**\\{color_to_use}{{{text}}}**" + line = re.sub(r"\*\*([^*]+)\*\*", color_replacer, line) + txt += line + "\n" + return txt def _process_enumerated_list(in_line: str) -> str: @@ -198,7 +257,7 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: _LOG.debug("%s:line=%s", i, line) # 1) Remove comment block. if _TRACE: - _LOG.debug("# 1) Process comment block.") + _LOG.debug("# Process comment block.") do_continue, in_skip_block = hmarkdo.process_comment_block( line, in_skip_block ) @@ -208,7 +267,7 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: continue # 2) Remove code block. if _TRACE: - _LOG.debug("# 2) Process code block.") + _LOG.debug("# Process code block.") # TODO(gp): Not sure why this is needed. For sure the extra spacing # creates a problem with the Python code blocks rendered by pandoc beamer. if False: @@ -220,25 +279,25 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: continue # 3) Remove single line comment. if _TRACE: - _LOG.debug("# 3) Process single line comment.") + _LOG.debug("# Process single line comment.") do_continue = hmarkdo.process_single_line_comment(line) if do_continue: continue # 4) Expand abbreviations. if _TRACE: - _LOG.debug("# 4) Process abbreviations.") + _LOG.debug("# Process abbreviations.") line = _process_abbreviations(line) # 5) Process enumerated list. if _TRACE: - _LOG.debug("# 5) Process enumerated list.") + _LOG.debug("# Process enumerated list.") line = _process_enumerated_list(line) # 6) Process color commands. if _TRACE: - _LOG.debug("# 6) Process color commands.") + _LOG.debug("# Process color commands.") line = _process_color_commands(line) # 7) Process question. if _TRACE: - _LOG.debug("# 7) Process question.") + _LOG.debug("# Process question.") if type_ == "slides": do_continue, line = _process_question_to_slides(line) else: @@ -248,7 +307,7 @@ def _transform_lines(txt: str, type_: str, *, is_qa: bool = False) -> str: continue # 8) Process empty lines in the questions and answers. if _TRACE: - _LOG.debug("# 8) Process empty lines in the questions and answers.") + _LOG.debug("# Process empty lines in the questions and answers.") if not is_qa: out.append(line) else: diff --git a/dev_scripts_helpers/documentation/test/test_preprocess_notes.py b/dev_scripts_helpers/documentation/test/test_preprocess_notes.py index 3874950e9..abd64948c 100644 --- a/dev_scripts_helpers/documentation/test/test_preprocess_notes.py +++ b/dev_scripts_helpers/documentation/test/test_preprocess_notes.py @@ -90,6 +90,50 @@ def test_nested_braces1(self) -> None: self.assert_equal(act, exp) +# ############################################################################# +# Test_colorize_bullet_points1 +# ############################################################################# + + +class Test_colorize_bullet_points1(hunitest.TestCase): + def helper(self, txt_in: str, exp: str) -> None: + """ + Test colorize bullet points. + """ + txt_in = hprint.dedent(txt_in) + act = dshdprno._colorize_bullet_points(txt_in) + exp = hprint.dedent(exp) + self.assert_equal(act, exp) + + def test1(self) -> None: + """ + Test colorize bullet points. + """ + txt_in = r""" + - **VC Theory** + - Measures model + + - **Bias-Variance Decomposition** + - Prediction error + - **Bias** + - **Variance** + + - **Computation Complexity** + - Balances model + - Related to + - E.g., Minimum + + - **Bayesian Approach** + - Treats ML as probability + - Combines prior knowledge with observed data to update belief about a model + + - **Problem in ML Theory:** + - Assumptions may not align with practical problems + """ + exp = r""" + """ + self.helper(txt_in, exp) + # ############################################################################# # Test_preprocess_notes1 # ############################################################################# @@ -245,4 +289,4 @@ def _is_integer(value): ``` """ exp = hprint.dedent(exp, remove_lead_trail_empty_lines_=True) - self.assert_equal(act, exp) + self.assert_equal(act, exp) \ No newline at end of file From d812756da1a0fbfc76cf0e16f8862dc866b795f9 Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sun, 29 Jun 2025 18:06:57 -0400 Subject: [PATCH 191/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/llms/llm_apply_cfile.py | 3 +- dev_scripts_helpers/llms/llm_prompts.py | 119 ++++++++++++++---- dev_scripts_helpers/llms/llm_transform.py | 3 +- .../llms/test/test_llm_prompts.py | 1 + 4 files changed, 103 insertions(+), 23 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_apply_cfile.py b/dev_scripts_helpers/llms/llm_apply_cfile.py index 6e60e7457..b28b0af96 100755 --- a/dev_scripts_helpers/llms/llm_apply_cfile.py +++ b/dev_scripts_helpers/llms/llm_apply_cfile.py @@ -153,7 +153,8 @@ def _main(parser: argparse.ArgumentParser) -> None: ) if args.prompt == "list": print("# Available prompt tags:") - print("\n".join(dshlllpr.get_prompt_tags())) + prompt_tags = dshlllpr.get_prompt_tags() + print(dshlllpr.prompt_tags_to_str(prompt_tags)) return # TODO(gp): We should just automatically pass-through the options. cmd_line_opts = [f"-p {args.prompt}", f"-v {args.log_level}"] diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 7ac67ced9..5fdfabe33 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -21,15 +21,15 @@ @functools.lru_cache(maxsize=1) -def get_prompt_tags() -> List[str]: +def get_prompt_tags() -> List[Tuple[str, str]]: """ - Return the list of functions in this file that can be called as a prompt. + Return the list of prompt functions in this file and their docstrings. """ # Read current file. curr_path = os.path.abspath(__file__) file_content = hio.from_file(curr_path) # - matched_functions = [] + functions = [] # Parse the file content into an AST. tree = ast.parse(file_content) for node in ast.walk(tree): @@ -46,10 +46,29 @@ def get_prompt_tags() -> List[str]: return_type_str = ast.unparse(node.returns) _LOG.debug(hprint.to_str("node.name args return_type_str")) if has_no_args and return_type_str == "_PROMPT_OUT": + # Add function name to list if it matches the expected signature. _LOG.debug(" -> matched") - matched_functions.append(node.name) - matched_functions = sorted(matched_functions) - return matched_functions + # Extract docstring if it exists. + docstring = ast.get_docstring(node) + _LOG.debug("docstring='%s'", docstring) + docstring = "" if docstring is None else docstring + functions.append((node.name, docstring)) + functions = sorted(functions) + return functions + + +def prompt_tags_to_str(prompt_tags: List[Tuple[str, str]]) -> str: + """ + Return a string representation of the prompt tags. + """ + # Find the longest tag length. + max_tag_len = max(len(tag) for tag, _ in prompt_tags) + # Format each line with aligned docstrings. + lines = [] + for tag, docstring in prompt_tags: + padding = " " * (max_tag_len - len(tag)) + lines.append(f"{tag}{padding} {docstring}") + return "\n".join(lines) # ############################################################################# @@ -69,7 +88,7 @@ def get_post_container_transforms( global _POST_CONTAINER_TRANSFORMS # Initialize the dictionary, on the first call. if not _POST_CONTAINER_TRANSFORMS: - valid_prompts = get_prompt_tags() + valid_prompts = list(zip(*get_prompt_tags()))[0] # Call all the functions and register their `post_container_transforms`. for prompt in valid_prompts: _, _, _, post_container_transforms = eval(f"{prompt}()") @@ -175,6 +194,9 @@ def process_data(data: List[float], threshold: float = 0.5) -> List[float]: def code_fix_function_type_hints() -> _PROMPT_OUT: + """ + Add type hints to the function definitions, if they are missing. + """ system = _CODING_CONTEXT system += r""" Add type hints only to the function definitions, if they are missing. @@ -207,13 +229,6 @@ def process_data(data: List[float], threshold: float = 0.5) -> List[float]: def code_fix_docstrings() -> _PROMPT_OUT: """ Add or complete a REST docstring to Python code. - - Each function should have a docstring that describes the function, - its parameters, and its return value. - - Create examples of the values in input and output of each function, - only when you are sure of the types and values of variables. If you - are not sure, do not add any information. """ system = _CODING_CONTEXT system += r''' @@ -244,6 +259,9 @@ def _format_greeting(name: str, *, greeting: str = "Hello") -> str: def code_fix_complex_assignments() -> _PROMPT_OUT: + """ + Convert complex assignments into if-then-else statements. + """ system = _CODING_CONTEXT system += r""" Convert complex assignments into if-then-else statements. @@ -437,9 +455,7 @@ def code_fix_log_string() -> _PROMPT_OUT: def code_fix_by_using_f_strings() -> _PROMPT_OUT: """ - Fix code to use f-strings, like `f"Hello, {name}. - - You are {age} years old."`. + Fix code to use f-strings instead of % formatting. """ system = _CODING_CONTEXT system += r""" @@ -466,7 +482,7 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ - Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. + Use % formatting instead of f-strings (formatted string literals). """ system = _CODING_CONTEXT system += r""" @@ -497,8 +513,7 @@ def code_fix_unit_test() -> _PROMPT_OUT: def code_fix_code() -> _PROMPT_OUT: """ - Apply all the transformations required to write code according to the - Causify conventions. + Apply all the transformations required for the Causify code conventions. """ # > grep "def code_fix" ./dev_scripts_helpers/llms/llm_prompts.py | awk '{print $2 }' function_names = [ @@ -536,6 +551,9 @@ def code_fix_code() -> _PROMPT_OUT: def code_transform_remove_redundancy() -> _PROMPT_OUT: + """ + Remove redundancy in the code. + """ system = _CODING_CONTEXT system += r""" You will review the code and look for opportunities to refactor the code, @@ -641,6 +659,9 @@ def code_write_1_unit_test() -> _PROMPT_OUT: def latex_rewrite() -> _PROMPT_OUT: + """ + Rewrite the Latex code to increase clarity and readability. + """ system = _LATEX_CONTEXT system += r""" - Rewrite the text passed to increase clarity and readability. @@ -656,6 +677,9 @@ def latex_rewrite() -> _PROMPT_OUT: def latex_check() -> _PROMPT_OUT: + """ + Check the Latex code is correct and doesn't have errors. + """ system = _LATEX_CONTEXT system += r""" Check the Latex code is correct and doesn't have errors. @@ -680,6 +704,9 @@ def latex_check() -> _PROMPT_OUT: def md_add_good_bad_examples() -> _PROMPT_OUT: + """ + Add good and bad examples to a markdown text. + """ system = _MD_CONTEXT system += r""" You will: @@ -712,6 +739,9 @@ def md_add_good_bad_examples() -> _PROMPT_OUT: def md_rewrite() -> _PROMPT_OUT: + """ + Rewrite the markdown text to increase clarity and readability. + """ system = _MD_CONTEXT system += r""" - Rewrite the text passed to increase clarity and readability. @@ -726,6 +756,9 @@ def md_rewrite() -> _PROMPT_OUT: def md_summarize_short() -> _PROMPT_OUT: + """ + Summarize the markdown text in less than 30 words. + """ system = _MD_CONTEXT system += r""" Summarize the text in less than 30 words. @@ -737,6 +770,9 @@ def md_summarize_short() -> _PROMPT_OUT: def md_expand() -> _PROMPT_OUT: + """ + Expand the markdown text by adding bullet points and examples. + """ system = _MD_CONTEXT system += r""" You will: @@ -761,6 +797,9 @@ def md_expand() -> _PROMPT_OUT: # TODO(gp): Move to template. def md_clean_up_how_to_guide() -> _PROMPT_OUT: + """ + Format the text passed as a how-to guide. + """ system = _MD_CONTEXT system += r""" Format the text passed as a how-to guide. @@ -785,6 +824,9 @@ def md_clean_up_how_to_guide() -> _PROMPT_OUT: def md_convert_text_to_bullet_points() -> _PROMPT_OUT: + """ + Convert the text passed to bullet points. + """ system = _MD_CONTEXT system += r""" - Convert the text passed to bullet points using multiple levels of bullets. @@ -799,6 +841,9 @@ def md_convert_text_to_bullet_points() -> _PROMPT_OUT: def md_convert_table_to_bullet_points() -> _PROMPT_OUT: + """ + Convert the table passed to bullet points. + """ system = _MD_CONTEXT system += r""" - Convert the table passed to bullet points using multiple levels of bullets. @@ -813,6 +858,9 @@ def md_convert_table_to_bullet_points() -> _PROMPT_OUT: def md_format() -> _PROMPT_OUT: + """ + Format the markdown text. + """ system = _MD_CONTEXT system += r""" - Replace `*` with `-` for bullet points @@ -825,6 +873,9 @@ def md_format() -> _PROMPT_OUT: def md_remove_formatting() -> _PROMPT_OUT: + """ + Remove the formatting (bold, italic, etc.) from the markdown text. + """ system = _MD_CONTEXT system += r""" You will: @@ -841,6 +892,9 @@ def md_remove_formatting() -> _PROMPT_OUT: def md_create_bullets() -> _PROMPT_OUT: + """ + Create bullet points from the markdown text. + """ system = _MD_CONTEXT system += r""" You will: @@ -1096,6 +1150,9 @@ def slide_to_bullet_points() -> _PROMPT_OUT: def slide_expand() -> _PROMPT_OUT: + """ + Expand the slide text by adding bullet points and examples. + """ system = _SLIDE_CONTEXT system += r""" You will: @@ -1222,6 +1279,9 @@ def slide_reduce_bullets() -> _PROMPT_OUT: def slide_reduce2() -> _PROMPT_OUT: + """ + Reduce the slide text to a maximum of 5-6 bullets per slide. + """ system = _SLIDE_CONTEXT system += r""" You will make sure that the text has the following characteristics: @@ -1251,6 +1311,9 @@ def slide_reduce2() -> _PROMPT_OUT: def slide_bold() -> _PROMPT_OUT: + """ + Highlight the most important phrases in the text. + """ system = _SLIDE_CONTEXT system += r""" You will: @@ -1270,6 +1333,9 @@ def slide_bold() -> _PROMPT_OUT: def slide_smart_colorize() -> _PROMPT_OUT: + """ + Colorize the most important phrases in the text. + """ system = _SLIDE_CONTEXT system += r""" You will: @@ -1310,6 +1376,9 @@ def slide_smart_colorize() -> _PROMPT_OUT: def slide_add_figure() -> _PROMPT_OUT: + """ + Add a figure to the slide. + """ system = _SLIDE_CONTEXT system += r""" You will create a figure that illustrates the text using Graphviz dot. @@ -1355,6 +1424,9 @@ def slide_add_figure() -> _PROMPT_OUT: def slide_check() -> _PROMPT_OUT: + """ + Check the slide is clear and correct. + """ system = _SLIDE_CONTEXT system += r""" - Do not print the content of the slide, but only the comment. @@ -1393,6 +1465,7 @@ def slide_check() -> _PROMPT_OUT: def text_idea() -> _PROMPT_OUT: """ + Come up with suggestions and variations to make it interesting. """ file = "text_idea.txt" if os.path.exists(file): @@ -1407,6 +1480,7 @@ def text_idea() -> _PROMPT_OUT: def text_rephrase() -> _PROMPT_OUT: """ + Rephrase the text using text_rephrase.txt. """ file = "text_rephrase.txt" if os.path.exists(file): @@ -1420,6 +1494,9 @@ def text_rephrase() -> _PROMPT_OUT: def text_rewrite() -> _PROMPT_OUT: + """ + Rewrite the text to increase clarity and readability. + """ system = "" system += r""" - Rewrite the text passed to increase clarity and readability. @@ -1556,7 +1633,7 @@ def run_prompt( """ _LOG.debug(hprint.func_signature_to_str()) # Get the info corresponding to the prompt tag. - prompt_tags = get_prompt_tags() + prompt_tags = list(zip(*get_prompt_tags()))[0] hdbg.dassert_in(prompt_tag, prompt_tags) python_cmd = f"{prompt_tag}()" ( diff --git a/dev_scripts_helpers/llms/llm_transform.py b/dev_scripts_helpers/llms/llm_transform.py index 529b141c7..9fd3cf618 100755 --- a/dev_scripts_helpers/llms/llm_transform.py +++ b/dev_scripts_helpers/llms/llm_transform.py @@ -224,7 +224,8 @@ def _main(parser: argparse.ArgumentParser) -> None: # if args.prompt == "list": print("# Available prompt tags:") - print("\n".join(dshlllpr.get_prompt_tags())) + prompt_tags = dshlllpr.get_prompt_tags() + print(dshlllpr.prompt_tags_to_str(prompt_tags)) return # Parse files. in_file_name, out_file_name = hparser.parse_input_output_args(args) diff --git a/dev_scripts_helpers/llms/test/test_llm_prompts.py b/dev_scripts_helpers/llms/test/test_llm_prompts.py index 9ae879d4a..23a82b88e 100644 --- a/dev_scripts_helpers/llms/test/test_llm_prompts.py +++ b/dev_scripts_helpers/llms/test/test_llm_prompts.py @@ -57,6 +57,7 @@ class Test_prompt_tags1(hunitest.TestCase): def test1(self) -> None: prompt_tags = dshlllpr.get_prompt_tags() _LOG.debug(hprint.to_str("prompt_tags")) + _LOG.debug(dshlllpr.prompt_tags_to_str(prompt_tags)) # self.assertGreater(len(prompt_tags), 0) From dccfe212e8da571545be011938d57b130df9eaaf Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Thu, 3 Jul 2025 17:54:37 -0400 Subject: [PATCH 192/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .claude/settings.local.json | 3 +- CLAUDE.md | 60 ++++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 4034a3004..4eed94d5f 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -3,7 +3,8 @@ "allow": [ "Bash(find:*)", "Bash(invoke --list)", - "Bash(grep:*)" + "Bash(grep:*)", + "Bash(ls:*)" ], "deny": [] } diff --git a/CLAUDE.md b/CLAUDE.md index cb22daae1..c8059aa8f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,7 +8,12 @@ This is the `helpers` repository - a foundational Python library providing utili ### Core Structure -- **`helpers/`** - Core utility modules (hdbg, hio, hpandas, etc.) following `h<name>` naming convention. Each module provides focused functionality (debugging, I/O, pandas extensions, etc.) +- **`helpers/`** - Core utility modules (65+ modules) following `h<name>` naming convention organized into categories: + - **Core Infrastructure**: `hdbg`, `hio`, `hsystem`, `hserver`, `henv` - debugging, I/O, system operations + - **Data Processing**: `hpandas`, `hdataframe`, `hnumpy`, `hparquet`, `hcsv` - data manipulation and analysis + - **Testing Framework**: `hunit_test`, `hpytest`, `hcoverage`, `hplayback` - comprehensive testing utilities + - **External Services**: `haws`, `hs3`, `hgit`, `hdocker`, `hchatgpt`, `hllm` - cloud and tool integrations + - **Caching & Performance**: `hcache`, `hcache_simple`, `hjoblib`, `htimer` - performance optimization - **`config_root/`** - Configuration system with `Config` class and builders for hierarchical configuration management - **`linters/`** - Pluggable linting framework with custom linters for code quality (amp_black, amp_isort, etc.) - **`dev_scripts_helpers/`** - Development automation scripts organized by functionality (git, docker, documentation, etc.) @@ -86,10 +91,11 @@ invoke git_merge_master ## Key Configuration -- **`repo_config.yaml`** - Repository metadata including Docker image names, S3 buckets, GitHub settings -- **`pytest.ini`** - Test configuration with custom markers and options +- **`repo_config.yaml`** - Repository metadata including Docker image names, S3 buckets, GitHub settings, ECR configuration +- **`pytest.ini`** - Test configuration with custom markers (`slow`, `superslow`, `requires_docker_in_docker`, `requires_ck_infra`) and options +- **`pyproject.toml`** - Ruff linting configuration (line length 81, Python 3.11 target) and Fixit settings - **`mypy.ini`** - Type checking configuration with library-specific ignore rules -- **`invoke.yaml`** - Invoke task configuration +- **`invoke.yaml`** - Invoke task configuration (auto_dash_names: false, echo: true) ## Development Patterns @@ -108,8 +114,11 @@ import config_root.config.config_ as crococon ### Testing Patterns - Inherit from `hunitest.TestCase` for enhanced test utilities - Use golden file pattern via `check_string()` method -- Mark tests with appropriate speed markers -- Use `pytest.mark.requires_docker_in_docker` for Docker-dependent tests +- Mark tests with appropriate speed markers: `@pytest.mark.slow`, `@pytest.mark.superslow` +- Use `pytest.mark.requires_docker_in_docker` for tests requiring Docker children/sibling containers +- Use `pytest.mark.requires_ck_infra` for tests requiring CK infrastructure +- Use `pytest.mark.no_container` for invoke target tests that run outside containers +- Test outcomes stored in `test/outcomes/` directories following module structure ### Configuration Management - Use `Config` class from `config_root.config.config_` for hierarchical configs @@ -124,4 +133,41 @@ The custom linting system in `linters/` provides: - Integration with invoke tasks for automated linting - Support for parallel execution via joblib -When running `invoke lint`, it executes appropriate linters based on file types and applies fixes automatically where possible. \ No newline at end of file +When running `invoke lint`, it executes appropriate linters based on file types and applies fixes automatically where possible. + +## Helper Module Categories + +The 65+ helper modules are organized into functional categories: + +### Core Infrastructure & System +- `hdbg` - Debugging with assertions, `dassert()`, and conditional logging +- `hio` - File system operations, path utilities, directory management +- `hsystem` - System command execution, subprocess management +- `hserver` - Server/environment detection (local vs container vs cloud) +- `henv` - Environment variable management and configuration + +### Data Processing & Analytics +- `hpandas` - DataFrame extensions, validation, comparison utilities +- `hdataframe` - Additional DataFrame processing and manipulation +- `hnumpy` - NumPy extensions and mathematical utilities +- `hparquet` - Parquet file read/write operations +- `hcsv` - CSV file handling with robust parsing + +### Development & Testing +- `hunit_test` - Base TestCase class with golden file testing via `check_string()` +- `hpytest` - pytest-specific helpers and test discovery +- `hcoverage` - Code coverage collection and reporting +- `hplayback` - Playback testing for deterministic test execution + +### External Services & APIs +- `haws` - AWS service interactions and authentication +- `hs3` - S3 bucket operations, file upload/download +- `hgit` - Git repository operations and branch management +- `hdocker` - Docker container management and image operations +- `hchatgpt`, `hllm` - LLM API integrations for AI-assisted development + +### Caching & Performance +- `hcache` - Advanced caching framework with persistence +- `hcache_simple` - Simple in-memory caching utilities +- `hjoblib` - Parallel processing with joblib integration +- `htimer` - Performance timing and measurement utilities \ No newline at end of file From cd44b0508cc2e6d0b63adb8b8834c6fe61d7e01b Mon Sep 17 00:00:00 2001 From: GP Saggese <saggese@gmail.com> Date: Sat, 5 Jul 2025 16:46:04 -0400 Subject: [PATCH 193/193] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hcoverage.py | 22 +++++++++++----------- helpers/hdocker.py | 3 +-- helpers/lib_tasks_bash.py | 2 -- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/helpers/hcoverage.py b/helpers/hcoverage.py index 26b3909d4..f7c5651a4 100644 --- a/helpers/hcoverage.py +++ b/helpers/hcoverage.py @@ -8,8 +8,11 @@ import os import pathlib import site +import subprocess import sysconfig +import helpers.hdbg as hdbg +import helpers.hprint as hprint import helpers.hsystem as hsystem _LOG = logging.getLogger(__name__) @@ -27,8 +30,9 @@ def _detect_site_packages() -> pathlib.Path: if purelib: return pathlib.Path(purelib) except (KeyError, IOError): - # TODO(Maddy): _LOG.debug - pass + _LOG.debug( + "sysconfig.get_path('purelib') failed, falling back to site packages" + ) try: sp_dirs = site.getsitepackages() except AttributeError: @@ -44,9 +48,7 @@ def inject(coveragerc: str = ".coveragerc") -> None: Install the coverage startup hook into this env site-packages. """ rc = pathlib.Path(coveragerc).resolve() - # TODO(Maddy): -> dassert. - if not rc.is_file(): - raise FileNotFoundError(f".coveragerc not found at {rc}") + hdbg.dassert(rc.is_file(), f".coveragerc not found at {rc}") # TODO(Maddy): IMO this doesn't work since the var is created in a bash # that is then killed. It's not persistent. hsystem.system(f"export COVERAGE_PROCESS_START={rc}") @@ -57,10 +59,8 @@ def inject(coveragerc: str = ".coveragerc") -> None: try: hsystem.system(cmd) _LOG.debug("Installed coverage hook to %s via sudo tee", target) - except Exception as e: - # TODO(Maddy): Just assert here, no reason to continue. - _LOG.error("Failed to install coverage hook via sudo tee: %s", e) - raise e + except (OSError, subprocess.SubprocessError) as e: + hdbg.dassert(False, f"Failed to install coverage hook via sudo tee: {e}") def remove() -> None: @@ -83,6 +83,7 @@ def remove() -> None: # Remove coverage environment variables. try: if "COVERAGE_PROCESS_START" in os.environ: + # TODO(Maddy): This is not persistent, so it doesn't work. hsystem.system("unset COVERAGE_PROCESS_START") _LOG.info("Removed COVERAGE_PROCESS_START from environment") else: @@ -122,8 +123,7 @@ def generate_coverage_dockerfile() -> str: f.write('import coverage; coverage.process_startup()') PYCODE """ - txt = hprint.dedent(txt) - return txt + return str(hprint.dedent(txt)) def coverage_commands_subprocess() -> None: diff --git a/helpers/hdocker.py b/helpers/hdocker.py index a8061d5ef..89b625982 100644 --- a/helpers/hdocker.py +++ b/helpers/hdocker.py @@ -412,8 +412,7 @@ def build_container_image( # dockerfile = hprint.dedent(dockerfile) # Add install coverage and hook to the Dockerfile. - # TODO(Maddy): Pass a var. - if False: + if os.environ.get("COVERAGE_PROCESS_START"): _LOG.debug("Enabling coverage") dockerfile = ( dockerfile.strip() + "\n" + hcovera.generate_coverage_dockerfile() diff --git a/helpers/lib_tasks_bash.py b/helpers/lib_tasks_bash.py index f929f1453..111fa2815 100644 --- a/helpers/lib_tasks_bash.py +++ b/helpers/lib_tasks_bash.py @@ -9,8 +9,6 @@ from invoke import task -import helpers.hgit as hgit - # We want to minimize the dependencies from non-standard Python packages since # this code needs to run with minimal dependencies and without Docker. import helpers.hdbg as hdbg