From 5369c2dd38eb18322d436c9db64c30cdfa33dcf7 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 18:40:05 -0400 Subject: [PATCH 01/36] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .gitignore | 14 + conftest.py | 13 +- docs/work_tools/all.import_check.reference.md | 250 ++++++++++++++++-- generate_deps.py | 41 +++ helpers/lib_tasks_docker.py | 4 +- import_check/dependency_graph.py | 225 ++++++++++++++++ import_check/test/test_dependency_graph.py | 205 ++++++++++++++ setup.py | 7 + tasks.py | 57 +++- 9 files changed, 788 insertions(+), 28 deletions(-) create mode 100644 .gitignore create mode 100644 generate_deps.py create mode 100644 import_check/dependency_graph.py create mode 100644 import_check/test/test_dependency_graph.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..d5fa30a5e --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +__pycache__/ +*.log +tmp.requirements.txt +helpers.egg-info/PKG-INFO +helpers.egg-info/SOURCES.txt +helpers.egg-info/top_level.txt +.coverage +.DS_Store +dependency_graph.dot +dependency_graph.svg +report.txt +report_cycles.txt +report_max_level.txt +i diff --git a/conftest.py b/conftest.py index 9ecd495b1..a8512611c 100644 --- a/conftest.py +++ b/conftest.py @@ -7,21 +7,22 @@ # Hack to workaround pytest not happy with multiple redundant conftest.py # (bug #34). -if not hasattr(hut, "_CONFTEST_ALREADY_PARSED"): +#if not hasattr(hut, "_CONFTEST_ALREADY_PARSED"): # import helpers.hversion as hversi # hversi.check_version() # pylint: disable=protected-access - hut._CONFTEST_ALREADY_PARSED = True + #hut._CONFTEST_ALREADY_PARSED = True # Store whether we are running unit test through pytest. # pylint: disable=line-too-long # From https://docs.pytest.org/en/latest/example/simple.html#detect-if-running-from-within-a-pytest-run - def pytest_configure(config: Any) -> None: - _ = config - # pylint: disable=protected-access - hut._CONFTEST_IN_PYTEST = True +def pytest_configure(config: Any) -> None: + _ = config + # pylint: disable=protected-access + hut._CONFTEST_IN_PYTEST = True + def pytest_unconfigure(config: Any) -> None: _ = config # pylint: disable=protected-access diff --git a/docs/work_tools/all.import_check.reference.md b/docs/work_tools/all.import_check.reference.md index 8ed8651f8..9db542ed8 100644 --- a/docs/work_tools/all.import_check.reference.md +++ b/docs/work_tools/all.import_check.reference.md @@ -1,20 +1,30 @@ - - * [show_imports](#show_imports) - * [Basic usage](#basic-usage) - * [Visualize dependencies at a directory level](#visualize-dependencies-at-a-directory-level) - * [Visualize external dependencies](#visualize-external-dependencies) - * [Visualize level X dependencies](#visualize-level-x-dependencies) - * [Visualize cyclic dependencies](#visualize-cyclic-dependencies) - * [Pydeps-dependent limitations](#pydeps-dependent-limitations) - * [NotModuleError](#notmoduleerror) - * [Modules above the target directory](#modules-above-the-target-directory) - * [Run the tool on our codebase -- pre-docker procedure](#run-the-tool-on-our-codebase----pre-docker-procedure) - * [detect_import_cycles](#detect_import_cycles) - * [Basic usage](#basic-usage-1) - - - - + + +- [show_imports](#show_imports) + * [Basic usage](#basic-usage) + * [Visualize dependencies at a directory level](#visualize-dependencies-at-a-directory-level) + * [Visualize external dependencies](#visualize-external-dependencies) + * [Visualize level X dependencies](#visualize-level-x-dependencies) + * [Visualize cyclic dependencies](#visualize-cyclic-dependencies) + * [Pydeps-dependent limitations](#pydeps-dependent-limitations) + + [NotModuleError](#notmoduleerror) + + [Modules above the target directory](#modules-above-the-target-directory) + * [Run the tool on our codebase -- pre-docker procedure](#run-the-tool-on-our-codebase----pre-docker-procedure) +- [detect_import_cycles](#detect_import_cycles) + * [Basic usage](#basic-usage-1) +- [show_deps](#show_deps) + * [Overview](#overview) + * [Command usage](#command-usage) + * [Examples](#examples) + + [Generate a text report](#generate-a-text-report) + + [Generate a DOT file for visualization](#generate-a-dot-file-for-visualization) + + [Limit analysis to a specific directory depth](#limit-analysis-to-a-specific-directory-depth) + + [Focus on cyclic dependencies](#focus-on-cyclic-dependencies) + * [Options](#options) + * [Limitations](#limitations) + * [Running the tool on our codebase](#running-the-tool-on-our-codebase) + + # show_imports @@ -325,3 +335,209 @@ following output, detecting two import cycles: ERROR detect_import_cycles.py _main:73 Cyclic imports detected: (input.subdir2.subdir3.file1, input.subdir2.subdir3.file2) ERROR detect_import_cycles.py _main:73 Cyclic imports detected: (input.subdir4.file1, input.subdir4.file2, input.subdir4.file3) ``` + +# show_deps + +## Overview + +- Analyzes Python files in a directory for intra-directory import dependencies +- Generates: + - A text report, or + - A DOT file for visualization +- Useful for understanding module relationships within a project +- Supports options to: + - Limit analysis depth + - Focus on cyclic dependencies + +## Command usage + +```bash +i show_deps [--directory ] [--format ] [--output_file ] [--max_level ] [--show_cycles] +``` + +- **Default behavior**: Produces a text report, printed to stdout. +- **Options**: + - `--directory`: Specifies the directory to analyze (default: current + directory). + - `--format`: Sets the output format (`text` or `dot`, default: `text`). + - `--output_file`: Saves the report to a file (default: stdout for text, + `dependency_graph.dot` for DOT). + - `--max_level`: Limits the directory depth for analysis (e.g., `2` for two + levels). + - `--show_cycles`: Filters the report to show only cyclic dependencies + (default: false). + +## Examples + +The examples below analyze the `helpers` directory, which contains +subdirectories like `notebooks/`. + +### Generate a text report + +Create a text report of all intra-directory dependencies: + +```bash +>i show_deps --directory helpers --format text > report.txt +``` + +Output in `report.txt`: + +```text +helpers/notebooks/gallery_parquet.py imports helpers/hdbg.py, helpers/hio.py +helpers/hdbg.py has no dependencies +helpers/hio.py has no dependencies +... +``` + +### Generate a DOT file for visualization + +Create a DOT file for visualization: + +```bash +>i show_deps --directory helpers --format dot --output_file dependency_graph.dot +>dot -Tsvg dependency_graph.dot -o dependency_graph.svg +>open dependency_graph.svg +``` + +For large graphs, use the `neato` layout engine: + +```bash +>neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 +>open dependency_graph.svg +``` + +### Limit analysis to a specific directory depth + +Restrict analysis to a certain depth with `--max_level` (e.g., `--max_level 2` +includes `helpers/notebooks/`, excludes deeper subdirectories): + +```bash +>i show_deps --directory helpers --format text --max_level 2 > report_max_level.txt +``` + +Output in `report_max_level.txt`: + +```text +helpers/notebooks/gallery_parquet.py imports helpers/hdbg.py, helpers/hio.py +helpers/hdbg.py has no dependencies +helpers/hio.py has no dependencies +... +``` + +Visualize the limited graph: + +```bash +>i show_deps --directory helpers --format dot --output_file dependency_graph.dot --max_level 2 +>neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 +>open dependency_graph.svg +``` + +### Focus on cyclic dependencies + +Show only cyclic dependencies with `--show_cycles`: + +```bash +>i show_deps --directory helpers --format text --show_cycles > report_cycles.txt +``` + +Output in `report_cycles.txt` (if cycles exist): + +```text +helpers/module_d.py imports helpers/module_e.py +helpers/module_e.py imports helpers/module_d.py +... +``` + +Visualize the cyclic dependencies: + +```bash +>i show_deps --directory helpers --format dot --output_file dependency_graph.dot --show_cycles +>neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 +>open dependency_graph.svg +``` + +## Options + +- `--directory `: Directory to analyze (default: `.`). +- `--format `: Output format (default: `text`). +- `--output_file `: File to save the report (default: stdout for text, + `dependency_graph.dot` for DOT). +- `--max_level `: Maximum directory depth to analyze (e.g., `2`). +- `--show_cycles`: Show only cyclic dependencies (e.g., `module_d` importing + `module_e` and vice versa). + +## Limitations + +- Analyzes only intra-directory imports; external imports (e.g., `numpy`) are + ignored. +- Imports must resolve within the directory (e.g., `helpers.hdbg` to + [`/helpers/hdbg.py)`](/helpers/hdbg.py)). +- Directories with Python files must be modules (contain `__init__.py`), or a + `NotModuleError` is raised. + +Example of a valid structure: + +```text +helpers +├── __init__.py +├── notebooks +│ ├── gallery_parquet.py +│ └── __init__.py +└── hdbg.py +``` + +Example causing `NotModuleError`: + +```text +helpers +├── __init__.py +├── notebooks +│ └── gallery_parquet.py +└── hdbg.py +``` + +```bash +NotModuleError: The following dirs have to be modules (add `__init__.py`): ['helpers/notebooks'] +``` + +## Running the tool on our codebase + +1. **Activate the `helpers` environment**: From the `helpers` root directory: + + ```bash + poetry shell; export PYTHONPATH=$PYTHONPATH:$(pwd) + ``` + +2. **Generate a dependency report**: Create a text report: + + ```bash + i show_deps --directory helpers --format text > report.txt + ``` + + Or create a DOT file for visualization: + + ```bash + i show_deps --directory helpers --format dot --output_file dependency_graph.dot + ``` + +3. **Visualize the graph** (optional): Convert the DOT file to SVG and view: + ```bash + neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 + open dependency_graph.svg + ``` + +**Troubleshooting**: If `invoke` fails (e.g., +`No idea what '--output_file' is!`), use the fallback script: + +```bash +python3 ~/src/helpers1/generate_deps.py +neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 +open dependency_graph.svg +``` + +**Tips**: The `generate_deps.py` script applies customizations like filtering +nodes with no dependencies and shortening labels (e.g., removing `helpers/` +prefix). Adjust Graphviz attributes (`ranksep=2.0`, `nodesep=1.0`, +`splines=spline`, `overlap=false`, `fontsize=10`) for better visualization. + +**Last review**: 2025-05-01 Ehaab Basil diff --git a/generate_deps.py b/generate_deps.py new file mode 100644 index 000000000..d0dc629ff --- /dev/null +++ b/generate_deps.py @@ -0,0 +1,41 @@ +import networkx as nx + +from import_check.dependency_graph import DependencyGraph + +# Generate dependency graph for the helpers directory +graph = DependencyGraph("helpers") +graph.build_graph() + +# Print the text report of dependencies - uncomment if needed to see the text report of dependencies (alt to i show deps) +# print("Text Report:") +# print(graph.get_text_report()) + +# Convert to a NetworkX graph for customization +nx_graph = graph.graph + +# Filter out nodes with no dependencies (in-degree and out-degree both 0) +nodes_to_remove = [ + node + for node in nx_graph.nodes + if nx_graph.in_degree(node) == 0 and nx_graph.out_degree(node) == 0 +] +nx_graph.remove_nodes_from(nodes_to_remove) +print(f"Removed {len(nodes_to_remove)} nodes with no dependencies") + +# Shorten node labels by removing the "helpers/" prefix +for node in nx_graph.nodes: + new_label = node.replace("helpers/", "") + nx_graph.nodes[node]["label"] = new_label + +# Add Graphviz attributes for better layout +nx_graph.graph["graph"] = { + "ranksep": "2.0", # Increase spacing between ranks + "nodesep": "1.0", # Increase spacing between nodes + "splines": "spline", # Use smooth curves for edges + "overlap": "false", # Avoid overlapping nodes + "fontsize": "10", # Smaller font size for labels +} + +# Write the DOT file with the customized graph +nx.drawing.nx_pydot.write_dot(nx_graph, "dependency_graph.dot") +print("Dependency graph written to dependency_graph.dot") diff --git a/helpers/lib_tasks_docker.py b/helpers/lib_tasks_docker.py index 0b9d2b4d7..a3355fa63 100644 --- a/helpers/lib_tasks_docker.py +++ b/helpers/lib_tasks_docker.py @@ -523,7 +523,7 @@ def _generate_docker_compose_file( # ``` csfy_host_os_name = os.uname()[0] csfy_host_name = os.uname()[1] - csfy_host_os_version = os.uname()[2] + csfy_host_version = os.uname()[2] csfy_host_user_name = getpass.getuser() # We assume that we don't use this code inside a container, since otherwise # we would need to distinguish the container style (see @@ -563,8 +563,8 @@ def _generate_docker_compose_file( f"CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL", f"CSFY_HOST_NAME={csfy_host_name}", f"CSFY_HOST_OS_NAME={csfy_host_os_name}", - f"CSFY_HOST_OS_VERSION={csfy_host_os_version}", f"CSFY_HOST_USER_NAME={csfy_host_user_name}", + f"CSFY_HOST_VERSION={csfy_host_version}", "CSFY_REPO_CONFIG_CHECK=True", # Use inferred path for `repo_config.py`. "CSFY_REPO_CONFIG_PATH=", diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py new file mode 100644 index 000000000..3ef48afad --- /dev/null +++ b/import_check/dependency_graph.py @@ -0,0 +1,225 @@ +# Standard library imports +import ast +import logging +from pathlib import Path +from typing import Union + +# Third-party imports +import networkx as nx +from networkx.drawing.nx_pydot import write_dot + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# DependencyGraph +# ############################################################################# + + +class DependencyGraph: + """ + Generate a dependency graph for intra-directory imports. + + Args: + directory (str): Path to the directory to analyze. + max_level (int, optional): Max directory depth to analyze (default: None). + show_cycles (bool, optional): Show only cyclic dependencies (default: False). + + Attributes: + directory (Path): Resolved directory path. + graph (nx.DiGraph): Directed graph of dependencies. + max_level (int, optional): Max directory depth to analyze. + show_cycles (bool): Whether to show only cyclic dependencies. + """ + + def __init__( + self, + directory: str, + max_level: Union[int, None] = None, + show_cycles: bool = False, + ): + self.directory = Path(directory).resolve() + self.graph = nx.DiGraph() + self.max_level = max_level + self.show_cycles = show_cycles + + def build_graph(self) -> None: + """ + Build a directed graph of intra-directory dependencies. + + Returns: + None + + Raises: + SyntaxError: Skipped with a warning if a Python file has a syntax error. + """ + _LOG.info(f"Building dependency graph for {self.directory}") + # Calculate the base depth of the directory + base_depth = len(self.directory.parts) + # Find Python files up to max_level + py_files = [ + path + for path in self.directory.rglob("*.py") + if self.max_level is None + or (len(path.parent.parts) - base_depth) <= self.max_level + ] + _LOG.info(f"Found Python files: {py_files}") + for py_file in py_files: + relative_path = py_file.relative_to(self.directory.parent).as_posix() + _LOG.info( + f"Processing file {py_file}, relative path: {relative_path}" + ) + self.graph.add_node(relative_path) + try: + with open(py_file, "r") as f: + tree = ast.parse(f.read(), filename=str(py_file)) + except SyntaxError as e: + _LOG.warning(f"Skipping {py_file} due to syntax error: {e}") + continue + for node in ast.walk(tree): + if isinstance(node, (ast.Import, ast.ImportFrom)): + # Extract import names based on node type + imports = ( + [name.name for name in node.names] + if isinstance(node, ast.Import) + else [node.module] + ) + for imp in imports: + _LOG.info(f"Found import: {imp}") + imp_path = self._resolve_import(imp, py_file) + if imp_path: + _LOG.info( + f"Adding edge: {relative_path} -> {imp_path}" + ) + self.graph.add_edge(relative_path, imp_path) + else: + _LOG.info(f"No edge added for import {imp}") + # Filter for cyclic dependencies if show_cycles is True + if self.show_cycles: + self._filter_cycles() + + def get_text_report(self) -> str: + """ + Generate a text report listing each module's dependencies. + + Returns: + str: Text report of dependencies, one per line. + """ + report = [] + for node in self.graph.nodes: + dependencies = list(self.graph.successors(node)) + line = ( + f"{node} imports {', '.join(dependencies)}" + if dependencies + else f"{node} has no dependencies" + ) + report.append(line) + return "\n".join(report) + + def get_dot_file(self, output_file: str) -> None: + """ + Write the dependency graph to a DOT file. + + Args: + output_file (str): Path to the output DOT file. + + Returns: + None + """ + write_dot(self.graph, output_file) + _LOG.info(f"DOT file written to {output_file}") + + def _filter_cycles(self) -> None: + """ + Filter the graph to show only nodes and edges in cyclic dependencies. + + Returns: + None + """ + # Find strongly connected components (cycles) + cycles = list(nx.strongly_connected_components(self.graph)) + # Keep only components with more than one node (i.e., cycles) + cyclic_nodes = set() + for component in cycles: + if len(component) > 1: + cyclic_nodes.update(component) + # Create a new graph with only cyclic nodes and their edges + new_graph = nx.DiGraph() + for node in cyclic_nodes: + new_graph.add_node(node) + for u, v in self.graph.edges(): + if u in cyclic_nodes and v in cyclic_nodes: + new_graph.add_edge(u, v) + self.graph = new_graph + _LOG.info( + f"Graph filtered to {len(self.graph.nodes)} nodes and {len(self.graph.edges)} edges in cycles" + ) + + def _resolve_import(self, imp: str, py_file: Path) -> str: + """ + Resolve an import to a file path within the directory. + + Args: + imp (str): Import statement (e.g., "module.submodule"). + py_file (Path): File path where the import is found. + + Returns: + str: Relative path to the resolved file, or None if unresolved. + """ + _LOG.info(f"Resolving import '{imp}' for file {py_file}") + base_dir = self.directory + _LOG.info(f"Base directory: {base_dir}") + parts = imp.split(".") + current_dir = base_dir + dir_name = self.directory.name # for example, "helpers" + # Handle imports starting with the directory name + if parts[0] == dir_name: + # Skip the first part dir, solve for next + parts = parts[1:] + if not parts: + # Only if the dir name is given (e.g., "helpers"), check for __init__.py + init_path = base_dir / "__init__.py" + if init_path.exists(): + resolved_path = init_path.relative_to( + self.directory.parent + ).as_posix() + _LOG.info(f"Resolved to: {resolved_path}") + return resolved_path + _LOG.info(f"Could not resolve import '{imp}' (directory only)") + return None + for i, module_name in enumerate(parts): + # Check for package with __init__.py + package_path = current_dir / module_name / "__init__.py" + _LOG.info(f"Checking package path: {package_path}") + if package_path.exists(): + # If last part, return the __init__.py path + if i == len(parts) - 1: + resolved_path = package_path.relative_to( + self.directory.parent + ).as_posix() + _LOG.info(f"Resolved to: {resolved_path}") + return resolved_path + # else, continue to the next part + current_dir = current_dir / module_name + continue + # Check for a .py file + module_path = current_dir / f"{module_name}.py" + _LOG.info(f"Checking module path: {module_path}") + if module_path.exists(): + # If last part, return the .py path + if i == len(parts) - 1: + resolved_path = module_path.relative_to( + self.directory.parent + ).as_posix() + _LOG.info(f"Resolved to: {resolved_path}") + return resolved_path + # If notlast part, but is a module, it can't lead further + _LOG.info( + f"Could not resolve full import '{imp}' beyond {module_path}" + ) + return None + # If neither exists, the import cannot be resolved + _LOG.info(f"Could not resolve import '{imp}' at part '{module_name}'") + return None + _LOG.info(f"Could not resolve import '{imp}'") + return None diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py new file mode 100644 index 000000000..446b832f3 --- /dev/null +++ b/import_check/test/test_dependency_graph.py @@ -0,0 +1,205 @@ +# Standard library imports +import os +import shutil +from pathlib import Path + +# Third-party imports +import pytest + +# Local imports +from import_check.dependency_graph import DependencyGraph + + +@pytest.fixture +def test_dir(): + """ + Create a temporary directory with test files and clean up after. + + Yields: + Path: Path to the temporary directory. + """ + dir_path = Path("test_tmp") + dir_path.mkdir(exist_ok=True) + # Create test files with specific imports + with open(dir_path / "module_a.py", "w") as f: + f.write("# No imports\n") + with open(dir_path / "module_b.py", "w") as f: + f.write("import module_a\n") + with open(dir_path / "module_c.py", "w") as f: + f.write("import module_b\n") + with open(dir_path / "module_d.py", "w") as f: + f.write("import module_e\n") + with open(dir_path / "module_e.py", "w") as f: + f.write("import module_d\n") + yield dir_path + shutil.rmtree(dir_path, ignore_errors=True) + + +# ############################################################################# +# TestDependencyGraph +# ############################################################################# + + +class TestDependencyGraph: + + def test_no_dependencies(self, test_dir: Path) -> None: + """ + Verify a module with no imports has no dependencies. + """ + graph = DependencyGraph(str(test_dir)) + graph.build_graph() + report = graph.get_text_report() + assert f"{test_dir}/module_a.py has no dependencies" in report + + def test_multiple_dependencies(self, test_dir: Path) -> None: + """ + Verify modules with chained dependencies are reported correctly. + """ + graph = DependencyGraph(str(test_dir)) + graph.build_graph() + report = graph.get_text_report() + assert f"{test_dir}/module_c.py imports {test_dir}/module_b.py" in report + assert f"{test_dir}/module_b.py imports {test_dir}/module_a.py" in report + + def test_circular_dependencies(self, test_dir: Path) -> None: + """ + Verify cyclic dependencies are identified correctly. + """ + graph = DependencyGraph(str(test_dir)) + graph.build_graph() + report = graph.get_text_report() + assert f"{test_dir}/module_d.py imports {test_dir}/module_e.py" in report + assert f"{test_dir}/module_e.py imports {test_dir}/module_d.py" in report + + def test_dot_output(self, test_dir: Path) -> None: + """ + Verify the DOT file is generated with correct format. + """ + graph = DependencyGraph(str(test_dir)) + graph.build_graph() + output_file = "dependency_graph.dot" + graph.get_dot_file(output_file) + assert os.path.exists(output_file) + with open(output_file, "r") as f: + content = f.read() + assert "digraph" in content + + def test_syntax_error_handling(self, test_dir: Path) -> None: + """ + Verify syntax errors in files are handled without crashing. + """ + with open(test_dir / "module_invalid.py", "w") as f: + f.write("def invalid_syntax() # Missing colon\n") + graph = DependencyGraph(str(test_dir)) + graph.build_graph() + report = graph.get_text_report() + assert f"{test_dir}/module_a.py has no dependencies" in report + + def test_import_directory_only(self, test_dir: Path) -> None: + """ + Verify importing only the directory name resolves to __init__.py. + """ + # Create __init__.py in the test directory + with open(test_dir / "__init__.py", "w") as f: + f.write("") + # Create a module that imports the directory name + with open(test_dir / "module_f.py", "w") as f: + f.write(f"import {test_dir.name}\n") + graph = DependencyGraph(str(test_dir)) + graph.build_graph() + report = graph.get_text_report() + assert f"{test_dir}/module_f.py imports {test_dir}/__init__.py" in report + + def test_package_only_import(self) -> None: + """ + Verify importing a package with only __init__.py adds a dependency. + """ + package_dir = Path("package_only_tmp") + package_dir.mkdir(exist_ok=True) + subdir = package_dir / "subpackage" + subdir.mkdir(exist_ok=True) + with open(subdir / "__init__.py", "w") as f: + f.write("") + with open(package_dir / "module_b.py", "w") as f: + f.write("import subpackage\n") + try: + graph = DependencyGraph(str(package_dir)) + graph.build_graph() + report = graph.get_text_report() + assert ( + f"{package_dir}/module_b.py imports {package_dir}/subpackage/__init__.py" + in report + ) + finally: + shutil.rmtree(package_dir) + + def test_package_import(self) -> None: + """ + Verify nested package imports resolve to __init__.py. + """ + package_dir = Path("package_tmp") + package_dir.mkdir(exist_ok=True) + subdir = package_dir / "subpackage" + subdir.mkdir(exist_ok=True) + subsubdir = subdir / "subsubpackage" + subsubdir.mkdir(exist_ok=True) + module_dir = subsubdir / "module_a" + module_dir.mkdir(exist_ok=True) + with open(subdir / "__init__.py", "w") as f: + f.write("") + with open(subsubdir / "__init__.py", "w") as f: + f.write("") + with open(module_dir / "__init__.py", "w") as f: + f.write("") + with open(package_dir / "module_b.py", "w") as f: + f.write("import subpackage.subsubpackage.module_a\n") + try: + graph = DependencyGraph(str(package_dir)) + graph.build_graph() + report = graph.get_text_report() + assert ( + f"{package_dir}/module_b.py imports {package_dir}/subpackage/subsubpackage/module_a/__init__.py" + in report + ) + finally: + shutil.rmtree(package_dir) + + def test_unresolved_nested_import(self) -> None: + """ + Verify unresolved nested imports result in no dependencies. + """ + package_dir = Path("unresolved_tmp") + package_dir.mkdir(exist_ok=True) + subdir = package_dir / "subpackage" + subdir.mkdir(exist_ok=True) + with open(subdir / "__init__.py", "w") as f: + f.write("") + with open(package_dir / "module_b.py", "w") as f: + f.write("import subpackage.subsubpackage.module_a\n") + try: + graph = DependencyGraph(str(package_dir)) + graph.build_graph() + report = graph.get_text_report() + assert f"{package_dir}/module_b.py has no dependencies" in report + finally: + shutil.rmtree(package_dir) + + def test_show_cycles_filters_cyclic_dependencies( + self, test_dir: Path + ) -> None: + """ + Verify show_cycles=True filters the graph to only cyclic dependencies. + """ + # Create a module with no imports to ensure it's filtered out + with open(test_dir / "module_f.py", "w") as f: + f.write("# No imports\n") + # Build the graph with show_cycles=True + graph = DependencyGraph(str(test_dir), show_cycles=True) + graph.build_graph() + # Get the text report + report = graph.get_text_report() + # Expected output: Only cyclic dependencies (module_d and module_e) should be shown + assert f"{test_dir}/module_d.py imports {test_dir}/module_e.py" in report + assert f"{test_dir}/module_e.py imports {test_dir}/module_d.py" in report + # Verify that non-cyclic module_f is not in the report + assert f"{test_dir}/module_f.py" not in report diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..c4cf9e849 --- /dev/null +++ b/setup.py @@ -0,0 +1,7 @@ +from setuptools import find_packages, setup + +setup( + name="helpers", + version="0.1", + packages=find_packages(), +) diff --git a/tasks.py b/tasks.py index 72f35fb6c..ee26e26d0 100644 --- a/tasks.py +++ b/tasks.py @@ -2,6 +2,8 @@ import os from typing import Any +from invoke import task + import helpers.repo_config_utils as hrecouti # Expose the pytest targets. @@ -92,7 +94,6 @@ pytest_repro, run_blank_tests, run_coverage_report, - run_coverage, run_fast_slow_superslow_tests, run_fast_slow_tests, run_fast_tests, @@ -119,7 +120,6 @@ from helpers.lib_tasks import ( # isort: skip # noqa: F401 # pylint: disable=unused-import copy_ecs_task_definition_image_url, docker_release_multi_build_dev_image, - docker_release_multi_arch_prod_image, docker_tag_push_multi_build_local_image_as_dev, release_dags_to_airflow, integrate_file, @@ -133,6 +133,8 @@ ) except ImportError: pass +from import_check.dependency_graph import DependencyGraph + # # TODO(gp): This is due to the coupling between code in linter container and # # the code being linted. # try: @@ -153,7 +155,8 @@ # TODO(gp): Move it to lib_tasks. -ECR_BASE_PATH = os.environ["CSFY_ECR_BASE_PATH"] +# ECR_BASE_PATH = os.environ["CSFY_ECR_BASE_PATH"] +ECR_BASE_PATH = os.environ.get("CSFY_ECR_BASE_PATH", "") def _run_qa_tests(ctx: Any, stage: str, version: str) -> bool: @@ -174,6 +177,54 @@ def _run_qa_tests(ctx: Any, stage: str, version: str) -> bool: return True +@task +def show_deps( + c, + directory=".", + format="text", + output_file=None, + max_level=None, + show_cycles=False, +): + """ + Generate a dependency report for a specified directory. + + Args: + c: Invoke context (required by invoke, unused). + directory (str): Directory to analyze (default: current directory). + format (str): Output format ('text' or 'dot', default: 'text'). + output_file (str, optional): File to write output to (default: None). + max_level (int, optional): Max directory depth to analyze (default: None). + show_cycles (bool, optional): Show only cyclic dependencies (default: False). + + Raises: + ValueError: If the format is neither 'text' nor 'dot'. + """ + # Convert max_level to int if provided + max_level = int(max_level) if max_level is not None else None + # Convert show_cycles to bool + show_cycles = show_cycles in (True, "True", "true", "1") + graph = DependencyGraph( + directory, max_level=max_level, show_cycles=show_cycles + ) + graph.build_graph() + if format == "text": + report = graph.get_text_report() + if output_file: + with open(output_file, "w") as f: + f.write(report) + print(f"Report written to {output_file}") + else: + print(report) + elif format == "dot": + if not output_file: + output_file = "dependency_graph.dot" + graph.get_dot_file(output_file) + print(f"DOT file written to {output_file}") + else: + raise ValueError(f"Unsupported format: {format}") + + default_params = { # TODO(Nikola): Remove prefix after everything is cleaned. # Currently there are a lot dependencies on prefix. From ff93cce0cd5f680639e7f1215e2f0d1c263d86b9 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 18:41:42 -0400 Subject: [PATCH 02/36] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .gitignore | 14 -------------- helpers/lib_tasks_docker.py | 4 ++-- setup.py | 7 ------- 3 files changed, 2 insertions(+), 23 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index d5fa30a5e..000000000 --- a/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -__pycache__/ -*.log -tmp.requirements.txt -helpers.egg-info/PKG-INFO -helpers.egg-info/SOURCES.txt -helpers.egg-info/top_level.txt -.coverage -.DS_Store -dependency_graph.dot -dependency_graph.svg -report.txt -report_cycles.txt -report_max_level.txt -i diff --git a/helpers/lib_tasks_docker.py b/helpers/lib_tasks_docker.py index a3355fa63..0b9d2b4d7 100644 --- a/helpers/lib_tasks_docker.py +++ b/helpers/lib_tasks_docker.py @@ -523,7 +523,7 @@ def _generate_docker_compose_file( # ``` csfy_host_os_name = os.uname()[0] csfy_host_name = os.uname()[1] - csfy_host_version = os.uname()[2] + csfy_host_os_version = os.uname()[2] csfy_host_user_name = getpass.getuser() # We assume that we don't use this code inside a container, since otherwise # we would need to distinguish the container style (see @@ -563,8 +563,8 @@ def _generate_docker_compose_file( f"CSFY_FORCE_TEST_FAIL=$CSFY_FORCE_TEST_FAIL", f"CSFY_HOST_NAME={csfy_host_name}", f"CSFY_HOST_OS_NAME={csfy_host_os_name}", + f"CSFY_HOST_OS_VERSION={csfy_host_os_version}", f"CSFY_HOST_USER_NAME={csfy_host_user_name}", - f"CSFY_HOST_VERSION={csfy_host_version}", "CSFY_REPO_CONFIG_CHECK=True", # Use inferred path for `repo_config.py`. "CSFY_REPO_CONFIG_PATH=", diff --git a/setup.py b/setup.py index c4cf9e849..e69de29bb 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +0,0 @@ -from setuptools import find_packages, setup - -setup( - name="helpers", - version="0.1", - packages=find_packages(), -) From c09716adf7242f79c982d82ccc9cf679548c134e Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 18:51:28 -0400 Subject: [PATCH 03/36] Use llm_transform to use our style and convention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/dependency_graph.py | 116 ++++++++++----------- import_check/test/test_dependency_graph.py | 73 ++++++++++--- 2 files changed, 111 insertions(+), 78 deletions(-) diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 3ef48afad..bef1b7185 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -1,12 +1,9 @@ -# Standard library imports import ast import logging from pathlib import Path from typing import Union -# Third-party imports import networkx as nx -from networkx.drawing.nx_pydot import write_dot _LOG = logging.getLogger(__name__) @@ -20,40 +17,32 @@ class DependencyGraph: """ Generate a dependency graph for intra-directory imports. - Args: - directory (str): Path to the directory to analyze. - max_level (int, optional): Max directory depth to analyze (default: None). - show_cycles (bool, optional): Show only cyclic dependencies (default: False). - - Attributes: - directory (Path): Resolved directory path. - graph (nx.DiGraph): Directed graph of dependencies. - max_level (int, optional): Max directory depth to analyze. - show_cycles (bool): Whether to show only cyclic dependencies. + :param directory: Path to the directory to analyze. + :param max_level: Max directory depth to analyze. + :param show_cycles: Show only cyclic dependencies """ def __init__( self, directory: str, + *, + # TODO: Use -1 instead of None to simplify typing. max_level: Union[int, None] = None, show_cycles: bool = False, - ): + ) -> None: self.directory = Path(directory).resolve() - self.graph = nx.DiGraph() + # Directed graph of dependencies. + self.graph: nx.DiGraph = nx.DiGraph() self.max_level = max_level + # Whether to show only cyclic dependencies. self.show_cycles = show_cycles def build_graph(self) -> None: """ Build a directed graph of intra-directory dependencies. - - Returns: - None - - Raises: - SyntaxError: Skipped with a warning if a Python file has a syntax error. + """ - _LOG.info(f"Building dependency graph for {self.directory}") + _LOG.info("Building dependency graph for %s", self.directory) # Calculate the base depth of the directory base_depth = len(self.directory.parts) # Find Python files up to max_level @@ -63,18 +52,18 @@ def build_graph(self) -> None: if self.max_level is None or (len(path.parent.parts) - base_depth) <= self.max_level ] - _LOG.info(f"Found Python files: {py_files}") + _LOG.info("Found Python files: %s", py_files) for py_file in py_files: relative_path = py_file.relative_to(self.directory.parent).as_posix() _LOG.info( - f"Processing file {py_file}, relative path: {relative_path}" + "Processing file %s, relative path: %s", py_file, relative_path ) self.graph.add_node(relative_path) try: with open(py_file, "r") as f: tree = ast.parse(f.read(), filename=str(py_file)) except SyntaxError as e: - _LOG.warning(f"Skipping {py_file} due to syntax error: {e}") + _LOG.warning("Skipping %s due to syntax error: %s", py_file, e) continue for node in ast.walk(tree): if isinstance(node, (ast.Import, ast.ImportFrom)): @@ -85,15 +74,15 @@ def build_graph(self) -> None: else [node.module] ) for imp in imports: - _LOG.info(f"Found import: {imp}") + _LOG.info("Found import: %s", imp) imp_path = self._resolve_import(imp, py_file) if imp_path: _LOG.info( - f"Adding edge: {relative_path} -> {imp_path}" + "Adding edge: %s -> %s", relative_path, imp_path ) self.graph.add_edge(relative_path, imp_path) else: - _LOG.info(f"No edge added for import {imp}") + _LOG.info("No edge added for import %s", imp) # Filter for cyclic dependencies if show_cycles is True if self.show_cycles: self._filter_cycles() @@ -102,10 +91,11 @@ def get_text_report(self) -> str: """ Generate a text report listing each module's dependencies. - Returns: - str: Text report of dependencies, one per line. + :return: Text report of dependencies, one per line. """ + # Accumulate report lines. report = [] + # Iterate over all nodes to report their dependencies. for node in self.graph.nodes: dependencies = list(self.graph.successors(node)) line = ( @@ -114,32 +104,28 @@ def get_text_report(self) -> str: else f"{node} has no dependencies" ) report.append(line) + # Join all lines into a single string separated by newline return "\n".join(report) def get_dot_file(self, output_file: str) -> None: """ Write the dependency graph to a DOT file. - Args: - output_file (str): Path to the output DOT file. - - Returns: - None + :param output_file: Path to the output DOT file. """ - write_dot(self.graph, output_file) - _LOG.info(f"DOT file written to {output_file}") + # Write the graph to a DOT file + networkx.drawing.nx_pydot.write_dot(self.graph, output_file) + _LOG.info("DOT file written to %s", output_file) def _filter_cycles(self) -> None: """ Filter the graph to show only nodes and edges in cyclic dependencies. - - Returns: - None """ - # Find strongly connected components (cycles) + # Find all strongly connected components in the graph cycles = list(nx.strongly_connected_components(self.graph)) - # Keep only components with more than one node (i.e., cycles) + # Accumulate cyclic nodes. cyclic_nodes = set() + # Keep only components with more than one node (i.e., cycles) for component in cycles: if len(component) > 1: cyclic_nodes.update(component) @@ -150,31 +136,34 @@ def _filter_cycles(self) -> None: for u, v in self.graph.edges(): if u in cyclic_nodes and v in cyclic_nodes: new_graph.add_edge(u, v) + # Replace the original graph with a new graph containing only cyclic edges self.graph = new_graph + # Summary of cyclic graph result _LOG.info( - f"Graph filtered to {len(self.graph.nodes)} nodes and {len(self.graph.edges)} edges in cycles" + "Graph filtered to %d nodes and %d edges in cycles", + len(self.graph.nodes), + len(self.graph.edges), ) def _resolve_import(self, imp: str, py_file: Path) -> str: """ Resolve an import to a file path within the directory. - - Args: - imp (str): Import statement (e.g., "module.submodule"). - py_file (Path): File path where the import is found. - - Returns: - str: Relative path to the resolved file, or None if unresolved. + + :param imp: Import statement (e.g., "module.submodule"). + :param py_file: File path where the import is found. + + :return: Relative path to the resolved file, or None if unresolved. """ - _LOG.info(f"Resolving import '{imp}' for file {py_file}") + _LOG.info("Resolving import '%s' for file %s", imp, py_file) + # Define base directory and other parameters for module resolution base_dir = self.directory - _LOG.info(f"Base directory: {base_dir}") + _LOG.info("Base directory: %s", base_dir) parts = imp.split(".") current_dir = base_dir dir_name = self.directory.name # for example, "helpers" # Handle imports starting with the directory name if parts[0] == dir_name: - # Skip the first part dir, solve for next + # Skip the first part dir, solve for next parts = parts[1:] if not parts: # Only if the dir name is given (e.g., "helpers"), check for __init__.py @@ -183,43 +172,44 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: resolved_path = init_path.relative_to( self.directory.parent ).as_posix() - _LOG.info(f"Resolved to: {resolved_path}") + _LOG.info("Resolved to: %s", resolved_path) return resolved_path - _LOG.info(f"Could not resolve import '{imp}' (directory only)") + _LOG.info("Could not resolve import '%s' (directory only)", imp) return None + # Iterate over each module name in resolved path for i, module_name in enumerate(parts): # Check for package with __init__.py package_path = current_dir / module_name / "__init__.py" - _LOG.info(f"Checking package path: {package_path}") + _LOG.info("Checking package path: %s", package_path) if package_path.exists(): # If last part, return the __init__.py path if i == len(parts) - 1: resolved_path = package_path.relative_to( self.directory.parent ).as_posix() - _LOG.info(f"Resolved to: {resolved_path}") + _LOG.info("Resolved to: %s", resolved_path) return resolved_path # else, continue to the next part current_dir = current_dir / module_name continue # Check for a .py file module_path = current_dir / f"{module_name}.py" - _LOG.info(f"Checking module path: {module_path}") + _LOG.info("Checking module path: %s", module_path) if module_path.exists(): # If last part, return the .py path if i == len(parts) - 1: resolved_path = module_path.relative_to( self.directory.parent ).as_posix() - _LOG.info(f"Resolved to: {resolved_path}") + _LOG.info("Resolved to: %s", resolved_path) return resolved_path - # If notlast part, but is a module, it can't lead further + # If not last part, but is a module, it can't lead further _LOG.info( - f"Could not resolve full import '{imp}' beyond {module_path}" + "Could not resolve full import '%s' beyond %s", imp, module_path ) return None # If neither exists, the import cannot be resolved - _LOG.info(f"Could not resolve import '{imp}' at part '{module_name}'") + _LOG.info("Could not resolve import '%s' at part '%s'", imp, module_name) return None - _LOG.info(f"Could not resolve import '{imp}'") + _LOG.info("Could not resolve import '%s'", imp) return None diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index 446b832f3..2e65018dc 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -1,13 +1,10 @@ -# Standard library imports import os import shutil from pathlib import Path -# Third-party imports import pytest -# Local imports -from import_check.dependency_graph import DependencyGraph +import import_check.dependency_graph as dependency_graph @pytest.fixture @@ -18,6 +15,7 @@ def test_dir(): Yields: Path: Path to the temporary directory. """ + # Create temporary directory for test files dir_path = Path("test_tmp") dir_path.mkdir(exist_ok=True) # Create test files with specific imports @@ -31,6 +29,7 @@ def test_dir(): f.write("import module_e\n") with open(dir_path / "module_e.py", "w") as f: f.write("import module_d\n") + # Cleanup of the directory after test completion yield dir_path shutil.rmtree(dir_path, ignore_errors=True) @@ -45,40 +44,56 @@ class TestDependencyGraph: def test_no_dependencies(self, test_dir: Path) -> None: """ Verify a module with no imports has no dependencies. + + :param test_dir: Path to the test directory """ - graph = DependencyGraph(str(test_dir)) + # Initialize dependency graph and build it + graph = dependency_graph.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() + # Verify the module with no imports is reported correctly assert f"{test_dir}/module_a.py has no dependencies" in report def test_multiple_dependencies(self, test_dir: Path) -> None: """ Verify modules with chained dependencies are reported correctly. + + :param test_dir: Path to the test directory """ - graph = DependencyGraph(str(test_dir)) + # Initialize dependency graph and build it + graph = dependency_graph.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() + # Verify chained dependencies are reported correctly assert f"{test_dir}/module_c.py imports {test_dir}/module_b.py" in report assert f"{test_dir}/module_b.py imports {test_dir}/module_a.py" in report def test_circular_dependencies(self, test_dir: Path) -> None: """ Verify cyclic dependencies are identified correctly. + + :param test_dir: Path to the test directory """ - graph = DependencyGraph(str(test_dir)) + # Initialize dependency graph and build it + graph = dependency_graph.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() + # Verify cyclic dependencies are identified assert f"{test_dir}/module_d.py imports {test_dir}/module_e.py" in report assert f"{test_dir}/module_e.py imports {test_dir}/module_d.py" in report def test_dot_output(self, test_dir: Path) -> None: """ Verify the DOT file is generated with correct format. + + :param test_dir: Path to the test directory """ - graph = DependencyGraph(str(test_dir)) + # Initialize dependency graph and build it + graph = dependency_graph.DependencyGraph(str(test_dir)) graph.build_graph() output_file = "dependency_graph.dot" graph.get_dot_file(output_file) + # Assert that the DOT file exists and has expected content assert os.path.exists(output_file) with open(output_file, "r") as f: content = f.read() @@ -87,17 +102,24 @@ def test_dot_output(self, test_dir: Path) -> None: def test_syntax_error_handling(self, test_dir: Path) -> None: """ Verify syntax errors in files are handled without crashing. + + :param test_dir: Path to the test directory """ + # Create a module with a syntax error with open(test_dir / "module_invalid.py", "w") as f: f.write("def invalid_syntax() # Missing colon\n") - graph = DependencyGraph(str(test_dir)) + # Initialize dependency graph and build it + graph = dependency_graph.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() + # Verify that the graph is still correct assert f"{test_dir}/module_a.py has no dependencies" in report def test_import_directory_only(self, test_dir: Path) -> None: """ Verify importing only the directory name resolves to __init__.py. + + :param test_dir: Path to the test directory """ # Create __init__.py in the test directory with open(test_dir / "__init__.py", "w") as f: @@ -105,38 +127,47 @@ def test_import_directory_only(self, test_dir: Path) -> None: # Create a module that imports the directory name with open(test_dir / "module_f.py", "w") as f: f.write(f"import {test_dir.name}\n") - graph = DependencyGraph(str(test_dir)) + # Initialize dependency graph and build it + graph = dependency_graph.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() + # Verify that the directory import is resolved to __init__.py assert f"{test_dir}/module_f.py imports {test_dir}/__init__.py" in report def test_package_only_import(self) -> None: """ Verify importing a package with only __init__.py adds a dependency. """ + # Prepare directory structure for package package_dir = Path("package_only_tmp") package_dir.mkdir(exist_ok=True) subdir = package_dir / "subpackage" subdir.mkdir(exist_ok=True) + # Create __init__.py for the subdir with open(subdir / "__init__.py", "w") as f: f.write("") + # Create module that imports the package with open(package_dir / "module_b.py", "w") as f: f.write("import subpackage\n") try: - graph = DependencyGraph(str(package_dir)) + # Initialize dependency graph and build it + graph = dependency_graph.DependencyGraph(str(package_dir)) graph.build_graph() report = graph.get_text_report() + # Verify the import of subpackage is resolved as a dependency assert ( f"{package_dir}/module_b.py imports {package_dir}/subpackage/__init__.py" in report ) finally: + # Cleanup package directory shutil.rmtree(package_dir) def test_package_import(self) -> None: """ Verify nested package imports resolve to __init__.py. """ + # Prepare nested package directory structure package_dir = Path("package_tmp") package_dir.mkdir(exist_ok=True) subdir = package_dir / "subpackage" @@ -145,43 +176,53 @@ def test_package_import(self) -> None: subsubdir.mkdir(exist_ok=True) module_dir = subsubdir / "module_a" module_dir.mkdir(exist_ok=True) + # Create __init__.py in each directory with open(subdir / "__init__.py", "w") as f: f.write("") with open(subsubdir / "__init__.py", "w") as f: f.write("") with open(module_dir / "__init__.py", "w") as f: f.write("") + # Create module that imports the nested package with open(package_dir / "module_b.py", "w") as f: f.write("import subpackage.subsubpackage.module_a\n") try: - graph = DependencyGraph(str(package_dir)) + # Initialize dependency graph and build it + graph = dependency_graph.DependencyGraph(str(package_dir)) graph.build_graph() report = graph.get_text_report() + # Verify the nested import is resolved as a dependency assert ( f"{package_dir}/module_b.py imports {package_dir}/subpackage/subsubpackage/module_a/__init__.py" in report ) finally: + # Cleanup package directory shutil.rmtree(package_dir) def test_unresolved_nested_import(self) -> None: """ Verify unresolved nested imports result in no dependencies. """ + # Prepare directory structure where nested module is missing package_dir = Path("unresolved_tmp") package_dir.mkdir(exist_ok=True) subdir = package_dir / "subpackage" subdir.mkdir(exist_ok=True) with open(subdir / "__init__.py", "w") as f: f.write("") + # Create a module that imports a non-existent nested package with open(package_dir / "module_b.py", "w") as f: f.write("import subpackage.subsubpackage.module_a\n") try: - graph = DependencyGraph(str(package_dir)) + # Initialize dependency graph and build it + graph = dependency_graph.DependencyGraph(str(package_dir)) graph.build_graph() report = graph.get_text_report() + # Verify no dependencies are reported for unresolved imports assert f"{package_dir}/module_b.py has no dependencies" in report finally: + # Cleanup package directory shutil.rmtree(package_dir) def test_show_cycles_filters_cyclic_dependencies( @@ -189,12 +230,14 @@ def test_show_cycles_filters_cyclic_dependencies( ) -> None: """ Verify show_cycles=True filters the graph to only cyclic dependencies. + + :param test_dir: Path to the test directory """ # Create a module with no imports to ensure it's filtered out with open(test_dir / "module_f.py", "w") as f: f.write("# No imports\n") - # Build the graph with show_cycles=True - graph = DependencyGraph(str(test_dir), show_cycles=True) + # Build the graph with show_cycles=True to filter out everything but cycles + graph = dependency_graph.DependencyGraph(str(test_dir), show_cycles=True) graph.build_graph() # Get the text report report = graph.get_text_report() From b1e322b938b3564d3823ed679ef32f080c880d22 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 18:56:22 -0400 Subject: [PATCH 04/36] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/dependency_graph.py | 15 +++++++++------ import_check/test/test_dependency_graph.py | 22 +++++++++++----------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index bef1b7185..d07a4a99f 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -40,7 +40,6 @@ def __init__( def build_graph(self) -> None: """ Build a directed graph of intra-directory dependencies. - """ _LOG.info("Building dependency graph for %s", self.directory) # Calculate the base depth of the directory @@ -148,11 +147,11 @@ def _filter_cycles(self) -> None: def _resolve_import(self, imp: str, py_file: Path) -> str: """ Resolve an import to a file path within the directory. - + :param imp: Import statement (e.g., "module.submodule"). :param py_file: File path where the import is found. - - :return: Relative path to the resolved file, or None if unresolved. + :return: Relative path to the resolved file, or None if + unresolved. """ _LOG.info("Resolving import '%s' for file %s", imp, py_file) # Define base directory and other parameters for module resolution @@ -205,11 +204,15 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: return resolved_path # If not last part, but is a module, it can't lead further _LOG.info( - "Could not resolve full import '%s' beyond %s", imp, module_path + "Could not resolve full import '%s' beyond %s", + imp, + module_path, ) return None # If neither exists, the import cannot be resolved - _LOG.info("Could not resolve import '%s' at part '%s'", imp, module_name) + _LOG.info( + "Could not resolve import '%s' at part '%s'", imp, module_name + ) return None _LOG.info("Could not resolve import '%s'", imp) return None diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index 2e65018dc..1e0629d96 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -4,7 +4,7 @@ import pytest -import import_check.dependency_graph as dependency_graph +import import_check.dependency_graph as ichdegra @pytest.fixture @@ -48,7 +48,7 @@ def test_no_dependencies(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ # Initialize dependency graph and build it - graph = dependency_graph.DependencyGraph(str(test_dir)) + graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() # Verify the module with no imports is reported correctly @@ -61,7 +61,7 @@ def test_multiple_dependencies(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ # Initialize dependency graph and build it - graph = dependency_graph.DependencyGraph(str(test_dir)) + graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() # Verify chained dependencies are reported correctly @@ -75,7 +75,7 @@ def test_circular_dependencies(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ # Initialize dependency graph and build it - graph = dependency_graph.DependencyGraph(str(test_dir)) + graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() # Verify cyclic dependencies are identified @@ -89,7 +89,7 @@ def test_dot_output(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ # Initialize dependency graph and build it - graph = dependency_graph.DependencyGraph(str(test_dir)) + graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() output_file = "dependency_graph.dot" graph.get_dot_file(output_file) @@ -109,7 +109,7 @@ def test_syntax_error_handling(self, test_dir: Path) -> None: with open(test_dir / "module_invalid.py", "w") as f: f.write("def invalid_syntax() # Missing colon\n") # Initialize dependency graph and build it - graph = dependency_graph.DependencyGraph(str(test_dir)) + graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() # Verify that the graph is still correct @@ -128,7 +128,7 @@ def test_import_directory_only(self, test_dir: Path) -> None: with open(test_dir / "module_f.py", "w") as f: f.write(f"import {test_dir.name}\n") # Initialize dependency graph and build it - graph = dependency_graph.DependencyGraph(str(test_dir)) + graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() # Verify that the directory import is resolved to __init__.py @@ -151,7 +151,7 @@ def test_package_only_import(self) -> None: f.write("import subpackage\n") try: # Initialize dependency graph and build it - graph = dependency_graph.DependencyGraph(str(package_dir)) + graph = ichdegra.DependencyGraph(str(package_dir)) graph.build_graph() report = graph.get_text_report() # Verify the import of subpackage is resolved as a dependency @@ -188,7 +188,7 @@ def test_package_import(self) -> None: f.write("import subpackage.subsubpackage.module_a\n") try: # Initialize dependency graph and build it - graph = dependency_graph.DependencyGraph(str(package_dir)) + graph = ichdegra.DependencyGraph(str(package_dir)) graph.build_graph() report = graph.get_text_report() # Verify the nested import is resolved as a dependency @@ -216,7 +216,7 @@ def test_unresolved_nested_import(self) -> None: f.write("import subpackage.subsubpackage.module_a\n") try: # Initialize dependency graph and build it - graph = dependency_graph.DependencyGraph(str(package_dir)) + graph = ichdegra.DependencyGraph(str(package_dir)) graph.build_graph() report = graph.get_text_report() # Verify no dependencies are reported for unresolved imports @@ -237,7 +237,7 @@ def test_show_cycles_filters_cyclic_dependencies( with open(test_dir / "module_f.py", "w") as f: f.write("# No imports\n") # Build the graph with show_cycles=True to filter out everything but cycles - graph = dependency_graph.DependencyGraph(str(test_dir), show_cycles=True) + graph = ichdegra.DependencyGraph(str(test_dir), show_cycles=True) graph.build_graph() # Get the text report report = graph.get_text_report() From 7f60e147c26a76897c11595c2994faabb8f0f612 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 19:51:11 -0400 Subject: [PATCH 05/36] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 38 +++++++++++++++- import_check/dependency_graph.py | 59 ++++++++++++------------- 2 files changed, 66 insertions(+), 31 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 29b7750e4..e2922753f 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -117,17 +117,53 @@ def test() -> _PROMPT_OUT: # ############################################################################# -def code_fix_comments() -> _PROMPT_OUT: +def code_fix_existing_comments() -> _PROMPT_OUT: + """ + Fix the already existing comments in the Python code. + """ + system = _CONTEXT + system += r""" + Make sure that comments in the code are: + - in imperative form + - a correct English phrase + - end with a period `.` + - clear + + Comments should be before the code that they refer to + E.g., + ``` + dir_name = self.directory.name # For example, "helpers". + ``` + should become + ``` + # E.g., "helpers". + dir_name = self.directory.name + ``` + + Variables should be enclosed in a back tick, like `bar`. + Functions should be reported as `foo()`. + + Do not change the code. + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + + +def code_fix_improve_comments() -> _PROMPT_OUT: """ Add comments to Python code. """ system = _CONTEXT system += r""" + - Add comments for the parts of the code that are not properly commented - Every a chunk of 4 or 5 lines of code add comment explaining the code - Comments should go before the logical chunk of code they describe - Comments should be in imperative form, a full English phrase, and end with a period `.` - Do not comment every single line of code and especially logging statements + + - Leave the comments already existing """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index d07a4a99f..554e21289 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -19,7 +19,7 @@ class DependencyGraph: :param directory: Path to the directory to analyze. :param max_level: Max directory depth to analyze. - :param show_cycles: Show only cyclic dependencies + :param show_cycles: Show only cyclic dependencies. """ def __init__( @@ -31,10 +31,10 @@ def __init__( show_cycles: bool = False, ) -> None: self.directory = Path(directory).resolve() - # Directed graph of dependencies. + # Create a directed graph of dependencies. self.graph: nx.DiGraph = nx.DiGraph() self.max_level = max_level - # Whether to show only cyclic dependencies. + # Determine whether to show only cyclic dependencies. self.show_cycles = show_cycles def build_graph(self) -> None: @@ -42,9 +42,9 @@ def build_graph(self) -> None: Build a directed graph of intra-directory dependencies. """ _LOG.info("Building dependency graph for %s", self.directory) - # Calculate the base depth of the directory + # Calculate the base depth of the directory. base_depth = len(self.directory.parts) - # Find Python files up to max_level + # Find Python files up to max_level. py_files = [ path for path in self.directory.rglob("*.py") @@ -66,7 +66,7 @@ def build_graph(self) -> None: continue for node in ast.walk(tree): if isinstance(node, (ast.Import, ast.ImportFrom)): - # Extract import names based on node type + # Extract import names based on node type. imports = ( [name.name for name in node.names] if isinstance(node, ast.Import) @@ -82,7 +82,7 @@ def build_graph(self) -> None: self.graph.add_edge(relative_path, imp_path) else: _LOG.info("No edge added for import %s", imp) - # Filter for cyclic dependencies if show_cycles is True + # Filter for cyclic dependencies if show_cycles is True. if self.show_cycles: self._filter_cycles() @@ -103,7 +103,7 @@ def get_text_report(self) -> str: else f"{node} has no dependencies" ) report.append(line) - # Join all lines into a single string separated by newline + # Join all lines into a single string separated by newline. return "\n".join(report) def get_dot_file(self, output_file: str) -> None: @@ -112,7 +112,7 @@ def get_dot_file(self, output_file: str) -> None: :param output_file: Path to the output DOT file. """ - # Write the graph to a DOT file + # Write the graph to a DOT file. networkx.drawing.nx_pydot.write_dot(self.graph, output_file) _LOG.info("DOT file written to %s", output_file) @@ -120,24 +120,24 @@ def _filter_cycles(self) -> None: """ Filter the graph to show only nodes and edges in cyclic dependencies. """ - # Find all strongly connected components in the graph + # Find all strongly connected components in the graph. cycles = list(nx.strongly_connected_components(self.graph)) # Accumulate cyclic nodes. cyclic_nodes = set() - # Keep only components with more than one node (i.e., cycles) + # Keep only components with more than one node (i.e., cycles). for component in cycles: if len(component) > 1: cyclic_nodes.update(component) - # Create a new graph with only cyclic nodes and their edges + # Create a new graph with only cyclic nodes and their edges. new_graph = nx.DiGraph() for node in cyclic_nodes: new_graph.add_node(node) for u, v in self.graph.edges(): if u in cyclic_nodes and v in cyclic_nodes: new_graph.add_edge(u, v) - # Replace the original graph with a new graph containing only cyclic edges + # Replace the original graph with a new graph containing only cyclic edges. self.graph = new_graph - # Summary of cyclic graph result + # Log a summary of the cyclic graph result. _LOG.info( "Graph filtered to %d nodes and %d edges in cycles", len(self.graph.nodes), @@ -150,22 +150,21 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: :param imp: Import statement (e.g., "module.submodule"). :param py_file: File path where the import is found. - :return: Relative path to the resolved file, or None if - unresolved. + :return: Relative path to the resolved file, or None if unresolved. """ _LOG.info("Resolving import '%s' for file %s", imp, py_file) - # Define base directory and other parameters for module resolution + # Define base directory and other parameters for module resolution. base_dir = self.directory _LOG.info("Base directory: %s", base_dir) parts = imp.split(".") current_dir = base_dir - dir_name = self.directory.name # for example, "helpers" - # Handle imports starting with the directory name + dir_name = self.directory.name # For example, "helpers". + # Handle imports starting with the directory name. if parts[0] == dir_name: - # Skip the first part dir, solve for next + # Skip the first part dir, solve for next. parts = parts[1:] if not parts: - # Only if the dir name is given (e.g., "helpers"), check for __init__.py + # Only if the dir name is given (e.g., "helpers"), check for __init__.py. init_path = base_dir / "__init__.py" if init_path.exists(): resolved_path = init_path.relative_to( @@ -175,44 +174,44 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: return resolved_path _LOG.info("Could not resolve import '%s' (directory only)", imp) return None - # Iterate over each module name in resolved path + # Iterate over each module name in resolved path. for i, module_name in enumerate(parts): - # Check for package with __init__.py + # Check for package with __init__.py. package_path = current_dir / module_name / "__init__.py" _LOG.info("Checking package path: %s", package_path) if package_path.exists(): - # If last part, return the __init__.py path + # If last part, return the __init__.py path. if i == len(parts) - 1: resolved_path = package_path.relative_to( self.directory.parent ).as_posix() _LOG.info("Resolved to: %s", resolved_path) return resolved_path - # else, continue to the next part + # Otherwise, continue to the next part. current_dir = current_dir / module_name continue - # Check for a .py file + # Check for a .py file. module_path = current_dir / f"{module_name}.py" _LOG.info("Checking module path: %s", module_path) if module_path.exists(): - # If last part, return the .py path + # If last part, return the .py path. if i == len(parts) - 1: resolved_path = module_path.relative_to( self.directory.parent ).as_posix() _LOG.info("Resolved to: %s", resolved_path) return resolved_path - # If not last part, but is a module, it can't lead further + # If not last part, but is a module, it can't lead further. _LOG.info( "Could not resolve full import '%s' beyond %s", imp, module_path, ) return None - # If neither exists, the import cannot be resolved + # If neither exists, the import cannot be resolved. _LOG.info( "Could not resolve import '%s' at part '%s'", imp, module_name ) return None _LOG.info("Could not resolve import '%s'", imp) - return None + return None \ No newline at end of file From adb7cec2065ad121de6c201961c8cd28c83bed35 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 19:52:00 -0400 Subject: [PATCH 06/36] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/dependency_graph.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 554e21289..c73ac8eda 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -44,7 +44,7 @@ def build_graph(self) -> None: _LOG.info("Building dependency graph for %s", self.directory) # Calculate the base depth of the directory. base_depth = len(self.directory.parts) - # Find Python files up to max_level. + # Find Python files up to `max_level`. py_files = [ path for path in self.directory.rglob("*.py") @@ -66,7 +66,7 @@ def build_graph(self) -> None: continue for node in ast.walk(tree): if isinstance(node, (ast.Import, ast.ImportFrom)): - # Extract import names based on node type. + # Extract import names based on `node` type. imports = ( [name.name for name in node.names] if isinstance(node, ast.Import) @@ -82,7 +82,7 @@ def build_graph(self) -> None: self.graph.add_edge(relative_path, imp_path) else: _LOG.info("No edge added for import %s", imp) - # Filter for cyclic dependencies if show_cycles is True. + # Filter for cyclic dependencies if `show_cycles` is `True`. if self.show_cycles: self._filter_cycles() @@ -158,13 +158,14 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: _LOG.info("Base directory: %s", base_dir) parts = imp.split(".") current_dir = base_dir - dir_name = self.directory.name # For example, "helpers". + # E.g., "helpers". + dir_name = self.directory.name # Handle imports starting with the directory name. if parts[0] == dir_name: - # Skip the first part dir, solve for next. + # Skip the first part `dir`, solve for next. parts = parts[1:] if not parts: - # Only if the dir name is given (e.g., "helpers"), check for __init__.py. + # Only if the `dir` name is given (e.g., "helpers"), check for `__init__.py`. init_path = base_dir / "__init__.py" if init_path.exists(): resolved_path = init_path.relative_to( @@ -176,11 +177,11 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: return None # Iterate over each module name in resolved path. for i, module_name in enumerate(parts): - # Check for package with __init__.py. + # Check for package with `__init__.py`. package_path = current_dir / module_name / "__init__.py" _LOG.info("Checking package path: %s", package_path) if package_path.exists(): - # If last part, return the __init__.py path. + # If last part, return the `__init__.py` path. if i == len(parts) - 1: resolved_path = package_path.relative_to( self.directory.parent @@ -190,11 +191,11 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: # Otherwise, continue to the next part. current_dir = current_dir / module_name continue - # Check for a .py file. + # Check for a `.py` file. module_path = current_dir / f"{module_name}.py" _LOG.info("Checking module path: %s", module_path) if module_path.exists(): - # If last part, return the .py path. + # If last part, return the `.py` path. if i == len(parts) - 1: resolved_path = module_path.relative_to( self.directory.parent From a9d108d56a661728e1c5a344b6728b6eeef25ffc Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 19:54:20 -0400 Subject: [PATCH 07/36] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- conftest.py | 13 ++++++------- dev_scripts_helpers/llms/llm_prompts.py | 5 ++++- import_check/dependency_graph.py | 5 +++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/conftest.py b/conftest.py index a8512611c..9ecd495b1 100644 --- a/conftest.py +++ b/conftest.py @@ -7,22 +7,21 @@ # Hack to workaround pytest not happy with multiple redundant conftest.py # (bug #34). -#if not hasattr(hut, "_CONFTEST_ALREADY_PARSED"): +if not hasattr(hut, "_CONFTEST_ALREADY_PARSED"): # import helpers.hversion as hversi # hversi.check_version() # pylint: disable=protected-access - #hut._CONFTEST_ALREADY_PARSED = True + hut._CONFTEST_ALREADY_PARSED = True # Store whether we are running unit test through pytest. # pylint: disable=line-too-long # From https://docs.pytest.org/en/latest/example/simple.html#detect-if-running-from-within-a-pytest-run + def pytest_configure(config: Any) -> None: + _ = config + # pylint: disable=protected-access + hut._CONFTEST_IN_PYTEST = True -def pytest_configure(config: Any) -> None: - _ = config - # pylint: disable=protected-access - hut._CONFTEST_IN_PYTEST = True - def pytest_unconfigure(config: Any) -> None: _ = config # pylint: disable=protected-access diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index e2922753f..1aadb6608 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -83,7 +83,9 @@ def get_prompt_tags() -> List[str]: hdbg.dassert_in(prompt, valid_prompts) -def get_outside_container_post_transforms(transform_name: str) -> Dict[str, List[str]]: +def get_outside_container_post_transforms( + transform_name: str, +) -> Dict[str, List[str]]: hdbg.dassert_in(transform_name, OUTSIDE_CONTAINER_POST_TRANSFORMS.keys()) return OUTSIDE_CONTAINER_POST_TRANSFORMS[transform_name] @@ -626,6 +628,7 @@ def scratch_categorize_topics() -> _PROMPT_OUT: post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms + # ############################################################################# # Transforms. # ############################################################################# diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index c73ac8eda..47a1a8d48 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -150,7 +150,8 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: :param imp: Import statement (e.g., "module.submodule"). :param py_file: File path where the import is found. - :return: Relative path to the resolved file, or None if unresolved. + :return: Relative path to the resolved file, or None if + unresolved. """ _LOG.info("Resolving import '%s' for file %s", imp, py_file) # Define base directory and other parameters for module resolution. @@ -215,4 +216,4 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: ) return None _LOG.info("Could not resolve import '%s'", imp) - return None \ No newline at end of file + return None From 718df8e35f616009df608844c27018f16db49b85 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 20:02:05 -0400 Subject: [PATCH 08/36] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 26 ++++++++++++++++++++++--- import_check/dependency_graph.py | 9 +++++++-- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 1aadb6608..ed5c948f6 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -158,20 +158,40 @@ def code_fix_improve_comments() -> _PROMPT_OUT: """ system = _CONTEXT system += r""" + - Leave the comments already existing - Add comments for the parts of the code that are not properly commented - - Every a chunk of 4 or 5 lines of code add comment explaining the code + - Every chunk of 4 or 5 lines of code add comment explaining the code - Comments should go before the logical chunk of code they describe - Comments should be in imperative form, a full English phrase, and end with a period `.` - Do not comment every single line of code and especially logging statements - - - Leave the comments already existing """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms +def code_fix_logging_statements() -> _PROMPT_OUT: + """ + Add comments to Python code. + """ + system = _CONTEXT + system += r""" + When a variable `foobar` is important for debugging the code in case of + failure, add statements like: + ``` + _LOG.debug(hprint.to_str("foobar")) + ``` + + At the beginning of an important function add code like + ``` + _LOG.debug(hprint.func_signature_to_str()) + ``` + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms + def code_fix_docstrings() -> _PROMPT_OUT: """ Add or complete a REST docstring to Python code. diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 47a1a8d48..59da6b344 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -58,6 +58,8 @@ def build_graph(self) -> None: "Processing file %s, relative path: %s", py_file, relative_path ) self.graph.add_node(relative_path) + # TODO: Use hio.from_file and to_file to write. + # TODO: Let's add a switch `abort_on_error` to continue or abort. try: with open(py_file, "r") as f: tree = ast.parse(f.read(), filename=str(py_file)) @@ -97,6 +99,7 @@ def get_text_report(self) -> str: # Iterate over all nodes to report their dependencies. for node in self.graph.nodes: dependencies = list(self.graph.successors(node)) + # TODO: Let's use a if-then-else for clarity. line = ( f"{node} imports {', '.join(dependencies)}" if dependencies @@ -144,11 +147,12 @@ def _filter_cycles(self) -> None: len(self.graph.edges), ) + # TODO: -> Optional[str] def _resolve_import(self, imp: str, py_file: Path) -> str: """ Resolve an import to a file path within the directory. - :param imp: Import statement (e.g., "module.submodule"). + :param imp: Import statement (e.g., `module.submodule`). :param py_file: File path where the import is found. :return: Relative path to the resolved file, or None if unresolved. @@ -166,7 +170,8 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: # Skip the first part `dir`, solve for next. parts = parts[1:] if not parts: - # Only if the `dir` name is given (e.g., "helpers"), check for `__init__.py`. + # Only if the `dir` name is given (e.g., "helpers"), check for + # `__init__.py`. init_path = base_dir / "__init__.py" if init_path.exists(): resolved_path = init_path.relative_to( From ad39bd7e871f401554cf26abf5fa2efffa17eef9 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 20:06:19 -0400 Subject: [PATCH 09/36] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 11 +++++++++-- import_check/dependency_graph.py | 9 +++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index ed5c948f6..7587bca80 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -158,13 +158,16 @@ def code_fix_improve_comments() -> _PROMPT_OUT: """ system = _CONTEXT system += r""" - - Leave the comments already existing + - Leave the comments in the code that already exists - Add comments for the parts of the code that are not properly commented - - Every chunk of 4 or 5 lines of code add comment explaining the code + - E.g., every chunk of 4 or 5 lines of code add comment explaining the + code - Comments should go before the logical chunk of code they describe - Comments should be in imperative form, a full English phrase, and end with a period `.` - Do not comment every single line of code and especially logging statements + - Add examples of the values of variables, when you are sure of the types + and values of variables. If you are not sure, do not add any information. """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -198,6 +201,10 @@ def code_fix_docstrings() -> _PROMPT_OUT: Each function should have a docstring that describes the function, its parameters, and its return value. + + Create examples of the values in input and output of each function, only + when you are sure of the types and values of variables. If you are not + sure, do not add any information. """ system = _CONTEXT system += r''' diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 59da6b344..0eafd6559 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -7,12 +7,10 @@ _LOG = logging.getLogger(__name__) - # ############################################################################# # DependencyGraph # ############################################################################# - class DependencyGraph: """ Generate a dependency graph for intra-directory imports. @@ -30,6 +28,7 @@ def __init__( max_level: Union[int, None] = None, show_cycles: bool = False, ) -> None: + # Initialize basic attributes. self.directory = Path(directory).resolve() # Create a directed graph of dependencies. self.graph: nx.DiGraph = nx.DiGraph() @@ -52,12 +51,14 @@ def build_graph(self) -> None: or (len(path.parent.parts) - base_depth) <= self.max_level ] _LOG.info("Found Python files: %s", py_files) + # Process each Python file to build the dependency graph. for py_file in py_files: relative_path = py_file.relative_to(self.directory.parent).as_posix() _LOG.info( "Processing file %s, relative path: %s", py_file, relative_path ) self.graph.add_node(relative_path) + # Attempt to parse the file as an Abstract Syntax Tree (AST). # TODO: Use hio.from_file and to_file to write. # TODO: Let's add a switch `abort_on_error` to continue or abort. try: @@ -66,6 +67,7 @@ def build_graph(self) -> None: except SyntaxError as e: _LOG.warning("Skipping %s due to syntax error: %s", py_file, e) continue + # Walk through the AST and parse import statements. for node in ast.walk(tree): if isinstance(node, (ast.Import, ast.ImportFrom)): # Extract import names based on `node` type. @@ -74,6 +76,7 @@ def build_graph(self) -> None: if isinstance(node, ast.Import) else [node.module] ) + # Add edges for each import found. for imp in imports: _LOG.info("Found import: %s", imp) imp_path = self._resolve_import(imp, py_file) @@ -99,6 +102,7 @@ def get_text_report(self) -> str: # Iterate over all nodes to report their dependencies. for node in self.graph.nodes: dependencies = list(self.graph.successors(node)) + # Conditional report creation depending on dependencies presence. # TODO: Let's use a if-then-else for clarity. line = ( f"{node} imports {', '.join(dependencies)}" @@ -220,5 +224,6 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: "Could not resolve import '%s' at part '%s'", imp, module_name ) return None + # Return None if module resolution was unsuccessful. _LOG.info("Could not resolve import '%s'", imp) return None From 895ebf93e173a5b553f4b54bcc3cc26a16f69697 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 20:11:47 -0400 Subject: [PATCH 10/36] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 27 ++++++++++++++------- import_check/dependency_graph.py | 31 ++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 7587bca80..32be8135b 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -158,7 +158,6 @@ def code_fix_improve_comments() -> _PROMPT_OUT: """ system = _CONTEXT system += r""" - - Leave the comments in the code that already exists - Add comments for the parts of the code that are not properly commented - E.g., every chunk of 4 or 5 lines of code add comment explaining the code @@ -168,6 +167,7 @@ def code_fix_improve_comments() -> _PROMPT_OUT: - Do not comment every single line of code and especially logging statements - Add examples of the values of variables, when you are sure of the types and values of variables. If you are not sure, do not add any information. + - Do not remove any already existing comment. """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -179,18 +179,27 @@ def code_fix_logging_statements() -> _PROMPT_OUT: Add comments to Python code. """ system = _CONTEXT - system += r""" + system += r''' When a variable `foobar` is important for debugging the code in case of failure, add statements like: ``` _LOG.debug(hprint.to_str("foobar")) ``` - At the beginning of an important function add code like + At the beginning of an important function, after the docstring, add code + like ``` - _LOG.debug(hprint.func_signature_to_str()) + def get_text_report(self) -> str: + """ + Generate a text report listing each module's dependencies. + + :return: Text report of dependencies, one per line. + """ + _LOG.debug(hprint.func_signature_to_str()) ``` - """ + + Do not remove any already existing comment. + ''' pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms @@ -216,13 +225,13 @@ def code_fix_docstrings() -> _PROMPT_OUT: An example of a correct docstring is: ``` - def _format_greeting(name: str, *, greeting: str = DEFAULT_GREETING) -> str: + def _format_greeting(name: str, *, greeting: str = "Hello") -> str: """ Format a greeting message with the given name. - :param name: the name to include in the greeting - :param greeting: the base greeting message to use - :return: formatted greeting + :param name: the name to include in the greeting (e.g., "John") + :param greeting: the base greeting message to use (e.g., "Ciao") + :return: formatted greeting (e.g., "Hello John") """ ``` ''' diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 0eafd6559..7ab2ff9e1 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -30,19 +30,24 @@ def __init__( ) -> None: # Initialize basic attributes. self.directory = Path(directory).resolve() + _LOG.debug(hprint.to_str("self.directory")) # Create a directed graph of dependencies. self.graph: nx.DiGraph = nx.DiGraph() self.max_level = max_level + _LOG.debug(hprint.to_str("self.max_level")) # Determine whether to show only cyclic dependencies. self.show_cycles = show_cycles + _LOG.debug(hprint.to_str("self.show_cycles")) def build_graph(self) -> None: """ Build a directed graph of intra-directory dependencies. """ + _LOG.debug(hprint.func_signature_to_str()) _LOG.info("Building dependency graph for %s", self.directory) # Calculate the base depth of the directory. base_depth = len(self.directory.parts) + _LOG.debug(hprint.to_str("base_depth")) # Find Python files up to `max_level`. py_files = [ path @@ -51,12 +56,14 @@ def build_graph(self) -> None: or (len(path.parent.parts) - base_depth) <= self.max_level ] _LOG.info("Found Python files: %s", py_files) + _LOG.debug(hprint.to_str("py_files")) # Process each Python file to build the dependency graph. for py_file in py_files: relative_path = py_file.relative_to(self.directory.parent).as_posix() _LOG.info( "Processing file %s, relative path: %s", py_file, relative_path ) + _LOG.debug(hprint.to_str("relative_path")) self.graph.add_node(relative_path) # Attempt to parse the file as an Abstract Syntax Tree (AST). # TODO: Use hio.from_file and to_file to write. @@ -76,15 +83,18 @@ def build_graph(self) -> None: if isinstance(node, ast.Import) else [node.module] ) + _LOG.debug(hprint.to_str("imports")) # Add edges for each import found. for imp in imports: _LOG.info("Found import: %s", imp) + _LOG.debug(hprint.to_str("imp")) imp_path = self._resolve_import(imp, py_file) if imp_path: _LOG.info( "Adding edge: %s -> %s", relative_path, imp_path ) self.graph.add_edge(relative_path, imp_path) + _LOG.debug(hprint.to_str("self.graph")) else: _LOG.info("No edge added for import %s", imp) # Filter for cyclic dependencies if `show_cycles` is `True`. @@ -97,11 +107,14 @@ def get_text_report(self) -> str: :return: Text report of dependencies, one per line. """ + _LOG.debug(hprint.func_signature_to_str()) # Accumulate report lines. report = [] # Iterate over all nodes to report their dependencies. for node in self.graph.nodes: + _LOG.debug(hprint.to_str("node")) dependencies = list(self.graph.successors(node)) + _LOG.debug(hprint.to_str("dependencies")) # Conditional report creation depending on dependencies presence. # TODO: Let's use a if-then-else for clarity. line = ( @@ -109,6 +122,7 @@ def get_text_report(self) -> str: if dependencies else f"{node} has no dependencies" ) + _LOG.debug(hprint.to_str("line")) report.append(line) # Join all lines into a single string separated by newline. return "\n".join(report) @@ -119,6 +133,7 @@ def get_dot_file(self, output_file: str) -> None: :param output_file: Path to the output DOT file. """ + _LOG.debug(hprint.func_signature_to_str()) # Write the graph to a DOT file. networkx.drawing.nx_pydot.write_dot(self.graph, output_file) _LOG.info("DOT file written to %s", output_file) @@ -127,14 +142,17 @@ def _filter_cycles(self) -> None: """ Filter the graph to show only nodes and edges in cyclic dependencies. """ + _LOG.debug(hprint.func_signature_to_str()) # Find all strongly connected components in the graph. cycles = list(nx.strongly_connected_components(self.graph)) + _LOG.debug(hprint.to_str("cycles")) # Accumulate cyclic nodes. cyclic_nodes = set() # Keep only components with more than one node (i.e., cycles). for component in cycles: if len(component) > 1: cyclic_nodes.update(component) + _LOG.debug(hprint.to_str("cyclic_nodes")) # Create a new graph with only cyclic nodes and their edges. new_graph = nx.DiGraph() for node in cyclic_nodes: @@ -161,22 +179,29 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: :return: Relative path to the resolved file, or None if unresolved. """ + _LOG.debug(hprint.func_signature_to_str()) _LOG.info("Resolving import '%s' for file %s", imp, py_file) + _LOG.debug(hprint.to_str("imp, py_file")) # Define base directory and other parameters for module resolution. base_dir = self.directory _LOG.info("Base directory: %s", base_dir) parts = imp.split(".") + _LOG.debug(hprint.to_str("parts")) current_dir = base_dir + _LOG.debug(hprint.to_str("current_dir")) # E.g., "helpers". dir_name = self.directory.name + _LOG.debug(hprint.to_str("dir_name")) # Handle imports starting with the directory name. if parts[0] == dir_name: # Skip the first part `dir`, solve for next. parts = parts[1:] + _LOG.debug(hprint.to_str("parts after dir_name handling")) if not parts: # Only if the `dir` name is given (e.g., "helpers"), check for # `__init__.py`. init_path = base_dir / "__init__.py" + _LOG.debug(hprint.to_str("init_path")) if init_path.exists(): resolved_path = init_path.relative_to( self.directory.parent @@ -187,9 +212,11 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: return None # Iterate over each module name in resolved path. for i, module_name in enumerate(parts): + _LOG.debug(hprint.to_str("i, module_name")) # Check for package with `__init__.py`. package_path = current_dir / module_name / "__init__.py" _LOG.info("Checking package path: %s", package_path) + _LOG.debug(hprint.to_str("package_path")) if package_path.exists(): # If last part, return the `__init__.py` path. if i == len(parts) - 1: @@ -200,10 +227,12 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: return resolved_path # Otherwise, continue to the next part. current_dir = current_dir / module_name + _LOG.debug(hprint.to_str("current_dir")) continue # Check for a `.py` file. module_path = current_dir / f"{module_name}.py" _LOG.info("Checking module path: %s", module_path) + _LOG.debug(hprint.to_str("module_path")) if module_path.exists(): # If last part, return the `.py` path. if i == len(parts) - 1: @@ -226,4 +255,4 @@ def _resolve_import(self, imp: str, py_file: Path) -> str: return None # Return None if module resolution was unsuccessful. _LOG.info("Could not resolve import '%s'", imp) - return None + return None \ No newline at end of file From 7a0bd461b0a05d0be130b40203ed2fd041ce2469 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 20:48:39 -0400 Subject: [PATCH 11/36] Lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- dev_scripts_helpers/llms/llm_prompts.py | 56 ++++++++++----- import_check/dependency_graph.py | 4 +- import_check/test/test_dependency_graph.py | 80 +++++++++++----------- 3 files changed, 78 insertions(+), 62 deletions(-) diff --git a/dev_scripts_helpers/llms/llm_prompts.py b/dev_scripts_helpers/llms/llm_prompts.py index 32be8135b..7920b61e0 100644 --- a/dev_scripts_helpers/llms/llm_prompts.py +++ b/dev_scripts_helpers/llms/llm_prompts.py @@ -146,6 +146,7 @@ def code_fix_existing_comments() -> _PROMPT_OUT: Functions should be reported as `foo()`. Do not change the code. + Do not add any empty line. """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -167,7 +168,10 @@ def code_fix_improve_comments() -> _PROMPT_OUT: - Do not comment every single line of code and especially logging statements - Add examples of the values of variables, when you are sure of the types and values of variables. If you are not sure, do not add any information. - - Do not remove any already existing comment. + + Do not change the code. + Do not remove any already existing comment. + Do not add any empty line. """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -198,12 +202,15 @@ def get_text_report(self) -> str: _LOG.debug(hprint.func_signature_to_str()) ``` + Do not change the code. Do not remove any already existing comment. + Do not add any empty line. ''' pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} return system, pre_transforms, post_transforms + def code_fix_docstrings() -> _PROMPT_OUT: """ Add or complete a REST docstring to Python code. @@ -256,7 +263,7 @@ def process_data(data, threshold=0.5): ``` to: ``` - def process_data(data: List[float], threshold: float = 0.5) -> List[float]: + def process_data(data: List[float], *, threshold: float = 0.5) -> List[float]: results: List[float] = [] for item in data: if item > threshold: @@ -333,18 +340,17 @@ def code_fix_by_using_f_strings() -> _PROMPT_OUT: def code_fix_by_using_perc_strings() -> _PROMPT_OUT: """ - Use % formatting, like `"Hello, %s. - - You are %d years old." % (name, age)`. + Use % formatting, like `"Hello, %s. You are %d years old." % (name, age)`. """ system = _CONTEXT system += r""" Use % formatting instead of f-strings (formatted string literals). - - Do not print any comment, but just the converted code. + Do not print any comment, just the converted code. For instance, convert: + `f"Hello, {name}. You are {age} years old."` to + `"Hello, %s. You are %d years old." % (name, age)` """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -359,10 +365,6 @@ def code_fix_from_imports() -> _PROMPT_OUT: system += r""" Replace any Python "from import" statement like `from X import Y` with the form `import X` and then replace the uses of `Y` with `X.Y` - - For instance, replace: - with: - Then replace the uses of `OpenAIEmbeddings` with: """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -378,9 +380,18 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: When you find a Python function with optional parameters, add a star after the mandatory parameters and before the optional parameters, and make sure that the function is called with the correct number of arguments. + """ + pre_transforms: Set[str] = set() + post_transforms = {"remove_code_delimiters"} + return system, pre_transforms, post_transforms - For instance, replace: - with the following: + +def code_fix_unit_test() -> _PROMPT_OUT: + """ + Fix code missing the star before optional parameters. + """ + system = _CONTEXT + system += r""" """ pre_transforms: Set[str] = set() post_transforms = {"remove_code_delimiters"} @@ -389,7 +400,8 @@ def code_fix_star_before_optional_parameters() -> _PROMPT_OUT: def code_fix_csfy_style() -> _PROMPT_OUT: """ - Apply the csfy style to the code. + Apply all the transformations required to write code according to the + Causify conventions. """ # > grep "def code_fix" ./dev_scripts_helpers/llms/llm_prompts.py | awk '{print $2 }' function_names = [ @@ -648,15 +660,21 @@ def slide_colorize_points() -> _PROMPT_OUT: def scratch_categorize_topics() -> _PROMPT_OUT: system = r""" - For each of the following title of article, find the best topic among the following ones + For each of the following title of article, find the best topic among the + following ones: - LLM Reasoning, Quant Finance, Time Series, Developer Tools, Python Ecosystem, Git and GitHub, Software Architecture, AI Infrastructure, Knowledge Graphs, Diffusion Models, Causal Inference, Trading Strategies, Prompt Engineering, Mathematical Concepts, Dev Productivity, Rust and C++, Marketing and Sales, Probabilistic Programming, Code Refactoring, Open Source + LLM Reasoning, Quant Finance, Time Series, Developer Tools, Python + Ecosystem, Git and GitHub, Software Architecture, AI Infrastructure, + Knowledge Graphs, Diffusion Models, Causal Inference, Trading Strategies, + Prompt Engineering, Mathematical Concepts, Dev Productivity, Rust and C++, + Marketing and Sales, Probabilistic Programming, Code Refactoring, Open + Source Only print - the first 3 words of the title - a separator | - the topic - and don't print any explanation + and don't print any explanation. if you don't know the topic, print "unknown" """ @@ -805,9 +823,9 @@ def run_prompt( # Add the specific instructions to the system prompt. # E.g., # The instructions are: - # 52: in private function `_parse`:D401: First line should be in imperative mood; try rephrasing (found 'Same') [doc_formatter] + # 52: in private function `_parse`:D401: First line should be in # 174: error: Missing return statement [return] [mypy] - # 192: [W1201(logging-not-lazy), _convert_file_names] Use lazy % formatting in logging functions [pylint] + # 192: [W1201(logging-not-lazy), _convert_file_names] Use lazy % system_prompt = hprint.dedent(system_prompt) hdbg.dassert_ne(instructions, "") system_prompt += "\nThe instructions are:\n" + instructions + "\n\n" diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 7ab2ff9e1..8c892ae32 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -28,16 +28,14 @@ def __init__( max_level: Union[int, None] = None, show_cycles: bool = False, ) -> None: + _LOG.debug(hprint.func_signature_to_str()) # Initialize basic attributes. self.directory = Path(directory).resolve() - _LOG.debug(hprint.to_str("self.directory")) # Create a directed graph of dependencies. self.graph: nx.DiGraph = nx.DiGraph() self.max_level = max_level - _LOG.debug(hprint.to_str("self.max_level")) # Determine whether to show only cyclic dependencies. self.show_cycles = show_cycles - _LOG.debug(hprint.to_str("self.show_cycles")) def build_graph(self) -> None: """ diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index 1e0629d96..c7338150b 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -15,10 +15,10 @@ def test_dir(): Yields: Path: Path to the temporary directory. """ - # Create temporary directory for test files + # Create a temporary directory for test files. dir_path = Path("test_tmp") dir_path.mkdir(exist_ok=True) - # Create test files with specific imports + # Create test files with specific imports. with open(dir_path / "module_a.py", "w") as f: f.write("# No imports\n") with open(dir_path / "module_b.py", "w") as f: @@ -29,7 +29,7 @@ def test_dir(): f.write("import module_e\n") with open(dir_path / "module_e.py", "w") as f: f.write("import module_d\n") - # Cleanup of the directory after test completion + # Clean up the directory after test completion. yield dir_path shutil.rmtree(dir_path, ignore_errors=True) @@ -47,11 +47,11 @@ def test_no_dependencies(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ - # Initialize dependency graph and build it + # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() - # Verify the module with no imports is reported correctly + # Verify the module with no imports is reported correctly. assert f"{test_dir}/module_a.py has no dependencies" in report def test_multiple_dependencies(self, test_dir: Path) -> None: @@ -60,11 +60,11 @@ def test_multiple_dependencies(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ - # Initialize dependency graph and build it + # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() - # Verify chained dependencies are reported correctly + # Verify chained dependencies are reported correctly. assert f"{test_dir}/module_c.py imports {test_dir}/module_b.py" in report assert f"{test_dir}/module_b.py imports {test_dir}/module_a.py" in report @@ -74,11 +74,11 @@ def test_circular_dependencies(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ - # Initialize dependency graph and build it + # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() - # Verify cyclic dependencies are identified + # Verify cyclic dependencies are identified. assert f"{test_dir}/module_d.py imports {test_dir}/module_e.py" in report assert f"{test_dir}/module_e.py imports {test_dir}/module_d.py" in report @@ -88,12 +88,12 @@ def test_dot_output(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ - # Initialize dependency graph and build it + # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() output_file = "dependency_graph.dot" graph.get_dot_file(output_file) - # Assert that the DOT file exists and has expected content + # Assert that the DOT file exists and has expected content. assert os.path.exists(output_file) with open(output_file, "r") as f: content = f.read() @@ -105,14 +105,14 @@ def test_syntax_error_handling(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ - # Create a module with a syntax error + # Create a module with a syntax error. with open(test_dir / "module_invalid.py", "w") as f: f.write("def invalid_syntax() # Missing colon\n") - # Initialize dependency graph and build it + # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() - # Verify that the graph is still correct + # Verify that the graph is still correct. assert f"{test_dir}/module_a.py has no dependencies" in report def test_import_directory_only(self, test_dir: Path) -> None: @@ -121,53 +121,53 @@ def test_import_directory_only(self, test_dir: Path) -> None: :param test_dir: Path to the test directory """ - # Create __init__.py in the test directory + # Create `__init__.py` in the test directory. with open(test_dir / "__init__.py", "w") as f: f.write("") - # Create a module that imports the directory name + # Create a module that imports the directory name. with open(test_dir / "module_f.py", "w") as f: f.write(f"import {test_dir.name}\n") - # Initialize dependency graph and build it + # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() report = graph.get_text_report() - # Verify that the directory import is resolved to __init__.py + # Verify that the directory import is resolved to `__init__.py`. assert f"{test_dir}/module_f.py imports {test_dir}/__init__.py" in report def test_package_only_import(self) -> None: """ Verify importing a package with only __init__.py adds a dependency. """ - # Prepare directory structure for package + # Prepare directory structure for the package. package_dir = Path("package_only_tmp") package_dir.mkdir(exist_ok=True) subdir = package_dir / "subpackage" subdir.mkdir(exist_ok=True) - # Create __init__.py for the subdir + # Create `__init__.py` for the subdir. with open(subdir / "__init__.py", "w") as f: f.write("") - # Create module that imports the package + # Create module that imports the package. with open(package_dir / "module_b.py", "w") as f: f.write("import subpackage\n") try: - # Initialize dependency graph and build it + # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(package_dir)) graph.build_graph() report = graph.get_text_report() - # Verify the import of subpackage is resolved as a dependency + # Verify the import of a subpackage is resolved as a dependency. assert ( f"{package_dir}/module_b.py imports {package_dir}/subpackage/__init__.py" in report ) finally: - # Cleanup package directory + # Clean up package directory. shutil.rmtree(package_dir) def test_package_import(self) -> None: """ Verify nested package imports resolve to __init__.py. """ - # Prepare nested package directory structure + # Prepare nested package directory structure. package_dir = Path("package_tmp") package_dir.mkdir(exist_ok=True) subdir = package_dir / "subpackage" @@ -176,53 +176,53 @@ def test_package_import(self) -> None: subsubdir.mkdir(exist_ok=True) module_dir = subsubdir / "module_a" module_dir.mkdir(exist_ok=True) - # Create __init__.py in each directory + # Create `__init__.py` in each directory. with open(subdir / "__init__.py", "w") as f: f.write("") with open(subsubdir / "__init__.py", "w") as f: f.write("") with open(module_dir / "__init__.py", "w") as f: f.write("") - # Create module that imports the nested package + # Create a module that imports the nested package. with open(package_dir / "module_b.py", "w") as f: f.write("import subpackage.subsubpackage.module_a\n") try: - # Initialize dependency graph and build it + # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(package_dir)) graph.build_graph() report = graph.get_text_report() - # Verify the nested import is resolved as a dependency + # Verify the nested import is resolved as a dependency. assert ( f"{package_dir}/module_b.py imports {package_dir}/subpackage/subsubpackage/module_a/__init__.py" in report ) finally: - # Cleanup package directory + # Clean up package directory. shutil.rmtree(package_dir) def test_unresolved_nested_import(self) -> None: """ Verify unresolved nested imports result in no dependencies. """ - # Prepare directory structure where nested module is missing + # Prepare directory structure where nested module is missing. package_dir = Path("unresolved_tmp") package_dir.mkdir(exist_ok=True) subdir = package_dir / "subpackage" subdir.mkdir(exist_ok=True) with open(subdir / "__init__.py", "w") as f: f.write("") - # Create a module that imports a non-existent nested package + # Create a module that imports a non-existent nested package. with open(package_dir / "module_b.py", "w") as f: f.write("import subpackage.subsubpackage.module_a\n") try: - # Initialize dependency graph and build it + # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(package_dir)) graph.build_graph() report = graph.get_text_report() - # Verify no dependencies are reported for unresolved imports + # Verify no dependencies are reported for unresolved imports. assert f"{package_dir}/module_b.py has no dependencies" in report finally: - # Cleanup package directory + # Clean up package directory. shutil.rmtree(package_dir) def test_show_cycles_filters_cyclic_dependencies( @@ -233,16 +233,16 @@ def test_show_cycles_filters_cyclic_dependencies( :param test_dir: Path to the test directory """ - # Create a module with no imports to ensure it's filtered out + # Create a module with no imports to ensure it's filtered out. with open(test_dir / "module_f.py", "w") as f: f.write("# No imports\n") - # Build the graph with show_cycles=True to filter out everything but cycles + # Build the graph with show_cycles=True to filter out everything but cycles. graph = ichdegra.DependencyGraph(str(test_dir), show_cycles=True) graph.build_graph() - # Get the text report + # Get the text report. report = graph.get_text_report() - # Expected output: Only cyclic dependencies (module_d and module_e) should be shown + # Expected output: Only cyclic dependencies (module_d and module_e) should be shown. assert f"{test_dir}/module_d.py imports {test_dir}/module_e.py" in report assert f"{test_dir}/module_e.py imports {test_dir}/module_d.py" in report - # Verify that non-cyclic module_f is not in the report + # Verify that non-cyclic `module_f` is not in the report. assert f"{test_dir}/module_f.py" not in report From e12fb119fc9ba319afce1ac26d665d88288184ef Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Sun, 4 May 2025 20:52:55 -0400 Subject: [PATCH 12/36] Add TODOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/test/test_dependency_graph.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index c7338150b..d3ebcb861 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -7,6 +7,8 @@ import import_check.dependency_graph as ichdegra +# TODO: use self.get_scratch_dir() and make this a function that is called +# by the needed test methods. @pytest.fixture def test_dir(): """ @@ -18,6 +20,7 @@ def test_dir(): # Create a temporary directory for test files. dir_path = Path("test_tmp") dir_path.mkdir(exist_ok=True) + # TODO: Let's use hio.to_file # Create test files with specific imports. with open(dir_path / "module_a.py", "w") as f: f.write("# No imports\n") @@ -39,6 +42,7 @@ def test_dir(): # ############################################################################# +# TODO: Derive from hunittest.TestCase class TestDependencyGraph: def test_no_dependencies(self, test_dir: Path) -> None: @@ -52,6 +56,7 @@ def test_no_dependencies(self, test_dir: Path) -> None: graph.build_graph() report = graph.get_text_report() # Verify the module with no imports is reported correctly. + # TODO: Use self.assert_in assert f"{test_dir}/module_a.py has no dependencies" in report def test_multiple_dependencies(self, test_dir: Path) -> None: @@ -94,6 +99,7 @@ def test_dot_output(self, test_dir: Path) -> None: output_file = "dependency_graph.dot" graph.get_dot_file(output_file) # Assert that the DOT file exists and has expected content. + # TODO: use self.check_string assert os.path.exists(output_file) with open(output_file, "r") as f: content = f.read() @@ -140,6 +146,9 @@ def test_package_only_import(self) -> None: """ # Prepare directory structure for the package. package_dir = Path("package_only_tmp") + # TODO: use self.get_scratch_space and hio.to_file + # TODO: use hio.create_dir + # TODO: add a descrition of how the dir and files look like package_dir.mkdir(exist_ok=True) subdir = package_dir / "subpackage" subdir.mkdir(exist_ok=True) @@ -149,6 +158,7 @@ def test_package_only_import(self) -> None: # Create module that imports the package. with open(package_dir / "module_b.py", "w") as f: f.write("import subpackage\n") + # TODO: No need for deleting. try: # Initialize dependency graph and build it. graph = ichdegra.DependencyGraph(str(package_dir)) From 1e7c0ea44e8e9755c5979763ee6be5045e6129fc Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Wed, 7 May 2025 11:58:31 -0400 Subject: [PATCH 13/36] Add .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .gitignore | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..f68d0f85a --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +.DS_Store +.coverage +dependency_graph.dot +dependency_graph.svg +dev_scripts_helpers/thin_client/build.py.log +dev_scripts_helpers/thin_client/thin_client_utils.py.log +dev_scripts_helpers/thin_client/tmp.requirements.txt +helpers.egg-info/ +helpers/__pycache__/ +i +import_check/__pycache__/ +import_check/test/__pycache__/ +report.txt +report_cycles.txt +report_max_level.txt +__pycache__/ +dependency_report.txt \ No newline at end of file From 64eea9a563f288cbb67ce4233d36f108c0606928 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Fri, 9 May 2025 20:34:24 -0400 Subject: [PATCH 14/36] Updates: TODOs addressed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/dependency_graph.py | 238 ++++++++-------- import_check/test/test_dependency_graph.py | 316 +++++++++------------ 2 files changed, 267 insertions(+), 287 deletions(-) diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 8c892ae32..0a50556ca 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -1,10 +1,19 @@ +""" +Import as: + +import import_check.dependency_graph as ichdegra +""" + import ast import logging from pathlib import Path -from typing import Union +from typing import Optional import networkx as nx +import helpers.hio as hio +import helpers.hprint as hprint + _LOG = logging.getLogger(__name__) # ############################################################################# @@ -16,86 +25,89 @@ class DependencyGraph: Generate a dependency graph for intra-directory imports. :param directory: Path to the directory to analyze. - :param max_level: Max directory depth to analyze. - :param show_cycles: Show only cyclic dependencies. + :param max_level: Max directory depth to analyze (-1 for no limit). + :param show_cycles: If True, include only cyclic dependencies in the graph. """ def __init__( self, directory: str, *, - # TODO: Use -1 instead of None to simplify typing. - max_level: Union[int, None] = None, + max_level: Optional[int] = -1, # TODO: Use -1 instead of None to simplify typing. show_cycles: bool = False, ) -> None: - _LOG.debug(hprint.func_signature_to_str()) - # Initialize basic attributes. + """ + Initialize the DependencyGraph with directory and analysis parameters. + """ + # _LOG.debug(hprint.func_signature_to_str()) + # Initialize directory path + print(f"Type of Path: {type(Path)}") self.directory = Path(directory).resolve() - # Create a directed graph of dependencies. + # Create a directed graph for dependencies. self.graph: nx.DiGraph = nx.DiGraph() - self.max_level = max_level - # Determine whether to show only cyclic dependencies. + # Set maximum directory depth. + self.max_level = max_level if max_level is not None else -1 # Handle None + # Configure cyclic dependency filtering. self.show_cycles = show_cycles - def build_graph(self) -> None: + def build_graph(self, abort_on_error: bool = False) -> None: """ Build a directed graph of intra-directory dependencies. + + :param abort_on_error: If True, raise SyntaxError on parsing failures; if False, + skip invalid files. """ - _LOG.debug(hprint.func_signature_to_str()) + # _LOG.debug(hprint.func_signature_to_str()) + # Prepare directory analysis. _LOG.info("Building dependency graph for %s", self.directory) - # Calculate the base depth of the directory. base_depth = len(self.directory.parts) _LOG.debug(hprint.to_str("base_depth")) - # Find Python files up to `max_level`. + # Collect Python files within `max_level` depth. py_files = [ path for path in self.directory.rglob("*.py") - if self.max_level is None - or (len(path.parent.parts) - base_depth) <= self.max_level + if self.max_level == -1 or (len(path.parent.parts) - base_depth) <= self.max_level ] _LOG.info("Found Python files: %s", py_files) _LOG.debug(hprint.to_str("py_files")) # Process each Python file to build the dependency graph. for py_file in py_files: relative_path = py_file.relative_to(self.directory.parent).as_posix() - _LOG.info( - "Processing file %s, relative path: %s", py_file, relative_path - ) + _LOG.info("Processing file %s, relative path: %s", py_file, relative_path) _LOG.debug(hprint.to_str("relative_path")) self.graph.add_node(relative_path) - # Attempt to parse the file as an Abstract Syntax Tree (AST). # TODO: Use hio.from_file and to_file to write. # TODO: Let's add a switch `abort_on_error` to continue or abort. + # Parse the file as an Abstract Syntax Tree (AST). try: - with open(py_file, "r") as f: - tree = ast.parse(f.read(), filename=str(py_file)) + file_content = hio.from_file(str(py_file)) + tree = ast.parse(file_content, filename=str(py_file)) except SyntaxError as e: + if abort_on_error: + _LOG.error("Syntax error in %s: %s", py_file, e) + raise e _LOG.warning("Skipping %s due to syntax error: %s", py_file, e) continue - # Walk through the AST and parse import statements. + # Extract imports from AST. for node in ast.walk(tree): if isinstance(node, (ast.Import, ast.ImportFrom)): - # Extract import names based on `node` type. imports = ( [name.name for name in node.names] if isinstance(node, ast.Import) else [node.module] ) _LOG.debug(hprint.to_str("imports")) - # Add edges for each import found. for imp in imports: _LOG.info("Found import: %s", imp) _LOG.debug(hprint.to_str("imp")) imp_path = self._resolve_import(imp, py_file) if imp_path: - _LOG.info( - "Adding edge: %s -> %s", relative_path, imp_path - ) + _LOG.info("Adding edge: %s -> %s", relative_path, imp_path) self.graph.add_edge(relative_path, imp_path) _LOG.debug(hprint.to_str("self.graph")) else: _LOG.info("No edge added for import %s", imp) - # Filter for cyclic dependencies if `show_cycles` is `True`. + # Filter for cyclic dependencies if enabled. if self.show_cycles: self._filter_cycles() @@ -105,24 +117,22 @@ def get_text_report(self) -> str: :return: Text report of dependencies, one per line. """ - _LOG.debug(hprint.func_signature_to_str()) - # Accumulate report lines. + # _LOG.debug(hprint.func_signature_to_str()) + # Initialize report. report = [] - # Iterate over all nodes to report their dependencies. + # Generate report lines for each node. for node in self.graph.nodes: _LOG.debug(hprint.to_str("node")) dependencies = list(self.graph.successors(node)) _LOG.debug(hprint.to_str("dependencies")) - # Conditional report creation depending on dependencies presence. + # Create report line based on dependencies. # TODO: Let's use a if-then-else for clarity. - line = ( - f"{node} imports {', '.join(dependencies)}" - if dependencies - else f"{node} has no dependencies" - ) + if dependencies: + line = f"{node} imports {', '.join(dependencies)}" + else: + line = f"{node} has no dependencies" _LOG.debug(hprint.to_str("line")) report.append(line) - # Join all lines into a single string separated by newline. return "\n".join(report) def get_dot_file(self, output_file: str) -> None: @@ -131,126 +141,130 @@ def get_dot_file(self, output_file: str) -> None: :param output_file: Path to the output DOT file. """ - _LOG.debug(hprint.func_signature_to_str()) + # _LOG.debug(hprint.func_signature_to_str()) # Write the graph to a DOT file. - networkx.drawing.nx_pydot.write_dot(self.graph, output_file) + nx.drawing.nx_pydot.write_dot(self.graph, output_file) _LOG.info("DOT file written to %s", output_file) def _filter_cycles(self) -> None: """ Filter the graph to show only nodes and edges in cyclic dependencies. """ - _LOG.debug(hprint.func_signature_to_str()) - # Find all strongly connected components in the graph. + # _LOG.debug(hprint.func_signature_to_str()) + # Find strongly connected components. cycles = list(nx.strongly_connected_components(self.graph)) - _LOG.debug(hprint.to_str("cycles")) # Accumulate cyclic nodes. + _LOG.debug(hprint.to_str("cycles")) cyclic_nodes = set() - # Keep only components with more than one node (i.e., cycles). for component in cycles: if len(component) > 1: cyclic_nodes.update(component) _LOG.debug(hprint.to_str("cyclic_nodes")) - # Create a new graph with only cyclic nodes and their edges. + # Create a new graph with cyclic nodes and edges. new_graph = nx.DiGraph() for node in cyclic_nodes: new_graph.add_node(node) for u, v in self.graph.edges(): if u in cyclic_nodes and v in cyclic_nodes: new_graph.add_edge(u, v) - # Replace the original graph with a new graph containing only cyclic edges. + # Update the graph to include only cyclic dependencies. self.graph = new_graph - # Log a summary of the cyclic graph result. _LOG.info( "Graph filtered to %d nodes and %d edges in cycles", len(self.graph.nodes), len(self.graph.edges), ) - # TODO: -> Optional[str] - def _resolve_import(self, imp: str, py_file: Path) -> str: + def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: """ Resolve an import to a file path within the directory. :param imp: Import statement (e.g., `module.submodule`). :param py_file: File path where the import is found. - :return: Relative path to the resolved file, or None if - unresolved. + :return: Relative path to the resolved file, or None if unresolved. """ - _LOG.debug(hprint.func_signature_to_str()) + # _LOG.debug(hprint.func_signature_to_str()) _LOG.info("Resolving import '%s' for file %s", imp, py_file) _LOG.debug(hprint.to_str("imp, py_file")) - # Define base directory and other parameters for module resolution. + # Initialize base directory. base_dir = self.directory _LOG.info("Base directory: %s", base_dir) parts = imp.split(".") _LOG.debug(hprint.to_str("parts")) current_dir = base_dir - _LOG.debug(hprint.to_str("current_dir")) - # E.g., "helpers". + # Handle directory name imports. + # Current directory. dir_name = self.directory.name - _LOG.debug(hprint.to_str("dir_name")) - # Handle imports starting with the directory name. - if parts[0] == dir_name: - # Skip the first part `dir`, solve for next. + # Parent directory. + parent_name = self.directory.parent.name + # Grandparent directory, if exists. + parent_parent_name = ( + self.directory.parent.parent.name if len(self.directory.parent.parts) > 1 else "" + ) + # Collect all parent directory names into a list for validation. + valid_names = [dir_name, parent_name, parent_parent_name] + parent = self.directory.parent + while len(parent.parts) > 2: # Stop near root + valid_names.append(parent.parent.name) + parent = parent.parent + _LOG.info( + "Directory name: %s, Parent name: %s, Parent parent name: %s, " + "Valid names: %s, Import first part: %s", + dir_name, parent_name, parent_parent_name, valid_names, + parts[0] if parts else "" + ) + result = None + if parts and parts[0] in valid_names: parts = parts[1:] _LOG.debug(hprint.to_str("parts after dir_name handling")) if not parts: # Only if the `dir` name is given (e.g., "helpers"), check for # `__init__.py`. init_path = base_dir / "__init__.py" - _LOG.debug(hprint.to_str("init_path")) + _LOG.info("Checking __init__.py at %s, exists: %s", init_path, init_path.exists()) if init_path.exists(): - resolved_path = init_path.relative_to( - self.directory.parent - ).as_posix() + resolved_path = init_path.relative_to(self.directory.parent).as_posix() _LOG.info("Resolved to: %s", resolved_path) - return resolved_path - _LOG.info("Could not resolve import '%s' (directory only)", imp) - return None - # Iterate over each module name in resolved path. - for i, module_name in enumerate(parts): - _LOG.debug(hprint.to_str("i, module_name")) - # Check for package with `__init__.py`. - package_path = current_dir / module_name / "__init__.py" - _LOG.info("Checking package path: %s", package_path) - _LOG.debug(hprint.to_str("package_path")) - if package_path.exists(): - # If last part, return the `__init__.py` path. - if i == len(parts) - 1: - resolved_path = package_path.relative_to( - self.directory.parent - ).as_posix() - _LOG.info("Resolved to: %s", resolved_path) - return resolved_path - # Otherwise, continue to the next part. - current_dir = current_dir / module_name - _LOG.debug(hprint.to_str("current_dir")) - continue - # Check for a `.py` file. - module_path = current_dir / f"{module_name}.py" - _LOG.info("Checking module path: %s", module_path) - _LOG.debug(hprint.to_str("module_path")) - if module_path.exists(): - # If last part, return the `.py` path. - if i == len(parts) - 1: - resolved_path = module_path.relative_to( - self.directory.parent - ).as_posix() - _LOG.info("Resolved to: %s", resolved_path) - return resolved_path - # If not last part, but is a module, it can't lead further. - _LOG.info( - "Could not resolve full import '%s' beyond %s", - imp, - module_path, - ) - return None - # If neither exists, the import cannot be resolved. - _LOG.info( - "Could not resolve import '%s' at part '%s'", imp, module_name - ) - return None + result = resolved_path + else: + _LOG.error("No __init__.py found at %s", init_path) + else: + for i, module_name in enumerate(parts): + _LOG.debug(hprint.to_str("i, module_name")) + package_path = current_dir / module_name / "__init__.py" + _LOG.info("Checking package path: %s", package_path) + _LOG.debug(hprint.to_str("package_path")) + if package_path.exists(): + if i == len(parts) - 1: + resolved_path = package_path.relative_to(self.directory.parent).as_posix() + _LOG.info("Resolved to: %s", resolved_path) + result = resolved_path + break + current_dir = current_dir / module_name + _LOG.debug(hprint.to_str("current_dir")) + continue + # Check for a .py file. + module_path = current_dir / f"{module_name}.py" + _LOG.info("Checking module path: %s", module_path) + _LOG.debug(hprint.to_str("module_path")) + if module_path.exists(): + # If last part, return the `.py` path. + if i == len(parts) - 1: + resolved_path = module_path.relative_to(self.directory.parent).as_posix() + _LOG.info("Resolved to: %s", resolved_path) + result = resolved_path + break + # If not last part, but is a module, it can't lead further. + _LOG.info( + "Could not resolve full import '%s' beyond %s", + imp, + module_path, + ) + break + # If neither exists, the import cannot be resolved. + _LOG.info("Could not resolve import '%s' at part '%s'", imp, module_name) + break # Return None if module resolution was unsuccessful. - _LOG.info("Could not resolve import '%s'", imp) - return None \ No newline at end of file + if result is None: + _LOG.info("Could not resolve import '%s'", imp) + return result \ No newline at end of file diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index d3ebcb861..0a361fd04 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -1,258 +1,224 @@ import os -import shutil -from pathlib import Path - -import pytest +import pathlib as path # Updated import style +import helpers.hio as hio +import helpers.hunit_test as hunitest import import_check.dependency_graph as ichdegra - -# TODO: use self.get_scratch_dir() and make this a function that is called -# by the needed test methods. -@pytest.fixture -def test_dir(): - """ - Create a temporary directory with test files and clean up after. - - Yields: - Path: Path to the temporary directory. - """ - # Create a temporary directory for test files. - dir_path = Path("test_tmp") - dir_path.mkdir(exist_ok=True) - # TODO: Let's use hio.to_file - # Create test files with specific imports. - with open(dir_path / "module_a.py", "w") as f: - f.write("# No imports\n") - with open(dir_path / "module_b.py", "w") as f: - f.write("import module_a\n") - with open(dir_path / "module_c.py", "w") as f: - f.write("import module_b\n") - with open(dir_path / "module_d.py", "w") as f: - f.write("import module_e\n") - with open(dir_path / "module_e.py", "w") as f: - f.write("import module_d\n") - # Clean up the directory after test completion. - yield dir_path - shutil.rmtree(dir_path, ignore_errors=True) - - # ############################################################################# # TestDependencyGraph # ############################################################################# - -# TODO: Derive from hunittest.TestCase -class TestDependencyGraph: - - def test_no_dependencies(self, test_dir: Path) -> None: +# ############################################################################# +# TestDependencyGraph +# ############################################################################# +# TODO: class TestDependencyGraph(hunitest.TestCase): +class TestDependencyGraph(hunitest.TestCase): + # TODO: use self.get_scratch_dir() and make this a function that is called + def get_test_dir(self) -> path.Path: + """ + Create a temporary directory with test files. + :return: Path to the temporary directory. + """ + # Prepare directory. + dir_path = path.Path(self.get_scratch_space()) + # Create test files. + hio.create_dir(dir_path, incremental=True) + hio.to_file(str(dir_path / "module_a.py"), "# No imports\n") + hio.to_file(str(dir_path / "module_b.py"), "import module_a\n") + hio.to_file(str(dir_path / "module_c.py"), "import module_b\n") + hio.to_file(str(dir_path / "module_d.py"), "import module_e\n") + hio.to_file(str(dir_path / "module_e.py"), "import module_d\n") + return dir_path + + def test_no_dependencies(self) -> None: """ Verify a module with no imports has no dependencies. - - :param test_dir: Path to the test directory """ - # Initialize dependency graph and build it. + # Prepare inputs + test_dir = self.get_test_dir() graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() + # Run. report = graph.get_text_report() - # Verify the module with no imports is reported correctly. # TODO: Use self.assert_in - assert f"{test_dir}/module_a.py has no dependencies" in report + # Check. + self.assertIn("tmp.scratch/module_a.py has no dependencies", report) - def test_multiple_dependencies(self, test_dir: Path) -> None: + def test_multiple_dependencies(self) -> None: """ Verify modules with chained dependencies are reported correctly. - - :param test_dir: Path to the test directory """ - # Initialize dependency graph and build it. + # Prepare inputs. + test_dir = self.get_test_dir() graph = ichdegra.DependencyGraph(str(test_dir)) + # Run. graph.build_graph() report = graph.get_text_report() - # Verify chained dependencies are reported correctly. - assert f"{test_dir}/module_c.py imports {test_dir}/module_b.py" in report - assert f"{test_dir}/module_b.py imports {test_dir}/module_a.py" in report + # Check. + self.assertIn("tmp.scratch/module_c.py imports tmp.scratch/module_b.py", report) + self.assertIn("tmp.scratch/module_b.py imports tmp.scratch/module_a.py", report) - def test_circular_dependencies(self, test_dir: Path) -> None: + def test_circular_dependencies(self) -> None: """ Verify cyclic dependencies are identified correctly. - - :param test_dir: Path to the test directory """ - # Initialize dependency graph and build it. + # Prepare inputs. + test_dir = self.get_test_dir() graph = ichdegra.DependencyGraph(str(test_dir)) + # Run. graph.build_graph() report = graph.get_text_report() - # Verify cyclic dependencies are identified. - assert f"{test_dir}/module_d.py imports {test_dir}/module_e.py" in report - assert f"{test_dir}/module_e.py imports {test_dir}/module_d.py" in report + # Check. + self.assertIn("tmp.scratch/module_d.py imports tmp.scratch/module_e.py", report) + self.assertIn("tmp.scratch/module_e.py imports tmp.scratch/module_d.py", report) - def test_dot_output(self, test_dir: Path) -> None: + def test_dot_output(self) -> None: """ Verify the DOT file is generated with correct format. - - :param test_dir: Path to the test directory """ - # Initialize dependency graph and build it. + # Prepare inputs. + test_dir = self.get_test_dir() graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() + # Run. output_file = "dependency_graph.dot" graph.get_dot_file(output_file) - # Assert that the DOT file exists and has expected content. # TODO: use self.check_string - assert os.path.exists(output_file) - with open(output_file, "r") as f: + # Check. + self.assertTrue(os.path.exists(output_file)) + with open(output_file, "r", encoding="utf-8") as f: content = f.read() - assert "digraph" in content + self.check_string(content) - def test_syntax_error_handling(self, test_dir: Path) -> None: + def test_syntax_error_handling(self) -> None: """ Verify syntax errors in files are handled without crashing. - - :param test_dir: Path to the test directory """ - # Create a module with a syntax error. - with open(test_dir / "module_invalid.py", "w") as f: - f.write("def invalid_syntax() # Missing colon\n") - # Initialize dependency graph and build it. + # Prepare inputs. + test_dir = self.get_test_dir() + hio.to_file(str(test_dir / "module_invalid.py"), "def invalid_syntax() # Missing colon\n") graph = ichdegra.DependencyGraph(str(test_dir)) + # Run. graph.build_graph() report = graph.get_text_report() - # Verify that the graph is still correct. - assert f"{test_dir}/module_a.py has no dependencies" in report + # Check. + self.assertIn("tmp.scratch/module_a.py has no dependencies", report) - def test_import_directory_only(self, test_dir: Path) -> None: + def test_import_directory_only(self) -> None: """ Verify importing only the directory name resolves to __init__.py. - - :param test_dir: Path to the test directory """ - # Create `__init__.py` in the test directory. - with open(test_dir / "__init__.py", "w") as f: - f.write("") - # Create a module that imports the directory name. - with open(test_dir / "module_f.py", "w") as f: - f.write(f"import {test_dir.name}\n") - # Initialize dependency graph and build it. + # Prepare inputs. + test_dir = self.get_test_dir() + init_path = test_dir / "__init__.py" + hio.to_file(str(init_path), "") + hio.to_file(str(test_dir / "module_f.py"), f"import {test_dir.name}") graph = ichdegra.DependencyGraph(str(test_dir)) + # Run. graph.build_graph() report = graph.get_text_report() - # Verify that the directory import is resolved to `__init__.py`. - assert f"{test_dir}/module_f.py imports {test_dir}/__init__.py" in report + # Check. + self.assertIn("tmp.scratch/module_f.py imports tmp.scratch/__init__.py", report) def test_package_only_import(self) -> None: """ Verify importing a package with only __init__.py adds a dependency. """ - # Prepare directory structure for the package. - package_dir = Path("package_only_tmp") # TODO: use self.get_scratch_space and hio.to_file # TODO: use hio.create_dir # TODO: add a descrition of how the dir and files look like - package_dir.mkdir(exist_ok=True) + # Prepare inputs. + package_dir = path.Path(self.get_scratch_space()) subdir = package_dir / "subpackage" - subdir.mkdir(exist_ok=True) - # Create `__init__.py` for the subdir. - with open(subdir / "__init__.py", "w") as f: - f.write("") - # Create module that imports the package. - with open(package_dir / "module_b.py", "w") as f: - f.write("import subpackage\n") - # TODO: No need for deleting. - try: - # Initialize dependency graph and build it. - graph = ichdegra.DependencyGraph(str(package_dir)) - graph.build_graph() - report = graph.get_text_report() - # Verify the import of a subpackage is resolved as a dependency. - assert ( - f"{package_dir}/module_b.py imports {package_dir}/subpackage/__init__.py" - in report - ) - finally: - # Clean up package directory. - shutil.rmtree(package_dir) + hio.create_dir(subdir, incremental=True) + hio.to_file(str(subdir / "__init__.py"), "") + hio.to_file(str(package_dir / "module_b.py"), "import subpackage") + # Directory structure: + # tmp.scratch/ + # subpackage/ + # __init__.py + # module_b.py + # Run. + graph = ichdegra.DependencyGraph(str(package_dir)) + graph.build_graph() + report = graph.get_text_report() + # Check. + self.assertIn( + "tmp.scratch/module_b.py imports tmp.scratch/subpackage/__init__.py", + report + ) def test_package_import(self) -> None: """ Verify nested package imports resolve to __init__.py. """ - # Prepare nested package directory structure. - package_dir = Path("package_tmp") - package_dir.mkdir(exist_ok=True) + # Prepare inputs. + package_dir = path.Path(self.get_scratch_space()) subdir = package_dir / "subpackage" - subdir.mkdir(exist_ok=True) subsubdir = subdir / "subsubpackage" - subsubdir.mkdir(exist_ok=True) module_dir = subsubdir / "module_a" - module_dir.mkdir(exist_ok=True) - # Create `__init__.py` in each directory. - with open(subdir / "__init__.py", "w") as f: - f.write("") - with open(subsubdir / "__init__.py", "w") as f: - f.write("") - with open(module_dir / "__init__.py", "w") as f: - f.write("") - # Create a module that imports the nested package. - with open(package_dir / "module_b.py", "w") as f: - f.write("import subpackage.subsubpackage.module_a\n") - try: - # Initialize dependency graph and build it. - graph = ichdegra.DependencyGraph(str(package_dir)) - graph.build_graph() - report = graph.get_text_report() - # Verify the nested import is resolved as a dependency. - assert ( - f"{package_dir}/module_b.py imports {package_dir}/subpackage/subsubpackage/module_a/__init__.py" - in report - ) - finally: - # Clean up package directory. - shutil.rmtree(package_dir) + hio.create_dir(subdir, incremental=True) + hio.create_dir(subsubdir, incremental=True) + hio.create_dir(module_dir, incremental=True) + hio.to_file(str(subdir / "__init__.py"), "") + hio.to_file(str(subsubdir / "__init__.py"), "") + hio.to_file(str(module_dir / "__init__.py"), "") + hio.to_file(str(package_dir / "module_b.py"), "import subpackage.subsubpackage.module_a") + # Directory structure: + # tmp.scratch/ + # subpackage/ + # __init__.py + # subsubpackage/ + # __init__.py + # module_a/ + # __init__.py + # module_b.py + # Run. + graph = ichdegra.DependencyGraph(str(package_dir)) + graph.build_graph() + # Check. + report = graph.get_text_report() + self.assertIn( + "tmp.scratch/module_b.py imports " + "tmp.scratch/subpackage/subsubpackage/module_a/__init__.py", + report + ) def test_unresolved_nested_import(self) -> None: """ Verify unresolved nested imports result in no dependencies. """ - # Prepare directory structure where nested module is missing. - package_dir = Path("unresolved_tmp") - package_dir.mkdir(exist_ok=True) + # Prepare inputs. + package_dir = path.Path(self.get_scratch_space()) subdir = package_dir / "subpackage" - subdir.mkdir(exist_ok=True) - with open(subdir / "__init__.py", "w") as f: - f.write("") - # Create a module that imports a non-existent nested package. - with open(package_dir / "module_b.py", "w") as f: - f.write("import subpackage.subsubpackage.module_a\n") - try: - # Initialize dependency graph and build it. - graph = ichdegra.DependencyGraph(str(package_dir)) - graph.build_graph() - report = graph.get_text_report() - # Verify no dependencies are reported for unresolved imports. - assert f"{package_dir}/module_b.py has no dependencies" in report - finally: - # Clean up package directory. - shutil.rmtree(package_dir) + hio.create_dir(subdir, incremental=True) + hio.to_file(str(subdir / "__init__.py"), "") + hio.to_file(str(package_dir / "module_b.py"), "import subpackage.subsubpackage.module_a") + # Directory structure: + # tmp.scratch/ + # subpackage/ + # __init__.py + # module_b.py + # Run. + graph = ichdegra.DependencyGraph(str(package_dir)) + graph.build_graph() + report = graph.get_text_report() + # Check. + self.assertIn("tmp.scratch/module_b.py has no dependencies", report) - def test_show_cycles_filters_cyclic_dependencies( - self, test_dir: Path - ) -> None: + def test_show_cycles_filters_cyclic_dependencies(self) -> None: """ Verify show_cycles=True filters the graph to only cyclic dependencies. - - :param test_dir: Path to the test directory """ - # Create a module with no imports to ensure it's filtered out. - with open(test_dir / "module_f.py", "w") as f: - f.write("# No imports\n") - # Build the graph with show_cycles=True to filter out everything but cycles. + # Prepare inputs. + test_dir = self.get_test_dir() + hio.to_file(str(test_dir / "module_f.py"), "# No imports") graph = ichdegra.DependencyGraph(str(test_dir), show_cycles=True) + # Run. graph.build_graph() - # Get the text report. report = graph.get_text_report() - # Expected output: Only cyclic dependencies (module_d and module_e) should be shown. - assert f"{test_dir}/module_d.py imports {test_dir}/module_e.py" in report - assert f"{test_dir}/module_e.py imports {test_dir}/module_d.py" in report - # Verify that non-cyclic `module_f` is not in the report. - assert f"{test_dir}/module_f.py" not in report + # Check. + self.assertIn("tmp.scratch/module_d.py imports tmp.scratch/module_e.py", report) + self.assertIn("tmp.scratch/module_e.py imports tmp.scratch/module_d.py", report) + self.assertFalse("tmp.scratch/module_f.py" in report) \ No newline at end of file From ce9f0050faae531ac80ee18f4b7c60713ca3a1a9 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Fri, 9 May 2025 21:01:22 -0400 Subject: [PATCH 15/36] Update: handle case for show_deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .gitignore | 8 +++++++- import_check/dependency_graph.py | 8 ++++++-- tasks.py | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index f68d0f85a..1c881b1e9 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,10 @@ report.txt report_cycles.txt report_max_level.txt __pycache__/ -dependency_report.txt \ No newline at end of file +dependency_report.txt +devops/compose/local.docker-compose.yml +tmp.precommit_output.txt +tmp.pytest.log +tmp.amp_normalize_import.txt +source +import_check/test/outcomes/TestDependencyGraph.test_dot_output/output/test.txt diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 0a50556ca..2c6997bd1 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -39,7 +39,8 @@ def __init__( """ Initialize the DependencyGraph with directory and analysis parameters. """ - # _LOG.debug(hprint.func_signature_to_str()) + # Following caused ValueError: Unable to determine caller function. + # _LOG.debug(hprint.func_signature_to_str()) # Initialize directory path print(f"Type of Path: {type(Path)}") self.directory = Path(directory).resolve() @@ -94,10 +95,13 @@ def build_graph(self, abort_on_error: bool = False) -> None: imports = ( [name.name for name in node.names] if isinstance(node, ast.Import) - else [node.module] + else [node.module] if node.module is not None else [] ) _LOG.debug(hprint.to_str("imports")) for imp in imports: + if imp is None: + _LOG.warning("Skipping None import in file %s", py_file) + continue _LOG.info("Found import: %s", imp) _LOG.debug(hprint.to_str("imp")) imp_path = self._resolve_import(imp, py_file) diff --git a/tasks.py b/tasks.py index ee26e26d0..bfa025760 100644 --- a/tasks.py +++ b/tasks.py @@ -183,7 +183,7 @@ def show_deps( directory=".", format="text", output_file=None, - max_level=None, + max_level=-1, show_cycles=False, ): """ From acd3039fb74e401dc6fba09b4309d4e418d75247 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Sat, 10 May 2025 12:01:21 -0400 Subject: [PATCH 16/36] Updates: lint fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/test/test_dependency_graph.py | 52 ++++++++++++++++------ 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index 0a361fd04..aa299dffd 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -5,10 +5,12 @@ import helpers.hunit_test as hunitest import import_check.dependency_graph as ichdegra + # ############################################################################# # TestDependencyGraph # ############################################################################# + # ############################################################################# # TestDependencyGraph # ############################################################################# @@ -18,6 +20,7 @@ class TestDependencyGraph(hunitest.TestCase): def get_test_dir(self) -> path.Path: """ Create a temporary directory with test files. + :return: Path to the temporary directory. """ # Prepare directory. @@ -56,8 +59,12 @@ def test_multiple_dependencies(self) -> None: graph.build_graph() report = graph.get_text_report() # Check. - self.assertIn("tmp.scratch/module_c.py imports tmp.scratch/module_b.py", report) - self.assertIn("tmp.scratch/module_b.py imports tmp.scratch/module_a.py", report) + self.assertIn( + "tmp.scratch/module_c.py imports tmp.scratch/module_b.py", report + ) + self.assertIn( + "tmp.scratch/module_b.py imports tmp.scratch/module_a.py", report + ) def test_circular_dependencies(self) -> None: """ @@ -70,8 +77,12 @@ def test_circular_dependencies(self) -> None: graph.build_graph() report = graph.get_text_report() # Check. - self.assertIn("tmp.scratch/module_d.py imports tmp.scratch/module_e.py", report) - self.assertIn("tmp.scratch/module_e.py imports tmp.scratch/module_d.py", report) + self.assertIn( + "tmp.scratch/module_d.py imports tmp.scratch/module_e.py", report + ) + self.assertIn( + "tmp.scratch/module_e.py imports tmp.scratch/module_d.py", report + ) def test_dot_output(self) -> None: """ @@ -97,7 +108,10 @@ def test_syntax_error_handling(self) -> None: """ # Prepare inputs. test_dir = self.get_test_dir() - hio.to_file(str(test_dir / "module_invalid.py"), "def invalid_syntax() # Missing colon\n") + hio.to_file( + str(test_dir / "module_invalid.py"), + "def invalid_syntax() # Missing colon\n", + ) graph = ichdegra.DependencyGraph(str(test_dir)) # Run. graph.build_graph() @@ -119,7 +133,9 @@ def test_import_directory_only(self) -> None: graph.build_graph() report = graph.get_text_report() # Check. - self.assertIn("tmp.scratch/module_f.py imports tmp.scratch/__init__.py", report) + self.assertIn( + "tmp.scratch/module_f.py imports tmp.scratch/__init__.py", report + ) def test_package_only_import(self) -> None: """ @@ -146,7 +162,7 @@ def test_package_only_import(self) -> None: # Check. self.assertIn( "tmp.scratch/module_b.py imports tmp.scratch/subpackage/__init__.py", - report + report, ) def test_package_import(self) -> None: @@ -164,7 +180,10 @@ def test_package_import(self) -> None: hio.to_file(str(subdir / "__init__.py"), "") hio.to_file(str(subsubdir / "__init__.py"), "") hio.to_file(str(module_dir / "__init__.py"), "") - hio.to_file(str(package_dir / "module_b.py"), "import subpackage.subsubpackage.module_a") + hio.to_file( + str(package_dir / "module_b.py"), + "import subpackage.subsubpackage.module_a", + ) # Directory structure: # tmp.scratch/ # subpackage/ @@ -182,7 +201,7 @@ def test_package_import(self) -> None: self.assertIn( "tmp.scratch/module_b.py imports " "tmp.scratch/subpackage/subsubpackage/module_a/__init__.py", - report + report, ) def test_unresolved_nested_import(self) -> None: @@ -194,7 +213,10 @@ def test_unresolved_nested_import(self) -> None: subdir = package_dir / "subpackage" hio.create_dir(subdir, incremental=True) hio.to_file(str(subdir / "__init__.py"), "") - hio.to_file(str(package_dir / "module_b.py"), "import subpackage.subsubpackage.module_a") + hio.to_file( + str(package_dir / "module_b.py"), + "import subpackage.subsubpackage.module_a", + ) # Directory structure: # tmp.scratch/ # subpackage/ @@ -219,6 +241,10 @@ def test_show_cycles_filters_cyclic_dependencies(self) -> None: graph.build_graph() report = graph.get_text_report() # Check. - self.assertIn("tmp.scratch/module_d.py imports tmp.scratch/module_e.py", report) - self.assertIn("tmp.scratch/module_e.py imports tmp.scratch/module_d.py", report) - self.assertFalse("tmp.scratch/module_f.py" in report) \ No newline at end of file + self.assertIn( + "tmp.scratch/module_d.py imports tmp.scratch/module_e.py", report + ) + self.assertIn( + "tmp.scratch/module_e.py imports tmp.scratch/module_d.py", report + ) + self.assertFalse("tmp.scratch/module_f.py" in report) From 6239725b619ea2fe2225817a6afa82d4d7564bcc Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Sun, 11 May 2025 16:10:12 -0400 Subject: [PATCH 17/36] Update: fixed path mismatch in test_package_only_import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/dependency_graph.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 2c6997bd1..d174c16e9 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -196,6 +196,17 @@ def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: parts = imp.split(".") _LOG.debug(hprint.to_str("parts")) current_dir = base_dir + # Check: direct match for directory package + dir_name_parts = base_dir.name.split(".") + if parts == dir_name_parts: + init_path = base_dir / "__init__.py" + _LOG.info("Checking base directory __init__ at %s", init_path) + if init_path.exists(): + resolved_path = init_path.relative_to(base_dir.parent).as_posix() + _LOG.info("Resolved directory self-import to: %s", resolved_path) + return resolved_path + _LOG.error("Base directory __init__.py missing at %s", init_path) + return None # Handle directory name imports. # Current directory. dir_name = self.directory.name From ea045b30a99f04cb218858fe3faa687b3e1536f2 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Sun, 11 May 2025 18:03:50 -0400 Subject: [PATCH 18/36] Update: test_dot_output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/test/test_dependency_graph.py | 31 ++++++++++++++++++---- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index aa299dffd..a2be74bdc 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -93,15 +93,36 @@ def test_dot_output(self) -> None: graph = ichdegra.DependencyGraph(str(test_dir)) graph.build_graph() # Run. - output_file = "dependency_graph.dot" - graph.get_dot_file(output_file) + scratch_dir = path.Path(self.get_scratch_space()) + output_file = scratch_dir / "dependency_graph.dot" + graph.get_dot_file(str(output_file)) # TODO: use self.check_string # Check. - self.assertTrue(os.path.exists(output_file)) - with open(output_file, "r", encoding="utf-8") as f: - content = f.read() + # Verify the DOT file content matches the expected golden outcome. + content = hio.from_file(str(output_file), encoding="utf-8") self.check_string(content) + + def test_dot_output(self) -> None: + """ + Verify that the DependencyGraph generates a DOT file with the correct format, + representing module dependencies as a directed graph. + """ + # Prepare inputs: Create a temporary directory with test files and initialize graph. + test_dir = self.get_test_dir() + graph = ichdegra.DependencyGraph(str(test_dir)) + graph.build_graph() + + # Run: Generate the DOT file in a temporary scratch space. + scratch_dir = path.Path(self.get_scratch_space()) + output_file = scratch_dir / "dependency_graph.dot" + graph.get_dot_file(str(output_file)) + + # Check: Verify the DOT file content matches the expected golden outcome. + content = hio.from_file(str(output_file), encoding="utf-8") + self.check_string(content) + + def test_syntax_error_handling(self) -> None: """ Verify syntax errors in files are handled without crashing. From d2d72fc9722d7dcb67249e9322104de1221c022a Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Sun, 11 May 2025 18:09:21 -0400 Subject: [PATCH 19/36] Updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/test/test_dependency_graph.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index a2be74bdc..65123ee79 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -102,26 +102,6 @@ def test_dot_output(self) -> None: content = hio.from_file(str(output_file), encoding="utf-8") self.check_string(content) - - def test_dot_output(self) -> None: - """ - Verify that the DependencyGraph generates a DOT file with the correct format, - representing module dependencies as a directed graph. - """ - # Prepare inputs: Create a temporary directory with test files and initialize graph. - test_dir = self.get_test_dir() - graph = ichdegra.DependencyGraph(str(test_dir)) - graph.build_graph() - - # Run: Generate the DOT file in a temporary scratch space. - scratch_dir = path.Path(self.get_scratch_space()) - output_file = scratch_dir / "dependency_graph.dot" - graph.get_dot_file(str(output_file)) - - # Check: Verify the DOT file content matches the expected golden outcome. - content = hio.from_file(str(output_file), encoding="utf-8") - self.check_string(content) - def test_syntax_error_handling(self) -> None: """ From 47746fb77e9b74f3ce203f17fb9aa7c1d1d54410 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Sun, 11 May 2025 19:08:15 -0400 Subject: [PATCH 20/36] Update: Typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/test/test_dependency_graph.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index 65123ee79..f342883e9 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -1,4 +1,3 @@ -import os import pathlib as path # Updated import style import helpers.hio as hio @@ -10,10 +9,6 @@ # TestDependencyGraph # ############################################################################# - -# ############################################################################# -# TestDependencyGraph -# ############################################################################# # TODO: class TestDependencyGraph(hunitest.TestCase): class TestDependencyGraph(hunitest.TestCase): # TODO: use self.get_scratch_dir() and make this a function that is called @@ -102,7 +97,6 @@ def test_dot_output(self) -> None: content = hio.from_file(str(output_file), encoding="utf-8") self.check_string(content) - def test_syntax_error_handling(self) -> None: """ Verify syntax errors in files are handled without crashing. From 572fb229f37366c42635acdff7941b940ee8b640 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Mon, 12 May 2025 08:55:22 -0400 Subject: [PATCH 21/36] Update: .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1c881b1e9..9d75ff875 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,4 @@ tmp.precommit_output.txt tmp.pytest.log tmp.amp_normalize_import.txt source -import_check/test/outcomes/TestDependencyGraph.test_dot_output/output/test.txt +devops/compose/tmp.docker-compose.yml From 70652a3fd53c2c6430bd679110c6a5c122e2e38b Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Mon, 12 May 2025 08:59:53 -0400 Subject: [PATCH 22/36] Updates: styling fix and outcome MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/dependency_graph.py | 67 +++++++++++++------ .../Test_show_imports.test1/output/output.txt | 6 +- 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index d174c16e9..a13bde05a 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -20,27 +20,31 @@ # DependencyGraph # ############################################################################# + class DependencyGraph: """ Generate a dependency graph for intra-directory imports. :param directory: Path to the directory to analyze. :param max_level: Max directory depth to analyze (-1 for no limit). - :param show_cycles: If True, include only cyclic dependencies in the graph. + :param show_cycles: If True, include only cyclic dependencies in the + graph. """ def __init__( self, directory: str, *, - max_level: Optional[int] = -1, # TODO: Use -1 instead of None to simplify typing. + max_level: Optional[ + int + ] = -1, # TODO: Use -1 instead of None to simplify typing. show_cycles: bool = False, ) -> None: """ Initialize the DependencyGraph with directory and analysis parameters. """ - # Following caused ValueError: Unable to determine caller function. - # _LOG.debug(hprint.func_signature_to_str()) + # Following caused ValueError: Unable to determine caller function. + # _LOG.debug(hprint.func_signature_to_str()) # Initialize directory path print(f"Type of Path: {type(Path)}") self.directory = Path(directory).resolve() @@ -55,8 +59,8 @@ def build_graph(self, abort_on_error: bool = False) -> None: """ Build a directed graph of intra-directory dependencies. - :param abort_on_error: If True, raise SyntaxError on parsing failures; if False, - skip invalid files. + :param abort_on_error: If True, raise SyntaxError on parsing + failures; if False, skip invalid files. """ # _LOG.debug(hprint.func_signature_to_str()) # Prepare directory analysis. @@ -67,14 +71,17 @@ def build_graph(self, abort_on_error: bool = False) -> None: py_files = [ path for path in self.directory.rglob("*.py") - if self.max_level == -1 or (len(path.parent.parts) - base_depth) <= self.max_level + if self.max_level == -1 + or (len(path.parent.parts) - base_depth) <= self.max_level ] _LOG.info("Found Python files: %s", py_files) _LOG.debug(hprint.to_str("py_files")) # Process each Python file to build the dependency graph. for py_file in py_files: relative_path = py_file.relative_to(self.directory.parent).as_posix() - _LOG.info("Processing file %s, relative path: %s", py_file, relative_path) + _LOG.info( + "Processing file %s, relative path: %s", py_file, relative_path + ) _LOG.debug(hprint.to_str("relative_path")) self.graph.add_node(relative_path) # TODO: Use hio.from_file and to_file to write. @@ -100,13 +107,17 @@ def build_graph(self, abort_on_error: bool = False) -> None: _LOG.debug(hprint.to_str("imports")) for imp in imports: if imp is None: - _LOG.warning("Skipping None import in file %s", py_file) + _LOG.warning( + "Skipping None import in file %s", py_file + ) continue _LOG.info("Found import: %s", imp) _LOG.debug(hprint.to_str("imp")) imp_path = self._resolve_import(imp, py_file) if imp_path: - _LOG.info("Adding edge: %s -> %s", relative_path, imp_path) + _LOG.info( + "Adding edge: %s -> %s", relative_path, imp_path + ) self.graph.add_edge(relative_path, imp_path) _LOG.debug(hprint.to_str("self.graph")) else: @@ -178,6 +189,7 @@ def _filter_cycles(self) -> None: len(self.graph.nodes), len(self.graph.edges), ) + # TODO: -> Optional[str] def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: """ @@ -214,7 +226,9 @@ def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: parent_name = self.directory.parent.name # Grandparent directory, if exists. parent_parent_name = ( - self.directory.parent.parent.name if len(self.directory.parent.parts) > 1 else "" + self.directory.parent.parent.name + if len(self.directory.parent.parts) > 1 + else "" ) # Collect all parent directory names into a list for validation. valid_names = [dir_name, parent_name, parent_parent_name] @@ -225,8 +239,11 @@ def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: _LOG.info( "Directory name: %s, Parent name: %s, Parent parent name: %s, " "Valid names: %s, Import first part: %s", - dir_name, parent_name, parent_parent_name, valid_names, - parts[0] if parts else "" + dir_name, + parent_name, + parent_parent_name, + valid_names, + parts[0] if parts else "", ) result = None if parts and parts[0] in valid_names: @@ -236,9 +253,15 @@ def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: # Only if the `dir` name is given (e.g., "helpers"), check for # `__init__.py`. init_path = base_dir / "__init__.py" - _LOG.info("Checking __init__.py at %s, exists: %s", init_path, init_path.exists()) + _LOG.info( + "Checking __init__.py at %s, exists: %s", + init_path, + init_path.exists(), + ) if init_path.exists(): - resolved_path = init_path.relative_to(self.directory.parent).as_posix() + resolved_path = init_path.relative_to( + self.directory.parent + ).as_posix() _LOG.info("Resolved to: %s", resolved_path) result = resolved_path else: @@ -251,7 +274,9 @@ def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: _LOG.debug(hprint.to_str("package_path")) if package_path.exists(): if i == len(parts) - 1: - resolved_path = package_path.relative_to(self.directory.parent).as_posix() + resolved_path = package_path.relative_to( + self.directory.parent + ).as_posix() _LOG.info("Resolved to: %s", resolved_path) result = resolved_path break @@ -265,7 +290,9 @@ def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: if module_path.exists(): # If last part, return the `.py` path. if i == len(parts) - 1: - resolved_path = module_path.relative_to(self.directory.parent).as_posix() + resolved_path = module_path.relative_to( + self.directory.parent + ).as_posix() _LOG.info("Resolved to: %s", resolved_path) result = resolved_path break @@ -277,9 +304,11 @@ def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: ) break # If neither exists, the import cannot be resolved. - _LOG.info("Could not resolve import '%s' at part '%s'", imp, module_name) + _LOG.info( + "Could not resolve import '%s' at part '%s'", imp, module_name + ) break # Return None if module resolution was unsuccessful. if result is None: _LOG.info("Could not resolve import '%s'", imp) - return result \ No newline at end of file + return result diff --git a/import_check/test/outcomes/Test_show_imports.test1/output/output.txt b/import_check/test/outcomes/Test_show_imports.test1/output/output.txt index 0977b1d2a..8c2bcfc54 100644 --- a/import_check/test/outcomes/Test_show_imports.test1/output/output.txt +++ b/import_check/test/outcomes/Test_show_imports.test1/output/output.txt @@ -19,7 +19,7 @@ "input.file2" ], "imports": null, - "path": "/app/helpers_root/import_check/test/outcomes/Test_show_imports.test1/input/__init__.py", + "path": "/app/import_check/test/outcomes/Test_show_imports.test1/input/__init__.py", "truncated": false, "is_external": false, "is_file": false @@ -31,7 +31,7 @@ "input.file2" ], "imports": null, - "path": "/app/helpers_root/import_check/test/outcomes/Test_show_imports.test1/input/file1.py", + "path": "/app/import_check/test/outcomes/Test_show_imports.test1/input/file1.py", "truncated": false, "is_external": false, "is_file": true @@ -45,7 +45,7 @@ "input", "input.file1" ], - "path": "/app/helpers_root/import_check/test/outcomes/Test_show_imports.test1/input/file2.py", + "path": "/app/import_check/test/outcomes/Test_show_imports.test1/input/file2.py", "truncated": false, "is_external": false, "is_file": true From 1376ee81e61f344fcdfdc313d47a661b22e1f1de Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Mon, 12 May 2025 09:31:18 -0400 Subject: [PATCH 23/36] Update: outcome test fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- .../output/test.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 import_check/test/outcomes/TestDependencyGraph.test_dot_output/output/test.txt diff --git a/import_check/test/outcomes/TestDependencyGraph.test_dot_output/output/test.txt b/import_check/test/outcomes/TestDependencyGraph.test_dot_output/output/test.txt new file mode 100644 index 000000000..b059464eb --- /dev/null +++ b/import_check/test/outcomes/TestDependencyGraph.test_dot_output/output/test.txt @@ -0,0 +1,11 @@ +strict digraph { +"tmp.scratch/module_e.py"; +"tmp.scratch/module_d.py"; +"tmp.scratch/module_a.py"; +"tmp.scratch/module_c.py"; +"tmp.scratch/module_b.py"; +"tmp.scratch/module_e.py" -> "tmp.scratch/module_d.py"; +"tmp.scratch/module_d.py" -> "tmp.scratch/module_e.py"; +"tmp.scratch/module_c.py" -> "tmp.scratch/module_b.py"; +"tmp.scratch/module_b.py" -> "tmp.scratch/module_a.py"; +} From ccd2df648505f6cc9a246dd7aa5ef23d8928b5b0 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Mon, 12 May 2025 10:01:07 -0400 Subject: [PATCH 24/36] Updates: lint fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/dependency_graph.py | 9 +++++---- import_check/test/test_dependency_graph.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index a13bde05a..ec81edbbf 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -6,7 +6,7 @@ import ast import logging -from pathlib import Path +import pathlib as path from typing import Optional import networkx as nx @@ -16,6 +16,7 @@ _LOG = logging.getLogger(__name__) + # ############################################################################# # DependencyGraph # ############################################################################# @@ -46,8 +47,8 @@ def __init__( # Following caused ValueError: Unable to determine caller function. # _LOG.debug(hprint.func_signature_to_str()) # Initialize directory path - print(f"Type of Path: {type(Path)}") - self.directory = Path(directory).resolve() + print(f"Type of Path: {type(path.Path)}") + self.directory = path.Path(directory).resolve() # Create a directed graph for dependencies. self.graph: nx.DiGraph = nx.DiGraph() # Set maximum directory depth. @@ -191,7 +192,7 @@ def _filter_cycles(self) -> None: ) # TODO: -> Optional[str] - def _resolve_import(self, imp: str, py_file: Path) -> Optional[str]: + def _resolve_import(self, imp: str, py_file: path.Path) -> Optional[str]: """ Resolve an import to a file path within the directory. diff --git a/import_check/test/test_dependency_graph.py b/import_check/test/test_dependency_graph.py index f342883e9..c77f6e413 100644 --- a/import_check/test/test_dependency_graph.py +++ b/import_check/test/test_dependency_graph.py @@ -9,6 +9,7 @@ # TestDependencyGraph # ############################################################################# + # TODO: class TestDependencyGraph(hunitest.TestCase): class TestDependencyGraph(hunitest.TestCase): # TODO: use self.get_scratch_dir() and make this a function that is called From 009a71f07a1dea79748752cfab7a4606c5726eae Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Tue, 13 May 2025 10:28:29 -0400 Subject: [PATCH 25/36] Updates: comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: - 'check_master' passed - 'check_author' passed - 'check_file_size' passed - 'check_python_compile' passed - 'check_gitleaks' passed All checks passed ✅ --- import_check/dependency_graph.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index ec81edbbf..3bd740a38 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -1,7 +1,15 @@ """ -Import as: +Build graph for dependencies. -import import_check.dependency_graph as ichdegra +Examples: +# Show file dependencies. +> show_deps.py + +# Show directory-level dependencies. +> show_deps.py --max_level 2 + +# Detect cyclic dependencies. +> show_deps.py --show_cycles """ import ast From c2a97650720c599020216b3adeb635ff209ad64d Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 15 May 2025 13:30:23 -0400 Subject: [PATCH 26/36] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- docs/tools/all.import_check.reference.md | 56 ++++++++++++------------ setup.py | 0 tasks.py | 5 ++- 3 files changed, 32 insertions(+), 29 deletions(-) delete mode 100644 setup.py diff --git a/docs/tools/all.import_check.reference.md b/docs/tools/all.import_check.reference.md index 9db542ed8..bb9c5530e 100644 --- a/docs/tools/all.import_check.reference.md +++ b/docs/tools/all.import_check.reference.md @@ -33,7 +33,7 @@ A tool for visualizing dependencies among files and packages. ## Basic usage ```bash ->./show_imports.py [flags] +> show_imports.py [flags] ``` The script will produce by default an output `.png` file named @@ -71,7 +71,7 @@ example Basic usage example: ```bash ->./show_imports.py --out_filename example/output/basic.png example/input +> show_imports.py --out_filename example/output/basic.png example/input ``` Will produce the following output: @@ -85,7 +85,7 @@ To visualize dependencies at a directory level, specify `--dir` option. Example: ```bash ->./show_imports.py --dir --out_filename example/output/directory_deps.png example/input +> show_imports.py --dir --out_filename example/output/directory_deps.png example/input ``` Output: @@ -100,7 +100,7 @@ specifying the `--ext` option. Example: ```bash ->./show_imports.py --ext --out_filename example/output/external_deps.png example/input +> show_imports.py --ext --out_filename example/output/external_deps.png example/input ``` Output: @@ -115,7 +115,7 @@ can set the `--max_level` option. Example: ```bash ->./show_imports.py --max_level 2 --out_filename example/output/max_level_deps.png example/input +> show_imports.py --max_level 2 --out_filename example/output/max_level_deps.png example/input ``` Output: @@ -130,7 +130,7 @@ When you want to visualize cyclic dependencies only, you can set the Example: ```bash ->./show_imports.py --show_cycles --out_filename example/output/cyclic_deps.png example/input +> show_imports.py --show_cycles --out_filename example/output/cyclic_deps.png example/input ``` Output: @@ -149,7 +149,7 @@ its limitations: considered (e.g., if a module is missing or not installed, it will not be included regardless of whether it is being imported) - All the imports inside submodules should be absolute -- There are certain requirements related to the presence of _modules_ in and +- There are certain requirements related to the presence of modules in and above the target directory, which are described in detail below - Here, a module is a directory that contains an `__init__.py` file @@ -314,7 +314,7 @@ A tool for detecting circular dependencies among files and packages. ## Basic usage ```bash ->./detect_import_cycles.py +> detect_import_cycles.py ``` The script will either exit with an error, logging the groups of files with @@ -328,7 +328,7 @@ For the `import_check/example/input` directory, the script will produce the following output, detecting two import cycles: ```bash ->./detect_import_cycles.py example/input +> detect_import_cycles.py example/input ``` ```bash @@ -352,7 +352,7 @@ ERROR detect_import_cycles.py _main:73 Cyclic imports detected: (input.subdir ## Command usage ```bash -i show_deps [--directory ] [--format ] [--output_file ] [--max_level ] [--show_cycles] +> i show_deps [--directory ] [--format ] [--output_file ] [--max_level ] [--show_cycles] ``` - **Default behavior**: Produces a text report, printed to stdout. @@ -377,7 +377,7 @@ subdirectories like `notebooks/`. Create a text report of all intra-directory dependencies: ```bash ->i show_deps --directory helpers --format text > report.txt +> i show_deps --directory helpers --format text > report.txt ``` Output in `report.txt`: @@ -394,7 +394,7 @@ helpers/hio.py has no dependencies Create a DOT file for visualization: ```bash ->i show_deps --directory helpers --format dot --output_file dependency_graph.dot +> i show_deps --directory helpers --format dot --output_file dependency_graph.dot >dot -Tsvg dependency_graph.dot -o dependency_graph.svg >open dependency_graph.svg ``` @@ -412,7 +412,7 @@ Restrict analysis to a certain depth with `--max_level` (e.g., `--max_level 2` includes `helpers/notebooks/`, excludes deeper subdirectories): ```bash ->i show_deps --directory helpers --format text --max_level 2 > report_max_level.txt +> i show_deps --directory helpers --format text --max_level 2 > report_max_level.txt ``` Output in `report_max_level.txt`: @@ -427,7 +427,7 @@ helpers/hio.py has no dependencies Visualize the limited graph: ```bash ->i show_deps --directory helpers --format dot --output_file dependency_graph.dot --max_level 2 +> i show_deps --directory helpers --format dot --output_file dependency_graph.dot --max_level 2 >neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 >open dependency_graph.svg ``` @@ -437,7 +437,7 @@ Visualize the limited graph: Show only cyclic dependencies with `--show_cycles`: ```bash ->i show_deps --directory helpers --format text --show_cycles > report_cycles.txt +> i show_deps --directory helpers --format text --show_cycles > report_cycles.txt ``` Output in `report_cycles.txt` (if cycles exist): @@ -451,9 +451,9 @@ helpers/module_e.py imports helpers/module_d.py Visualize the cyclic dependencies: ```bash ->i show_deps --directory helpers --format dot --output_file dependency_graph.dot --show_cycles ->neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 ->open dependency_graph.svg +> i show_deps --directory helpers --format dot --output_file dependency_graph.dot --show_cycles +> neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 +> open dependency_graph.svg ``` ## Options @@ -505,34 +505,34 @@ NotModuleError: The following dirs have to be modules (add `__init__.py`): ['hel 1. **Activate the `helpers` environment**: From the `helpers` root directory: ```bash - poetry shell; export PYTHONPATH=$PYTHONPATH:$(pwd) + > poetry shell; export PYTHONPATH=$PYTHONPATH:$(pwd) ``` 2. **Generate a dependency report**: Create a text report: ```bash - i show_deps --directory helpers --format text > report.txt + > i show_deps --directory helpers --format text > report.txt ``` Or create a DOT file for visualization: ```bash - i show_deps --directory helpers --format dot --output_file dependency_graph.dot + > i show_deps --directory helpers --format dot --output_file dependency_graph.dot ``` 3. **Visualize the graph** (optional): Convert the DOT file to SVG and view: ```bash - neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 - open dependency_graph.svg + > neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 + > open dependency_graph.svg ``` **Troubleshooting**: If `invoke` fails (e.g., `No idea what '--output_file' is!`), use the fallback script: ```bash -python3 ~/src/helpers1/generate_deps.py -neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 -open dependency_graph.svg +> generate_deps.py +> neato -Tsvg dependency_graph.dot -o dependency_graph.svg -Goverlap=scale -Gsep=+0.5 -Gepsilon=0.01 +> open dependency_graph.svg ``` **Tips**: The `generate_deps.py` script applies customizations like filtering @@ -540,4 +540,6 @@ nodes with no dependencies and shortening labels (e.g., removing `helpers/` prefix). Adjust Graphviz attributes (`ranksep=2.0`, `nodesep=1.0`, `splines=spline`, `overlap=false`, `fontsize=10`) for better visualization. -**Last review**: 2025-05-01 Ehaab Basil +**Last review**: +- 2025-05-14, GP Saggese +- 2025-05-01, Ehaab Basil diff --git a/setup.py b/setup.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tasks.py b/tasks.py index bfa025760..5e165e112 100644 --- a/tasks.py +++ b/tasks.py @@ -94,6 +94,7 @@ pytest_repro, run_blank_tests, run_coverage_report, + run_coverage, run_fast_slow_superslow_tests, run_fast_slow_tests, run_fast_tests, @@ -120,6 +121,7 @@ from helpers.lib_tasks import ( # isort: skip # noqa: F401 # pylint: disable=unused-import copy_ecs_task_definition_image_url, docker_release_multi_build_dev_image, + docker_release_multi_arch_prod_image, docker_tag_push_multi_build_local_image_as_dev, release_dags_to_airflow, integrate_file, @@ -155,8 +157,7 @@ # TODO(gp): Move it to lib_tasks. -# ECR_BASE_PATH = os.environ["CSFY_ECR_BASE_PATH"] -ECR_BASE_PATH = os.environ.get("CSFY_ECR_BASE_PATH", "") +ECR_BASE_PATH = os.environ["CSFY_ECR_BASE_PATH"] def _run_qa_tests(ctx: Any, stage: str, version: str) -> bool: From b1b32303f7ec4fb38b9f904161f934cd86178975 Mon Sep 17 00:00:00 2001 From: GP Saggese Date: Thu, 15 May 2025 14:15:34 -0400 Subject: [PATCH 27/36] Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../thin_client/thin_client_utils.sh | 8 ++- helpers/lib_tasks_lint.py | 52 +++++++++++++++++++ import_check/dependency_graph.py | 2 - tasks.py | 48 ----------------- 4 files changed, 59 insertions(+), 51 deletions(-) diff --git a/dev_scripts_helpers/thin_client/thin_client_utils.sh b/dev_scripts_helpers/thin_client/thin_client_utils.sh index df3cbd127..54a073cba 100644 --- a/dev_scripts_helpers/thin_client/thin_client_utils.sh +++ b/dev_scripts_helpers/thin_client/thin_client_utils.sh @@ -201,9 +201,15 @@ set_path() { echo "# set_path()" local dev_script_dir=$1 dassert_dir_exists $dev_script_dir + dassert_dir_exists $GIT_ROOT_DIR export PATH=$(pwd):$PATH + # Add the top Git dir to PATH. export PATH=$GIT_ROOT_DIR:$PATH - # Add to the PATH all the first level directory under `dev_scripts`. + # Add import_check. + IMPORT_CHECK_DIR=$GIT_ROOT_DIR/import_check + dassert_dir_exists $IMPORT_CHECK_DIR + export PATH=$IMPORT_CHECK_DIR:$PATH + # Add all the first level directory under `dev_scripts` to PATH. export PATH_TMP="$(find $dev_script_dir -maxdepth 1 -type d -not -path "$(pwd)" | tr '\n' ':' | sed 's/:$//')" export PATH=$PATH_TMP:$PATH # Remove duplicates. diff --git a/helpers/lib_tasks_lint.py b/helpers/lib_tasks_lint.py index 153700b9e..118233efe 100644 --- a/helpers/lib_tasks_lint.py +++ b/helpers/lib_tasks_lint.py @@ -176,6 +176,58 @@ def lint_detect_cycles( # type: ignore hlitauti.run(ctx, cmd) +# TODO(ehaabbasil): c -> ctx +# TODO(ehaabbasil): Use REST for docstrings. +# TODO(ehaabbasil): Make this invoke target like the others. +# TODO +@task +def lint_show_deps( + c, + directory=".", + format="text", + output_file=None, + max_level=-1, + show_cycles=False, +): + """ + Generate a dependency report for a specified directory. + + Args: + c: Invoke context (required by invoke, unused). + directory (str): Directory to analyze (default: current directory). + format (str): Output format ('text' or 'dot', default: 'text'). + output_file (str, optional): File to write output to (default: None). + max_level (int, optional): Max directory depth to analyze (default: None). + show_cycles (bool, optional): Show only cyclic dependencies (default: False). + + Raises: + ValueError: If the format is neither 'text' nor 'dot'. + """ + # Convert max_level to int if provided + max_level = int(max_level) if max_level is not None else None + # Convert show_cycles to bool + show_cycles = show_cycles in (True, "True", "true", "1") + graph = DependencyGraph( + directory, max_level=max_level, show_cycles=show_cycles + ) + graph.build_graph() + if format == "text": + report = graph.get_text_report() + if output_file: + with open(output_file, "w") as f: + f.write(report) + print(f"Report written to {output_file}") + else: + print(report) + elif format == "dot": + if not output_file: + output_file = "dependency_graph.dot" + graph.get_dot_file(output_file) + print(f"DOT file written to {output_file}") + else: + raise ValueError(f"Unsupported format: {format}") + + # pylint: disable=line-too-long @task def lint( # type: ignore diff --git a/import_check/dependency_graph.py b/import_check/dependency_graph.py index 3bd740a38..53bd346d2 100644 --- a/import_check/dependency_graph.py +++ b/import_check/dependency_graph.py @@ -93,8 +93,6 @@ def build_graph(self, abort_on_error: bool = False) -> None: ) _LOG.debug(hprint.to_str("relative_path")) self.graph.add_node(relative_path) - # TODO: Use hio.from_file and to_file to write. - # TODO: Let's add a switch `abort_on_error` to continue or abort. # Parse the file as an Abstract Syntax Tree (AST). try: file_content = hio.from_file(str(py_file)) diff --git a/tasks.py b/tasks.py index 5e165e112..cd048b4e9 100644 --- a/tasks.py +++ b/tasks.py @@ -178,54 +178,6 @@ def _run_qa_tests(ctx: Any, stage: str, version: str) -> bool: return True -@task -def show_deps( - c, - directory=".", - format="text", - output_file=None, - max_level=-1, - show_cycles=False, -): - """ - Generate a dependency report for a specified directory. - - Args: - c: Invoke context (required by invoke, unused). - directory (str): Directory to analyze (default: current directory). - format (str): Output format ('text' or 'dot', default: 'text'). - output_file (str, optional): File to write output to (default: None). - max_level (int, optional): Max directory depth to analyze (default: None). - show_cycles (bool, optional): Show only cyclic dependencies (default: False). - - Raises: - ValueError: If the format is neither 'text' nor 'dot'. - """ - # Convert max_level to int if provided - max_level = int(max_level) if max_level is not None else None - # Convert show_cycles to bool - show_cycles = show_cycles in (True, "True", "true", "1") - graph = DependencyGraph( - directory, max_level=max_level, show_cycles=show_cycles - ) - graph.build_graph() - if format == "text": - report = graph.get_text_report() - if output_file: - with open(output_file, "w") as f: - f.write(report) - print(f"Report written to {output_file}") - else: - print(report) - elif format == "dot": - if not output_file: - output_file = "dependency_graph.dot" - graph.get_dot_file(output_file) - print(f"DOT file written to {output_file}") - else: - raise ValueError(f"Unsupported format: {format}") - - default_params = { # TODO(Nikola): Remove prefix after everything is cleaned. # Currently there are a lot dependencies on prefix. From 1c5cf8ca40f79dcbc53f999bfb5969c32e05b994 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Mon, 19 May 2025 19:56:51 -0400 Subject: [PATCH 28/36] Updates: TODOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/lib_tasks_lint.py | 55 ++++++++++++--------------------------- 1 file changed, 17 insertions(+), 38 deletions(-) diff --git a/helpers/lib_tasks_lint.py b/helpers/lib_tasks_lint.py index 118233efe..570bda207 100644 --- a/helpers/lib_tasks_lint.py +++ b/helpers/lib_tasks_lint.py @@ -182,50 +182,29 @@ def lint_detect_cycles( # type: ignore # TODO @task def lint_show_deps( - c, - directory=".", - format="text", - output_file=None, + ctx, + dir_name=".", + stage="prod", + version="", + out_file_name="dependency_graph.dot", max_level=-1, show_cycles=False, + log_level="INFO", ): """ Generate a dependency report for a specified directory. - - Args: - c: Invoke context (required by invoke, unused). - directory (str): Directory to analyze (default: current directory). - format (str): Output format ('text' or 'dot', default: 'text'). - output_file (str, optional): File to write output to (default: None). - max_level (int, optional): Max directory depth to analyze (default: None). - show_cycles (bool, optional): Show only cyclic dependencies (default: False). - - Raises: - ValueError: If the format is neither 'text' nor 'dot'. + + :param dir_name: The name of dir to generate dependecy report for. + :param out_file_name: Path to the output DOT file. + :param max_level: Maximum directory depth to analyze (-1 for no limit) """ - # Convert max_level to int if provided - max_level = int(max_level) if max_level is not None else None - # Convert show_cycles to bool - show_cycles = show_cycles in (True, "True", "true", "1") - graph = DependencyGraph( - directory, max_level=max_level, show_cycles=show_cycles - ) - graph.build_graph() - if format == "text": - report = graph.get_text_report() - if output_file: - with open(output_file, "w") as f: - f.write(report) - print(f"Report written to {output_file}") - else: - print(report) - elif format == "dot": - if not output_file: - output_file = "dependency_graph.dot" - graph.get_dot_file(output_file) - print(f"DOT file written to {output_file}") - else: - raise ValueError(f"Unsupported format: {format}") + hlitauti.report_task() + cmd = f"python import_check/show_deps.py {dir_name} --out_file {out_file_name} --max_level {max_level}" + if show_cycles: + cmd += " --show_cycles" + cmd += f" --log_level {log_level}" + docker_cmd = _get_lint_docker_cmd("", cmd, stage=stage, version=version) + hlitauti.run(ctx, docker_cmd) # pylint: disable=line-too-long From 7d45eeeaef42df4f5d9fc2dded29b9da22a04729 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Sat, 24 May 2025 17:53:02 -0400 Subject: [PATCH 29/36] Updates: show_deps.py invoke show_deps / generate_deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .DS_Store | Bin 0 -> 6148 bytes helpers/lib_tasks_lint.py | 32 +++++++++++ import_check/generate_deps.py | 40 ++++++++++++++ import_check/show_deps.py | 98 ++++++++++++++++++++++++++++++++++ tasks.py | 3 +- 5 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 .DS_Store create mode 100644 import_check/generate_deps.py create mode 100644 import_check/show_deps.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 show_deps.py . + +# Generate report with max depth 2 +> show_deps.py --max_level 2 . + +# Show only cyclic dependencies +> show_deps.py --show_cycles . +""" + +import argparse +import logging +import sys + +import helpers.hdbg as hdbg +import helpers.hparser as hparser +try: + from import_check.dependency_graph import DependencyGraph +except ImportError as e: + logging.error("Failed to import DependencyGraph: %s", str(e)) + logging.error("Ensure you are running as a module (e.g., python -m import_check.show_deps) or check package structure") + sys.exit(1) + +_LOG = logging.getLogger(__name__) + +# ############################################################################# + +def _parse() -> argparse.ArgumentParser: + """ + Parse command-line arguments. + + :return: ArgumentParser object configured with command-line options. + """ + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "directory", + type=str, + help="Path to the directory to analyze" + ) + parser.add_argument( + "--out_file", + type=str, + default="dependency_graph.dot", + help="Path to the output DOT file" + ) + parser.add_argument( + "--max_level", + type=int, + default=-1, + help="Maximum directory depth to analyze (-1 for no limit)" + ) + parser.add_argument( + "--show_cycles", + action="store_true", + help="Show only cyclic dependencies" + ) + hparser.add_verbosity_arg(parser) + return parser + +# ############################################################################# + +def _main(parser: argparse.ArgumentParser) -> None: + """ + Main function to generate dependency report. + + :param parser: ArgumentParser object to parse command-line arguments. + """ + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + hdbg.dassert_dir_exists(args.directory, f"{args.directory} is not a valid directory") + _LOG.info("Starting dependency analysis for %s", args.directory) + try: + graph = DependencyGraph(args.directory, max_level=args.max_level, show_cycles=args.show_cycles) + graph.build_graph() + if not graph.graph.nodes: + _LOG.info("No Python files found or no dependencies to report in %s", args.directory) + else: + report = graph.get_text_report() + print(report) + graph.get_dot_file(args.out_file) + _LOG.info("DOT file written to %s", args.out_file) + except Exception as e: + _LOG.error("Failed to generate dependency report: %s", str(e)) + sys.exit(1) + +# ############################################################################# + +if __name__ == "__main__": + _main(_parse()) \ No newline at end of file diff --git a/tasks.py b/tasks.py index cd048b4e9..d42065ab8 100644 --- a/tasks.py +++ b/tasks.py @@ -79,6 +79,8 @@ lint_check_python_files_in_docker, lint_create_branch, lint_detect_cycles, + lint_generate_deps, + lint_show_deps, print_env, print_setup, print_tasks, @@ -177,7 +179,6 @@ def _run_qa_tests(ctx: Any, stage: str, version: str) -> bool: ctx.run(cmd) return True - default_params = { # TODO(Nikola): Remove prefix after everything is cleaned. # Currently there are a lot dependencies on prefix. From 1ed6808227dbf82caa02f70aec0cf059e6f68c91 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Sat, 24 May 2025 17:58:07 -0400 Subject: [PATCH 30/36] Update gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9d75ff875..d294a4270 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -.DS_Store .coverage dependency_graph.dot dependency_graph.svg From 4386534d65d810d510337c9d1e979e7bb73160ba Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Sat, 24 May 2025 18:09:07 -0400 Subject: [PATCH 31/36] Update: conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../thin_client/thin_client_utils.sh | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/dev_scripts_helpers/thin_client/thin_client_utils.sh b/dev_scripts_helpers/thin_client/thin_client_utils.sh index 54a073cba..dfd72d7ac 100644 --- a/dev_scripts_helpers/thin_client/thin_client_utils.sh +++ b/dev_scripts_helpers/thin_client/thin_client_utils.sh @@ -198,19 +198,25 @@ set_csfy_env_vars() { set_path() { - echo "# set_path()" + # Process interface. + dassert_eq_num_args $# 1 "set_path" local dev_script_dir=$1 + # dassert_dir_exists $dev_script_dir - dassert_dir_exists $GIT_ROOT_DIR + dtrace "dev_script_dir=$dev_script_dir" + # export PATH=$(pwd):$PATH - # Add the top Git dir to PATH. + dtrace "GIT_ROOT=$GIT_ROOT" + dassert_var_defined "GIT_ROOT" + # export PATH=$GIT_ROOT_DIR:$PATH - # Add import_check. - IMPORT_CHECK_DIR=$GIT_ROOT_DIR/import_check - dassert_dir_exists $IMPORT_CHECK_DIR - export PATH=$IMPORT_CHECK_DIR:$PATH - # Add all the first level directory under `dev_scripts` to PATH. - export PATH_TMP="$(find $dev_script_dir -maxdepth 1 -type d -not -path "$(pwd)" | tr '\n' ':' | sed 's/:$//')" + # Avoid ./.mypy_cache/3.12/app/dev_scripts_helpers + DEV_SCRIPT_HELPER_DIR=$(find . -name dev_scripts_helpers -type d -not -path "*.mypy_cache*") + dassert_dir_exists $DEV_SCRIPT_HELPER_DIR + dtrace "DEV_SCRIPT_HELPER_DIR=$DEV_SCRIPT_HELPER_DIR" + # Add to the PATH all the first level directory under `dev_scripts`. + export PATH_TMP="$(find $DEV_SCRIPT_HELPER_DIR -maxdepth 1 -type d -not -path "$(pwd)" | tr '\n' ':' | sed 's/:$//')" + dtrace "PATH_TMP=$PATH_TMP" export PATH=$PATH_TMP:$PATH # Remove duplicates. export PATH=$(remove_dups $PATH) @@ -219,6 +225,7 @@ set_path() { } + set_pythonpath() { local helpers_root_dir="$1" echo "# set_pythonpath()" From 4e78b18a75b4e08929fd8259f9a164f63843d048 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Sat, 24 May 2025 18:24:54 -0400 Subject: [PATCH 32/36] Update: requirements check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- dev_scripts_helpers/thin_client/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev_scripts_helpers/thin_client/requirements.txt b/dev_scripts_helpers/thin_client/requirements.txt index b3c2273d9..b9028c942 100644 --- a/dev_scripts_helpers/thin_client/requirements.txt +++ b/dev_scripts_helpers/thin_client/requirements.txt @@ -8,6 +8,8 @@ docker < 7 docker-compose >= 1.29.0 invoke >= 1.5.0 poetry +networkx >= 2.6.3 +pydot >= 1.4.1 pytest >= 6.0.0 s3fs # For tools like `publish_notebook.py`. tqdm From e66911467078cea7a2ad3d02903920900f080c62 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Tue, 3 Jun 2025 01:18:50 -0400 Subject: [PATCH 33/36] Resolve set_path conflict in thin_client_utils.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- .../thin_client/thin_client_utils.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dev_scripts_helpers/thin_client/thin_client_utils.sh b/dev_scripts_helpers/thin_client/thin_client_utils.sh index dfd72d7ac..7685557e9 100644 --- a/dev_scripts_helpers/thin_client/thin_client_utils.sh +++ b/dev_scripts_helpers/thin_client/thin_client_utils.sh @@ -204,14 +204,21 @@ set_path() { # dassert_dir_exists $dev_script_dir dtrace "dev_script_dir=$dev_script_dir" - # + # TODO(gp): Unify this as part of CmTask12257. + if [[ -n "$GIT_ROOT_DIR" ]]; then + # `GIT_ROOT_DIR` is available outside the container. + GIT_ROOT=$GIT_ROOT_DIR + elif [[ -n "$CSFY_GIT_ROOT_PATH" ]]; then + # `CSFY_GIT_ROOT_PATH` is available inside the container. + GIT_ROOT=$CSFY_GIT_ROOT_PATH + fi export PATH=$(pwd):$PATH dtrace "GIT_ROOT=$GIT_ROOT" dassert_var_defined "GIT_ROOT" - # export PATH=$GIT_ROOT_DIR:$PATH # Avoid ./.mypy_cache/3.12/app/dev_scripts_helpers - DEV_SCRIPT_HELPER_DIR=$(find . -name dev_scripts_helpers -type d -not -path "*.mypy_cache*") + DEV_SCRIPT_HELPER_DIR=$(find ${GIT_ROOT} -name dev_scripts_helpers -type d -not -path "*.mypy_cache*") + echo "DEV_SCRIPT_HELPER_DIR=$DEV_SCRIPT_HELPER_DIR" dassert_dir_exists $DEV_SCRIPT_HELPER_DIR dtrace "DEV_SCRIPT_HELPER_DIR=$DEV_SCRIPT_HELPER_DIR" # Add to the PATH all the first level directory under `dev_scripts`. From 3eab85763b395663a3eae81c5c55e05e15ac43c6 Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Tue, 3 Jun 2025 01:25:27 -0400 Subject: [PATCH 34/36] Resolve import conflict tasks.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks.py b/tasks.py index d42065ab8..5372410c7 100644 --- a/tasks.py +++ b/tasks.py @@ -81,6 +81,7 @@ lint_detect_cycles, lint_generate_deps, lint_show_deps, + lint_sync_code, print_env, print_setup, print_tasks, From 9d4043f7f280979df655f0c6eaf2fc410e1ec8ed Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Tue, 3 Jun 2025 01:33:31 -0400 Subject: [PATCH 35/36] Resolve conflicts tasks.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks.py b/tasks.py index 5372410c7..ed5e2528e 100644 --- a/tasks.py +++ b/tasks.py @@ -81,7 +81,7 @@ lint_detect_cycles, lint_generate_deps, lint_show_deps, - lint_sync_code, + # lint_sync_code, print_env, print_setup, print_tasks, From f89a51f9a716e3bcee2180202045e32ed1b53dae Mon Sep 17 00:00:00 2001 From: Ehaab Basil Date: Tue, 3 Jun 2025 01:35:09 -0400 Subject: [PATCH 36/36] Update:tasks.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks.py b/tasks.py index ed5e2528e..5372410c7 100644 --- a/tasks.py +++ b/tasks.py @@ -81,7 +81,7 @@ lint_detect_cycles, lint_generate_deps, lint_show_deps, - # lint_sync_code, + lint_sync_code, print_env, print_setup, print_tasks,