From ded9c0cbf1c15e55ed25594a7e86a45ddbe988e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A5l=20R=C3=B8nning?= Date: Wed, 10 Dec 2025 18:35:03 +0100 Subject: [PATCH 1/5] Savepoint --- .../_cdf_tk/data_classes/modules.py | 66 +++++ .../test_commands/test_build_v2.py | 248 ++++++++++++++++++ .../test_data_classes/test_modules.py | 24 ++ 3 files changed, 338 insertions(+) create mode 100644 cognite_toolkit/_cdf_tk/data_classes/modules.py create mode 100644 tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py create mode 100644 tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py diff --git a/cognite_toolkit/_cdf_tk/data_classes/modules.py b/cognite_toolkit/_cdf_tk/data_classes/modules.py new file mode 100644 index 0000000000..e21ac03f94 --- /dev/null +++ b/cognite_toolkit/_cdf_tk/data_classes/modules.py @@ -0,0 +1,66 @@ +import sys +from functools import cached_property +from pathlib import Path + +from pydantic import BaseModel, ConfigDict, Field + +from cognite_toolkit._cdf_tk.constants import MODULES +from cognite_toolkit._cdf_tk.utils import iterate_modules + +from ._module_toml import ModuleToml + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + + +class Resource(BaseModel): + model_config = ConfigDict( + frozen=True, + validate_assignment=True, + ) + + path: Path + + @classmethod + def load(cls, path: Path) -> Self: + return cls(path=path) + + +class Module(BaseModel): + model_config = ConfigDict( + frozen=True, + validate_assignment=True, + ) + + path: Path + resources: list[Resource] + definition: ModuleToml | None = None + + @classmethod + def load(cls, path: Path, resource_paths: list[Path]) -> Self: + definition = ModuleToml.load(path / ModuleToml.filename) if (path / ModuleToml.filename).exists() else None + resources = [Resource.load(path=resource_path) for resource_path in resource_paths] + return cls(path=path, resources=resources, definition=definition) + + +class ModulesDirectory(BaseModel): + model_config = ConfigDict( + frozen=True, + validate_assignment=True, + ) + + modules: list[Module] = Field(default_factory=list) + + @classmethod + def load(cls, organization_dir: Path) -> Self: + modules = [ + Module.load(path=module_path, resource_paths=resource_paths) + for module_path, resource_paths in iterate_modules(organization_dir / MODULES) + ] + return cls(modules=modules) + + @cached_property + def paths(self) -> list[Path]: + return [module.path for module in self.modules] diff --git a/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py b/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py new file mode 100644 index 0000000000..d28e2b4a7e --- /dev/null +++ b/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py @@ -0,0 +1,248 @@ +import os +from contextlib import suppress +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +import yaml +from _pytest.monkeypatch import MonkeyPatch +from cognite.client.data_classes.data_modeling import DataModelId, Space +from cognite_toolkit._cdf_tk.commands.build_v2.build_cmd import BuildCommand +from cognite_toolkit._cdf_tk.commands.build_v2.build_issues import BuildIssue, BuildIssueList + +from cognite_toolkit._cdf_tk.commands.build_cmd import BuildCommand as OldBuildCommand +from cognite_toolkit._cdf_tk.cruds import TransformationCRUD +from cognite_toolkit._cdf_tk.data_classes import BuildConfigYAML, BuildVariables, Environment, Packages +from cognite_toolkit._cdf_tk.data_classes._module_directories import ModuleDirectories +from cognite_toolkit._cdf_tk.exceptions import ( + ToolkitMissingModuleError, +) +from cognite_toolkit._cdf_tk.feature_flags import Flags +from cognite_toolkit._cdf_tk.hints import ModuleDefinition +from cognite_toolkit._cdf_tk.utils.auth import EnvironmentVariables +from tests import data +from tests.test_unit.approval_client import ApprovalToolkitClient + + +@pytest.fixture(scope="session") +def dummy_environment() -> Environment: + return Environment( + name="dev", + project="my_project", + validation_type="dev", + selected=["none"], + ) + + +# Checks to avoid regressions +class TestBuildV2Command: + def test_module_not_found_error(self, tmp_path: Path) -> None: + with pytest.raises(ToolkitMissingModuleError): + BuildCommand(print_warning=False).execute( + verbose=False, + build_dir=tmp_path, + organization_dir=data.PROJECT_WITH_BAD_MODULES, + selected=None, + build_env_name="no_module", + no_clean=False, + ) + + def test_module_with_non_resource_directories(self, tmp_path: Path) -> None: + cmd = BuildCommand(print_warning=False) + with suppress(NotImplementedError): + cmd.execute( + verbose=False, + build_dir=tmp_path, + organization_dir=data.PROJECT_WITH_BAD_MODULES, + selected=None, + build_env_name="ill_module", + no_clean=False, + ) + + assert len(cmd.issues) >= 1 + assert ( + BuildIssue( + description=f"Module 'ill_made_module' has non-resource directories: ['spaces']. {ModuleDefinition.short()}" + ) + in cmd.issues + ) + + @pytest.mark.skipif(not Flags.GRAPHQL.is_enabled(), reason="GraphQL schema files will give warnings") + def test_custom_project_no_warnings(self, tmp_path: Path, monkeypatch: MonkeyPatch) -> None: + cmd = BuildCommand(print_warning=False) + monkeypatch.setenv("CDF_PROJECT", "some-project") + with suppress(NotImplementedError): + cmd.execute( + verbose=False, + build_dir=tmp_path, + organization_dir=data.PROJECT_NO_COGNITE_MODULES, + selected=None, + build_env_name="dev", + no_clean=False, + ) + + assert not cmd.warning_list, f"No warnings should be raised. Got warnings: {cmd.warning_list}" + # There are two transformations in the project, expect two transformation files + transformation_files = [ + f + for f in (tmp_path / "transformations").iterdir() + if f.is_file() and TransformationCRUD.is_supported_file(f) + ] + assert len(transformation_files) == 2 + + def test_build_complete_org_without_warnings( + self, + tmp_path: Path, + env_vars_with_client: EnvironmentVariables, + ) -> None: + cmd = BuildCommand(silent=True, skip_tracking=True) + with patch.dict( + os.environ, + {"CDF_PROJECT": env_vars_with_client.CDF_PROJECT, "CDF_CLUSTER": env_vars_with_client.CDF_CLUSTER}, + ): + with suppress(NotImplementedError): + cmd.execute( + verbose=False, + build_dir=tmp_path / "build", + organization_dir=data.COMPLETE_ORG, + selected=None, + build_env_name="dev", + no_clean=False, + ) + + assert not cmd.warning_list, ( + f"No warnings should be raised. Got {len(cmd.warning_list)} warnings: {cmd.warning_list}" + ) + + def test_build_no_warnings_when_space_exists_in_cdf( + self, env_vars_with_client: EnvironmentVariables, toolkit_client_approval: ApprovalToolkitClient, tmp_path: Path + ) -> None: + my_group = """name: gp_trigger_issue +sourceId: '1234567890123456789' +capabilities: +- dataModelInstancesAcl: + actions: + - READ + scope: + spaceIdScope: + spaceIds: + - existing-space +""" + filepath = tmp_path / "my_org" / "modules" / "my_module" / "auth" / "my.Group.yaml" + filepath.parent.mkdir(parents=True, exist_ok=True) + filepath.write_text(my_group) + + # Simulate that the space exists in CDF + toolkit_client_approval.append(Space, Space("existing-space", False, 1, 1, None, None)) + cmd = BuildCommand(silent=True, skip_tracking=True) + with patch.dict( + os.environ, + {"CDF_PROJECT": env_vars_with_client.CDF_PROJECT, "CDF_CLUSTER": env_vars_with_client.CDF_CLUSTER}, + ): + with suppress(NotImplementedError): + cmd.execute( + verbose=False, + organization_dir=tmp_path / "my_org", + build_dir=tmp_path / "build", + selected=None, + build_env_name=None, + no_clean=False, + client=toolkit_client_approval.mock_client, + on_error="raise", + ) + assert len(cmd.issues) == 0 + + +class TestCheckYamlSemantics: + def test_build_valid_read_int_version(self) -> None: + cmd = BuildCommand(silent=True) + raw_yaml = """destination: + dataModel: + destinationType: CogniteFile + externalId: MyModel + space: my_space + version: 1_0_0 + instanceSpace: my_space + type: instances +externalId: some_external_id + """ + source_filepath = MagicMock(spec=Path) + source_filepath.read_text.return_value = raw_yaml + source_filepath.suffix = ".yaml" + source_filepath.read_bytes.return_value = raw_yaml.encode("utf-8") + + source_files = cmd._replace_variables( + [source_filepath], BuildVariables([]), TransformationCRUD.folder_name, Path("my_module"), verbose=False + ) + assert len(source_files) == 1 + source_file = source_files[0] + assert isinstance(source_file.loaded, dict) + actual = DataModelId.load(source_file.loaded["destination"]["dataModel"]) + assert actual == DataModelId("my_space", "MyModel", "1_0_0") + + def test_track_module_build(self, tmp_path: Path) -> None: + cmd = BuildCommand(print_warning=True, skip_tracking=True) + cmd.run( + lambda: cmd.build_modules( + modules=ModuleDirectories.load(data.EXTERNAL_PACKAGE), + build_dir=tmp_path, + variables=BuildVariables([]), + verbose=False, + ) + ) + assert cmd._additional_tracking_info.package_ids == {"rmdm"} + assert cmd._additional_tracking_info.module_ids == {"agent", "data_model"} + + def test_track_module_build_with_package_info(self, tmp_path: Path) -> None: + cmd = BuildCommand(print_warning=True, skip_tracking=True) + cmd.build_config( + build_dir=tmp_path, + organization_dir=data.EXTERNAL_PACKAGE, + config=BuildConfigYAML( + filepath=Path("config.dev.yaml"), + environment=Environment( + name="dev", project="my_project", validation_type="dev", selected=["external_module_1"] + ), + ), + packages=Packages.load(data.EXTERNAL_PACKAGE), + clean=False, + verbose=False, + client=None, + progress_bar=False, + on_error="continue", + ) + + with open(tmp_path / "_build_environment.yaml") as file: + _build_file = yaml.safe_load(file) + assert _build_file is not None + assert _build_file["read_modules"][0]["package_id"] == "rmdm" + assert _build_file["read_modules"][0]["module_id"] == "data_model" + + +class TestBuildParity: + def test_build_parity_with_old_build_command(self, tmp_path: Path) -> None: + new_cmd = BuildCommand(silent=True, skip_tracking=True) + new_result = None + old_result = None + + with suppress(NotImplementedError): + new_result = new_cmd.execute( + verbose=False, + build_dir=tmp_path / "new", + organization_dir=data.COMPLETE_ORG, + selected=None, + build_env_name="dev", + no_clean=False, + ) + + old_cmd = OldBuildCommand(print_warning=False, skip_tracking=False) + old_result = old_cmd.execute( + verbose=False, + build_dir=tmp_path / "old", + organization_dir=data.COMPLETE_ORG, + selected=None, + build_env_name="dev", + no_clean=False, + ) + assert new_result == old_result + assert new_cmd.issues == BuildIssueList.from_warning_list(old_cmd.warning_list) diff --git a/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py new file mode 100644 index 0000000000..b7a45f5f52 --- /dev/null +++ b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py @@ -0,0 +1,24 @@ +from cognite_toolkit._cdf_tk.constants import MODULES +from cognite_toolkit._cdf_tk.data_classes.modules import ModulesDirectory +from tests.data import COMPLETE_ORG + + +class TestModules: + def test_load_modules(self) -> None: + modules = ModulesDirectory.load(COMPLETE_ORG) + + assert len(modules.modules) == 3 + assert {module.path for module in modules.modules} == { + COMPLETE_ORG / MODULES / "my_example_module", + COMPLETE_ORG / MODULES / "my_file_expand_module", + COMPLETE_ORG / MODULES / "populate_model", + } + + def test_load_selection(self) -> None: + modules = ModulesDirectory.load(COMPLETE_ORG, selection=["my_example_module", "my_file_expand_module"]) + + assert len(modules.modules) == 2 + assert {module.path for module in modules.modules} == { + COMPLETE_ORG / MODULES / "my_example_module", + COMPLETE_ORG / MODULES / "my_file_expand_module", + } From 75faac65ad6b93a35f343ed73c76136b4dfce723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A5l=20R=C3=B8nning?= Date: Thu, 11 Dec 2025 11:48:10 +0100 Subject: [PATCH 2/5] Savepoint --- .../_cdf_tk/commands/build_v2/build_cmd.py | 5 ++-- .../_cdf_tk/commands/build_v2/build_input.py | 27 ++++++++++--------- .../_cdf_tk/data_classes/modules.py | 27 ++++++++++++++----- .../test_data_classes/test_modules.py | 6 ++++- 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py b/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py index a18cbb2b83..2fe70efbd7 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py @@ -1,6 +1,7 @@ from pathlib import Path from typing import Any, Literal, TypedDict +from _cdf_tk.data_classes.modules import ModulesDirectory from rich import print from rich.panel import Panel @@ -121,7 +122,7 @@ def _validate_modules(self, input: BuildInput) -> BuildIssueList: # Validate module selection user_selected_modules = input.config.environment.get_selected_modules({}) module_warnings = validate_module_selection( - modules=input.modules, + modules=ModulesDirectory.load(input.organization_dir, input.config.environment.selected), config=input.config, packages={}, selected_modules=user_selected_modules, @@ -146,7 +147,7 @@ def _validate_modules(self, input: BuildInput) -> BuildIssueList: def _build_configuration(self, input: BuildInput) -> tuple[BuiltModuleList, BuildIssueList]: issues = BuildIssueList() # Use input.modules.selected directly (it's already a ModuleDirectories) - if not input.modules.selected: + if not list(input.config.environment.selected): return BuiltModuleList(), issues # first collect variables into practical lookup diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py b/cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py index 1412eff4c6..1d86dd27cc 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py @@ -2,6 +2,8 @@ from functools import cached_property from pathlib import Path +from cognite_toolkit._cdf_tk.data_classes.modules import ModulesDirectory + if sys.version_info >= (3, 11): from typing import Self else: @@ -14,7 +16,6 @@ from cognite_toolkit._cdf_tk.data_classes import ( BuildConfigYAML, BuildVariables, - ModuleDirectories, ) from cognite_toolkit._cdf_tk.tk_warnings import ToolkitWarning, WarningList from cognite_toolkit._cdf_tk.utils.modules import parse_user_selected_modules @@ -31,8 +32,8 @@ class BuildInput(BaseModel): build_env_name: str config: BuildConfigYAML client: ToolkitClient | None = None - selected: list[str | Path] | None = None warnings: WarningList[ToolkitWarning] | None = None + user_selected: list[str | Path] | None = None @classmethod def load( @@ -41,24 +42,24 @@ def load( build_dir: Path, build_env_name: str | None, client: ToolkitClient | None, - selected: list[str | Path] | None = None, + user_selected: list[str | Path] | None = None, ) -> Self: resolved_org_dir = Path.cwd() if organization_dir in {Path("."), Path("./")} else organization_dir resolved_env = build_env_name or DEFAULT_ENV - config, warnings = cls._load_config(resolved_org_dir, resolved_env, selected) + config, warnings = cls._load_config(resolved_org_dir, resolved_env, user_selected) return cls( organization_dir=resolved_org_dir, build_dir=build_dir, build_env_name=resolved_env, config=config, client=client, - selected=selected, warnings=warnings, + user_selected=user_selected, ) @classmethod def _load_config( - cls, organization_dir: Path, build_env_name: str, selected: list[str | Path] | None + cls, organization_dir: Path, build_env_name: str, user_selected: list[str | Path] | None ) -> tuple[BuildConfigYAML, WarningList[ToolkitWarning]]: warnings: WarningList[ToolkitWarning] = WarningList[ToolkitWarning]() if (organization_dir / BuildConfigYAML.get_filename(build_env_name or DEFAULT_ENV)).exists(): @@ -66,20 +67,22 @@ def _load_config( else: # Loads the default environment config = BuildConfigYAML.load_default(organization_dir) - if selected: - config.environment.selected = parse_user_selected_modules(selected, organization_dir) + if user_selected: + config.environment.selected = list(set(parse_user_selected_modules(list(user_selected), organization_dir))) config.set_environment_variables() if environment_warning := config.validate_environment(): warnings.append(environment_warning) return config, warnings @cached_property - def modules(self) -> ModuleDirectories: - user_selected_modules = self.config.environment.get_selected_modules({}) - return ModuleDirectories.load(self.organization_dir, user_selected_modules) + def modules(self) -> ModulesDirectory: + selection = self.user_selected or self.config.environment.selected + return ModulesDirectory.load(self.organization_dir, selection) @cached_property def variables(self) -> BuildVariables: return BuildVariables.load_raw( - self.config.variables, self.modules.available_paths, self.modules.selected.available_paths + self.config.variables, + self.modules.available_paths, + set(Path(sel) for sel in self.config.environment.selected), ) diff --git a/cognite_toolkit/_cdf_tk/data_classes/modules.py b/cognite_toolkit/_cdf_tk/data_classes/modules.py index e21ac03f94..db92c75386 100644 --- a/cognite_toolkit/_cdf_tk/data_classes/modules.py +++ b/cognite_toolkit/_cdf_tk/data_classes/modules.py @@ -6,6 +6,7 @@ from cognite_toolkit._cdf_tk.constants import MODULES from cognite_toolkit._cdf_tk.utils import iterate_modules +from cognite_toolkit._cdf_tk.utils.modules import parse_user_selected_modules from ._module_toml import ModuleToml @@ -54,13 +55,27 @@ class ModulesDirectory(BaseModel): modules: list[Module] = Field(default_factory=list) @classmethod - def load(cls, organization_dir: Path) -> Self: - modules = [ - Module.load(path=module_path, resource_paths=resource_paths) - for module_path, resource_paths in iterate_modules(organization_dir / MODULES) - ] - return cls(modules=modules) + def load(cls, organization_dir: Path, selection: list[str | Path] | None = None) -> Self: + selected = parse_user_selected_modules(selection, organization_dir) if selection else None + return cls( + modules=[ + Module.load(path=module_path, resource_paths=resource_paths) + for module_path, resource_paths in iterate_modules(organization_dir / MODULES) + if cls._is_selected(module_path, organization_dir, selected) + ], + ) + + @staticmethod + def _is_selected(module_path: Path, organization_dir: Path, selection: list[str | Path] | None) -> bool: + if selection is None: + return True + relative = module_path.relative_to(organization_dir) + return module_path.name in selection or relative in selection or any(p in selection for p in relative.parents) @cached_property def paths(self) -> list[Path]: return [module.path for module in self.modules] + + @cached_property + def available_paths(self) -> set[Path]: + return {module.path for module in self.modules} diff --git a/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py index b7a45f5f52..2bbe44faca 100644 --- a/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py +++ b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py @@ -1,3 +1,5 @@ +from pathlib import Path + from cognite_toolkit._cdf_tk.constants import MODULES from cognite_toolkit._cdf_tk.data_classes.modules import ModulesDirectory from tests.data import COMPLETE_ORG @@ -15,7 +17,9 @@ def test_load_modules(self) -> None: } def test_load_selection(self) -> None: - modules = ModulesDirectory.load(COMPLETE_ORG, selection=["my_example_module", "my_file_expand_module"]) + modules = ModulesDirectory.load( + COMPLETE_ORG, selection=["my_example_module", Path(MODULES) / "my_file_expand_module"] + ) assert len(modules.modules) == 2 assert {module.path for module in modules.modules} == { From 1eda918b26628629f3136cff3404b76f52b07613 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A5l=20R=C3=B8nning?= Date: Mon, 5 Jan 2026 12:30:55 +0100 Subject: [PATCH 3/5] savepoint --- .vscode/launch.json | 2 +- .../_cdf_tk/commands/build_v2/build_cmd.py | 130 ++++++++---------- .../_cdf_tk/commands/build_v2/build_issues.py | 27 ---- .../{build_input.py => build_parameters.py} | 8 +- .../_cdf_tk/data_classes/__init__.py | 3 + .../_cdf_tk/data_classes/_issues.py | 86 ++++++++++++ .../_cdf_tk/data_classes/_modules.py | 10 ++ .../_cdf_tk/data_classes/modules.py | 66 ++++++++- cognite_toolkit/_cdf_tk/utils/modules.py | 55 ++++++++ cognite_toolkit/_cdf_tk/validation.py | 3 +- .../test_commands/test_build_v2.py | 26 ++-- .../test_data_classes/test_modules.py | 13 +- 12 files changed, 306 insertions(+), 123 deletions(-) delete mode 100644 cognite_toolkit/_cdf_tk/commands/build_v2/build_issues.py rename cognite_toolkit/_cdf_tk/commands/build_v2/{build_input.py => build_parameters.py} (93%) create mode 100644 cognite_toolkit/_cdf_tk/data_classes/_issues.py create mode 100644 cognite_toolkit/_cdf_tk/data_classes/_modules.py diff --git a/.vscode/launch.json b/.vscode/launch.json index 7665adcd25..119c617254 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -313,7 +313,7 @@ "init", ".", "--clean", - "--library-url=https://github.com/cognitedata/library/raw/refs/heads/packages-menu/packages.zip", + "--library-url=https://github.com/cognitedata/library/releases/download/latest/packages.zip", "--library-checksum=sha256:foo" ], diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py b/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py index 2fe70efbd7..af1ee6644a 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py @@ -1,46 +1,35 @@ from pathlib import Path -from typing import Any, Literal, TypedDict +from typing import Any, Literal -from _cdf_tk.data_classes.modules import ModulesDirectory from rich import print from rich.panel import Panel from cognite_toolkit._cdf_tk.client import ToolkitClient from cognite_toolkit._cdf_tk.commands._base import ToolkitCommand from cognite_toolkit._cdf_tk.commands.build_cmd import BuildCommand as OldBuildCommand -from cognite_toolkit._cdf_tk.commands.build_v2.build_input import BuildInput -from cognite_toolkit._cdf_tk.commands.build_v2.build_issues import BuildIssue, BuildIssueList -from cognite_toolkit._cdf_tk.data_classes import ( - BuildConfigYAML, - BuildVariables, - BuiltModuleList, - ModuleDirectories, -) +from cognite_toolkit._cdf_tk.commands.build_v2.build_parameters import BuildParameters +from cognite_toolkit._cdf_tk.data_classes import BuildConfigYAML, BuildVariables, BuiltModuleList +from cognite_toolkit._cdf_tk.data_classes._issues import Issue, IssueList +from cognite_toolkit._cdf_tk.data_classes._module_directories import ModuleDirectories from cognite_toolkit._cdf_tk.exceptions import ToolkitError -from cognite_toolkit._cdf_tk.hints import verify_module_directory from cognite_toolkit._cdf_tk.tk_warnings import ToolkitWarning, WarningList from cognite_toolkit._cdf_tk.utils.file import safe_rmtree from cognite_toolkit._cdf_tk.validation import validate_module_selection, validate_modules_variables from cognite_toolkit._version import __version__ -class BuildWarnings(TypedDict): - warning: ToolkitWarning - location: list[Path] - - class BuildCommand(ToolkitCommand): def __init__(self, print_warning: bool = True, skip_tracking: bool = False, silent: bool = False) -> None: super().__init__(print_warning, skip_tracking, silent) - self.issues = BuildIssueList() + self.issues = IssueList() def execute( self, verbose: bool, - organization_dir: Path, + base_dir: Path, build_dir: Path, selected: list[str | Path] | None, - build_env_name: str | None, + build_env: str | None, no_clean: bool, client: ToolkitClient | None = None, on_error: Literal["continue", "raise"] = "continue", @@ -52,30 +41,35 @@ def execute( self.verbose = verbose self.on_error = on_error - # Tracking the project and cluster for the build. - if client: - self._additional_tracking_info.project = client.config.project - self._additional_tracking_info.cluster = client.config.cdf_cluster - - # Setting the parameters for the build. - input = BuildInput.load(organization_dir, build_dir, build_env_name, client, selected) + build_input = BuildParameters.load( + organization_dir=base_dir, + build_dir=build_dir, + build_env_name=build_env, + client=client, + user_selected=selected, + ) # Print the build input. if self.verbose: - self._print_build_input(input) + self._print_build_input(build_input) + + # Tracking the project and cluster for the build. + if build_input.client: + self._additional_tracking_info.project = build_input.client.config.project + self._additional_tracking_info.cluster = build_input.client.config.cdf_cluster # Capture warnings from module structure integrity - if module_selection_issues := self._validate_modules(input): + if module_selection_issues := self._validate_modules(build_input): self.issues.extend(module_selection_issues) # Logistics: clean and create build directory - if prepare_issues := self._prepare_target_directory(input, not no_clean): + if prepare_issues := self._prepare_target_directory(build_dir, not no_clean): self.issues.extend(prepare_issues) # Compile the configuration and variables, # check syntax on module and resource level # for any "compilation errors and warnings" - built_modules, build_integrity_issues = self._build_configuration(input) + built_modules, build_integrity_issues = self._build_configuration(build_input) if build_integrity_issues: self.issues.extend(build_integrity_issues) @@ -88,55 +82,53 @@ def execute( return built_modules - def _print_build_input(self, input: BuildInput) -> None: + def _print_build_input(self, build_input: BuildParameters) -> None: print( Panel( - f"Building {input.organization_dir!s}:\n - Toolkit Version '{__version__!s}'\n" - f" - Environment name {input.build_env_name!r}, validation-type {input.config.environment.validation_type!r}.\n" - f" - Config '{input.config.filepath!s}'", + f"Building {build_input.organization_dir!s}:\n - Toolkit Version '{__version__!s}'\n" + f" - Environment name {build_input.build_env_name!r}, validation-type {build_input.config.environment.validation_type!r}.\n" + f" - Config '{build_input.config.filepath!s}'", expand=False, ) ) - def _prepare_target_directory(self, input: BuildInput, clean: bool = False) -> BuildIssueList: + def _prepare_target_directory(self, build_dir: Path, clean: bool = False) -> IssueList: """ Directory logistics """ - issues = BuildIssueList() - if input.build_dir.exists() and any(input.build_dir.iterdir()): + issues = IssueList() + if build_dir.exists() and any(build_dir.iterdir()): if not clean: raise ToolkitError("Build directory is not empty. Run without --no-clean to remove existing files.") if self.verbose: - issues.append(BuildIssue(description=f"Build directory {input.build_dir!s} is not empty. Clearing.")) - safe_rmtree(input.build_dir) - input.build_dir.mkdir(parents=True, exist_ok=True) + issues.append( + Issue(name="BuildDirNotEmpty", message=f"Build directory {build_dir!s} is not empty. Clearing.") + ) + safe_rmtree(build_dir) + build_dir.mkdir(parents=True, exist_ok=True) return issues - def _validate_modules(self, input: BuildInput) -> BuildIssueList: - issues = BuildIssueList() - # Verify that the modules exists, are not duplicates, - # and at least one is selected - verify_module_directory(input.organization_dir, input.build_env_name) + def _validate_modules(self, build_input: BuildParameters) -> IssueList: + issues = IssueList() + # Validate module directory integrity. + issues.extend(build_input.modules.verify_integrity()) # Validate module selection - user_selected_modules = input.config.environment.get_selected_modules({}) + packages: dict[str, list[str]] = {} + user_selected_modules = build_input.config.environment.get_selected_modules(packages) module_warnings = validate_module_selection( - modules=ModulesDirectory.load(input.organization_dir, input.config.environment.selected), - config=input.config, - packages={}, - selected_modules=user_selected_modules, - organization_dir=input.organization_dir, + build_input.modules, build_input.config, packages, user_selected_modules, build_input.organization_dir ) if module_warnings: - issues.extend(BuildIssueList.from_warning_list(module_warnings)) + issues.extend(IssueList.from_warning_list(module_warnings)) # Validate variables. Note: this looks for non-replaced template # variables <.*?> and can be improved in the future. # Keeping for reference. - variables_warnings = validate_modules_variables(input.variables, input.config.filepath) + variables_warnings = validate_modules_variables(build_input.variables.selected, build_input.config.filepath) if variables_warnings: - issues.extend(BuildIssueList.from_warning_list(variables_warnings)) + issues.extend(IssueList.from_warning_list(variables_warnings)) # Track LOC of managed configuration # Note: _track is not implemented yet, so we skip it for now @@ -144,10 +136,10 @@ def _validate_modules(self, input: BuildInput) -> BuildIssueList: return issues - def _build_configuration(self, input: BuildInput) -> tuple[BuiltModuleList, BuildIssueList]: - issues = BuildIssueList() - # Use input.modules.selected directly (it's already a ModuleDirectories) - if not list(input.config.environment.selected): + def _build_configuration(self, build_input: BuildParameters) -> tuple[BuiltModuleList, IssueList]: + issues = IssueList() + # Use build_input.modules directly (it is already filtered by selection) + if not list(build_input.config.environment.selected): return BuiltModuleList(), issues # first collect variables into practical lookup @@ -155,13 +147,13 @@ def _build_configuration(self, input: BuildInput) -> tuple[BuiltModuleList, Buil old_build_command = OldBuildCommand(print_warning=False, skip_tracking=False) built_modules = old_build_command.build_config( - build_dir=input.build_dir, - organization_dir=input.organization_dir, - config=input.config, + build_dir=build_input.build_dir, + organization_dir=build_input.organization_dir, + config=build_input.config, packages={}, clean=False, verbose=self.verbose, - client=input.client, + client=build_input.client, progress_bar=False, on_error=self.on_error, ) @@ -173,23 +165,23 @@ def _build_configuration(self, input: BuildInput) -> tuple[BuiltModuleList, Buil # Always convert warnings to issues, even if the list appears empty # (WarningList might have custom __bool__ behavior) if old_build_command.warning_list: - converted_issues = BuildIssueList.from_warning_list(old_build_command.warning_list) + converted_issues = IssueList.from_warning_list(old_build_command.warning_list) issues.extend(converted_issues) return built_modules, issues - def _verify_build_quality(self, built_modules: BuiltModuleList) -> BuildIssueList: - issues = BuildIssueList() + def _verify_build_quality(self, built_modules: BuiltModuleList) -> IssueList: + issues = IssueList() return issues - def _write(self, input: BuildInput) -> None: + def _write(self, build_input: BuildParameters) -> None: # Write the build to the build directory. # Track lines of code built. raise NotImplementedError() - def _track(self, input: BuildInput) -> None: + def _track(self, build_input: BuildParameters) -> None: raise NotImplementedError() - def _print_or_log_warnings_by_category(self, issues: BuildIssueList) -> None: + def _print_or_log_warnings_by_category(self, issues: IssueList) -> None: pass # Delegate to old BuildCommand for backward compatibility with tests @@ -208,7 +200,7 @@ def build_modules( built_modules = old_cmd.build_modules(modules, build_dir, variables, verbose, progress_bar, on_error) self._additional_tracking_info.package_ids.update(old_cmd._additional_tracking_info.package_ids) self._additional_tracking_info.module_ids.update(old_cmd._additional_tracking_info.module_ids) - self.issues.extend(BuildIssueList.from_warning_list(old_cmd.warning_list or WarningList[ToolkitWarning]())) + self.issues.extend(IssueList.from_warning_list(old_cmd.warning_list or WarningList[ToolkitWarning]())) return built_modules def build_config( diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/build_issues.py b/cognite_toolkit/_cdf_tk/commands/build_v2/build_issues.py deleted file mode 100644 index bfef2f9c2c..0000000000 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/build_issues.py +++ /dev/null @@ -1,27 +0,0 @@ -import sys - -if sys.version_info >= (3, 11): - from typing import Self -else: - from typing_extensions import Self - -from collections import UserList - -from pydantic import BaseModel - -from cognite_toolkit._cdf_tk.tk_warnings import ToolkitWarning, WarningList - - -class BuildIssue(BaseModel): - """Issue with the build. Can have a recommendation for the user to improve the build.""" - - description: str - - -class BuildIssueList(UserList[BuildIssue]): - """List of build issues.""" - - @classmethod - def from_warning_list(cls, warning_list: WarningList[ToolkitWarning]) -> Self: - """Create a BuildIssueList from a WarningList.""" - return cls([BuildIssue(description=warning.get_message()) for warning in warning_list]) diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py b/cognite_toolkit/_cdf_tk/commands/build_v2/build_parameters.py similarity index 93% rename from cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py rename to cognite_toolkit/_cdf_tk/commands/build_v2/build_parameters.py index 1d86dd27cc..644e661bff 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/build_parameters.py @@ -2,7 +2,7 @@ from functools import cached_property from pathlib import Path -from cognite_toolkit._cdf_tk.data_classes.modules import ModulesDirectory +from cognite_toolkit._cdf_tk.data_classes.modules import ModuleRootDirectory if sys.version_info >= (3, 11): from typing import Self @@ -21,7 +21,7 @@ from cognite_toolkit._cdf_tk.utils.modules import parse_user_selected_modules -class BuildInput(BaseModel): +class BuildParameters(BaseModel): """Input to the build process.""" # need this until we turn BuildConfigYaml and ToolkitClient into Pydantic models @@ -75,9 +75,9 @@ def _load_config( return config, warnings @cached_property - def modules(self) -> ModulesDirectory: + def modules(self) -> ModuleRootDirectory: selection = self.user_selected or self.config.environment.selected - return ModulesDirectory.load(self.organization_dir, selection) + return ModuleRootDirectory.load(self.organization_dir, selection) @cached_property def variables(self) -> BuildVariables: diff --git a/cognite_toolkit/_cdf_tk/data_classes/__init__.py b/cognite_toolkit/_cdf_tk/data_classes/__init__.py index 57016f5010..034d43df42 100644 --- a/cognite_toolkit/_cdf_tk/data_classes/__init__.py +++ b/cognite_toolkit/_cdf_tk/data_classes/__init__.py @@ -29,6 +29,7 @@ ResourceDeployResult, UploadDeployResult, ) +from ._issues import Issue, IssueList from ._module_directories import ModuleDirectories, ModuleLocation from ._module_resources import ModuleResources from ._packages import Package, Packages @@ -56,6 +57,8 @@ "DeployResults", "Environment", "InitConfigYAML", + "Issue", + "IssueList", "ModuleDirectories", "ModuleLocation", "ModuleResources", diff --git a/cognite_toolkit/_cdf_tk/data_classes/_issues.py b/cognite_toolkit/_cdf_tk/data_classes/_issues.py new file mode 100644 index 0000000000..45a84cfd1a --- /dev/null +++ b/cognite_toolkit/_cdf_tk/data_classes/_issues.py @@ -0,0 +1,86 @@ +import sys +from collections import UserList +from pathlib import Path +from typing import Any + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + +from pydantic import BaseModel + +from cognite_toolkit._cdf_tk.tk_warnings import ToolkitWarning, WarningList + +MODULE_ISSUE_CODE = "MOD" + + +class Issue(BaseModel): + """Base class for all issues""" + + name: str | None = None + message: str | None = None + code: str | None = None + fix: str | None = None + + @classmethod + def issue_type(cls) -> str: + return cls.__name__ + + +# temporary adapter to manage existing warnings +class IssueList(UserList[Issue]): + """List of build issues.""" + + @classmethod + def from_warning_list(cls, warning_list: WarningList[ToolkitWarning]) -> Self: + """Create a IssueList from a WarningList.""" + return cls([Issue(name=type(warning).__name__, message=warning.get_message()) for warning in warning_list]) + + +class ModuleDirectoryIssue(Issue): + """Issue related to module directory integrity checks.""" + + code: str = "DIR" + + +class ModuleLoadingIssue(Issue): + """Issue with the loading of the module root folder + + ## What it does + Validates that the module root folder exists, contains modules and that the selected modules match the modules in the root folder. + + ## Why is this bad? + If the module root folder does not exist or contains no modules, the build will fail. If the selected modules do not exist, the build will fail. + """ + + code: str = f"{MODULE_ISSUE_CODE}_001" + path: Path + config: Any + + def get_message(self, verbose: bool = False) -> str: + if self.message: + return self.message + default_message = f"Module root folder {self.path.as_posix()!r} does not exist or is not a directory, or " + if not verbose: + default_message += "does not contain the selected modules" + return default_message + default_message += f"does not contain the selected modules: {self.config.environment.selected}" + default_message += "Please check that the selected modules exist in the module root folder." + default_message += f"The Toolkit expects the following structure: {self.path.as_posix()!r}/modules/{self.config.environment.selected}." + return default_message + + +class ModuleSkippedIssue(Issue): + """Issue related to skipped modules.""" + + code: str = f"{MODULE_ISSUE_CODE}_002" + path: Path + + def get_message(self, verbose: bool = False) -> str: + if self.message: + return self.message + default_message = ( + f"Module {self.path.as_posix()!r} was ignored by the Toolkit. It may be excluded by .toolkitignore." + ) + return default_message diff --git a/cognite_toolkit/_cdf_tk/data_classes/_modules.py b/cognite_toolkit/_cdf_tk/data_classes/_modules.py new file mode 100644 index 0000000000..85f3c3493f --- /dev/null +++ b/cognite_toolkit/_cdf_tk/data_classes/_modules.py @@ -0,0 +1,10 @@ +"""Compatibility wrapper for module loading data classes. + +Historically, some parts of the code imported `ModuleRootDirectory` from +`cognite_toolkit._cdf_tk.data_classes._modules`. The canonical implementation +now lives in `cognite_toolkit._cdf_tk.data_classes.modules`. +""" + +from cognite_toolkit._cdf_tk.data_classes.modules import Module, ModuleRootDirectory, Resource + +__all__ = ["Module", "ModuleRootDirectory", "Resource"] diff --git a/cognite_toolkit/_cdf_tk/data_classes/modules.py b/cognite_toolkit/_cdf_tk/data_classes/modules.py index db92c75386..19d5a1de53 100644 --- a/cognite_toolkit/_cdf_tk/data_classes/modules.py +++ b/cognite_toolkit/_cdf_tk/data_classes/modules.py @@ -1,10 +1,12 @@ import sys +from collections.abc import Iterator from functools import cached_property from pathlib import Path from pydantic import BaseModel, ConfigDict, Field from cognite_toolkit._cdf_tk.constants import MODULES +from cognite_toolkit._cdf_tk.data_classes._issues import IssueList, ModuleDirectoryIssue from cognite_toolkit._cdf_tk.utils import iterate_modules from cognite_toolkit._cdf_tk.utils.modules import parse_user_selected_modules @@ -46,18 +48,23 @@ def load(cls, path: Path, resource_paths: list[Path]) -> Self: return cls(path=path, resources=resources, definition=definition) -class ModulesDirectory(BaseModel): +class ModuleRootDirectory(BaseModel): model_config = ConfigDict( frozen=True, validate_assignment=True, ) + organization_dir: Path modules: list[Module] = Field(default_factory=list) @classmethod def load(cls, organization_dir: Path, selection: list[str | Path] | None = None) -> Self: - selected = parse_user_selected_modules(selection, organization_dir) if selection else None + # selection semantics: + # - selection is None -> select all modules + # - selection is [] -> select none (explicit "no modules selected") + selected = None if selection is None else parse_user_selected_modules(selection, organization_dir) return cls( + organization_dir=organization_dir, modules=[ Module.load(path=module_path, resource_paths=resource_paths) for module_path, resource_paths in iterate_modules(organization_dir / MODULES) @@ -65,6 +72,22 @@ def load(cls, organization_dir: Path, selection: list[str | Path] | None = None) ], ) + def verify_integrity(self) -> IssueList: + issues = IssueList() + if not self.organization_dir.exists() or not self.organization_dir.is_dir(): + issues.append( + ModuleDirectoryIssue( + message=f"Organization directory {self.organization_dir.as_posix()!r} is not a directory." + ) + ) + elif not self.organization_dir.joinpath(MODULES).is_dir(): + issues.append( + ModuleDirectoryIssue( + message=f"Modules directory {self.organization_dir.joinpath(MODULES).as_posix()!r} is not a directory." + ) + ) + return issues + @staticmethod def _is_selected(module_path: Path, organization_dir: Path, selection: list[str | Path] | None) -> bool: if selection is None: @@ -72,10 +95,43 @@ def _is_selected(module_path: Path, organization_dir: Path, selection: list[str relative = module_path.relative_to(organization_dir) return module_path.name in selection or relative in selection or any(p in selection for p in relative.parents) + def __iter__(self) -> Iterator[Module]: # type: ignore[override] + return iter(self.modules) + + def __len__(self) -> int: + # Enables correct truthiness checks (e.g. `if not modules.selected:`) + return len(self.modules) + @cached_property - def paths(self) -> list[Path]: - return [module.path for module in self.modules] + def available(self) -> set[str | Path]: + """Ways of selecting the loaded modules (name and relative paths).""" + selections: set[str | Path] = set() + for module in self.modules: + relative = module.path.relative_to(self.organization_dir) + selections.add(module.path.name) + selections.add(relative) + selections.update(relative.parents) + return selections + + @cached_property + def available_names(self) -> set[str]: + return {item for item in self.available if isinstance(item, str)} @cached_property def available_paths(self) -> set[Path]: - return {module.path for module in self.modules} + return {item for item in self.available if isinstance(item, Path)} + + @property + def selected(self) -> "ModuleRootDirectory": + # ModuleRootDirectory currently only loads selected modules + return self + + def as_path_by_name(self) -> dict[str, list[Path]]: + module_path_by_name: dict[str, list[Path]] = {} + for module in self.modules: + module_path_by_name.setdefault(module.path.name, []).append(module.path.relative_to(self.organization_dir)) + return module_path_by_name + + @cached_property + def paths(self) -> list[Path]: + return [module.path for module in self.modules] diff --git a/cognite_toolkit/_cdf_tk/utils/modules.py b/cognite_toolkit/_cdf_tk/utils/modules.py index efe944587e..a6400f6b2d 100644 --- a/cognite_toolkit/_cdf_tk/utils/modules.py +++ b/cognite_toolkit/_cdf_tk/utils/modules.py @@ -7,6 +7,7 @@ MODULE_PATH_SEP, ROOT_MODULES, ) +from cognite_toolkit._cdf_tk.data_classes._issues import Issue, IssueList, ModuleSkippedIssue def iterate_modules(root_dir: Path) -> Iterator[tuple[Path, list[Path]]]: @@ -48,6 +49,60 @@ def _iterate_modules(root_dir: Path) -> Iterator[tuple[Path, list[Path]]]: yield from _iterate_modules(module_dir) +def _iterate_modules_v2(root_dir: Path) -> Iterator[tuple[Path, list[Path]]]: + """V2 module iterator that also returns loading issues. + + Returns: + (modules, issues) where modules is a list of (module_dir, filepaths). + """ + # local imports to avoid circular import + + from cognite_toolkit._cdf_tk.constants import EXCL_FILES + from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME + + def _collect(current_dir: Path, is_root: bool) -> tuple[list[tuple[Path, list[Path]]], IssueList]: + modules: list[tuple[Path, list[Path]]] = [] + issues = IssueList() + + for module_dir in current_dir.iterdir(): + if (module_dir / ".toolkitignore").exists(): + issues.append(ModuleSkippedIssue(path=module_dir)) + continue + + if not module_dir.is_dir(): + continue + sub_directories = [path for path in module_dir.iterdir() if path.is_dir()] + is_any_resource_directories = any(d.name in CRUDS_BY_FOLDER_NAME for d in sub_directories) + + if sub_directories and is_any_resource_directories: + modules.append( + ( + module_dir, + [p for p in module_dir.rglob("*") if p.is_file() and p.name not in EXCL_FILES], + ) + ) + continue + child_modules, child_issues = _collect(module_dir, is_root=False) + modules.extend(child_modules) + issues.extend(child_issues) + + if is_root and not modules: + issues.append( + Issue( + name="NoModulesFound", + code="NO_MODULES_FOUND", + message=f"No modules found under {current_dir.as_posix()!r}.", + ) + ) + return modules, issues + + # Backwards-compat: keep this as an iterator by yielding the modules, + # but allow callers to get issues via attribute on the generator result. + modules, issues = _collect(root_dir, is_root=True) + _iterate_modules_v2.issues = issues # type: ignore[attr-defined] + yield from modules + + @overload def module_from_path(path: Path, return_resource_folder: Literal[True]) -> tuple[str, str]: ... diff --git a/cognite_toolkit/_cdf_tk/validation.py b/cognite_toolkit/_cdf_tk/validation.py index 03e4bc9b26..6c0a6d1c16 100644 --- a/cognite_toolkit/_cdf_tk/validation.py +++ b/cognite_toolkit/_cdf_tk/validation.py @@ -11,6 +11,7 @@ from cognite_toolkit._cdf_tk.cdf_toml import CDFToml from cognite_toolkit._cdf_tk.constants import DEV_ONLY_MODULES from cognite_toolkit._cdf_tk.data_classes import BuildConfigYAML, BuildVariables, ModuleDirectories +from cognite_toolkit._cdf_tk.data_classes._modules import ModuleRootDirectory from cognite_toolkit._cdf_tk.exceptions import ( ToolkitDuplicatedModuleError, ToolkitEnvError, @@ -228,7 +229,7 @@ def as_json_path(loc: tuple[str | int, ...]) -> str: def validate_module_selection( - modules: ModuleDirectories, + modules: ModuleDirectories | ModuleRootDirectory, config: BuildConfigYAML, packages: dict[str, list[str]], selected_modules: set[str | Path], diff --git a/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py b/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py index 2d40f172e7..3f558c40a0 100644 --- a/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py +++ b/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py @@ -7,10 +7,10 @@ import yaml from _pytest.monkeypatch import MonkeyPatch from cognite.client.data_classes.data_modeling import DataModelId, Space +from cognite_toolkit._cdf_tk.commands.build_v2.build_issues import BuildIssue, BuildIssueList from cognite_toolkit._cdf_tk.commands.build_cmd import BuildCommand as OldBuildCommand from cognite_toolkit._cdf_tk.commands.build_v2.build_cmd import BuildCommand -from cognite_toolkit._cdf_tk.commands.build_v2.build_issues import BuildIssue, BuildIssueList from cognite_toolkit._cdf_tk.cruds import TransformationCRUD from cognite_toolkit._cdf_tk.data_classes import BuildConfigYAML, BuildVariables, Environment, Packages from cognite_toolkit._cdf_tk.data_classes._module_directories import ModuleDirectories @@ -41,9 +41,9 @@ def test_module_not_found_error(self, tmp_path: Path) -> None: BuildCommand(print_warning=False).execute( verbose=False, build_dir=tmp_path, - organization_dir=data.PROJECT_WITH_BAD_MODULES, + base_dir=data.PROJECT_WITH_BAD_MODULES, selected=None, - build_env_name="no_module", + build_env="no_module", no_clean=False, ) @@ -53,9 +53,9 @@ def test_module_with_non_resource_directories(self, tmp_path: Path) -> None: cmd.execute( verbose=False, build_dir=tmp_path, - organization_dir=data.PROJECT_WITH_BAD_MODULES, + base_dir=data.PROJECT_WITH_BAD_MODULES, selected=None, - build_env_name="ill_module", + build_env="ill_module", no_clean=False, ) @@ -75,9 +75,9 @@ def test_custom_project_no_warnings(self, tmp_path: Path, monkeypatch: MonkeyPat cmd.execute( verbose=False, build_dir=tmp_path, - organization_dir=data.PROJECT_NO_COGNITE_MODULES, + base_dir=data.PROJECT_NO_COGNITE_MODULES, selected=None, - build_env_name="dev", + build_env="dev", no_clean=False, ) @@ -104,9 +104,9 @@ def test_build_complete_org_without_warnings( cmd.execute( verbose=False, build_dir=tmp_path / "build", - organization_dir=data.COMPLETE_ORG, + base_dir=data.COMPLETE_ORG, selected=None, - build_env_name="dev", + build_env="dev", no_clean=False, ) @@ -147,10 +147,10 @@ def test_build_no_warnings_when_space_exists_in_cdf( with suppress(NotImplementedError): cmd.execute( verbose=False, - organization_dir=tmp_path / "my_org", + base_dir=tmp_path / "my_org", build_dir=tmp_path / "build", selected=None, - build_env_name=None, + build_env=None, no_clean=False, client=toolkit_client_approval.mock_client, on_error="raise", @@ -234,9 +234,9 @@ def test_build_parity_with_old_build_command(self, tmp_path: Path) -> None: new_result = new_cmd.execute( verbose=False, build_dir=tmp_path / "new", - organization_dir=data.COMPLETE_ORG, + base_dir=data.COMPLETE_ORG, selected=None, - build_env_name="dev", + build_env="dev", no_clean=False, ) diff --git a/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py index 2bbe44faca..a69d3c6017 100644 --- a/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py +++ b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py @@ -1,13 +1,15 @@ from pathlib import Path +import pytest + from cognite_toolkit._cdf_tk.constants import MODULES -from cognite_toolkit._cdf_tk.data_classes.modules import ModulesDirectory +from cognite_toolkit._cdf_tk.data_classes._modules import ModuleRootDirectory, OrganizationDirMissingWarning from tests.data import COMPLETE_ORG class TestModules: def test_load_modules(self) -> None: - modules = ModulesDirectory.load(COMPLETE_ORG) + modules = ModuleRootDirectory.load(COMPLETE_ORG) assert len(modules.modules) == 3 assert {module.path for module in modules.modules} == { @@ -17,7 +19,7 @@ def test_load_modules(self) -> None: } def test_load_selection(self) -> None: - modules = ModulesDirectory.load( + modules = ModuleRootDirectory.load( COMPLETE_ORG, selection=["my_example_module", Path(MODULES) / "my_file_expand_module"] ) @@ -26,3 +28,8 @@ def test_load_selection(self) -> None: COMPLETE_ORG / MODULES / "my_example_module", COMPLETE_ORG / MODULES / "my_file_expand_module", } + + def test_warns_if_organization_dir_missing(self) -> None: + with pytest.warns(OrganizationDirMissingWarning): + modules = ModuleRootDirectory.load(Path("wrong")) + assert modules.modules == [] From 32fbdb18d94de331322d6ef1302c66ee2d7eff06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A5l=20R=C3=B8nning?= Date: Thu, 8 Jan 2026 08:24:59 +0100 Subject: [PATCH 4/5] savepoint --- .../_cdf_tk/commands/build_v2/build_cmd.py | 54 ++--- .../commands/build_v2/build_parameters.py | 17 -- .../build_v2/data_classes/_modules.py | 196 ++++++++++++++++++ .../build_v2/data_classes/_resource.py | 22 ++ cognite_toolkit/_cdf_tk/cruds/__init__.py | 17 +- .../_cdf_tk/data_classes/_issues.py | 169 +++++++++++---- .../data_classes/_module_directories.py | 3 +- .../_cdf_tk/data_classes/_modules.py | 10 - .../_cdf_tk/data_classes/modules.py | 137 ------------ cognite_toolkit/_cdf_tk/utils/modules.py | 56 +---- cognite_toolkit/_cdf_tk/validation.py | 3 +- .../test_commands/test_build_v2.py | 9 +- .../test_data_classes/test_modules.py | 187 ++++++++++++++++- 13 files changed, 578 insertions(+), 302 deletions(-) create mode 100644 cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_modules.py create mode 100644 cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_resource.py delete mode 100644 cognite_toolkit/_cdf_tk/data_classes/_modules.py delete mode 100644 cognite_toolkit/_cdf_tk/data_classes/modules.py diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py b/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py index af1ee6644a..254e4aa7bf 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py @@ -8,13 +8,12 @@ from cognite_toolkit._cdf_tk.commands._base import ToolkitCommand from cognite_toolkit._cdf_tk.commands.build_cmd import BuildCommand as OldBuildCommand from cognite_toolkit._cdf_tk.commands.build_v2.build_parameters import BuildParameters +from cognite_toolkit._cdf_tk.commands.build_v2.data_classes._modules import Modules from cognite_toolkit._cdf_tk.data_classes import BuildConfigYAML, BuildVariables, BuiltModuleList from cognite_toolkit._cdf_tk.data_classes._issues import Issue, IssueList -from cognite_toolkit._cdf_tk.data_classes._module_directories import ModuleDirectories from cognite_toolkit._cdf_tk.exceptions import ToolkitError from cognite_toolkit._cdf_tk.tk_warnings import ToolkitWarning, WarningList from cognite_toolkit._cdf_tk.utils.file import safe_rmtree -from cognite_toolkit._cdf_tk.validation import validate_module_selection, validate_modules_variables from cognite_toolkit._version import __version__ @@ -41,7 +40,7 @@ def execute( self.verbose = verbose self.on_error = on_error - build_input = BuildParameters.load( + build_parameters = BuildParameters.load( organization_dir=base_dir, build_dir=build_dir, build_env_name=build_env, @@ -51,15 +50,18 @@ def execute( # Print the build input. if self.verbose: - self._print_build_input(build_input) + self._print_build_input(build_parameters) + + modules, load_issues = Modules.load(base_dir, selected or build_parameters.config.environment.selected) + self.issues.extend(load_issues) # Tracking the project and cluster for the build. - if build_input.client: - self._additional_tracking_info.project = build_input.client.config.project - self._additional_tracking_info.cluster = build_input.client.config.cdf_cluster + if build_parameters.client: + self._additional_tracking_info.project = build_parameters.client.config.project + self._additional_tracking_info.cluster = build_parameters.client.config.cdf_cluster # Capture warnings from module structure integrity - if module_selection_issues := self._validate_modules(build_input): + if module_selection_issues := self._validate_modules(build_parameters, modules): self.issues.extend(module_selection_issues) # Logistics: clean and create build directory @@ -69,7 +71,7 @@ def execute( # Compile the configuration and variables, # check syntax on module and resource level # for any "compilation errors and warnings" - built_modules, build_integrity_issues = self._build_configuration(build_input) + built_modules, build_integrity_issues = self._build_configuration(build_parameters) if build_integrity_issues: self.issues.extend(build_integrity_issues) @@ -102,33 +104,33 @@ def _prepare_target_directory(self, build_dir: Path, clean: bool = False) -> Iss raise ToolkitError("Build directory is not empty. Run without --no-clean to remove existing files.") if self.verbose: - issues.append( - Issue(name="BuildDirNotEmpty", message=f"Build directory {build_dir!s} is not empty. Clearing.") - ) + issues.append(Issue(code="BUILD_001")) safe_rmtree(build_dir) build_dir.mkdir(parents=True, exist_ok=True) return issues - def _validate_modules(self, build_input: BuildParameters) -> IssueList: + def _validate_modules(self, build_input: BuildParameters, modules: Modules) -> IssueList: issues = IssueList() # Validate module directory integrity. - issues.extend(build_input.modules.verify_integrity()) + # issues.extend(modules.verify_integrity()) # Validate module selection - packages: dict[str, list[str]] = {} - user_selected_modules = build_input.config.environment.get_selected_modules(packages) - module_warnings = validate_module_selection( - build_input.modules, build_input.config, packages, user_selected_modules, build_input.organization_dir - ) - if module_warnings: - issues.extend(IssueList.from_warning_list(module_warnings)) + # Note: validate_module_selection expects ModuleDirectories, but we're using build_v2 Modules for now. + # For now, we'll skip this validation or need to adapt it + # packages: dict[str, list[str]] = {} + # user_selected_modules = build_input.config.environment.get_selected_modules(packages) + # module_warnings = validate_module_selection( + # modules, build_input.config, packages, user_selected_modules, build_input.organization_dir + # ) + # if module_warnings: + # issues.extend(IssueList.from_warning_list(module_warnings)) # Validate variables. Note: this looks for non-replaced template # variables <.*?> and can be improved in the future. # Keeping for reference. - variables_warnings = validate_modules_variables(build_input.variables.selected, build_input.config.filepath) - if variables_warnings: - issues.extend(IssueList.from_warning_list(variables_warnings)) + # variables_warnings = validate_modules_variables(build_input.variables.selected, build_input.config.filepath) + # if variables_warnings: + # issues.extend(IssueList.from_warning_list(variables_warnings)) # Track LOC of managed configuration # Note: _track is not implemented yet, so we skip it for now @@ -187,7 +189,7 @@ def _print_or_log_warnings_by_category(self, issues: IssueList) -> None: # Delegate to old BuildCommand for backward compatibility with tests def build_modules( self, - modules: ModuleDirectories, + modules: Modules, build_dir: Path, variables: BuildVariables, verbose: bool = False, @@ -197,7 +199,7 @@ def build_modules( """Delegate to old BuildCommand for backward compatibility.""" old_cmd = OldBuildCommand() - built_modules = old_cmd.build_modules(modules, build_dir, variables, verbose, progress_bar, on_error) + built_modules = old_cmd.build_modules(modules, build_dir, variables, verbose, progress_bar, on_error) # type: ignore[arg-type] self._additional_tracking_info.package_ids.update(old_cmd._additional_tracking_info.package_ids) self._additional_tracking_info.module_ids.update(old_cmd._additional_tracking_info.module_ids) self.issues.extend(IssueList.from_warning_list(old_cmd.warning_list or WarningList[ToolkitWarning]())) diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/build_parameters.py b/cognite_toolkit/_cdf_tk/commands/build_v2/build_parameters.py index 644e661bff..d4cea88a16 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/build_parameters.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/build_parameters.py @@ -1,9 +1,6 @@ import sys -from functools import cached_property from pathlib import Path -from cognite_toolkit._cdf_tk.data_classes.modules import ModuleRootDirectory - if sys.version_info >= (3, 11): from typing import Self else: @@ -15,7 +12,6 @@ from cognite_toolkit._cdf_tk.constants import DEFAULT_ENV from cognite_toolkit._cdf_tk.data_classes import ( BuildConfigYAML, - BuildVariables, ) from cognite_toolkit._cdf_tk.tk_warnings import ToolkitWarning, WarningList from cognite_toolkit._cdf_tk.utils.modules import parse_user_selected_modules @@ -73,16 +69,3 @@ def _load_config( if environment_warning := config.validate_environment(): warnings.append(environment_warning) return config, warnings - - @cached_property - def modules(self) -> ModuleRootDirectory: - selection = self.user_selected or self.config.environment.selected - return ModuleRootDirectory.load(self.organization_dir, selection) - - @cached_property - def variables(self) -> BuildVariables: - return BuildVariables.load_raw( - self.config.variables, - self.modules.available_paths, - set(Path(sel) for sel in self.config.environment.selected), - ) diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_modules.py b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_modules.py new file mode 100644 index 0000000000..99843bf3b2 --- /dev/null +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_modules.py @@ -0,0 +1,196 @@ +import os +import sys +from collections import defaultdict +from pathlib import Path + +from pydantic import BaseModel, ConfigDict, Field + +from cognite_toolkit._cdf_tk.constants import MODULES +from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME, EXCLUDED_CRUDS +from cognite_toolkit._cdf_tk.data_classes import IssueList +from cognite_toolkit._cdf_tk.data_classes._issues import ( + ModuleLoadingDisabledResourceIssue, + ModuleLoadingIssue, + ModuleLoadingUnrecognizedResourceIssue, +) +from cognite_toolkit._cdf_tk.data_classes._module_toml import ModuleToml +from cognite_toolkit._cdf_tk.utils.modules import parse_user_selected_modules + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + + +class Module(BaseModel): + model_config = ConfigDict( + frozen=True, + validate_assignment=True, + arbitrary_types_allowed=True, + ) + + path: Path + definition: ModuleToml | None = None + + @classmethod + def load(cls, path: Path, resource_paths: list[Path]) -> Self: + definition = ModuleToml.load(path / ModuleToml.filename) if (path / ModuleToml.filename).exists() else None + return cls(path=path, definition=definition) + + +class Modules(BaseModel): + model_config = ConfigDict( + frozen=True, + validate_assignment=True, + arbitrary_types_allowed=True, + ) + + organization_dir: Path + modules: list[Module] = Field(default_factory=list) + + @classmethod + def load(cls, organization_dir: Path, selection: list[str | Path] | None = None) -> tuple[Self, IssueList]: + if selection is None: + # Treat "no selection" as selecting the whole modules tree. + # This makes the implicit default equivalent to selecting `MODULES`. + selected: list[str | Path] | None = [MODULES] + else: + selected = parse_user_selected_modules(selection, organization_dir) + + modules_root = organization_dir / MODULES + issues = IssueList() + + if not modules_root.exists(): + issues.append( + ModuleLoadingIssue( + path=modules_root, + ) + ) + return cls(organization_dir=organization_dir, modules=[]), issues + + # Walk modules_root: all leaf directories are resource presumed to be resource folders + # Their parents are module candidates + excluded_folder_names = {crud.folder_name for crud in EXCLUDED_CRUDS} + + # Map module paths to their resource folders and issues + module_candidates: defaultdict[Path, set[str]] = defaultdict(set) + unrecognized_resource_folders: defaultdict[Path, set[str]] = defaultdict(set) + disabled_resource_folders: defaultdict[Path, set[str]] = defaultdict(set) + + for dirpath, dirnames, filenames in os.walk(modules_root): + current_dir = Path(dirpath) + module_candidate = current_dir.parent + + if not cls._matches_selection(module_candidate, modules_root, selected): + continue + + # if ( + # dirnames # directories with subdirectories + # or not filenames # directories with no files + # or not cls._matches_selection(module_candidate, modules_root, selected) # not selected + # ): + # continue + # if dirnames: + # continue + # if not filenames: + # continue + # if not cls._matches_selection(module_candidate, modules_root, selected): + # continue + + if current_dir.name in excluded_folder_names: + disabled_resource_folders[module_candidate].add(current_dir.name) + elif current_dir.name not in CRUDS_BY_FOLDER_NAME: + unrecognized_resource_folders[module_candidate].add(current_dir.name) + else: + module_candidates[module_candidate].add(current_dir.name) + + loaded_modules = [] + for module_candidate, resource_folders in module_candidates.items(): + loaded_modules.append( + Module.load( + path=module_candidate, resource_paths=[module_candidate / folder for folder in resource_folders] + ) + ) + + for k, v in unrecognized_resource_folders.items(): + issues.append( + ModuleLoadingUnrecognizedResourceIssue( + path=k, + unrecognized_resource_folders=list(v), + ) + ) + for k, v in disabled_resource_folders.items(): + issues.append( + ModuleLoadingDisabledResourceIssue( + path=k, + disabled_resource_folders=list(v), + ) + ) + + return cls( + organization_dir=organization_dir, + modules=loaded_modules, + ), issues + + @staticmethod + def _matches_selection(module_candidate: Path, modules_root: Path, selected: list[str | Path] | None) -> bool: + if not selected: + return True + + rel = module_candidate.relative_to(modules_root) + rel_parts = [p.lower() for p in rel.parts] + if not rel_parts: + # module_candidate is the modules_root itself + return False + name_lower = rel_parts[-1] + modules_lower = MODULES.lower() + + for sel in selected: + sel_path = Path(sel) if isinstance(sel, str) else sel + + sel_parts = [p.lower() for p in sel_path.parts] + if not sel_parts: + continue + + if sel_parts[0] == modules_lower: + sel_parts = sel_parts[1:] + + if not sel_parts: + return True + + if len(sel_parts) == 1 and name_lower == sel_parts[0]: + return True + + if rel_parts[: len(sel_parts)] == sel_parts: + return True + + return False + + @classmethod + def _is_selected(cls, module_path: Path, organization_dir: Path, selected: list[str | Path] | None) -> bool: + if selected is None: + return True + + module_name = module_path.name + try: + rel_to_org = module_path.relative_to(organization_dir) + except ValueError: + rel_to_org = module_path + try: + rel_to_modules = module_path.relative_to(organization_dir / MODULES) + except ValueError: + rel_to_modules = module_path + + for sel in selected: + if isinstance(sel, str): + if sel == module_name: + return True + continue + + # Path selections can be absolute or relative (to org or modules root). + if sel == module_path or sel == rel_to_org or sel == rel_to_modules: + return True + if sel in rel_to_org.parents or sel in rel_to_modules.parents: + return True + + return False diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_resource.py b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_resource.py new file mode 100644 index 0000000000..826323d503 --- /dev/null +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_resource.py @@ -0,0 +1,22 @@ +import sys +from pathlib import Path + +from pydantic import BaseModel, ConfigDict + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + + +class Resource(BaseModel): + model_config = ConfigDict( + frozen=True, + validate_assignment=True, + ) + + path: Path + + @classmethod + def load(cls, path: Path) -> Self: + return cls(path=path) diff --git a/cognite_toolkit/_cdf_tk/cruds/__init__.py b/cognite_toolkit/_cdf_tk/cruds/__init__.py index f2070f71d6..60de981723 100644 --- a/cognite_toolkit/_cdf_tk/cruds/__init__.py +++ b/cognite_toolkit/_cdf_tk/cruds/__init__.py @@ -75,17 +75,17 @@ ) from ._worker import ResourceWorker -_EXCLUDED_CRUDS: set[type[ResourceCRUD]] = set() +EXCLUDED_CRUDS: set[type[ResourceCRUD]] = set() if not FeatureFlag.is_enabled(Flags.GRAPHQL): - _EXCLUDED_CRUDS.add(GraphQLCRUD) + EXCLUDED_CRUDS.add(GraphQLCRUD) if not FeatureFlag.is_enabled(Flags.INFIELD): - _EXCLUDED_CRUDS.add(InfieldV1CRUD) - _EXCLUDED_CRUDS.add(InFieldLocationConfigCRUD) - _EXCLUDED_CRUDS.add(InFieldCDMLocationConfigCRUD) + EXCLUDED_CRUDS.add(InfieldV1CRUD) + EXCLUDED_CRUDS.add(InFieldLocationConfigCRUD) + EXCLUDED_CRUDS.add(InFieldCDMLocationConfigCRUD) if not FeatureFlag.is_enabled(Flags.MIGRATE): - _EXCLUDED_CRUDS.add(ResourceViewMappingCRUD) + EXCLUDED_CRUDS.add(ResourceViewMappingCRUD) if not FeatureFlag.is_enabled(Flags.STREAMS): - _EXCLUDED_CRUDS.add(StreamCRUD) + EXCLUDED_CRUDS.add(StreamCRUD) CRUDS_BY_FOLDER_NAME: dict[str, list[type[Loader]]] = {} @@ -95,7 +95,7 @@ DataCRUD.__subclasses__(), GroupCRUD.__subclasses__(), ): - if _loader in [ResourceCRUD, ResourceContainerCRUD, DataCRUD, GroupCRUD] or _loader in _EXCLUDED_CRUDS: + if _loader in [ResourceCRUD, ResourceContainerCRUD, DataCRUD, GroupCRUD] or _loader in EXCLUDED_CRUDS: # Skipping base classes continue if _loader.folder_name not in CRUDS_BY_FOLDER_NAME: # type: ignore[attr-defined] @@ -158,6 +158,7 @@ def get_crud(resource_dir: str, kind: str) -> type[Loader]: __all__ = [ "CRUDS_BY_FOLDER_NAME", "CRUD_LIST", + "EXCLUDED_CRUDS", "KINDS_BY_FOLDER_NAME", "RESOURCE_CRUD_BY_FOLDER_NAME", "RESOURCE_CRUD_CONTAINER_LIST", diff --git a/cognite_toolkit/_cdf_tk/data_classes/_issues.py b/cognite_toolkit/_cdf_tk/data_classes/_issues.py index 45a84cfd1a..6314b12c77 100644 --- a/cognite_toolkit/_cdf_tk/data_classes/_issues.py +++ b/cognite_toolkit/_cdf_tk/data_classes/_issues.py @@ -3,12 +3,14 @@ from pathlib import Path from typing import Any +from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME + if sys.version_info >= (3, 11): from typing import Self else: from typing_extensions import Self -from pydantic import BaseModel +from pydantic import BaseModel, model_validator from cognite_toolkit._cdf_tk.tk_warnings import ToolkitWarning, WarningList @@ -18,15 +20,37 @@ class Issue(BaseModel): """Base class for all issues""" - name: str | None = None - message: str | None = None - code: str | None = None - fix: str | None = None + code: str + _custom_message: str | None = None + + @model_validator(mode="before") + @classmethod + def handle_message_param(cls, data: dict | Any) -> dict | Any: + if isinstance(data, dict): + if "message" in data: + data["_custom_message"] = data.pop("message") + if "name" in data: + # Use name as code if code is not provided + if "code" not in data: + data["code"] = str(data.pop("name")) + else: + _ = data.pop("name") + return data @classmethod def issue_type(cls) -> str: return cls.__name__ + @property + def message(self) -> str: + if self._custom_message: + return self._custom_message + fix_msg = self.fix() + return f"{self.issue_type()} {self.code}: {fix_msg if fix_msg else 'Unknown issue'}" + + def fix(self) -> str | None: + return None + # temporary adapter to manage existing warnings class IssueList(UserList[Issue]): @@ -35,52 +59,127 @@ class IssueList(UserList[Issue]): @classmethod def from_warning_list(cls, warning_list: WarningList[ToolkitWarning]) -> Self: """Create a IssueList from a WarningList.""" - return cls([Issue(name=type(warning).__name__, message=warning.get_message()) for warning in warning_list]) - - -class ModuleDirectoryIssue(Issue): - """Issue related to module directory integrity checks.""" - - code: str = "DIR" + return cls([Issue(name=type(warning).__name__, message=warning.get_message()) for warning in warning_list]) # type: ignore[call-arg] class ModuleLoadingIssue(Issue): - """Issue with the loading of the module root folder + """Issue with the loading of a module folder ## What it does - Validates that the module root folder exists, contains modules and that the selected modules match the modules in the root folder. + Validates that the module folder exists and contains resources. ## Why is this bad? - If the module root folder does not exist or contains no modules, the build will fail. If the selected modules do not exist, the build will fail. + If the module folder does not exist or contains no resources, the build will skip it. """ code: str = f"{MODULE_ISSUE_CODE}_001" path: Path - config: Any + _custom_message: str | None = None + + @model_validator(mode="before") + @classmethod + def handle_message_param(cls, data: dict | Any) -> dict | Any: + if isinstance(data, dict) and "message" in data: + data["_custom_message"] = data.pop("message") + return data + + @property + def message(self) -> str: + if self._custom_message: + return self._custom_message + return f"Module {self.path.as_posix()!r} does not exist or is not a directory, or does not contain the selected modules" - def get_message(self, verbose: bool = False) -> str: - if self.message: - return self.message - default_message = f"Module root folder {self.path.as_posix()!r} does not exist or is not a directory, or " - if not verbose: - default_message += "does not contain the selected modules" - return default_message - default_message += f"does not contain the selected modules: {self.config.environment.selected}" - default_message += "Please check that the selected modules exist in the module root folder." - default_message += f"The Toolkit expects the following structure: {self.path.as_posix()!r}/modules/{self.config.environment.selected}." - return default_message +class ModuleLoadingUnrecognizedResourceIssue(Issue): + """Module contains a resource type that is not recognized -class ModuleSkippedIssue(Issue): - """Issue related to skipped modules.""" + ## What it does + Validates that the resource type is supported by the Toolkit. + + ## What is the problem? + If the resource type is not supported, the build will skip it. + + ## How to fix it? + Check spelling or negate the selection with wildcard to exclude the resource type from the loading. + """ code: str = f"{MODULE_ISSUE_CODE}_002" path: Path + unrecognized_resource_folders: list[str] + + @property + def message(self) -> str: + return f"unrecognized resource folders: {', '.join(self.unrecognized_resource_folders)}" + + @property + def verbose(self) -> str: + return ( + self.message + + f"\nThe Toolkit supports the following resource folders: {', '.join(CRUDS_BY_FOLDER_NAME.keys())}" + ) + + +class ModuleLoadingDisabledResourceIssue(Issue): + """Module contains a resource type that hasn't been enabled + + ## What it does + Validates that the resource type is enabled. - def get_message(self, verbose: bool = False) -> str: - if self.message: - return self.message - default_message = ( - f"Module {self.path.as_posix()!r} was ignored by the Toolkit. It may be excluded by .toolkitignore." + ## What is the problem? + If the resource type is disabled, the build will skip it. + + ## How to fix it? + Enable the resource type in the cdf.toml file. + """ + + code: str = f"{MODULE_ISSUE_CODE}_003" + path: Path + disabled_resource_folders: list[str] + + @property + def message(self) -> str: + return f"Contains resource folders that require enabling a flag in your cdf.toml: {', '.join(self.disabled_resource_folders)}" + + @property + def verbose(self) -> str: + # TODO: show which flags are required to enable the resource folders + return ( + self.message + + f"\nThe Toolkit supports the following resource folders: {', '.join(CRUDS_BY_FOLDER_NAME.keys())}" ) - return default_message + + +class ModuleLoadingNoResourcesIssue(Issue): + """Module contains no resources + + ## What it does + Validates that the module contains resources. + + ## What is the problem? + If the module contains no resources, the build will skip it. + """ + + code: str = f"{MODULE_ISSUE_CODE}_004" + path: Path + + @property + def message(self) -> str: + return f"No resources found in module {self.path.as_posix()!r}" + + +class ModuleLoadingNestedModulesIssue(Issue): + """Module contains nested modules + + ## What it does + Validates that the module is a deepest module. + + ## What is the problem? + If the module contains nested modules, it is not a deepest module and is discarded. + """ + + code: str = f"{MODULE_ISSUE_CODE}_005" + path: Path + + @property + def message(self) -> str: + return f"Module {self.path.as_posix()!r} contains nested modules and was discarded. Only the deepest modules are loaded." diff --git a/cognite_toolkit/_cdf_tk/data_classes/_module_directories.py b/cognite_toolkit/_cdf_tk/data_classes/_module_directories.py index d5b21aa798..44b3de4846 100644 --- a/cognite_toolkit/_cdf_tk/data_classes/_module_directories.py +++ b/cognite_toolkit/_cdf_tk/data_classes/_module_directories.py @@ -8,7 +8,8 @@ from typing import Any, SupportsIndex, overload from cognite_toolkit._cdf_tk.constants import INDEX_PATTERN -from cognite_toolkit._cdf_tk.utils import calculate_directory_hash, iterate_modules, resource_folder_from_path +from cognite_toolkit._cdf_tk.utils.hashing import calculate_directory_hash +from cognite_toolkit._cdf_tk.utils.modules import iterate_modules, resource_folder_from_path from ._module_toml import ModuleToml diff --git a/cognite_toolkit/_cdf_tk/data_classes/_modules.py b/cognite_toolkit/_cdf_tk/data_classes/_modules.py deleted file mode 100644 index 85f3c3493f..0000000000 --- a/cognite_toolkit/_cdf_tk/data_classes/_modules.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Compatibility wrapper for module loading data classes. - -Historically, some parts of the code imported `ModuleRootDirectory` from -`cognite_toolkit._cdf_tk.data_classes._modules`. The canonical implementation -now lives in `cognite_toolkit._cdf_tk.data_classes.modules`. -""" - -from cognite_toolkit._cdf_tk.data_classes.modules import Module, ModuleRootDirectory, Resource - -__all__ = ["Module", "ModuleRootDirectory", "Resource"] diff --git a/cognite_toolkit/_cdf_tk/data_classes/modules.py b/cognite_toolkit/_cdf_tk/data_classes/modules.py deleted file mode 100644 index 19d5a1de53..0000000000 --- a/cognite_toolkit/_cdf_tk/data_classes/modules.py +++ /dev/null @@ -1,137 +0,0 @@ -import sys -from collections.abc import Iterator -from functools import cached_property -from pathlib import Path - -from pydantic import BaseModel, ConfigDict, Field - -from cognite_toolkit._cdf_tk.constants import MODULES -from cognite_toolkit._cdf_tk.data_classes._issues import IssueList, ModuleDirectoryIssue -from cognite_toolkit._cdf_tk.utils import iterate_modules -from cognite_toolkit._cdf_tk.utils.modules import parse_user_selected_modules - -from ._module_toml import ModuleToml - -if sys.version_info >= (3, 11): - from typing import Self -else: - from typing_extensions import Self - - -class Resource(BaseModel): - model_config = ConfigDict( - frozen=True, - validate_assignment=True, - ) - - path: Path - - @classmethod - def load(cls, path: Path) -> Self: - return cls(path=path) - - -class Module(BaseModel): - model_config = ConfigDict( - frozen=True, - validate_assignment=True, - ) - - path: Path - resources: list[Resource] - definition: ModuleToml | None = None - - @classmethod - def load(cls, path: Path, resource_paths: list[Path]) -> Self: - definition = ModuleToml.load(path / ModuleToml.filename) if (path / ModuleToml.filename).exists() else None - resources = [Resource.load(path=resource_path) for resource_path in resource_paths] - return cls(path=path, resources=resources, definition=definition) - - -class ModuleRootDirectory(BaseModel): - model_config = ConfigDict( - frozen=True, - validate_assignment=True, - ) - - organization_dir: Path - modules: list[Module] = Field(default_factory=list) - - @classmethod - def load(cls, organization_dir: Path, selection: list[str | Path] | None = None) -> Self: - # selection semantics: - # - selection is None -> select all modules - # - selection is [] -> select none (explicit "no modules selected") - selected = None if selection is None else parse_user_selected_modules(selection, organization_dir) - return cls( - organization_dir=organization_dir, - modules=[ - Module.load(path=module_path, resource_paths=resource_paths) - for module_path, resource_paths in iterate_modules(organization_dir / MODULES) - if cls._is_selected(module_path, organization_dir, selected) - ], - ) - - def verify_integrity(self) -> IssueList: - issues = IssueList() - if not self.organization_dir.exists() or not self.organization_dir.is_dir(): - issues.append( - ModuleDirectoryIssue( - message=f"Organization directory {self.organization_dir.as_posix()!r} is not a directory." - ) - ) - elif not self.organization_dir.joinpath(MODULES).is_dir(): - issues.append( - ModuleDirectoryIssue( - message=f"Modules directory {self.organization_dir.joinpath(MODULES).as_posix()!r} is not a directory." - ) - ) - return issues - - @staticmethod - def _is_selected(module_path: Path, organization_dir: Path, selection: list[str | Path] | None) -> bool: - if selection is None: - return True - relative = module_path.relative_to(organization_dir) - return module_path.name in selection or relative in selection or any(p in selection for p in relative.parents) - - def __iter__(self) -> Iterator[Module]: # type: ignore[override] - return iter(self.modules) - - def __len__(self) -> int: - # Enables correct truthiness checks (e.g. `if not modules.selected:`) - return len(self.modules) - - @cached_property - def available(self) -> set[str | Path]: - """Ways of selecting the loaded modules (name and relative paths).""" - selections: set[str | Path] = set() - for module in self.modules: - relative = module.path.relative_to(self.organization_dir) - selections.add(module.path.name) - selections.add(relative) - selections.update(relative.parents) - return selections - - @cached_property - def available_names(self) -> set[str]: - return {item for item in self.available if isinstance(item, str)} - - @cached_property - def available_paths(self) -> set[Path]: - return {item for item in self.available if isinstance(item, Path)} - - @property - def selected(self) -> "ModuleRootDirectory": - # ModuleRootDirectory currently only loads selected modules - return self - - def as_path_by_name(self) -> dict[str, list[Path]]: - module_path_by_name: dict[str, list[Path]] = {} - for module in self.modules: - module_path_by_name.setdefault(module.path.name, []).append(module.path.relative_to(self.organization_dir)) - return module_path_by_name - - @cached_property - def paths(self) -> list[Path]: - return [module.path for module in self.modules] diff --git a/cognite_toolkit/_cdf_tk/utils/modules.py b/cognite_toolkit/_cdf_tk/utils/modules.py index a6400f6b2d..2febc00fc5 100644 --- a/cognite_toolkit/_cdf_tk/utils/modules.py +++ b/cognite_toolkit/_cdf_tk/utils/modules.py @@ -7,12 +7,12 @@ MODULE_PATH_SEP, ROOT_MODULES, ) -from cognite_toolkit._cdf_tk.data_classes._issues import Issue, IssueList, ModuleSkippedIssue def iterate_modules(root_dir: Path) -> Iterator[tuple[Path, list[Path]]]: """Iterate over all modules in the project and yield the module directory and all files in the module. + Args: root_dir (Path): The root directory of the project @@ -49,60 +49,6 @@ def _iterate_modules(root_dir: Path) -> Iterator[tuple[Path, list[Path]]]: yield from _iterate_modules(module_dir) -def _iterate_modules_v2(root_dir: Path) -> Iterator[tuple[Path, list[Path]]]: - """V2 module iterator that also returns loading issues. - - Returns: - (modules, issues) where modules is a list of (module_dir, filepaths). - """ - # local imports to avoid circular import - - from cognite_toolkit._cdf_tk.constants import EXCL_FILES - from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME - - def _collect(current_dir: Path, is_root: bool) -> tuple[list[tuple[Path, list[Path]]], IssueList]: - modules: list[tuple[Path, list[Path]]] = [] - issues = IssueList() - - for module_dir in current_dir.iterdir(): - if (module_dir / ".toolkitignore").exists(): - issues.append(ModuleSkippedIssue(path=module_dir)) - continue - - if not module_dir.is_dir(): - continue - sub_directories = [path for path in module_dir.iterdir() if path.is_dir()] - is_any_resource_directories = any(d.name in CRUDS_BY_FOLDER_NAME for d in sub_directories) - - if sub_directories and is_any_resource_directories: - modules.append( - ( - module_dir, - [p for p in module_dir.rglob("*") if p.is_file() and p.name not in EXCL_FILES], - ) - ) - continue - child_modules, child_issues = _collect(module_dir, is_root=False) - modules.extend(child_modules) - issues.extend(child_issues) - - if is_root and not modules: - issues.append( - Issue( - name="NoModulesFound", - code="NO_MODULES_FOUND", - message=f"No modules found under {current_dir.as_posix()!r}.", - ) - ) - return modules, issues - - # Backwards-compat: keep this as an iterator by yielding the modules, - # but allow callers to get issues via attribute on the generator result. - modules, issues = _collect(root_dir, is_root=True) - _iterate_modules_v2.issues = issues # type: ignore[attr-defined] - yield from modules - - @overload def module_from_path(path: Path, return_resource_folder: Literal[True]) -> tuple[str, str]: ... diff --git a/cognite_toolkit/_cdf_tk/validation.py b/cognite_toolkit/_cdf_tk/validation.py index 6c0a6d1c16..03e4bc9b26 100644 --- a/cognite_toolkit/_cdf_tk/validation.py +++ b/cognite_toolkit/_cdf_tk/validation.py @@ -11,7 +11,6 @@ from cognite_toolkit._cdf_tk.cdf_toml import CDFToml from cognite_toolkit._cdf_tk.constants import DEV_ONLY_MODULES from cognite_toolkit._cdf_tk.data_classes import BuildConfigYAML, BuildVariables, ModuleDirectories -from cognite_toolkit._cdf_tk.data_classes._modules import ModuleRootDirectory from cognite_toolkit._cdf_tk.exceptions import ( ToolkitDuplicatedModuleError, ToolkitEnvError, @@ -229,7 +228,7 @@ def as_json_path(loc: tuple[str | int, ...]) -> str: def validate_module_selection( - modules: ModuleDirectories | ModuleRootDirectory, + modules: ModuleDirectories, config: BuildConfigYAML, packages: dict[str, list[str]], selected_modules: set[str | Path], diff --git a/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py b/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py index 3f558c40a0..4b473eec32 100644 --- a/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py +++ b/tests/test_unit/test_cdf_tk/test_commands/test_build_v2.py @@ -7,12 +7,12 @@ import yaml from _pytest.monkeypatch import MonkeyPatch from cognite.client.data_classes.data_modeling import DataModelId, Space -from cognite_toolkit._cdf_tk.commands.build_v2.build_issues import BuildIssue, BuildIssueList from cognite_toolkit._cdf_tk.commands.build_cmd import BuildCommand as OldBuildCommand from cognite_toolkit._cdf_tk.commands.build_v2.build_cmd import BuildCommand from cognite_toolkit._cdf_tk.cruds import TransformationCRUD from cognite_toolkit._cdf_tk.data_classes import BuildConfigYAML, BuildVariables, Environment, Packages +from cognite_toolkit._cdf_tk.data_classes._issues import Issue, IssueList from cognite_toolkit._cdf_tk.data_classes._module_directories import ModuleDirectories from cognite_toolkit._cdf_tk.exceptions import ( ToolkitMissingModuleError, @@ -61,8 +61,9 @@ def test_module_with_non_resource_directories(self, tmp_path: Path) -> None: assert len(cmd.issues) >= 1 assert ( - BuildIssue( - description=f"Module 'ill_made_module' has non-resource directories: ['spaces']. {ModuleDefinition.short()}" + Issue( + name="ModuleWithNonResourceDirectories", + message=f"Module 'ill_made_module' has non-resource directories: ['spaces']. {ModuleDefinition.short()}", ) in cmd.issues ) @@ -250,4 +251,4 @@ def test_build_parity_with_old_build_command(self, tmp_path: Path) -> None: no_clean=False, ) assert new_result == old_result - assert new_cmd.issues == BuildIssueList.from_warning_list(old_cmd.warning_list) + assert new_cmd.issues == IssueList.from_warning_list(old_cmd.warning_list) diff --git a/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py index a69d3c6017..2b4bce8b63 100644 --- a/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py +++ b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py @@ -1,15 +1,22 @@ from pathlib import Path +from unittest.mock import patch import pytest +from cognite_toolkit._cdf_tk.commands.build_v2.data_classes._modules import Modules from cognite_toolkit._cdf_tk.constants import MODULES -from cognite_toolkit._cdf_tk.data_classes._modules import ModuleRootDirectory, OrganizationDirMissingWarning +from cognite_toolkit._cdf_tk.data_classes._issues import ( + ModuleLoadingDisabledResourceIssue, + ModuleLoadingIssue, + ModuleLoadingNoResourcesIssue, + ModuleLoadingUnrecognizedResourceIssue, +) from tests.data import COMPLETE_ORG class TestModules: def test_load_modules(self) -> None: - modules = ModuleRootDirectory.load(COMPLETE_ORG) + modules, _ = Modules.load(COMPLETE_ORG) assert len(modules.modules) == 3 assert {module.path for module in modules.modules} == { @@ -19,7 +26,7 @@ def test_load_modules(self) -> None: } def test_load_selection(self) -> None: - modules = ModuleRootDirectory.load( + modules, issues = Modules.load( COMPLETE_ORG, selection=["my_example_module", Path(MODULES) / "my_file_expand_module"] ) @@ -28,8 +35,174 @@ def test_load_selection(self) -> None: COMPLETE_ORG / MODULES / "my_example_module", COMPLETE_ORG / MODULES / "my_file_expand_module", } + # No issues should be raised + assert len(issues) == 0 - def test_warns_if_organization_dir_missing(self) -> None: - with pytest.warns(OrganizationDirMissingWarning): - modules = ModuleRootDirectory.load(Path("wrong")) - assert modules.modules == [] + @pytest.fixture + def selection_test_modules_root(self, tmp_path: Path) -> Path: + modules_root = tmp_path / MODULES + + module_a = modules_root / "A" / "sub" / "module1" + module_b = modules_root / "B" / "module2" + + for module_dir in (module_a, module_b): + (module_dir / "transformations").mkdir(parents=True) + (module_dir / "transformations" / "resource.yaml").touch() + + return modules_root + + def test_selection_default_includes_all_modules(self, selection_test_modules_root: Path) -> None: + modules_root = selection_test_modules_root + organization_dir = modules_root.parent + + modules, issues = Modules.load(organization_dir) + + # A is a parent module with nested modules, so it should be discarded + # Only the deepest modules should be loaded + assert {m.path.relative_to(modules_root) for m in modules.modules} == { + Path("A/sub/module1"), + Path("B/module2"), + } + # No issues should be raised + assert len(issues) == 0 + + def test_selection_by_modules_path_is_recursive(self, selection_test_modules_root: Path) -> None: + modules_root = selection_test_modules_root + organization_dir = modules_root.parent + + modules, issues = Modules.load(organization_dir, selection=[Path(MODULES) / "A"]) + + # Only the selected module should be loaded + assert {m.path.relative_to(modules_root) for m in modules.modules} == { + Path("A/sub/module1"), + } + # No issues should be raised + assert len(issues) == 0 + + def test_selection_by_name_is_case_insensitive(self, selection_test_modules_root: Path) -> None: + modules_root = selection_test_modules_root + organization_dir = modules_root.parent + + modules, issues = Modules.load(organization_dir, selection=["A/SUB"]) + + # A is a parent module with nested modules, so it should be discarded + # Only the deepest module should be loaded + assert {m.path.relative_to(modules_root) for m in modules.modules} == { + Path("A/sub/module1"), + } + # No issues should be raised + assert len(issues) == 0 + + def test_selection_by_string_path_is_case_insensitive(self, selection_test_modules_root: Path) -> None: + modules_root = selection_test_modules_root + organization_dir = modules_root.parent + + modules, issues = Modules.load(organization_dir, selection=["MODULES/A"]) + + # A is a parent module with nested modules, so it should be discarded + # Only the deepest module should be loaded + assert {m.path.relative_to(modules_root) for m in modules.modules} == { + Path("A/sub/module1"), + } + # No issues should be raised + assert len(issues) == 0 + + def test_module_root_directory_missing(self, tmp_path: Path) -> None: + modules_root = Path("missing_module_root") + _modules, issues = Modules.load(modules_root) + assert len(issues) == 1 + assert isinstance(issues[0], ModuleLoadingIssue) + + def test_unrecognized_module_gives_warning(self, tmp_path: Path) -> None: + module_path = tmp_path / MODULES / "mixed_module" + (module_path / "transformations").mkdir(parents=True) + (module_path / "transformations" / "transformation.yaml").touch() + (module_path / "docs").mkdir(parents=True) + (module_path / "docs" / "readme.md").touch() + _modules, issues = Modules.load(tmp_path) + + assert len(issues) == 1 + assert issues[0].path == module_path + assert isinstance(issues[0], ModuleLoadingUnrecognizedResourceIssue) + assert issues[0].unrecognized_resource_folders == ["docs"] + + def test_module_with_normal_and_disabled_resources(self, tmp_path: Path) -> None: + """Test that a module with both normal and disabled resource folders shows appropriate warnings.""" + from cognite_toolkit._cdf_tk.cruds._resource_cruds.streams import StreamCRUD + + module_path = tmp_path / MODULES / "mixed_module" + (module_path / "transformations").mkdir(parents=True) + (module_path / "transformations" / "transformation.yaml").touch() + (module_path / "streams").mkdir(parents=True) + (module_path / "streams" / "stream.yaml").touch() + + # Mock EXCLUDED_CRUDS to always include StreamCRUD so streams is always disabled + with patch("cognite_toolkit._cdf_tk.commands.build_v2.data_classes._modules.EXCLUDED_CRUDS", {StreamCRUD}): + modules, issues = Modules.load(tmp_path, selection=["mixed_module"]) + + # The module should be loaded since it has at least one normal resource (transformations) + assert len(modules.modules) == 1 + assert issues[0].path == module_path + assert isinstance(issues[0], ModuleLoadingDisabledResourceIssue) + assert issues[0].disabled_resource_folders == ["streams"] + + def test_module_with_no_resources(self, tmp_path: Path) -> None: + """Test that a module with no resource folders raises ModuleLoadingNoResourcesIssue and is not loaded.""" + module_path = tmp_path / MODULES / "empty_module" + (module_path).mkdir(parents=True) + (module_path / ".gitkeep").touch() + modules, issues = Modules.load(tmp_path, selection=["empty_module"]) + assert len(modules.modules) == 0 + assert len(issues) == 1 + assert issues[0].path == module_path + assert isinstance(issues[0], ModuleLoadingNoResourcesIssue) + + def test_module_container_with_resources_and_nested_module(self, tmp_path: Path) -> None: + global_path = tmp_path / MODULES / "global" + (global_path / "transformations").mkdir(parents=True) + (global_path / "transformations" / "transformation.yaml").touch() + + # Create a nested module inside "global" + nested_module_path = global_path / "nested_module" + (nested_module_path / "transformations").mkdir(parents=True) + (nested_module_path / "transformations" / "transformation.yaml").touch() + + modules, issues = Modules.load(tmp_path) + + module_paths = {module.path for module in modules.modules} + + # Only the deepest module should be loaded + assert len(modules.modules) == 1 + assert nested_module_path in module_paths + assert global_path not in module_paths + + # The parent module should be discarded with a ModuleLoadingIssue + parent_issues = [ + issue for issue in issues if isinstance(issue, ModuleLoadingIssue) and issue.path == global_path + ] + assert len(parent_issues) > 0, ( + f"Expected a ModuleLoadingIssue for parent module {global_path}, but found issues: {issues}" + ) + + def test_functions_resource_folder_with_subfolder(self, tmp_path: Path) -> None: + """Test that a functions resource folder with a subfolder (for function code) is still detected as a resource folder.""" + module_path = tmp_path / MODULES / "my_module" + functions_path = module_path / "functions" + functions_path.mkdir(parents=True) + + # Create a function YAML file in the functions folder + (functions_path / "my_function.yaml").touch() + + # Create a subfolder in functions (for function code) + code_path = functions_path / "my_function_code" + code_path.mkdir() + (code_path / "handler.py").touch() + + modules, issues = Modules.load(tmp_path) + + # The module should be loaded (functions with subfolders should still be detected) + assert len(modules.modules) == 1 + assert modules.modules[0].path == module_path + + # No issues should be raised (functions folder should be recognized even with subfolders) + assert len(issues) == 0, f"Expected no issues, but got: {issues}" From 6ce349f309e32ebf0c230c0d15da318232be7b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A5l=20R=C3=B8nning?= Date: Fri, 9 Jan 2026 15:58:56 +0100 Subject: [PATCH 5/5] A new take on module lookup --- .../build_v2/data_classes/_modules.py | 79 +++++----- .../_cdf_tk/data_classes/_issues.py | 145 +----------------- .../test_data_classes/test_modules.py | 17 +- 3 files changed, 51 insertions(+), 190 deletions(-) diff --git a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_modules.py b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_modules.py index 99843bf3b2..35b5486e3a 100644 --- a/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_modules.py +++ b/cognite_toolkit/_cdf_tk/commands/build_v2/data_classes/_modules.py @@ -1,4 +1,3 @@ -import os import sys from collections import defaultdict from pathlib import Path @@ -9,9 +8,7 @@ from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME, EXCLUDED_CRUDS from cognite_toolkit._cdf_tk.data_classes import IssueList from cognite_toolkit._cdf_tk.data_classes._issues import ( - ModuleLoadingDisabledResourceIssue, ModuleLoadingIssue, - ModuleLoadingUnrecognizedResourceIssue, ) from cognite_toolkit._cdf_tk.data_classes._module_toml import ModuleToml from cognite_toolkit._cdf_tk.utils.modules import parse_user_selected_modules @@ -68,62 +65,70 @@ def load(cls, organization_dir: Path, selection: list[str | Path] | None = None) ) return cls(organization_dir=organization_dir, modules=[]), issues - # Walk modules_root: all leaf directories are resource presumed to be resource folders - # Their parents are module candidates - excluded_folder_names = {crud.folder_name for crud in EXCLUDED_CRUDS} - # Map module paths to their resource folders and issues - module_candidates: defaultdict[Path, set[str]] = defaultdict(set) - unrecognized_resource_folders: defaultdict[Path, set[str]] = defaultdict(set) - disabled_resource_folders: defaultdict[Path, set[str]] = defaultdict(set) + detected_modules: defaultdict[Path, set[str]] = defaultdict(set) + detected_unrecognized_resource_folders: defaultdict[Path, set[str]] = defaultdict(set) + detected_disabled_resource_folders: defaultdict[Path, set[str]] = defaultdict(set) + detected_unselected_modules: set[Path] = set() + + for current_dir in modules_root.glob("**/"): + if current_dir == modules_root: + continue - for dirpath, dirnames, filenames in os.walk(modules_root): - current_dir = Path(dirpath) module_candidate = current_dir.parent + if module_candidate in detected_modules: + continue if not cls._matches_selection(module_candidate, modules_root, selected): + detected_unselected_modules.add(module_candidate) continue - # if ( - # dirnames # directories with subdirectories - # or not filenames # directories with no files - # or not cls._matches_selection(module_candidate, modules_root, selected) # not selected - # ): - # continue - # if dirnames: - # continue - # if not filenames: - # continue - # if not cls._matches_selection(module_candidate, modules_root, selected): - # continue - - if current_dir.name in excluded_folder_names: - disabled_resource_folders[module_candidate].add(current_dir.name) - elif current_dir.name not in CRUDS_BY_FOLDER_NAME: - unrecognized_resource_folders[module_candidate].add(current_dir.name) + module_subfolders = [d for d in module_candidate.iterdir() if d.is_dir()] + excluded_folder_names = {crud.folder_name for crud in EXCLUDED_CRUDS} + resource_folders = {d.name for d in module_subfolders if d.name in CRUDS_BY_FOLDER_NAME.keys()} + disabled_resource_folders = {d.name for d in module_subfolders if d.name in excluded_folder_names} + unrecognized_resource_folders = { + d.name + for d in module_subfolders + if d.name not in CRUDS_BY_FOLDER_NAME and d.name not in excluded_folder_names + } + + if resource_folders: + detected_modules[module_candidate] = resource_folders + + # If the current module is a submodule of another module, remove the parent module from the detected modules. We only keep the deepest module. + for k, v in detected_modules.items(): + if k in module_candidate.parents: + detected_unselected_modules.add(k) + detected_modules.pop(k) + break else: - module_candidates[module_candidate].add(current_dir.name) + continue + if disabled_resource_folders: + detected_disabled_resource_folders[module_candidate] = disabled_resource_folders + if unrecognized_resource_folders: + detected_unrecognized_resource_folders[module_candidate] = unrecognized_resource_folders loaded_modules = [] - for module_candidate, resource_folders in module_candidates.items(): + for module_candidate, resource_folders in detected_modules.items(): loaded_modules.append( Module.load( path=module_candidate, resource_paths=[module_candidate / folder for folder in resource_folders] ) ) - for k, v in unrecognized_resource_folders.items(): + for k, v in detected_unrecognized_resource_folders.items(): issues.append( - ModuleLoadingUnrecognizedResourceIssue( + ModuleLoadingIssue( path=k, - unrecognized_resource_folders=list(v), + message=f"Module {k.as_posix()!r} contains unrecognized resource folders: {', '.join(v)}", ) ) - for k, v in disabled_resource_folders.items(): + for k, v in detected_disabled_resource_folders.items(): issues.append( - ModuleLoadingDisabledResourceIssue( + ModuleLoadingIssue( path=k, - disabled_resource_folders=list(v), + message=f"Module {k.as_posix()!r} contains unsupported resource folders, check flags in cdf.toml: {', '.join(v)}", ) ) diff --git a/cognite_toolkit/_cdf_tk/data_classes/_issues.py b/cognite_toolkit/_cdf_tk/data_classes/_issues.py index 6314b12c77..7abec7bee0 100644 --- a/cognite_toolkit/_cdf_tk/data_classes/_issues.py +++ b/cognite_toolkit/_cdf_tk/data_classes/_issues.py @@ -1,16 +1,13 @@ import sys from collections import UserList from pathlib import Path -from typing import Any - -from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME if sys.version_info >= (3, 11): from typing import Self else: from typing_extensions import Self -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, Field from cognite_toolkit._cdf_tk.tk_warnings import ToolkitWarning, WarningList @@ -21,35 +18,7 @@ class Issue(BaseModel): """Base class for all issues""" code: str - _custom_message: str | None = None - - @model_validator(mode="before") - @classmethod - def handle_message_param(cls, data: dict | Any) -> dict | Any: - if isinstance(data, dict): - if "message" in data: - data["_custom_message"] = data.pop("message") - if "name" in data: - # Use name as code if code is not provided - if "code" not in data: - data["code"] = str(data.pop("name")) - else: - _ = data.pop("name") - return data - - @classmethod - def issue_type(cls) -> str: - return cls.__name__ - - @property - def message(self) -> str: - if self._custom_message: - return self._custom_message - fix_msg = self.fix() - return f"{self.issue_type()} {self.code}: {fix_msg if fix_msg else 'Unknown issue'}" - - def fix(self) -> str | None: - return None + message: str | None = Field(default=None) # temporary adapter to manage existing warnings @@ -74,112 +43,4 @@ class ModuleLoadingIssue(Issue): code: str = f"{MODULE_ISSUE_CODE}_001" path: Path - _custom_message: str | None = None - - @model_validator(mode="before") - @classmethod - def handle_message_param(cls, data: dict | Any) -> dict | Any: - if isinstance(data, dict) and "message" in data: - data["_custom_message"] = data.pop("message") - return data - - @property - def message(self) -> str: - if self._custom_message: - return self._custom_message - return f"Module {self.path.as_posix()!r} does not exist or is not a directory, or does not contain the selected modules" - - -class ModuleLoadingUnrecognizedResourceIssue(Issue): - """Module contains a resource type that is not recognized - - ## What it does - Validates that the resource type is supported by the Toolkit. - - ## What is the problem? - If the resource type is not supported, the build will skip it. - - ## How to fix it? - Check spelling or negate the selection with wildcard to exclude the resource type from the loading. - """ - - code: str = f"{MODULE_ISSUE_CODE}_002" - path: Path - unrecognized_resource_folders: list[str] - - @property - def message(self) -> str: - return f"unrecognized resource folders: {', '.join(self.unrecognized_resource_folders)}" - - @property - def verbose(self) -> str: - return ( - self.message - + f"\nThe Toolkit supports the following resource folders: {', '.join(CRUDS_BY_FOLDER_NAME.keys())}" - ) - - -class ModuleLoadingDisabledResourceIssue(Issue): - """Module contains a resource type that hasn't been enabled - - ## What it does - Validates that the resource type is enabled. - - ## What is the problem? - If the resource type is disabled, the build will skip it. - - ## How to fix it? - Enable the resource type in the cdf.toml file. - """ - - code: str = f"{MODULE_ISSUE_CODE}_003" - path: Path - disabled_resource_folders: list[str] - - @property - def message(self) -> str: - return f"Contains resource folders that require enabling a flag in your cdf.toml: {', '.join(self.disabled_resource_folders)}" - - @property - def verbose(self) -> str: - # TODO: show which flags are required to enable the resource folders - return ( - self.message - + f"\nThe Toolkit supports the following resource folders: {', '.join(CRUDS_BY_FOLDER_NAME.keys())}" - ) - - -class ModuleLoadingNoResourcesIssue(Issue): - """Module contains no resources - - ## What it does - Validates that the module contains resources. - - ## What is the problem? - If the module contains no resources, the build will skip it. - """ - - code: str = f"{MODULE_ISSUE_CODE}_004" - path: Path - - @property - def message(self) -> str: - return f"No resources found in module {self.path.as_posix()!r}" - - -class ModuleLoadingNestedModulesIssue(Issue): - """Module contains nested modules - - ## What it does - Validates that the module is a deepest module. - - ## What is the problem? - If the module contains nested modules, it is not a deepest module and is discarded. - """ - - code: str = f"{MODULE_ISSUE_CODE}_005" - path: Path - - @property - def message(self) -> str: - return f"Module {self.path.as_posix()!r} contains nested modules and was discarded. Only the deepest modules are loaded." + message: str | None = Field(default=None) diff --git a/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py index 2b4bce8b63..552736fb82 100644 --- a/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py +++ b/tests/test_unit/test_cdf_tk/test_data_classes/test_modules.py @@ -6,10 +6,7 @@ from cognite_toolkit._cdf_tk.commands.build_v2.data_classes._modules import Modules from cognite_toolkit._cdf_tk.constants import MODULES from cognite_toolkit._cdf_tk.data_classes._issues import ( - ModuleLoadingDisabledResourceIssue, ModuleLoadingIssue, - ModuleLoadingNoResourcesIssue, - ModuleLoadingUnrecognizedResourceIssue, ) from tests.data import COMPLETE_ORG @@ -123,8 +120,7 @@ def test_unrecognized_module_gives_warning(self, tmp_path: Path) -> None: assert len(issues) == 1 assert issues[0].path == module_path - assert isinstance(issues[0], ModuleLoadingUnrecognizedResourceIssue) - assert issues[0].unrecognized_resource_folders == ["docs"] + assert issues[0].message == f"Module {module_path.as_posix()!r} contains unrecognized resource folders: docs" def test_module_with_normal_and_disabled_resources(self, tmp_path: Path) -> None: """Test that a module with both normal and disabled resource folders shows appropriate warnings.""" @@ -143,19 +139,18 @@ def test_module_with_normal_and_disabled_resources(self, tmp_path: Path) -> None # The module should be loaded since it has at least one normal resource (transformations) assert len(modules.modules) == 1 assert issues[0].path == module_path - assert isinstance(issues[0], ModuleLoadingDisabledResourceIssue) - assert issues[0].disabled_resource_folders == ["streams"] + assert ( + issues[0].message + == f"Module {module_path.as_posix()!r} contains unsupported resource folders, check flags in cdf.toml: streams" + ) def test_module_with_no_resources(self, tmp_path: Path) -> None: """Test that a module with no resource folders raises ModuleLoadingNoResourcesIssue and is not loaded.""" module_path = tmp_path / MODULES / "empty_module" (module_path).mkdir(parents=True) (module_path / ".gitkeep").touch() - modules, issues = Modules.load(tmp_path, selection=["empty_module"]) + modules, _ = Modules.load(tmp_path, selection=["empty_module"]) assert len(modules.modules) == 0 - assert len(issues) == 1 - assert issues[0].path == module_path - assert isinstance(issues[0], ModuleLoadingNoResourcesIssue) def test_module_container_with_resources_and_nested_module(self, tmp_path: Path) -> None: global_path = tmp_path / MODULES / "global"