Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions cognite/neat/_data_model/importers/_api_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
)
from cognite.neat._issues import ConsistencyError, ModelSyntaxError
from cognite.neat._utils.http_client import FailedRequestMessage
from cognite.neat._utils.text import humanize_collection
from cognite.neat._utils.text import humanize_collection, quote_int_value_by_key_in_yaml
from cognite.neat._utils.validation import ValidationContext, humanize_validation_error


Expand Down Expand Up @@ -122,7 +122,12 @@ def from_yaml(cls, yaml_file: Path, data_model_file: Path | None = None) -> "DMS
"""Create a DMSTableImporter from a YAML file."""
source = cls._display_name(yaml_file)
if yaml_file.suffix.lower() in {".yaml", ".yml", ".json"}:
return cls(yaml.safe_load(yaml_file.read_text(encoding=cls.ENCODING)))
yaml_content = yaml_file.read_text(encoding=cls.ENCODING)
# DataModels and Views have `version` that is often given as an integer by the user.
# This ensures that the version is always read as a string, even if the user forgets to
# quote it in the YAML file.
fixed_content = quote_int_value_by_key_in_yaml(yaml_content, "version")
return cls(yaml.safe_load(fixed_content))
elif yaml_file.is_dir():
return cls(cls._read_yaml_files(yaml_file, data_model_file))
raise FileReadException(source.as_posix(), f"Unsupported file type: {source.suffix}")
Expand Down Expand Up @@ -151,7 +156,13 @@ def _read_yaml_files(cls, directory: Path, data_model_file: Path | None = None)
continue
stem = yaml_file.stem.casefold()

data = yaml.safe_load(yaml_file.read_text(encoding=cls.ENCODING))
yaml_content = yaml_file.read_text(encoding=cls.ENCODING)
if stem.endswith("datamodel") or stem.endswith("view"):
# DataModels and Views have `version` that is often given as an integer by the user.
# This ensures that the version is always read as a string, even if the user forgets to
# quote it in the YAML file.
yaml_content = quote_int_value_by_key_in_yaml(yaml_content, "version")
data = yaml.safe_load(yaml_content)
list_data = data if isinstance(data, list) else [data]

if stem.endswith("datamodel"):
Expand Down
9 changes: 9 additions & 0 deletions cognite/neat/_utils/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,12 @@ def title_case(s: str) -> str:
def split_on_capitals(text: str) -> list[str]:
"""Split a string at capital letters."""
return re.findall(r"[A-Z][a-z]*", text)


def quote_int_value_by_key_in_yaml(content: str, key: str) -> str:
"""Quote a value in a yaml string"""
# This pattern will match the key if it is not already quoted
pattern = rf"^(\s*-?\s*{key}:\s*)([\d_]+)(.*)$"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't seem like we successfully handle floats with this regex, see the image below, this is what happens when I load a yaml with version: 2.1.1

Image

replacement = r'\1"\2"\3'

return re.sub(pattern, replacement, content, flags=re.MULTILINE)
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from collections.abc import Iterable
from pathlib import Path
from unittest.mock import MagicMock

import pytest

from cognite.neat._data_model.exporters import DMSAPIYAMLExporter
from cognite.neat._data_model.importers import DMSAPIImporter
from cognite.neat._data_model.models.dms import RequestSchema


def valid_dms_yaml_formats_roundtrip() -> Iterable[tuple]:
yield pytest.param(
{
"dataModel": """ space: my_space
externalId: MyModel
version: 1_0_0
views:
- space: my_space
externalId: MyView
version: 1_0_0""",
"views": """- space: my_space
externalId: MyView
version: 1_0_0
properties:
name:
container:
space: my_space
externalId: MyContainer
containerPropertyIdentifier: name
""",
"containers": """- space: my_space
externalId: MyContainer
properties:
name:
type:
type: text
""",
},
{
"MyModel.datamodel.yaml": """space: my_space
externalId: MyModel
version: '1_0_0'
views:
- space: my_space
externalId: MyView
version: '1_0_0'
type: view
""",
"views/MyView.view.yaml": """space: my_space
externalId: MyView
version: '1_0_0'
properties:
name:
container:
space: my_space
externalId: MyContainer
type: container
containerPropertyIdentifier: name
""",
"containers/MyContainer.container.yaml": """space: my_space
externalId: MyContainer
properties:
name:
type:
type: text
""",
},
id="Handle integer in version field",
)


class TestImportYAMLAPIFormat:
@pytest.mark.parametrize("source, expected", list(valid_dms_yaml_formats_roundtrip()))
def test_roundtrip_single_input_file(self, source: dict[str, str], expected: dict[str, str]) -> None:
source_content: list[str] = []
for key, value in source.items():
source_content.extend([f"{key}:", value])
yaml_file = MagicMock(spec=Path)
yaml_file.suffix = ".yaml"
yaml_file.read_text.return_value = "\n".join(source_content)

data_model = DMSAPIImporter.from_yaml(yaml_file).to_data_model()

self.assert_written_output(data_model, expected)

@pytest.mark.parametrize("source, expected", list(valid_dms_yaml_formats_roundtrip()))
def test_roundtrip_directory_input(self, source: dict[str, str], expected: dict[str, str]) -> None:
yaml_dir = MagicMock(spec=Path)
yaml_dir.is_dir.return_value = True
yaml_dir.rglob.return_value = [
# File kind is singular.
self._make_mock_file(kind.removesuffix("s"), content)
for kind, content in source.items()
]

data_model = DMSAPIImporter.from_yaml(yaml_dir).to_data_model()

self.assert_written_output(data_model, expected)

def _make_mock_file(self, kind: str, content: str) -> MagicMock:
yaml_file = MagicMock(spec=Path)
yaml_file.suffix = ".yaml"
yaml_file.read_text.return_value = content
yaml_file.stem = f"my.{kind}"
yaml_file.name = f"{yaml_file.stem}{yaml_file.suffix}"
return yaml_file

def assert_written_output(self, data_model: RequestSchema, expected: dict[str, str]) -> None:
written_files: dict[str, str] = {}

def make_mock_path(name: str = "root") -> MagicMock:
mock = MagicMock(spec=Path)
mock.suffix = ""
mock.write_text = MagicMock(side_effect=lambda content, **_: written_files.update({name: content}))
mock.__truediv__ = lambda self, other: make_mock_path(str(other))
return mock

yaml_dir = make_mock_path()
DMSAPIYAMLExporter().export_to_file(data_model, yaml_dir)
assert expected == written_files
120 changes: 120 additions & 0 deletions tests/tests_unit/test_utils/test_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from collections.abc import Iterable

import pytest

from cognite.neat._utils.text import quote_int_value_by_key_in_yaml


def quote_key_in_yaml_test_cases() -> Iterable[tuple]:
yield pytest.param(
"""space: my_space
externalID: myModel
version: 3_0_2""",
'''space: my_space
externalID: myModel
version: "3_0_2"''',
id="Single data model",
)

yield pytest.param(
"""- space: my_space
externalId: myModel
version: 1_000
- space: my_other_space
externalId: myOtherModel
version: 2_000
""",
"""- space: my_space
externalId: myModel
version: "1_000"
- space: my_other_space
externalId: myOtherModel
version: "2_000"
""",
id="Two Data Models",
)

yield pytest.param(
"""space: my_space
externalID: myModel
version: '3_0_2'""",
"""space: my_space
externalID: myModel
version: '3_0_2'""",
id="Single data model with single quoted version",
)

yield pytest.param(
"""- space: my_space
externalId: myModel
version: '1_000'
- space: my_other_space
externalId: myOtherModel
version: '2_000'
""",
"""- space: my_space
externalId: myModel
version: '1_000'
- space: my_other_space
externalId: myOtherModel
version: '2_000'
""",
id="Two Data Models with single quoted version",
)

yield pytest.param(
'''space: my_space
externalID: myModel
version: "3_0_2"''',
'''space: my_space
externalID: myModel
version: "3_0_2"''',
id="Single data model with double quoted version",
)

yield pytest.param(
"""- space: my_space
externalId: myModel
version: "1_000"
- space: my_other_space
externalId: myOtherModel
version: "2_000"
""",
"""- space: my_space
externalId: myModel
version: "1_000"
- space: my_other_space
externalId: myOtherModel
version: "2_000"
""",
id="Two Data Models with double quoted version",
)

version_prop = """
externalId: CogniteSourceSystem
properties:
version:
container:
externalId: CogniteSourceSystem
space: sp_core_model
type: container
"""
yield pytest.param(
version_prop,
version_prop,
id="Version property untouched",
)
yield pytest.param(
"""version: 1_0_0 # My comment""", """version: "1_0_0" # My comment""", id="Handle comment after version"
)
yield pytest.param(
"""version: 1 # My "quoted" comment""",
"""version: "1" # My "quoted" comment""",
id="Handle comment with quotes after version",
)


class TestQuoteKeyInYAML:
@pytest.mark.parametrize("raw, expected", list(quote_key_in_yaml_test_cases()))
def test_quote_key_in_yaml(self, raw: str, expected: str) -> None:
assert quote_int_value_by_key_in_yaml(raw, key="version") == expected
Loading