From edd87bdba8215422ee390e34ff99856bcf594701 Mon Sep 17 00:00:00 2001 From: Mateo Date: Thu, 26 Mar 2026 16:41:41 +0100 Subject: [PATCH 1/4] Added CaseInsentiveDict to handle insensitive VTL regular names rule --- src/vtlengine/API/_InternalApi.py | 13 +- src/vtlengine/AST/DAG/__init__.py | 17 +- src/vtlengine/Interpreter/__init__.py | 70 +++++--- src/vtlengine/Model/__init__.py | 19 ++- src/vtlengine/Model/_case_insensitive_dict.py | 153 ++++++++++++++++++ src/vtlengine/Operators/Join.py | 13 +- 6 files changed, 238 insertions(+), 47 deletions(-) create mode 100644 src/vtlengine/Model/_case_insensitive_dict.py diff --git a/src/vtlengine/API/_InternalApi.py b/src/vtlengine/API/_InternalApi.py index 8b179d196..511e64483 100644 --- a/src/vtlengine/API/_InternalApi.py +++ b/src/vtlengine/API/_InternalApi.py @@ -47,6 +47,7 @@ Scalar, ValueDomain, ) +from vtlengine.Model._case_insensitive_dict import CaseInsensitiveDict # Cache SCALAR_TYPES keys for performance _SCALAR_TYPE_KEYS = SCALAR_TYPES.keys() @@ -94,8 +95,8 @@ def _load_dataset_from_structure( """ Loads a dataset with the structure given. """ - datasets = {} - scalars = {} + datasets: CaseInsensitiveDict[Any] = CaseInsensitiveDict() + scalars: CaseInsensitiveDict[Any] = CaseInsensitiveDict() if "datasets" in structures: for dataset_json in structures["datasets"]: @@ -417,12 +418,12 @@ def load_datasets( if isinstance(data_structure, dict): return _load_datastructure_single(data_structure, sdmx_mappings=sdmx_mappings) if isinstance(data_structure, list): - ds_structures: Dict[str, Dataset] = {} - scalar_structures: Dict[str, Scalar] = {} + ds_structures: CaseInsensitiveDict[Dataset] = CaseInsensitiveDict() + scalar_structures: CaseInsensitiveDict[Scalar] = CaseInsensitiveDict() for x in data_structure: ds, sc = _load_datastructure_single(x, sdmx_mappings=sdmx_mappings) - ds_structures = {**ds_structures, **ds} # Overwrite ds_structures dict. - scalar_structures = {**scalar_structures, **sc} # Overwrite scalar_structures dict. + ds_structures.update(ds) + scalar_structures.update(sc) return ds_structures, scalar_structures return _load_datastructure_single(data_structure, sdmx_mappings=sdmx_mappings) diff --git a/src/vtlengine/AST/DAG/__init__.py b/src/vtlengine/AST/DAG/__init__.py index 29d7cb819..2ac64a0bb 100644 --- a/src/vtlengine/AST/DAG/__init__.py +++ b/src/vtlengine/AST/DAG/__init__.py @@ -180,14 +180,15 @@ def load_edges(self) -> None: for key, statement in self.dependencies.items(): reference = statement.outputs + statement.persistent if reference: - ref_to_keys[reference[0]] = key + ref_to_keys[reference[0].casefold()] = key for sub_key, sub_statement in self.dependencies.items(): for input_val in sub_statement.inputs: - if input_val in ref_to_keys: - key = ref_to_keys[input_val] - self.edges[count_edges] = (key, sub_key) - count_edges += 1 + if input_val.casefold() in ref_to_keys: + key = ref_to_keys[input_val.casefold()] + if key != sub_key: # Skip self-edges (e.g. a <- A) + self.edges[count_edges] = (key, sub_key) + count_edges += 1 def sort_elements(self, statements: list) -> list: return [statements[x - 1] for x in self.sorting] # type: ignore[union-attr] @@ -195,9 +196,11 @@ def sort_elements(self, statements: list) -> list: def check_overwriting(self, statements: list) -> None: seen: Set[str] = set() for statement in statements: - if statement.left.value in seen: + # Case-insensitive check: regular VTL names are case-insensitive + normalized = statement.left.value.casefold() + if normalized in seen: raise SemanticError("1-2-2", varId_value=statement.left.value) - seen.add(statement.left.value) + seen.add(normalized) def sort_ast(self, ast: AST) -> None: statements_nodes = ast.children diff --git a/src/vtlengine/Interpreter/__init__.py b/src/vtlengine/Interpreter/__init__.py index 1492a5fea..e154c8518 100644 --- a/src/vtlengine/Interpreter/__init__.py +++ b/src/vtlengine/Interpreter/__init__.py @@ -67,6 +67,7 @@ ScalarSet, ValueDomain, ) +from vtlengine.Model._case_insensitive_dict import CaseInsensitiveDict from vtlengine.Operators.Aggregation import extract_grouping_identifiers from vtlengine.Operators.Assignment import Assignment from vtlengine.Operators.CastOperator import Cast @@ -155,6 +156,15 @@ class InterpreterAnalyzer(ASTTemplate): signature_values: Optional[Dict[str, Any]] = None def __post_init__(self) -> None: + # Ensure case-insensitive lookups for datasets, scalars, and value_domains + if not isinstance(self.datasets, CaseInsensitiveDict): + self.datasets = CaseInsensitiveDict(self.datasets) + if self.scalars is not None and not isinstance(self.scalars, CaseInsensitiveDict): + self.scalars = CaseInsensitiveDict(self.scalars) + if self.value_domains is not None and not isinstance( + self.value_domains, CaseInsensitiveDict + ): + self.value_domains = CaseInsensitiveDict(self.value_domains) self.datasets_inputs = set(self.datasets.keys()) self.scalars_inputs = set(self.scalars.keys()) if self.scalars else set() @@ -236,7 +246,7 @@ def visit_Start(self, node: AST.Start) -> Any: Operators.only_semantic = True else: Operators.only_semantic = False - results = {} + results: CaseInsensitiveDict[Any] = CaseInsensitiveDict() scalars_to_save = set() invalid_dataset_outputs = [] invalid_scalar_outputs = [] @@ -279,7 +289,7 @@ def visit_Start(self, node: AST.Start) -> Any: if isinstance(result, Scalar): scalars_to_save.add(result.name) if self.scalars is None: - self.scalars = {} + self.scalars = CaseInsensitiveDict() self.scalars[result.name] = copy(result) self._save_datapoints_efficient(statement_num) statement_num += 1 @@ -834,16 +844,17 @@ def visit_VarID(self, node: AST.VarID) -> Any: # noqa: C901 comp_name=node.value, dataset_name=self.aggregation_dataset.name, ) + canon = self.aggregation_dataset.resolve_component_name(node.value) if self.aggregation_dataset.data is None: data = None else: - data = copy(self.aggregation_dataset.data[node.value]) + data = copy(self.aggregation_dataset.data[canon]) return DataComponent( - name=node.value, + name=canon, data=data, - data_type=self.aggregation_dataset.components[node.value].data_type, - role=self.aggregation_dataset.components[node.value].role, - nullable=self.aggregation_dataset.components[node.value].nullable, + data_type=self.aggregation_dataset.components[canon].data_type, + role=self.aggregation_dataset.components[canon].role, + nullable=self.aggregation_dataset.components[canon].nullable, ) if self.is_from_regular_aggregation: if self.is_from_join and node.value in self.datasets: @@ -883,16 +894,17 @@ def visit_VarID(self, node: AST.VarID) -> Any: # noqa: C901 comp_name=node.value, dataset_name=self.regular_aggregation_dataset.name, ) + canon = self.regular_aggregation_dataset.resolve_component_name(node.value) if self.regular_aggregation_dataset.data is not None: - data = copy(self.regular_aggregation_dataset.data[node.value]) + data = copy(self.regular_aggregation_dataset.data[canon]) else: data = None return DataComponent( - name=node.value, + name=canon, data=data, - data_type=self.regular_aggregation_dataset.components[node.value].data_type, - role=self.regular_aggregation_dataset.components[node.value].role, - nullable=self.regular_aggregation_dataset.components[node.value].nullable, + data_type=self.regular_aggregation_dataset.components[canon].data_type, + role=self.regular_aggregation_dataset.components[canon].role, + nullable=self.regular_aggregation_dataset.components[canon].nullable, ) if ( self.is_from_rule @@ -982,11 +994,13 @@ def visit_RegularAggregation(self, node: AST.RegularAggregation) -> None: # noq dataset = copy(operands[0]) if self.regular_aggregation_dataset is not None: dataset.name = self.regular_aggregation_dataset.name - dataset.components = { - comp_name: comp - for comp_name, comp in dataset.components.items() - if comp.role != Role.MEASURE - } + dataset.components = CaseInsensitiveDict( + { + comp_name: comp + for comp_name, comp in dataset.components.items() + if comp.role != Role.MEASURE + } + ) if dataset.data is not None: dataset.data = dataset.data[dataset.get_identifiers_names()] aux_operands = [] @@ -1056,10 +1070,12 @@ def visit_RegularAggregation(self, node: AST.RegularAggregation) -> None: # noq columns={col: col[col.find("#") + 1 :] for col in result.data.columns}, inplace=True, ) - result.components = { - comp_name[comp_name.find("#") + 1 :]: comp - for comp_name, comp in result.components.items() - } + result.components = CaseInsensitiveDict( + { + comp_name[comp_name.find("#") + 1 :]: comp + for comp_name, comp in result.components.items() + } + ) for comp in result.components.values(): comp.name = comp.name[comp.name.find("#") + 1 :] if result.data is not None: @@ -1260,11 +1276,13 @@ def visit_ParamOp(self, node: AST.ParamOp) -> None: # noqa: C901 if len(self.aggregation_dataset.get_measures()) != 1: raise ValueError("Only one measure is allowed") # Deepcopy is necessary for components to avoid changing the original dataset - self.aggregation_dataset.components = { - comp_name: deepcopy(comp) - for comp_name, comp in self.aggregation_dataset.components.items() - if comp_name in self.aggregation_grouping or comp.role == Role.MEASURE - } + self.aggregation_dataset.components = CaseInsensitiveDict( + { + comp_name: deepcopy(comp) + for comp_name, comp in self.aggregation_dataset.components.items() + if comp_name in self.aggregation_grouping or comp.role == Role.MEASURE + } + ) self.aggregation_dataset.data = ( self.aggregation_dataset.data[ diff --git a/src/vtlengine/Model/__init__.py b/src/vtlengine/Model/__init__.py index 70664027d..02577810b 100644 --- a/src/vtlengine/Model/__init__.py +++ b/src/vtlengine/Model/__init__.py @@ -15,6 +15,7 @@ from vtlengine.DataTypes import SCALAR_TYPES, ScalarType from vtlengine.DataTypes.TimeHandling import TimePeriodHandler from vtlengine.Exceptions import InputValidationException, SemanticError +from vtlengine.Model._case_insensitive_dict import CaseInsensitiveDict @dataclass @@ -205,6 +206,9 @@ class Dataset: persistent: bool = False def __post_init__(self) -> None: + # Ensure components is always a CaseInsensitiveDict + if not isinstance(self.components, CaseInsensitiveDict): + self.components = CaseInsensitiveDict(self.components) if self.data is not None: if len(self.components) != len(self.data.columns): raise ValueError( @@ -331,15 +335,26 @@ def __eq__(self, other: Any) -> bool: def get_component(self, component_name: str) -> Component: return self.components[component_name] + def resolve_component_name(self, name: str) -> str: + """Return the canonical (original-case) component name for a case-insensitive match.""" + if isinstance(self.components, CaseInsensitiveDict): + return self.components.canonical_key(name) + return name + def add_component(self, component: Component) -> None: if component.name in self.components: raise ValueError(f"Component with name {component.name} already exists") self.components[component.name] = component def delete_component(self, component_name: str) -> None: - self.components.pop(component_name, None) + # Resolve to canonical name for DataFrame column access + try: + canonical = self.resolve_component_name(component_name) + except KeyError: + return + del self.components[canonical] if self.data is not None: - self.data.drop(columns=[component_name], inplace=True) + self.data.drop(columns=[canonical], inplace=True) def get_components(self) -> List[Component]: return list(self.components.values()) diff --git a/src/vtlengine/Model/_case_insensitive_dict.py b/src/vtlengine/Model/_case_insensitive_dict.py new file mode 100644 index 000000000..fd64fc3f0 --- /dev/null +++ b/src/vtlengine/Model/_case_insensitive_dict.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +from typing import Any, Dict, Iterator, Optional, Tuple, TypeVar + +V = TypeVar("V") + + +class CaseInsensitiveDict(Dict[str, V]): + """A dict subclass that treats string keys as case-insensitive. + + Keys are normalized to lowercase for lookup, but the original key + (from the first insertion) is preserved for iteration and output. + + This is used for VTL regular names which are case-insensitive per the spec. + """ + + def __init__(self, *args: Any, **kwargs: V) -> None: + self._key_map: Dict[str, str] = {} # lowercase -> original key + super().__init__() + if args: + arg = args[0] + if isinstance(arg, dict): + for k, v in arg.items(): + self[k] = v + elif hasattr(arg, "__iter__"): + for k, v in arg: + self[k] = v + for k, v in kwargs.items(): + self[k] = v + + def _normalize(self, key: str) -> str: + return key.casefold() + + def __setitem__(self, key: str, value: V) -> None: + norm = self._normalize(key) + if norm not in self._key_map: + self._key_map[norm] = key + original = self._key_map[norm] + super().__setitem__(original, value) + + def __getitem__(self, key: str) -> V: + norm = self._normalize(key) + if norm not in self._key_map: + raise KeyError(key) + return super().__getitem__(self._key_map[norm]) + + def __contains__(self, key: object) -> bool: + if not isinstance(key, str): + return False + return self._normalize(key) in self._key_map + + def __delitem__(self, key: str) -> None: + norm = self._normalize(key) + if norm not in self._key_map: + raise KeyError(key) + original = self._key_map.pop(norm) + super().__delitem__(original) + + def get(self, key: str, default: Optional[V] = None) -> Optional[V]: # type: ignore[override] + norm = self._normalize(key) + if norm not in self._key_map: + return default + return super().__getitem__(self._key_map[norm]) + + def pop(self, key: str, *args: V) -> V: # type: ignore[override] + norm = self._normalize(key) + if norm not in self._key_map: + if args: + return args[0] + raise KeyError(key) + original = self._key_map.pop(norm) + return super().pop(original) + + def setdefault(self, key: str, default: Optional[V] = None) -> V: + norm = self._normalize(key) + if norm not in self._key_map: + self[key] = default # type: ignore[assignment] + return self[key] + + def update(self, *args: Any, **kwargs: V) -> None: + if args: + other = args[0] + if isinstance(other, dict): + for k, v in other.items(): + self[k] = v + elif hasattr(other, "__iter__"): + for k, v in other: + self[k] = v + for k, v in kwargs.items(): + self[k] = v + + def canonical_key(self, key: str) -> str: + """Return the original-case key for a given (possibly different-case) key. + + Raises KeyError if the key doesn't exist. + """ + norm = self._normalize(key) + if norm not in self._key_map: + raise KeyError(key) + return self._key_map[norm] + + def __iter__(self) -> Iterator[str]: + return super().__iter__() + + def copy(self) -> CaseInsensitiveDict[V]: + result: CaseInsensitiveDict[V] = CaseInsensitiveDict() + result._key_map = self._key_map.copy() + for key in dict.keys(self): + dict.__setitem__(result, key, dict.__getitem__(self, key)) + return result + + def __repr__(self) -> str: + return f"CaseInsensitiveDict({dict(self.items())})" + + def __eq__(self, other: object) -> bool: + if isinstance(other, CaseInsensitiveDict): + return dict.__eq__(self, other) + if isinstance(other, dict): + if len(self) != len(other): + return False + return all(k in self and self[k] == v for k, v in other.items()) + return NotImplemented + + def __deepcopy__(self, memo: Dict[int, Any]) -> CaseInsensitiveDict[V]: + from copy import deepcopy + + new: CaseInsensitiveDict[V] = CaseInsensitiveDict.__new__(CaseInsensitiveDict) + memo[id(self)] = new + dict.__init__(new) + new._key_map = deepcopy(self._key_map, memo) + for key in dict.keys(self): + dict.__setitem__(new, key, deepcopy(dict.__getitem__(self, key), memo)) + return new + + def __copy__(self) -> CaseInsensitiveDict[V]: + new: CaseInsensitiveDict[V] = CaseInsensitiveDict.__new__(CaseInsensitiveDict) + dict.__init__(new) + new._key_map = self._key_map.copy() + for key in dict.keys(self): + dict.__setitem__(new, key, dict.__getitem__(self, key)) + return new + + @classmethod + def from_dict(cls, d: Dict[str, V]) -> CaseInsensitiveDict[V]: + """Create a CaseInsensitiveDict from a regular dict.""" + return cls(d) + + def to_dict(self) -> Dict[str, V]: + """Convert back to a regular dict with original-cased keys.""" + return dict(self.items()) + + def __reduce__(self) -> Tuple[type, Tuple[Dict[str, V]]]: + return (CaseInsensitiveDict, (dict(self.items()),)) diff --git a/src/vtlengine/Operators/Join.py b/src/vtlengine/Operators/Join.py index fc67df5fc..89a78650f 100644 --- a/src/vtlengine/Operators/Join.py +++ b/src/vtlengine/Operators/Join.py @@ -9,6 +9,7 @@ from vtlengine.DataTypes import SCALAR_TYPES_CLASS_REVERSE, binary_implicit_promotion from vtlengine.Exceptions import SemanticError from vtlengine.Model import Component, Dataset, Role +from vtlengine.Model._case_insensitive_dict import CaseInsensitiveDict from vtlengine.Operators import Operator, _id_type_promotion_join_keys from vtlengine.Utils.__Virtual_Assets import VirtualCounter @@ -478,12 +479,12 @@ def create_dataset(cls, name: str, prefix: str, dataset: Dataset) -> Dataset: @classmethod def get_common_components(cls, left: Dataset, right: Dataset) -> (Dataset, Dataset): # type: ignore[syntax] common = set(left.get_components_names()) & set(right.get_components_names()) - left.components = { - comp.name: comp for comp in left.components.values() if comp.name in common - } - right.components = { - comp.name: comp for comp in right.components.values() if comp.name in common - } + left.components = CaseInsensitiveDict( + {comp.name: comp for comp in left.components.values() if comp.name in common} + ) + right.components = CaseInsensitiveDict( + {comp.name: comp for comp in right.components.values() if comp.name in common} + ) left.data = left.data[list(common)] if left.data is not None else pd.DataFrame() right.data = right.data[list(common)] if right.data is not None else pd.DataFrame() return left, right From 82ec897d4c0a05a54ef020aabc138534b4ff0293 Mon Sep 17 00:00:00 2001 From: Mateo Date: Thu, 26 Mar 2026 17:54:27 +0100 Subject: [PATCH 2/4] Fixed HR, DPR and UDO signatures beign case sensitive --- src/vtlengine/Interpreter/__init__.py | 36 ++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/vtlengine/Interpreter/__init__.py b/src/vtlengine/Interpreter/__init__.py index e154c8518..e90d55391 100644 --- a/src/vtlengine/Interpreter/__init__.py +++ b/src/vtlengine/Interpreter/__init__.py @@ -315,7 +315,7 @@ def visit_Start(self, node: AST.Start) -> Any: def visit_Operator(self, node: AST.Operator) -> None: if self.udos is None: - self.udos = {} + self.udos = CaseInsensitiveDict() elif node.op in self.udos: raise ValueError(f"User Defined Operator {node.op} already exists") @@ -365,7 +365,7 @@ def visit_DPRuleset(self, node: AST.DPRuleset) -> None: ) # Signature has the actual parameters names or aliases if provided - signature_actual_names = {} + signature_actual_names: Dict[str, str] = CaseInsensitiveDict() if not isinstance(node.params, AST.DefIdentifier): for param in node.params: if param.alias is not None: @@ -386,7 +386,7 @@ def visit_DPRuleset(self, node: AST.DPRuleset) -> None: # Adding the ruleset to the dprs dictionary if self.dprs is None: - self.dprs = {} + self.dprs = CaseInsensitiveDict() elif node.name in self.dprs: raise ValueError(f"Datapoint Ruleset {node.name} already exists") @@ -394,7 +394,7 @@ def visit_DPRuleset(self, node: AST.DPRuleset) -> None: def visit_HRuleset(self, node: AST.HRuleset) -> None: if self.hrs is None: - self.hrs = {} + self.hrs = CaseInsensitiveDict() if node.name in self.hrs: raise ValueError(f"Hierarchical Ruleset {node.name} already exists") @@ -584,6 +584,13 @@ def visit_Aggregation(self, node: AST.Aggregation) -> None: for x in node.grouping: groupings.append(self.visit(x)) self.is_from_grouping = False + # Resolve grouping names to canonical (original-case) component names + groupings = [ + operand.resolve_component_name(g) + if isinstance(g, str) and g in operand.components + else g + for g in groupings + ] if grouping_op == "group all" or has_time_agg: groupings = self._apply_time_agg_grouping(operand, groupings, grouping_op) self.aggregation_dataset = None @@ -920,6 +927,8 @@ def visit_VarID(self, node: AST.VarID) -> Any: # noqa: C901 comp_name=node.value, dataset_name=self.ruleset_dataset.name, ) + # Resolve to canonical (original-case) name for DataFrame access + comp_name = self.ruleset_dataset.resolve_component_name(comp_name) data = None if self.rule_data is None else self.rule_data[comp_name] return DataComponent( name=comp_name, @@ -1215,6 +1224,15 @@ def visit_RenameNode(self, node: AST.RenameNode) -> Any: ): node.old_name = node.old_name.split("#")[1] + # Resolve old_name to canonical (original-case) component name + if ( + self.regular_aggregation_dataset is not None + and node.old_name in self.regular_aggregation_dataset.components + ): + node.old_name = self.regular_aggregation_dataset.resolve_component_name( + node.old_name + ) + return node def visit_Constant(self, node: AST.Constant) -> Any: @@ -1353,7 +1371,10 @@ def visit_HROperation(self, node: AST.HROperation) -> None: # noqa: C901 if len(cond_components) != len(hr_info["condition"]): raise SemanticError("1-1-10-2", op=node.op) - if hr_info["node"].signature_type == "variable" and hr_info["signature"] != component: + if ( + hr_info["node"].signature_type == "variable" + and hr_info["signature"].casefold() != component.casefold() # type: ignore[union-attr] + ): raise SemanticError( "1-1-10-3", op=node.op, @@ -1411,6 +1432,9 @@ def visit_HROperation(self, node: AST.HROperation) -> None: # noqa: C901 Check_Hierarchy.validate_hr_dataset(dataset, component) + # Resolve to canonical (original-case) component name for DataFrame access + component = dataset.resolve_component_name(component) + # Set up interpreter state for rule processing self.ruleset_dataset = dataset self.ruleset_signature = {**{"RULE_COMPONENT": component}, **cond_info} @@ -1481,7 +1505,7 @@ def visit_DPValidation(self, node: AST.DPValidation) -> None: ) if dpr_info is not None and dpr_info["signature_type"] == "variable": for i, comp_name in enumerate(node.components): - if comp_name != dpr_info["params"][i]: + if comp_name.casefold() != dpr_info["params"][i].casefold(): raise SemanticError( "1-1-10-3", op=CHECK_DATAPOINT, From 401ac53d153d3dfa0d9d3334fd79e265568deb14 Mon Sep 17 00:00:00 2001 From: Mateo Date: Thu, 26 Mar 2026 18:16:53 +0100 Subject: [PATCH 3/4] Added related tests --- tests/CaseInsensitive/__init__.py | 0 .../CaseInsensitive/test_case_insensitive.py | 311 ++++++++++++++++++ 2 files changed, 311 insertions(+) create mode 100644 tests/CaseInsensitive/__init__.py create mode 100644 tests/CaseInsensitive/test_case_insensitive.py diff --git a/tests/CaseInsensitive/__init__.py b/tests/CaseInsensitive/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/CaseInsensitive/test_case_insensitive.py b/tests/CaseInsensitive/test_case_insensitive.py new file mode 100644 index 000000000..0b4089498 --- /dev/null +++ b/tests/CaseInsensitive/test_case_insensitive.py @@ -0,0 +1,311 @@ +"""Tests for case-insensitive regular name resolution (VTL 2.1 spec).""" + +import pandas as pd +import pytest + +from vtlengine import run +from vtlengine.Exceptions import SemanticError + +# --------------------------------------------------------------------------- +# Shared fixtures +# --------------------------------------------------------------------------- + +BASE_STRUCTURES = { + "datasets": [ + { + "name": "DS_1", + "DataStructure": [ + {"name": "Id_1", "type": "Integer", "role": "Identifier", "nullable": False}, + {"name": "Id_2", "type": "String", "role": "Identifier", "nullable": False}, + {"name": "Me_1", "type": "Number", "role": "Measure", "nullable": True}, + ], + } + ] +} + +BASE_DATAPOINTS = { + "DS_1": pd.DataFrame({"Id_1": [1, 1, 1], "Id_2": ["A", "B", "C"], "Me_1": [10.0, 20.0, 30.0]}) +} + +HR_DATAPOINTS = { + "DS_1": pd.DataFrame( + {"Id_1": [1, 1, 1, 1], "Id_2": ["A", "B", "C", "D"], "Me_1": [10.0, 20.0, 30.0, None]} + ) +} + +HR_RULE_BODY = """\ + E = A + B errorcode "e1" errorlevel 1 +end hierarchical ruleset""" + +DPR_RULE_BODY = """\ + when Id_2 = "A" then Me_1 >= 0 errorcode "err1" +end datapoint ruleset""" + + +def _run(script: str, datapoints: dict = BASE_DATAPOINTS) -> dict: + return run(script=script, data_structures=BASE_STRUCTURES, datapoints=datapoints) + + +# --------------------------------------------------------------------------- +# 1. Dataset name resolution +# --------------------------------------------------------------------------- + +dataset_name_params = [ + pytest.param("ds_1", id="all_lower"), + pytest.param("Ds_1", id="mixed_1"), + pytest.param("DS_1", id="original"), + pytest.param("dS_1", id="mixed_2"), +] + + +@pytest.mark.parametrize("alias", dataset_name_params) +def test_dataset_name_case_variants(alias): + result = _run(f"DS_r <- {alias};") + assert "DS_r" in result + assert list(result["DS_r"].data.columns) == ["Id_1", "Id_2", "Me_1"] + + +def test_dataset_preserves_original_name(): + result = _run("My_Result <- ds_1;") + assert "My_Result" in result + + +def test_dataset_chained_resolution(): + result = _run("DS_r <- DS_1; DS_r2 <- ds_r;") + assert "DS_r2" in result + pd.testing.assert_frame_equal(result["DS_r"].data, result["DS_r2"].data) + + +# --------------------------------------------------------------------------- +# 2. Duplicate assignment detection +# --------------------------------------------------------------------------- + +duplicate_params = [ + pytest.param("DS_r <- DS_1; DS_R <- DS_1;", id="different_case"), + pytest.param("DS_r <- DS_1; DS_r <- DS_1;", id="same_case"), + pytest.param("DS_r <- DS_1; ds_r <- DS_1;", id="all_lower"), +] + + +@pytest.mark.parametrize("script", duplicate_params) +def test_duplicate_assignment_raises(script): + with pytest.raises(SemanticError, match="1-2-2"): + _run(script) + + +# --------------------------------------------------------------------------- +# 3. Component name resolution (calc, filter, rename) +# --------------------------------------------------------------------------- + +component_calc_params = [ + pytest.param( + "DS_r <- ds_1[calc me_2 := me_1 * 2];", + ["me_2"], + id="calc_lowercase", + ), + pytest.param( + "DS_r <- ds_1[calc me_2 := Me_1, mE_3 := ME_1 + me_1];", + ["me_2", "mE_3"], + id="calc_mixed_case", + ), +] + + +@pytest.mark.parametrize("script, expected_comps", component_calc_params) +def test_calc_case_insensitive(script, expected_comps): + result = _run(script) + for comp in expected_comps: + assert comp in result["DS_r"].components + + +filter_params = [ + pytest.param("DS_r <- ds_1[filter me_1 > 15];", 2, id="lowercase_measure"), + pytest.param("DS_r <- ds_1[filter ME_1 > 15];", 2, id="uppercase_measure"), + pytest.param("DS_r <- ds_1[filter Me_1 > 25];", 1, id="original_case"), +] + + +@pytest.mark.parametrize("script, expected_rows", filter_params) +def test_filter_case_insensitive(script, expected_rows): + result = _run(script) + assert len(result["DS_r"].data) == expected_rows + + +rename_params = [ + pytest.param("me_1", "Me_New", id="lowercase_old"), + pytest.param("ME_1", "Me_New", id="uppercase_old"), + pytest.param("Me_1", "Me_Renamed", id="original_case_old"), +] + + +@pytest.mark.parametrize("old_name, new_name", rename_params) +def test_rename_case_insensitive(old_name, new_name): + result = _run(f"DS_r <- ds_1[rename {old_name} to {new_name}];") + assert new_name in result["DS_r"].components + assert "Me_1" not in result["DS_r"].components + + +# --------------------------------------------------------------------------- +# 4. Hierarchical ruleset name resolution +# --------------------------------------------------------------------------- + +hr_params = [ + pytest.param("hr1", "HR1", "Id_2", "Id_2", id="name_upper"), + pytest.param("hr1", "hr1", "Id_2", "id_2", id="comp_lower"), + pytest.param("My_HR", "MY_HR", "Id_2", "ID_2", id="both_different"), + pytest.param("hr1", "Hr1", "id_2", "ID_2", id="all_mixed"), +] + + +@pytest.mark.parametrize("def_name, call_name, def_comp, call_comp", hr_params) +def test_hr_case_insensitive(def_name, call_name, def_comp, call_comp): + script = f""" + define hierarchical ruleset {def_name} (variable rule {def_comp}) is + {HR_RULE_BODY}; + DS_r <- hierarchy(DS_1, {call_name} rule {call_comp} computed); + """ + result = _run(script, datapoints=HR_DATAPOINTS) + assert "DS_r" in result + assert "Id_2" in result["DS_r"].components + + +# --------------------------------------------------------------------------- +# 5. Datapoint ruleset name resolution +# --------------------------------------------------------------------------- + +dpr_params = [ + pytest.param("dpr1", "DPR1", "Id_2, Me_1", "Id_2, Me_1", id="name_upper"), + pytest.param("dpr1", "dpr1", "ID_2, ME_1", "id_2, me_1", id="comps_swapped"), + pytest.param("My_DPR", "MY_DPR", "ID_2, ME_1", "id_2, me_1", id="both_different"), +] + + +@pytest.mark.parametrize("def_name, call_name, def_comps, call_comps", dpr_params) +def test_dpr_case_insensitive(def_name, call_name, def_comps, call_comps): + script = f""" + define datapoint ruleset {def_name} (variable {def_comps}) is + {DPR_RULE_BODY}; + DS_r := check_datapoint(DS_1, {call_name} components {call_comps} invalid); + """ + result = _run(script) + assert result is not None + + +# --------------------------------------------------------------------------- +# 6. UDO name resolution +# --------------------------------------------------------------------------- + +udo_params = [ + pytest.param( + """ + define operator my_op (ds dataset) returns dataset is ds end operator; + DS_r <- MY_OP(DS_1); + """, + "DS_r", + id="simple_upper", + ), + pytest.param( + """ + define operator my_op (ds dataset) returns dataset is ds end operator; + DS_r <- My_Op(ds_1); + """, + "DS_r", + id="simple_mixed", + ), + pytest.param( + """ + define operator suma (ds1 dataset, ds2 dataset) returns dataset is ds1 + ds2 end operator; + define operator drop_id (ds dataset, comp component) + returns dataset is max(ds group except comp) end operator; + DS_r <- DROP_ID(SUMA(ds_1, Ds_1), Id_2); + """, + "DS_r", + id="nested_mixed", + ), +] + + +@pytest.mark.parametrize("script, expected_ds", udo_params) +def test_udo_case_insensitive(script, expected_ds): + result = _run(script) + assert expected_ds in result + + +def test_udo_duplicate_definition_different_case(): + script = """ + define operator my_op (ds dataset) returns dataset is ds end operator; + define operator MY_OP (ds dataset) returns dataset is ds end operator; + DS_r <- my_op(DS_1); + """ + with pytest.raises((ValueError, SemanticError)): + _run(script) + + +# --------------------------------------------------------------------------- +# 7. Aggregation with case-insensitive component refs +# --------------------------------------------------------------------------- + +agg_params = [ + pytest.param("sum(ds_1 group by id_1)", ["Id_1", "Me_1"], id="group_by_lower"), + pytest.param("sum(DS_1 group by Id_1)", ["Id_1", "Me_1"], id="group_by_original"), + pytest.param("max(ds_1 group except id_2)", ["Id_1", "Me_1"], id="group_except_lower"), + pytest.param("max(ds_1 group except ID_2)", ["Id_1", "Me_1"], id="group_except_upper"), + pytest.param("sum(ds_1 group by ID_1)", ["Id_1", "Me_1"], id="group_by_upper"), +] + + +@pytest.mark.parametrize("expr, expected_comps", agg_params) +def test_aggregation_case_insensitive(expr, expected_comps): + result = _run(f"DS_r <- {expr};") + assert "DS_r" in result + for comp in expected_comps: + assert comp in result["DS_r"].components + + +# --------------------------------------------------------------------------- +# 8. Scalar name resolution +# --------------------------------------------------------------------------- + +scalar_params = [ + pytest.param("my_sc", "my_sc", 43, id="lower"), + pytest.param("My_Sc", "my_sc", 43, id="mixed"), + pytest.param("MY_SC", "my_sc", 43, id="upper"), +] + + +@pytest.mark.parametrize("def_name, ref_name, expected_value", scalar_params) +def test_scalar_case_insensitive(def_name, ref_name, expected_value): + script = f"{def_name} <- 42; DS_r <- {ref_name} + 1;" + result = run(script=script, data_structures={"datasets": []}, datapoints={}) + assert "DS_r" in result + assert result["DS_r"].value == expected_value + + +# --------------------------------------------------------------------------- +# 9. End-to-end mixed operations +# --------------------------------------------------------------------------- + +e2e_params = [ + pytest.param( + "DS_r <- DS_1; DS_r2 <- ds_r;", + ["DS_r", "DS_r2"], + id="chained_datasets", + ), + pytest.param( + "DS_r <- ds_1[calc me_2 := Me_1, mE_3 := ME_1 + me_1];", + ["DS_r"], + id="calc_mixed_refs", + ), + pytest.param( + "DS_r <- DS_1; DS_r2 <- ds_r; DS_r3 <- ds_1[calc me_2 := Me_1];", + ["DS_r", "DS_r2", "DS_r3"], + id="full_pipeline", + ), +] + + +@pytest.mark.parametrize("script, expected_datasets", e2e_params) +def test_end_to_end(script, expected_datasets): + result = _run(script) + for ds in expected_datasets: + assert ds in result From 9e68f103311b52399de39e6cce26a3086ed16c8a Mon Sep 17 00:00:00 2001 From: Mateo Date: Thu, 26 Mar 2026 18:20:12 +0100 Subject: [PATCH 4/4] Fixed linting errors --- src/vtlengine/Interpreter/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/vtlengine/Interpreter/__init__.py b/src/vtlengine/Interpreter/__init__.py index e90d55391..54a7848b6 100644 --- a/src/vtlengine/Interpreter/__init__.py +++ b/src/vtlengine/Interpreter/__init__.py @@ -1229,9 +1229,7 @@ def visit_RenameNode(self, node: AST.RenameNode) -> Any: self.regular_aggregation_dataset is not None and node.old_name in self.regular_aggregation_dataset.components ): - node.old_name = self.regular_aggregation_dataset.resolve_component_name( - node.old_name - ) + node.old_name = self.regular_aggregation_dataset.resolve_component_name(node.old_name) return node