Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
07dd467
feat(duckdb): Add DuckDB transpiler for VTL execution (#477)
javihern98 Feb 3, 2026
0a9cb84
Merge origin/main into duckdb/main
javihern98 Feb 3, 2026
a1417d0
Duckdb/structure refactoring (#491)
javihern98 Feb 6, 2026
51b0015
Merge branch 'duckdb/main' of github.com:Meaningful-Data/vtlengine in…
javihern98 Feb 9, 2026
e60f1ce
Added env variable VTL_MAX_TEMP_DIRECTORY_SIZE to handle temp directo…
javihern98 Feb 11, 2026
ceb59b1
Merge branch 'main' of github.com:Meaningful-Data/vtlengine into duck…
javihern98 Feb 13, 2026
902ec94
Implemented base AST to SQL Query formatter (#516)
mla2001 Feb 18, 2026
0bcc81c
Merged main into duckdb_main (#536)
mla2001 Feb 25, 2026
0a21074
Minor fix
mla2001 Feb 25, 2026
d44800f
Merge remote-tracking branch 'origin/main' into duckdb/main
mla2001 Feb 25, 2026
fffc4aa
Bump main 1.6.0rc4 into duckdb/main (#566)
mla2001 Mar 5, 2026
50a9904
Fix #568: (Duckdb) Fix all remaining DuckDB errors unrelated to Time …
mla2001 Mar 12, 2026
a5cd914
Implement 476: (Duckdb) Implement hierarchy operators (#601)
mla2001 Mar 17, 2026
25b4330
Fix #603: Custom STRUCT types for TimePeriod and TimeInterval (#604)
javihern98 Mar 18, 2026
fd476c3
Update #476 (#605)
mla2001 Mar 18, 2026
279f3d8
Fix #519: Implement DuckDB time operators (#606)
javihern98 Mar 18, 2026
f372145
Implement #475: (DuckDB) Implement SDMX loading (#608)
javihern98 Mar 18, 2026
73ab4f3
Reconcile duckdb/main with main and remove s3fs dependency (#614)
javihern98 Mar 20, 2026
963c1bf
Merge remote-tracking branch 'origin/main' into merge-main-into-duckdb
javihern98 Mar 20, 2026
6a99cc6
Merge pull request #615 from Meaningful-Data/merge-main-into-duckdb
javihern98 Mar 20, 2026
983b338
Remove S3 URI support from pandas backend
javihern98 Mar 20, 2026
2e969ed
Document S3 URI support via DuckDB backend in run() docstring
javihern98 Mar 20, 2026
8432946
Merge pull request #616 from Meaningful-Data/remove-s3-pandas-path
javihern98 Mar 20, 2026
d5ef70a
Route all test patterns through DuckDB backend when configured
javihern98 Mar 20, 2026
b4da442
Merge pull request #618 from Meaningful-Data/cr-duckdb-test-routing
javihern98 Mar 20, 2026
b3dd930
Route all remaining test patterns through run() API
javihern98 Mar 20, 2026
098fa86
Revert "Route all remaining test patterns through run() API"
javihern98 Mar 20, 2026
791a2bb
Route all remaining test patterns through run() API (#619)
javihern98 Mar 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,115 changes: 33 additions & 1,082 deletions poetry.lock

Large diffs are not rendered by default.

7 changes: 2 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,9 @@ dependencies = [
"pyarrow>=14.0,<20.0",
"numpy>=2.0.2,<2.1; python_version < '3.10'",
"numpy>=2.2.0,<2.3; python_version >= '3.10'",
"psutil>=7.2.2,<8.0.0",
]

[project.optional-dependencies]
s3 = ["s3fs>=2022.11.0"]
all = ["s3fs>=2022.11.0"]

[project.urls]
Repository = 'https://github.com/Meaningful-Data/vtlengine'
Documentation = 'https://docs.vtlengine.meaningfuldata.eu'
Expand Down Expand Up @@ -89,7 +86,7 @@ lint.exclude = ["*/Grammar/*", "*/main.py", "*/dev.py"]

[tool.mypy]
files = "src"
exclude = "src/vtlengine/AST/.*|src/dev.py"
exclude = "src/vtlengine/AST/.*|src/dev.py|src/vtlengine/duckdb_transpiler/.*"
disallow_untyped_defs = true
disallow_untyped_calls = true
ignore_errors = false
Expand Down
49 changes: 31 additions & 18 deletions src/vtlengine/API/_InternalApi.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
)

from vtlengine import AST as AST
from vtlengine.__extras_check import __check_s3_extra
from vtlengine.AST import Assignment, DPRuleset, HRuleset, Operator, PersistentAssignment, Start
from vtlengine.AST.ASTString import ASTString
from vtlengine.DataTypes import SCALAR_TYPES
Expand Down Expand Up @@ -205,25 +204,27 @@ def _load_single_datapoint(
plain CSV, SDMX-CSV, and SDMX-ML file formats.

Args:
datapoint: Path or S3 URI to the datapoint file.
datapoint: Path to the datapoint file.
sdmx_mappings: Optional mapping from SDMX URNs to VTL dataset names.
"""
if not isinstance(datapoint, (str, Path)):
raise InputValidationException(
code="0-1-1-2", input=datapoint, message="Input must be a Path or an S3 URI"
code="0-1-1-2", input=datapoint, message="Input must be a Path"
)
# Handling of str values
if isinstance(datapoint, str):
if "s3://" in datapoint:
__check_s3_extra()
dataset_name = datapoint.split("/")[-1].removesuffix(".csv")
return {dataset_name: datapoint}
# Converting to Path object if it is not an S3 URI
raise InputValidationException(
code="0-1-1-2",
input=datapoint,
message="S3 URIs are only supported with use_duckdb=True.",
)
# Converting to Path object
try:
datapoint = Path(datapoint)
except Exception:
raise InputValidationException(
code="0-1-1-2", input=datapoint, message="Input must refer to a Path or an S3 URI"
code="0-1-1-2", input=datapoint, message="Input must refer to a Path"
)
# Validation of Path object
if not datapoint.exists():
Expand Down Expand Up @@ -268,7 +269,7 @@ def _load_datapoints_path(
happens in load_datapoints() which supports both formats.

Args:
datapoints: Dict, List, or single Path/S3 URI with datapoints.
datapoints: Dict, List, or single Path with datapoints.
sdmx_mappings: Optional mapping from SDMX URNs to VTL dataset names.

Returns:
Expand All @@ -288,11 +289,17 @@ def _load_datapoints_path(
raise InputValidationException(
code="0-1-1-2",
input=datapoint,
message="Datapoints dictionary values must be Paths or S3 URIs.",
message="Datapoints dictionary values must be Paths.",
)

# Convert string to Path if not S3 or URL
if isinstance(datapoint, str) and "s3://" not in datapoint and not _is_url(datapoint):
if isinstance(datapoint, str) and _is_s3_uri(datapoint):
raise InputValidationException(
code="0-1-1-2",
input=datapoint,
message="S3 URIs are only supported with use_duckdb=True.",
)
if isinstance(datapoint, str) and not _is_url(datapoint):
datapoint = Path(datapoint)

# Validate file exists
Expand Down Expand Up @@ -516,14 +523,14 @@ def load_datasets_with_data(
not isinstance(v, (str, Path)) for v in datapoints.values()
):
raise InputValidationException(
"Invalid datapoints. All values in the dictionary must be Paths or S3 URIs, "
"Invalid datapoints. All values in the dictionary must be Paths, "
"or all values must be Pandas Dataframes."
)

# Handling Individual, List or Dict of Paths, S3 URIs, or URLs
# Handling Individual, List or Dict of Paths or URLs
# At this point, datapoints is narrowed to exclude None and Dict[str, DataFrame]
# All file types (CSV, SDMX) are returned as paths for lazy loading
# URLs are preserved as strings (like S3 URIs)
# URLs are preserved as strings
datapoints_paths = _load_datapoints_path(
cast(Union[Dict[str, Union[str, Path]], List[Union[str, Path]], str, Path], datapoints),
sdmx_mappings=sdmx_mappings,
Expand Down Expand Up @@ -735,10 +742,11 @@ def _check_output_folder(output_folder: Union[str, Path]) -> None:
"""
if isinstance(output_folder, str):
if "s3://" in output_folder:
__check_s3_extra()
if not output_folder.endswith("/"):
raise DataLoadError("0-3-1-2", folder=str(output_folder))
return
raise InputValidationException(
code="0-1-1-2",
input=output_folder,
message="S3 URIs are only supported with use_duckdb=True.",
)
try:
output_folder = Path(output_folder)
except Exception:
Expand Down Expand Up @@ -894,6 +902,11 @@ def ast_to_sdmx(ast: AST.Start, agency_id: str, id: str, version: str) -> Transf
return transformation_scheme


def _is_s3_uri(value: Any) -> bool:
"""Check if a value is an S3 URI."""
return isinstance(value, str) and "s3://" in value


def _is_url(value: Any) -> bool:
"""
Check if a value is an HTTP/HTTPS URL.
Expand Down
Loading
Loading