From 674a1a5f257c3a25e94dee22c241d964f7e02069 Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 16:06:05 +0100
Subject: [PATCH 01/11] feat: add has_dataset_op to StatementDeps and four
 category fields to DatasetSchedule

---
 src/vtlengine/AST/DAG/_models.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/vtlengine/AST/DAG/_models.py b/src/vtlengine/AST/DAG/_models.py
index 359edc49e..92d7d4f9f 100644
--- a/src/vtlengine/AST/DAG/_models.py
+++ b/src/vtlengine/AST/DAG/_models.py
@@ -10,6 +10,7 @@ class StatementDeps:
     outputs: List[str] = field(default_factory=list)
     persistent: List[str] = field(default_factory=list)
     unknown_variables: List[str] = field(default_factory=list)
+    has_dataset_op: bool = False
 
 
 @dataclass
@@ -22,4 +23,8 @@ class DatasetSchedule:
     insertion: Dict[int, List[str]] = field(default_factory=dict)
     deletion: Dict[int, List[str]] = field(default_factory=dict)
     global_inputs: List[str] = field(default_factory=list)
+    global_input_datasets: List[str] = field(default_factory=list)
+    global_input_scalars: List[str] = field(default_factory=list)
+    global_input_dataset_or_scalar: List[str] = field(default_factory=list)
+    global_input_component_or_scalar: List[str] = field(default_factory=list)
     persistent: List[str] = field(default_factory=list)

From 079f31ae9342e217da0a74b978b28efc30db405c Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 16:11:36 +0100
Subject: [PATCH 02/11] feat: implement four-category global input
 classification with scalar propagation

Track dataset operations per statement (has_dataset_op flag) and use
fixed-point propagation to identify scalar outputs. Classify global
inputs into: global_input_datasets, global_input_scalars,
global_input_dataset_or_scalar, and global_input_component_or_scalar.

Add 6 new DAG test cases (11-16) covering scalar chains, RegularAggregation
ambiguity, top-level ambiguity, and mixed contexts.
---
 src/vtlengine/AST/DAG/__init__.py |  98 ++++++++++++++++++++++++++
 tests/DAG/data/references/1.json  |   4 ++
 tests/DAG/data/references/10.json |  35 ++++++++++
 tests/DAG/data/references/11.json |  20 ++++++
 tests/DAG/data/references/12.json |  30 ++++++++
 tests/DAG/data/references/13.json |  42 ++++++++++++
 tests/DAG/data/references/14.json |  29 ++++++++
 tests/DAG/data/references/15.json |  29 ++++++++
 tests/DAG/data/references/16.json |  32 +++++++++
 tests/DAG/data/references/2.json  |   6 ++
 tests/DAG/data/references/3.json  |  18 +++++
 tests/DAG/data/references/4.json  |   7 ++
 tests/DAG/data/references/5.json  |  18 +++++
 tests/DAG/data/references/6.json  |  24 +++++++
 tests/DAG/data/references/7.json  | 110 ++++++++++++++++++++++++++++++
 tests/DAG/data/references/8.json  |  26 +++++++
 tests/DAG/data/references/9.json  |  46 +++++++++++++
 tests/DAG/data/vtl/11.vtl         |   3 +
 tests/DAG/data/vtl/12.vtl         |   1 +
 tests/DAG/data/vtl/13.vtl         |   2 +
 tests/DAG/data/vtl/14.vtl         |   1 +
 tests/DAG/data/vtl/15.vtl         |   2 +
 tests/DAG/data/vtl/16.vtl         |   2 +
 tests/DAG/test_dag.py             |   8 +++
 24 files changed, 593 insertions(+)
 create mode 100644 tests/DAG/data/references/11.json
 create mode 100644 tests/DAG/data/references/12.json
 create mode 100644 tests/DAG/data/references/13.json
 create mode 100644 tests/DAG/data/references/14.json
 create mode 100644 tests/DAG/data/references/15.json
 create mode 100644 tests/DAG/data/references/16.json
 create mode 100644 tests/DAG/data/vtl/11.vtl
 create mode 100644 tests/DAG/data/vtl/12.vtl
 create mode 100644 tests/DAG/data/vtl/13.vtl
 create mode 100644 tests/DAG/data/vtl/14.vtl
 create mode 100644 tests/DAG/data/vtl/15.vtl
 create mode 100644 tests/DAG/data/vtl/16.vtl

diff --git a/src/vtlengine/AST/DAG/__init__.py b/src/vtlengine/AST/DAG/__init__.py
index aed9d5145..f8f7abda7 100644
--- a/src/vtlengine/AST/DAG/__init__.py
+++ b/src/vtlengine/AST/DAG/__init__.py
@@ -60,8 +60,11 @@ class DAGAnalyzer(ASTTemplate):
 
     # Per-statement accumulator (reset between statements)
     current_deps: StatementDeps = field(default_factory=StatementDeps)
+    _current_has_dataset_op: bool = False
     # Cross-statement unknown variable tracking
     unknown_variables: Set[str] = field(default_factory=set)
+    # Outputs that were consumed via unknown_variables (RegularAggregation context)
+    _resolved_from_unknown: Set[str] = field(default_factory=set)
 
     @classmethod
     def ds_structure(cls, ast: AST) -> DatasetSchedule:
@@ -105,11 +108,96 @@ def _ds_usage_analysis(self) -> DatasetSchedule:
                     deletion[last_consumer.get(element, key)].append(element)
                     insertion[key].append(element)
 
+        # --- Scalar output propagation ---
+
+        # Seed: outputs of statements with no dataset ops and no inputs (constant assignments)
+        # Also seed outputs resolved from unknown_variables (used in RegularAggregation context)
+        scalar_outputs: Set[str] = set()
+        for statement in self.dependencies.values():
+            reference = statement.outputs + statement.persistent
+            if not reference:
+                continue
+            ds_name = reference[0]
+            if not statement.has_dataset_op and not statement.inputs:
+                scalar_outputs.add(ds_name)
+            elif ds_name in self._resolved_from_unknown:
+                scalar_outputs.add(ds_name)
+
+        # Collect component_or_scalar candidates from unknown_variables
+        component_or_scalar_candidates: Set[str] = set()
+        for statement in self.dependencies.values():
+            for uv in statement.unknown_variables:
+                if uv not in all_outputs:
+                    component_or_scalar_candidates.add(uv)
+
+        # Propagate: statements with no dataset ops where all inputs are known scalars
+        changed = True
+        while changed:
+            changed = False
+            for statement in self.dependencies.values():
+                reference = statement.outputs + statement.persistent
+                if not reference:
+                    continue
+                ds_name = reference[0]
+                if ds_name in scalar_outputs or statement.has_dataset_op:
+                    continue
+                if statement.inputs and all(
+                    inp in scalar_outputs or inp in component_or_scalar_candidates
+                    for inp in statement.inputs
+                ):
+                    scalar_outputs.add(ds_name)
+                    changed = True
+
+        # --- Identify definite dataset inputs ---
+        definite_dataset_inputs: Set[str] = set()
+        for statement in self.dependencies.values():
+            if statement.has_dataset_op:
+                for inp in statement.inputs:
+                    definite_dataset_inputs.add(inp)
+
+        # Include component_or_scalar candidates in global_inputs
+        for name in component_or_scalar_candidates:
+            if name not in global_set:
+                global_set.add(name)
+                global_inputs.append(name)
+
+        # --- Classify global inputs into four categories ---
+        global_input_datasets: List[str] = []
+        global_input_scalars: List[str] = []
+        global_input_dataset_or_scalar: List[str] = []
+        global_input_component_or_scalar: List[str] = []
+
+        for name in global_inputs:
+            if name in component_or_scalar_candidates:
+                global_input_component_or_scalar.append(name)
+            elif name in definite_dataset_inputs:
+                global_input_datasets.append(name)
+            else:
+                feeds_only_scalars = all(
+                    (stmt.outputs + stmt.persistent)[0] in scalar_outputs
+                    for stmt in self.dependencies.values()
+                    if name in stmt.inputs and (stmt.outputs + stmt.persistent)
+                )
+                no_dataset_ops = not any(
+                    stmt.has_dataset_op
+                    for stmt in self.dependencies.values()
+                    if name in stmt.inputs
+                )
+                if feeds_only_scalars and no_dataset_ops:
+                    global_input_scalars.append(name)
+                else:
+                    global_input_dataset_or_scalar.append(name)
+
         return DatasetSchedule(
             insertion=dict(insertion),
             deletion=dict(deletion),
             global_inputs=global_inputs,
+            global_input_datasets=global_input_datasets,
+            global_input_scalars=global_input_scalars,
+            global_input_dataset_or_scalar=global_input_dataset_or_scalar,
+            global_input_component_or_scalar=global_input_component_or_scalar,
             persistent=persistent_datasets,
+            all_outputs=sorted(all_outputs),
         )
 
     @classmethod
@@ -212,6 +300,7 @@ def statement_structure(self) -> StatementDeps:
             outputs=list(self.current_deps.outputs),
             persistent=list(self.current_deps.persistent),
             unknown_variables=list(self.current_deps.unknown_variables),
+            has_dataset_op=self._current_has_dataset_op,
         )
         self.unknown_variables.update(self.current_deps.unknown_variables)
         return result
@@ -246,12 +335,14 @@ def visit_Start(self, node: Start) -> None:
                 self.number_of_statements += 1
                 self.alias = set()
                 self.current_deps = StatementDeps()
+                self._current_has_dataset_op = False
 
         aux = copy.copy(self.unknown_variables)
         for variable in aux:
             for _number_of_statement, dependency in self.dependencies.items():
                 if variable in dependency.outputs:
                     self.unknown_variables.discard(variable)
+                    self._resolved_from_unknown.add(variable)
                     for _ns2, dep2 in self.dependencies.items():
                         if variable in dep2.unknown_variables:
                             dep2.unknown_variables.remove(variable)
@@ -272,6 +363,7 @@ def visit_PersistentAssignment(self, node: PersistentAssignment) -> None:
         self.visit(node.right)
 
     def visit_RegularAggregation(self, node: RegularAggregation) -> None:
+        self._current_has_dataset_op = True
         self.visit(node.dataset)
         if node.op in [KEEP, DROP, RENAME]:
             return
@@ -313,6 +405,7 @@ def visit_Identifier(self, node: Identifier) -> None:
             and node.value not in self.alias
             and node.value not in self.current_deps.inputs
         ):
+            self._current_has_dataset_op = True
             self.current_deps.inputs.append(node.value)
 
     def visit_ParamOp(self, node: ParamOp) -> None:
@@ -327,14 +420,17 @@ def visit_ParamOp(self, node: ParamOp) -> None:
             super(DAGAnalyzer, self).visit_ParamOp(node)
 
     def visit_Aggregation(self, node: Aggregation) -> None:
+        self._current_has_dataset_op = True
         if node.operand is not None:
             self.visit(node.operand)
 
     def visit_Analytic(self, node: Analytic) -> None:
+        self._current_has_dataset_op = True
         if node.operand is not None:
             self.visit(node.operand)
 
     def visit_JoinOp(self, node: JoinOp) -> None:
+        self._current_has_dataset_op = True
         for clause in node.clauses:
             self.visit(clause)
 
@@ -350,10 +446,12 @@ def visit_UDOCall(self, node: UDOCall) -> None:
 
     def visit_HROperation(self, node: HROperation) -> None:
         """Visit HROperation node for dependency analysis."""
+        self._current_has_dataset_op = True
         self.visit(node.dataset)
 
     def visit_DPValidation(self, node: DPValidation) -> None:
         """Visit DPValidation node for dependency analysis."""
+        self._current_has_dataset_op = True
         self.visit(node.dataset)
 
 
diff --git a/tests/DAG/data/references/1.json b/tests/DAG/data/references/1.json
index 2621e2997..42a811fce 100644
--- a/tests/DAG/data/references/1.json
+++ b/tests/DAG/data/references/1.json
@@ -8,6 +8,10 @@
         ]
     },
     "global_inputs": [],
+    "global_input_datasets": [],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [],
     "persistent": [
         "c"
     ]
diff --git a/tests/DAG/data/references/10.json b/tests/DAG/data/references/10.json
index 34df46b41..74b3870eb 100644
--- a/tests/DAG/data/references/10.json
+++ b/tests/DAG/data/references/10.json
@@ -65,7 +65,42 @@
         ]
     },
     "global_inputs": [
+        "ACCOUNTING_ENTRY",
+        "ADJUSTMENT",
+        "BOP",
+        "COMP_METHOD",
+        "COUNTERPART_SECTOR",
+        "CURRENCY_DENOM",
+        "FLOW_STOCK_ENTRY",
+        "FREQ",
+        "FUNCTIONAL_CAT",
+        "INSTR_ASSET",
+        "INT_ACC_ITEM",
+        "MATURITY",
+        "REF_SECTOR",
+        "VALUATION",
+        "imbalance"
+    ],
+    "global_input_datasets": [
         "BOP"
     ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [
+        "ACCOUNTING_ENTRY",
+        "ADJUSTMENT",
+        "COMP_METHOD",
+        "COUNTERPART_SECTOR",
+        "CURRENCY_DENOM",
+        "FLOW_STOCK_ENTRY",
+        "FREQ",
+        "FUNCTIONAL_CAT",
+        "INSTR_ASSET",
+        "INT_ACC_ITEM",
+        "MATURITY",
+        "REF_SECTOR",
+        "VALUATION",
+        "imbalance"
+    ],
     "persistent": []
 }
\ No newline at end of file
diff --git a/tests/DAG/data/references/11.json b/tests/DAG/data/references/11.json
new file mode 100644
index 000000000..53e6ccfef
--- /dev/null
+++ b/tests/DAG/data/references/11.json
@@ -0,0 +1,20 @@
+{
+    "insertion": {},
+    "deletion": {
+        "2": [
+            "a"
+        ],
+        "3": [
+            "b",
+            "c"
+        ]
+    },
+    "global_inputs": [],
+    "global_input_datasets": [],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [],
+    "persistent": [
+        "c"
+    ]
+}
\ No newline at end of file
diff --git a/tests/DAG/data/references/12.json b/tests/DAG/data/references/12.json
new file mode 100644
index 000000000..53d80fe5f
--- /dev/null
+++ b/tests/DAG/data/references/12.json
@@ -0,0 +1,30 @@
+{
+    "insertion": {
+        "1": [
+            "DS_1"
+        ]
+    },
+    "deletion": {
+        "1": [
+            "DS_1",
+            "DS_r"
+        ]
+    },
+    "global_inputs": [
+        "DS_1",
+        "Me_1",
+        "SC_1"
+    ],
+    "global_input_datasets": [
+        "DS_1"
+    ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [
+        "Me_1",
+        "SC_1"
+    ],
+    "persistent": [
+        "DS_r"
+    ]
+}
\ No newline at end of file
diff --git a/tests/DAG/data/references/13.json b/tests/DAG/data/references/13.json
new file mode 100644
index 000000000..12ee853e3
--- /dev/null
+++ b/tests/DAG/data/references/13.json
@@ -0,0 +1,42 @@
+{
+    "insertion": {
+        "1": [
+            "SC_1",
+            "SC_2"
+        ],
+        "2": [
+            "DS_1"
+        ]
+    },
+    "deletion": {
+        "2": [
+            "DS_1",
+            "DS_r",
+            "SC_r"
+        ],
+        "1": [
+            "SC_1",
+            "SC_2"
+        ]
+    },
+    "global_inputs": [
+        "DS_1",
+        "Me_1",
+        "SC_1",
+        "SC_2"
+    ],
+    "global_input_datasets": [
+        "DS_1"
+    ],
+    "global_input_scalars": [
+        "SC_1",
+        "SC_2"
+    ],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [
+        "Me_1"
+    ],
+    "persistent": [
+        "DS_r"
+    ]
+}
\ No newline at end of file
diff --git a/tests/DAG/data/references/14.json b/tests/DAG/data/references/14.json
new file mode 100644
index 000000000..34f8edeee
--- /dev/null
+++ b/tests/DAG/data/references/14.json
@@ -0,0 +1,29 @@
+{
+    "insertion": {
+        "1": [
+            "DS_1",
+            "DS_2"
+        ]
+    },
+    "deletion": {
+        "1": [
+            "DS_1",
+            "DS_2",
+            "DS_r"
+        ]
+    },
+    "global_inputs": [
+        "DS_1",
+        "DS_2"
+    ],
+    "global_input_datasets": [],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [
+        "DS_1",
+        "DS_2"
+    ],
+    "global_input_component_or_scalar": [],
+    "persistent": [
+        "DS_r"
+    ]
+}
\ No newline at end of file
diff --git a/tests/DAG/data/references/15.json b/tests/DAG/data/references/15.json
new file mode 100644
index 000000000..f7cb442d6
--- /dev/null
+++ b/tests/DAG/data/references/15.json
@@ -0,0 +1,29 @@
+{
+    "insertion": {
+        "2": [
+            "DS_1"
+        ]
+    },
+    "deletion": {
+        "2": [
+            "DS_1",
+            "DS_r",
+            "SC_r"
+        ]
+    },
+    "global_inputs": [
+        "DS_1",
+        "Me_1"
+    ],
+    "global_input_datasets": [
+        "DS_1"
+    ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [
+        "Me_1"
+    ],
+    "persistent": [
+        "DS_r"
+    ]
+}
\ No newline at end of file
diff --git a/tests/DAG/data/references/16.json b/tests/DAG/data/references/16.json
new file mode 100644
index 000000000..c94e32353
--- /dev/null
+++ b/tests/DAG/data/references/16.json
@@ -0,0 +1,32 @@
+{
+    "insertion": {
+        "1": [
+            "DS_1",
+            "SC_1"
+        ]
+    },
+    "deletion": {
+        "1": [
+            "DS_1",
+            "DS_r"
+        ],
+        "2": [
+            "SC_1",
+            "SC_r"
+        ]
+    },
+    "global_inputs": [
+        "DS_1",
+        "SC_1"
+    ],
+    "global_input_datasets": [],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [
+        "DS_1",
+        "SC_1"
+    ],
+    "global_input_component_or_scalar": [],
+    "persistent": [
+        "DS_r"
+    ]
+}
\ No newline at end of file
diff --git a/tests/DAG/data/references/2.json b/tests/DAG/data/references/2.json
index 44dfb5825..0e8dae7c7 100644
--- a/tests/DAG/data/references/2.json
+++ b/tests/DAG/data/references/2.json
@@ -25,6 +25,12 @@
     "global_inputs": [
         "A"
     ],
+    "global_input_datasets": [],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [
+        "A"
+    ],
+    "global_input_component_or_scalar": [],
     "persistent": [
         "a",
         "b",
diff --git a/tests/DAG/data/references/3.json b/tests/DAG/data/references/3.json
index 0e1cfdbd7..63e131dec 100644
--- a/tests/DAG/data/references/3.json
+++ b/tests/DAG/data/references/3.json
@@ -30,9 +30,27 @@
         ]
     },
     "global_inputs": [
+        "A",
+        "A2",
+        "var1",
+        "var3",
+        "varF",
+        "varRel",
+        "varRel2"
+    ],
+    "global_input_datasets": [
         "A",
         "A2"
     ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [
+        "var1",
+        "var3",
+        "varF",
+        "varRel",
+        "varRel2"
+    ],
     "persistent": [
         "F"
     ]
diff --git a/tests/DAG/data/references/4.json b/tests/DAG/data/references/4.json
index dadf09596..8c1ba0627 100644
--- a/tests/DAG/data/references/4.json
+++ b/tests/DAG/data/references/4.json
@@ -16,5 +16,12 @@
         "DS_1",
         "DS_2"
     ],
+    "global_input_datasets": [],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [
+        "DS_1",
+        "DS_2"
+    ],
+    "global_input_component_or_scalar": [],
     "persistent": []
 }
\ No newline at end of file
diff --git a/tests/DAG/data/references/5.json b/tests/DAG/data/references/5.json
index 47e0d83cb..0fad1b8ad 100644
--- a/tests/DAG/data/references/5.json
+++ b/tests/DAG/data/references/5.json
@@ -30,9 +30,27 @@
         ]
     },
     "global_inputs": [
+        "AGE",
+        "DSD_AGR",
+        "DSD_POP",
+        "MEASURE",
+        "SEX",
+        "TIME_HORIZ",
+        "UNIT_MEASURE"
+    ],
+    "global_input_datasets": [
         "DSD_AGR",
         "DSD_POP"
     ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [
+        "AGE",
+        "MEASURE",
+        "SEX",
+        "TIME_HORIZ",
+        "UNIT_MEASURE"
+    ],
     "persistent": [
         "DS_check_agr",
         "DS_check_countries",
diff --git a/tests/DAG/data/references/6.json b/tests/DAG/data/references/6.json
index cc0c0e644..aac54e973 100644
--- a/tests/DAG/data/references/6.json
+++ b/tests/DAG/data/references/6.json
@@ -55,11 +55,35 @@
         ]
     },
     "global_inputs": [
+        "BIS_LOC_STATS",
+        "CURRENCY",
+        "CURRENCY_DENOM",
+        "DS1",
+        "DS2",
+        "DS3",
+        "EXCHANGE_RATE",
+        "EXR_SUFFIX",
+        "EXR_TYPE",
+        "FREQ",
+        "OBS_VALUE"
+    ],
+    "global_input_datasets": [
         "BIS_LOC_STATS",
         "DS1",
         "DS2",
         "DS3"
     ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [
+        "CURRENCY",
+        "CURRENCY_DENOM",
+        "EXCHANGE_RATE",
+        "EXR_SUFFIX",
+        "EXR_TYPE",
+        "FREQ",
+        "OBS_VALUE"
+    ],
     "persistent": [
         "numCouYear",
         "numYearCou",
diff --git a/tests/DAG/data/references/7.json b/tests/DAG/data/references/7.json
index e1456f9c5..e6ccc00ae 100644
--- a/tests/DAG/data/references/7.json
+++ b/tests/DAG/data/references/7.json
@@ -507,8 +507,10 @@
         ]
     },
     "global_inputs": [
+        "ACCMLTD_WRTFFS",
         "ANCRDT_ACCNTNG_C",
         "ANCRDT_ACCNTNG_C_Z",
+        "ANCRDT_DRGTN_QRTR_CR_OA",
         "ANCRDT_ENTTY",
         "ANCRDT_ENTTY_DFLT_C",
         "ANCRDT_ENTTY_DFLT_C_T1",
@@ -521,8 +523,116 @@
         "ANCRDT_INSTRMNT_PRTCTN_RCVD_C",
         "ANCRDT_JNT_LBLTS_C",
         "ANCRDT_PRTCTN_RCVD_C",
+        "ANCRDT_PRTCTN_RCVD_C_T1",
+        "CC0010",
+        "CNTRY",
+        "CRDTR",
+        "CRDTR_CD",
+        "DBTR_CD",
+        "DFLT_STTS",
+        "DT_BRTH",
+        "DT_INCPTN",
+        "DT_RFRNC",
+        "ENTTY_RIAD_CD",
+        "ENTTY_RL",
+        "FRGN_BRNCH",
+        "HD_OFFC_UNDRT_CD",
+        "HD_OFFC_UNDRT_CNTRY",
+        "HD_QRTR_CD_CRDTR",
+        "HD_QRTR_CD_DBTR",
+        "IMMDT_PRNT_UNDRT_CD",
+        "INSTTTNL_SCTR",
+        "INSTTTNL_SCTR_DTL",
+        "IS_PRTCTN_PRVDR",
+        "LGL_FRM",
+        "OBSRVD_AGNT_CD",
+        "OFF_BLNC_SHT_AMNT",
+        "OTHR_TYP_ENTTY",
+        "OTSTNDNG_NMNL_AMNT",
+        "PRTCTN_ALLCTD_VL",
+        "PRTCTN_PRVDR_CD",
+        "RCGNTN_STTS",
+        "RCRS",
+        "SPFUND",
+        "SRVCR",
+        "SSMSIGNIFICANCE",
+        "THRD_PRTY_PRRTY_CLMS",
+        "TRD_RCVBL_NN_RCRS",
+        "TTL_NMBR_DBTRS",
+        "TTL_NMBR_DFLT_DBTRS",
+        "TYP_INSTRMNT",
+        "TYP_PRTCTN",
+        "TYP_SCRTSTN",
+        "ULTMT_PRNT_UNDRT_CD",
+        "bool_var",
+        "int_var"
+    ],
+    "global_input_datasets": [
+        "ANCRDT_ACCNTNG_C",
+        "ANCRDT_ACCNTNG_C_Z",
+        "ANCRDT_ENTTY",
+        "ANCRDT_ENTTY_DFLT_C",
+        "ANCRDT_ENTTY_DFLT_C_T1",
+        "ANCRDT_ENTTY_INSTRMNT_C",
+        "ANCRDT_ENTTY_RSK_C",
+        "ANCRDT_FNNCL_C",
+        "ANCRDT_INSTRMNT_C",
+        "ANCRDT_INSTRMNT_PRTCTN_RCVD_C",
+        "ANCRDT_JNT_LBLTS_C",
+        "ANCRDT_PRTCTN_RCVD_C",
         "ANCRDT_PRTCTN_RCVD_C_T1"
     ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [
+        "ANCRDT_FNNCL_C_T1",
+        "ANCRDT_INSTRMNT_C_T1"
+    ],
+    "global_input_component_or_scalar": [
+        "ACCMLTD_WRTFFS",
+        "ANCRDT_DRGTN_QRTR_CR_OA",
+        "CC0010",
+        "CNTRY",
+        "CRDTR",
+        "CRDTR_CD",
+        "DBTR_CD",
+        "DFLT_STTS",
+        "DT_BRTH",
+        "DT_INCPTN",
+        "DT_RFRNC",
+        "ENTTY_RIAD_CD",
+        "ENTTY_RL",
+        "FRGN_BRNCH",
+        "HD_OFFC_UNDRT_CD",
+        "HD_OFFC_UNDRT_CNTRY",
+        "HD_QRTR_CD_CRDTR",
+        "HD_QRTR_CD_DBTR",
+        "IMMDT_PRNT_UNDRT_CD",
+        "INSTTTNL_SCTR",
+        "INSTTTNL_SCTR_DTL",
+        "IS_PRTCTN_PRVDR",
+        "LGL_FRM",
+        "OBSRVD_AGNT_CD",
+        "OFF_BLNC_SHT_AMNT",
+        "OTHR_TYP_ENTTY",
+        "OTSTNDNG_NMNL_AMNT",
+        "PRTCTN_ALLCTD_VL",
+        "PRTCTN_PRVDR_CD",
+        "RCGNTN_STTS",
+        "RCRS",
+        "SPFUND",
+        "SRVCR",
+        "SSMSIGNIFICANCE",
+        "THRD_PRTY_PRRTY_CLMS",
+        "TRD_RCVBL_NN_RCRS",
+        "TTL_NMBR_DBTRS",
+        "TTL_NMBR_DFLT_DBTRS",
+        "TYP_INSTRMNT",
+        "TYP_PRTCTN",
+        "TYP_SCRTSTN",
+        "ULTMT_PRNT_UNDRT_CD",
+        "bool_var",
+        "int_var"
+    ],
     "persistent": [
         "ACCNTNG_CMPLTNSS",
         "CN0230",
diff --git a/tests/DAG/data/references/8.json b/tests/DAG/data/references/8.json
index 111d84582..dacbdc15f 100644
--- a/tests/DAG/data/references/8.json
+++ b/tests/DAG/data/references/8.json
@@ -99,6 +99,23 @@
         ]
     },
     "global_inputs": [
+        "ANCRDT_ACCNTNG_C",
+        "ANCRDT_ACCNTNG_C_T3",
+        "ANCRDT_ENTTY",
+        "ANCRDT_ENTTY_DFLT_C",
+        "ANCRDT_ENTTY_INSTRMNT_C",
+        "ANCRDT_FNNCL_C",
+        "ANCRDT_INSTRMNT_C",
+        "ANCRDT_INSTRMNT_C_T1",
+        "ANCRDT_INSTRMNT_C_T2",
+        "ANCRDT_INSTRMNT_C_T3",
+        "ENTTY_RIAD_CD",
+        "ENTTY_RL",
+        "HD_OFFC_UNDRT_CD",
+        "LGL_ENTTY_CD",
+        "OBSRVD_AGNT_CD"
+    ],
+    "global_input_datasets": [
         "ANCRDT_ACCNTNG_C",
         "ANCRDT_ACCNTNG_C_T3",
         "ANCRDT_ENTTY",
@@ -110,6 +127,15 @@
         "ANCRDT_INSTRMNT_C_T2",
         "ANCRDT_INSTRMNT_C_T3"
     ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [
+        "ENTTY_RIAD_CD",
+        "ENTTY_RL",
+        "HD_OFFC_UNDRT_CD",
+        "LGL_ENTTY_CD",
+        "OBSRVD_AGNT_CD"
+    ],
     "persistent": [
         "DP_RLST_ACCNTNG_FRMWRK_RSLT",
         "DP_RLST_ACCNTNG_INTRCMPNY",
diff --git a/tests/DAG/data/references/9.json b/tests/DAG/data/references/9.json
index 6381e3a2b..c90da87a3 100644
--- a/tests/DAG/data/references/9.json
+++ b/tests/DAG/data/references/9.json
@@ -132,9 +132,55 @@
         "Income_PT",
         "Inflation_PT",
         "Inflation_divisors_Q",
+        "Oferta_PT_2025_Q1",
+        "Vendas_PT_2025_Q1",
+        "coefficient",
+        "coefficient_cq",
+        "coefficient_inv",
+        "coefficient_l",
+        "coefficient_lc",
+        "coefficient_lcq",
+        "coefficient_lq",
+        "coefficient_q",
+        "county",
+        "divisor",
+        "estado",
+        "income",
+        "period_label",
+        "regiao",
+        "value",
+        "var",
+        "year_str"
+    ],
+    "global_input_datasets": [
+        "Income_PT",
+        "Inflation_PT",
+        "Inflation_divisors_Q"
+    ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [
         "Oferta_PT_2025_Q1",
         "Vendas_PT_2025_Q1"
     ],
+    "global_input_component_or_scalar": [
+        "coefficient",
+        "coefficient_cq",
+        "coefficient_inv",
+        "coefficient_l",
+        "coefficient_lc",
+        "coefficient_lcq",
+        "coefficient_lq",
+        "coefficient_q",
+        "county",
+        "divisor",
+        "estado",
+        "income",
+        "period_label",
+        "regiao",
+        "value",
+        "var",
+        "year_str"
+    ],
     "persistent": [
         "output_generic",
         "output_generic_eda",
diff --git a/tests/DAG/data/vtl/11.vtl b/tests/DAG/data/vtl/11.vtl
new file mode 100644
index 000000000..d666ead62
--- /dev/null
+++ b/tests/DAG/data/vtl/11.vtl
@@ -0,0 +1,3 @@
+a := 1;
+b := a + 2;
+c <- b * 3;
diff --git a/tests/DAG/data/vtl/12.vtl b/tests/DAG/data/vtl/12.vtl
new file mode 100644
index 000000000..f9ee4c4bd
--- /dev/null
+++ b/tests/DAG/data/vtl/12.vtl
@@ -0,0 +1 @@
+DS_r <- DS_1[calc Me_2 := Me_1 * SC_1];
diff --git a/tests/DAG/data/vtl/13.vtl b/tests/DAG/data/vtl/13.vtl
new file mode 100644
index 000000000..298741c6d
--- /dev/null
+++ b/tests/DAG/data/vtl/13.vtl
@@ -0,0 +1,2 @@
+SC_r := SC_1 + SC_2;
+DS_r <- DS_1[calc Me_2 := Me_1 + SC_r];
diff --git a/tests/DAG/data/vtl/14.vtl b/tests/DAG/data/vtl/14.vtl
new file mode 100644
index 000000000..e19c38657
--- /dev/null
+++ b/tests/DAG/data/vtl/14.vtl
@@ -0,0 +1 @@
+DS_r <- DS_1 + DS_2;
diff --git a/tests/DAG/data/vtl/15.vtl b/tests/DAG/data/vtl/15.vtl
new file mode 100644
index 000000000..b17fb76c0
--- /dev/null
+++ b/tests/DAG/data/vtl/15.vtl
@@ -0,0 +1,2 @@
+SC_r := 10;
+DS_r <- DS_1[calc Me_2 := Me_1 + SC_r];
diff --git a/tests/DAG/data/vtl/16.vtl b/tests/DAG/data/vtl/16.vtl
new file mode 100644
index 000000000..97392464a
--- /dev/null
+++ b/tests/DAG/data/vtl/16.vtl
@@ -0,0 +1,2 @@
+DS_r <- DS_1 + SC_1;
+SC_r := SC_1 * 2;
diff --git a/tests/DAG/test_dag.py b/tests/DAG/test_dag.py
index f7be96209..7edbeb54c 100644
--- a/tests/DAG/test_dag.py
+++ b/tests/DAG/test_dag.py
@@ -22,6 +22,14 @@ def _normalize_ds_structure(ds_structure):
                 "insertion": {k: sorted(v) for k, v in ds_structure.insertion.items()},
                 "deletion": {k: sorted(v) for k, v in ds_structure.deletion.items()},
                 "global_inputs": sorted(ds_structure.global_inputs),
+                "global_input_datasets": sorted(ds_structure.global_input_datasets),
+                "global_input_scalars": sorted(ds_structure.global_input_scalars),
+                "global_input_dataset_or_scalar": sorted(
+                    ds_structure.global_input_dataset_or_scalar
+                ),
+                "global_input_component_or_scalar": sorted(
+                    ds_structure.global_input_component_or_scalar
+                ),
                 "persistent": sorted(ds_structure.persistent),
             }
         )

From 23a2fe670ad1d6d1a8bfa9c74e9bd2ffea5f4579 Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 16:13:06 +0100
Subject: [PATCH 03/11] feat: update consumers to use categorized global input
 fields

---
 src/vtlengine/API/__init__.py         | 3 ++-
 src/vtlengine/Interpreter/__init__.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/vtlengine/API/__init__.py b/src/vtlengine/API/__init__.py
index 33dce44f4..3d1a4679d 100644
--- a/src/vtlengine/API/__init__.py
+++ b/src/vtlengine/API/__init__.py
@@ -88,7 +88,8 @@ def _extract_input_datasets(script: Union[str, TransformationScheme, Path]) -> L
         raise TypeError("Unsupported script type.")
 
     ast = create_ast(vtl_script)
-    dag_inputs = DAGAnalyzer.ds_structure(ast).global_inputs
+    ds = DAGAnalyzer.ds_structure(ast)
+    dag_inputs = ds.global_input_datasets + ds.global_input_dataset_or_scalar
 
     return dag_inputs
 
diff --git a/src/vtlengine/Interpreter/__init__.py b/src/vtlengine/Interpreter/__init__.py
index eda8789bc..8cdd19888 100644
--- a/src/vtlengine/Interpreter/__init__.py
+++ b/src/vtlengine/Interpreter/__init__.py
@@ -195,7 +195,7 @@ def _save_datapoints_efficient(self, statement_num: int) -> None:
                 or self.datasets[ds_name].data is None
             ):
                 continue
-            if ds_name in self.ds_analysis.global_inputs:
+            if ds_name in self.ds_analysis.global_input_datasets:
                 # We do not save global input datasets, only results of transformations
                 self.datasets[ds_name].data = None
                 continue

From 5d9405f3c63e48d424e060d51babea785c2d661f Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 16:16:38 +0100
Subject: [PATCH 04/11] refactor: extract helper methods to reduce complexity
 in DAG analysis

---
 src/vtlengine/AST/DAG/__init__.py | 143 +++++++++++++++++-------------
 1 file changed, 83 insertions(+), 60 deletions(-)

diff --git a/src/vtlengine/AST/DAG/__init__.py b/src/vtlengine/AST/DAG/__init__.py
index f8f7abda7..7f3cd7808 100644
--- a/src/vtlengine/AST/DAG/__init__.py
+++ b/src/vtlengine/AST/DAG/__init__.py
@@ -108,27 +108,86 @@ def _ds_usage_analysis(self) -> DatasetSchedule:
                     deletion[last_consumer.get(element, key)].append(element)
                     insertion[key].append(element)
 
-        # --- Scalar output propagation ---
+        classification = self._classify_global_inputs(
+            all_outputs, global_inputs, global_set
+        )
+
+        return DatasetSchedule(
+            insertion=dict(insertion),
+            deletion=dict(deletion),
+            global_inputs=classification["global_inputs"],
+            global_input_datasets=classification["global_input_datasets"],
+            global_input_scalars=classification["global_input_scalars"],
+            global_input_dataset_or_scalar=classification["global_input_dataset_or_scalar"],
+            global_input_component_or_scalar=classification[
+                "global_input_component_or_scalar"
+            ],
+            persistent=persistent_datasets,
+            all_outputs=sorted(all_outputs),
+        )
+
+    def _classify_global_inputs(
+        self,
+        all_outputs: Set[str],
+        global_inputs: List[str],
+        global_set: Set[str],
+    ) -> Dict[str, List[str]]:
+        """Classify global inputs into datasets, scalars, and ambiguous categories."""
+        scalar_outputs, comp_or_scalar = self._compute_scalar_outputs(all_outputs)
+
+        # Identify definite dataset inputs
+        definite_dataset_inputs: Set[str] = set()
+        for statement in self.dependencies.values():
+            if statement.has_dataset_op:
+                for inp in statement.inputs:
+                    definite_dataset_inputs.add(inp)
+
+        # Include component_or_scalar candidates in global_inputs
+        for name in comp_or_scalar:
+            if name not in global_set:
+                global_set.add(name)
+                global_inputs.append(name)
+
+        # Classify into four categories
+        result: Dict[str, List[str]] = {
+            "global_inputs": global_inputs,
+            "global_input_datasets": [],
+            "global_input_scalars": [],
+            "global_input_dataset_or_scalar": [],
+            "global_input_component_or_scalar": [],
+        }
+
+        for name in global_inputs:
+            if name in comp_or_scalar:
+                result["global_input_component_or_scalar"].append(name)
+            elif name in definite_dataset_inputs:
+                result["global_input_datasets"].append(name)
+            elif self._feeds_only_scalar_chains(name, scalar_outputs):
+                result["global_input_scalars"].append(name)
+            else:
+                result["global_input_dataset_or_scalar"].append(name)
 
-        # Seed: outputs of statements with no dataset ops and no inputs (constant assignments)
-        # Also seed outputs resolved from unknown_variables (used in RegularAggregation context)
+        return result
+
+    def _compute_scalar_outputs(
+        self, all_outputs: Set[str]
+    ) -> tuple[Set[str], Set[str]]:
+        """Compute scalar outputs via propagation and component/scalar candidates."""
         scalar_outputs: Set[str] = set()
         for statement in self.dependencies.values():
             reference = statement.outputs + statement.persistent
             if not reference:
                 continue
             ds_name = reference[0]
-            if not statement.has_dataset_op and not statement.inputs:
-                scalar_outputs.add(ds_name)
-            elif ds_name in self._resolved_from_unknown:
+            is_constant_assignment = not statement.has_dataset_op and not statement.inputs
+            if is_constant_assignment or ds_name in self._resolved_from_unknown:
                 scalar_outputs.add(ds_name)
 
-        # Collect component_or_scalar candidates from unknown_variables
-        component_or_scalar_candidates: Set[str] = set()
+        comp_or_scalar: Set[str] = set()
         for statement in self.dependencies.values():
             for uv in statement.unknown_variables:
                 if uv not in all_outputs:
-                    component_or_scalar_candidates.add(uv)
+                    comp_or_scalar.add(uv)
 
         # Propagate: statements with no dataset ops where all inputs are known scalars
         changed = True
@@ -142,63 +201,27 @@ def _ds_usage_analysis(self) -> DatasetSchedule:
                 if ds_name in scalar_outputs or statement.has_dataset_op:
                     continue
                 if statement.inputs and all(
-                    inp in scalar_outputs or inp in component_or_scalar_candidates
+                    inp in scalar_outputs or inp in comp_or_scalar
                     for inp in statement.inputs
                 ):
                     scalar_outputs.add(ds_name)
                     changed = True
 
-        # --- Identify definite dataset inputs ---
-        definite_dataset_inputs: Set[str] = set()
-        for statement in self.dependencies.values():
-            if statement.has_dataset_op:
-                for inp in statement.inputs:
-                    definite_dataset_inputs.add(inp)
-
-        # Include component_or_scalar candidates in global_inputs
-        for name in component_or_scalar_candidates:
-            if name not in global_set:
-                global_set.add(name)
-                global_inputs.append(name)
+        return scalar_outputs, comp_or_scalar
 
-        # --- Classify global inputs into four categories ---
-        global_input_datasets: List[str] = []
-        global_input_scalars: List[str] = []
-        global_input_dataset_or_scalar: List[str] = []
-        global_input_component_or_scalar: List[str] = []
-
-        for name in global_inputs:
-            if name in component_or_scalar_candidates:
-                global_input_component_or_scalar.append(name)
-            elif name in definite_dataset_inputs:
-                global_input_datasets.append(name)
-            else:
-                feeds_only_scalars = all(
-                    (stmt.outputs + stmt.persistent)[0] in scalar_outputs
-                    for stmt in self.dependencies.values()
-                    if name in stmt.inputs and (stmt.outputs + stmt.persistent)
-                )
-                no_dataset_ops = not any(
-                    stmt.has_dataset_op
-                    for stmt in self.dependencies.values()
-                    if name in stmt.inputs
-                )
-                if feeds_only_scalars and no_dataset_ops:
-                    global_input_scalars.append(name)
-                else:
-                    global_input_dataset_or_scalar.append(name)
-
-        return DatasetSchedule(
-            insertion=dict(insertion),
-            deletion=dict(deletion),
-            global_inputs=global_inputs,
-            global_input_datasets=global_input_datasets,
-            global_input_scalars=global_input_scalars,
-            global_input_dataset_or_scalar=global_input_dataset_or_scalar,
-            global_input_component_or_scalar=global_input_component_or_scalar,
-            persistent=persistent_datasets,
-            all_outputs=sorted(all_outputs),
-        )
+    def _feeds_only_scalar_chains(self, name: str, scalar_outputs: Set[str]) -> bool:
+        """Check if a global input feeds only into scalar-output statements."""
+        has_consumers = False
+        for stmt in self.dependencies.values():
+            if name not in stmt.inputs:
+                continue
+            has_consumers = True
+            if stmt.has_dataset_op:
+                return False
+            reference = stmt.outputs + stmt.persistent
+            if reference and reference[0] not in scalar_outputs:
+                return False
+        return has_consumers
 
     @classmethod
     def create_dag(cls, ast: Start) -> "DAGAnalyzer":

From 5aca4adf18bdeaf51d02ffa8b8336458ee330e53 Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 16:33:48 +0100
Subject: [PATCH 05/11] feat: fix dataset classification for UDO calls and
 MEMBERSHIP operator

Set _current_has_dataset_op in visit_ParamOp, visit_UDOCall, and
visit_BinOp (MEMBERSHIP) so UDO dataset parameters and membership
operands are correctly classified as global_input_datasets.
---
 src/vtlengine/AST/DAG/__init__.py | 28 +++++++++++++---------------
 src/vtlengine/AST/DAG/_models.py  |  1 +
 tests/DAG/data/references/17.json | 23 +++++++++++++++++++++++
 tests/DAG/data/references/4.json  |  8 ++++----
 tests/DAG/data/references/7.json  |  7 +++----
 tests/DAG/data/references/9.json  |  7 +++----
 tests/DAG/data/vtl/17.vtl         |  1 +
 7 files changed, 48 insertions(+), 27 deletions(-)
 create mode 100644 tests/DAG/data/references/17.json
 create mode 100644 tests/DAG/data/vtl/17.vtl

diff --git a/src/vtlengine/AST/DAG/__init__.py b/src/vtlengine/AST/DAG/__init__.py
index 7f3cd7808..9ee185272 100644
--- a/src/vtlengine/AST/DAG/__init__.py
+++ b/src/vtlengine/AST/DAG/__init__.py
@@ -40,7 +40,7 @@
 from vtlengine.AST.DAG._models import DatasetSchedule, StatementDeps
 from vtlengine.AST.Grammar.tokens import AS, DROP, KEEP, MEMBERSHIP, RENAME, TO
 from vtlengine.Exceptions import SemanticError
-from vtlengine.Model import Component
+from vtlengine.Model import Component, Dataset
 
 
 @dataclass
@@ -108,9 +108,7 @@ def _ds_usage_analysis(self) -> DatasetSchedule:
                     deletion[last_consumer.get(element, key)].append(element)
                     insertion[key].append(element)
 
-        classification = self._classify_global_inputs(
-            all_outputs, global_inputs, global_set
-        )
+        classification = self._classify_global_inputs(all_outputs, global_inputs, global_set)
 
         return DatasetSchedule(
             insertion=dict(insertion),
@@ -119,9 +117,7 @@ def _ds_usage_analysis(self) -> DatasetSchedule:
             global_input_datasets=classification["global_input_datasets"],
             global_input_scalars=classification["global_input_scalars"],
             global_input_dataset_or_scalar=classification["global_input_dataset_or_scalar"],
-            global_input_component_or_scalar=classification[
-                "global_input_component_or_scalar"
-            ],
+            global_input_component_or_scalar=classification["global_input_component_or_scalar"],
             persistent=persistent_datasets,
             all_outputs=sorted(all_outputs),
         )
@@ -169,9 +165,7 @@ def _classify_global_inputs(
 
         return result
 
-    def _compute_scalar_outputs(
-        self, all_outputs: Set[str]
-    ) -> tuple[Set[str], Set[str]]:
+    def _compute_scalar_outputs(self, all_outputs: Set[str]) -> tuple[Set[str], Set[str]]:
         """Compute scalar outputs via propagation and component/scalar candidates."""
         scalar_outputs: Set[str] = set()
         for statement in self.dependencies.values():
@@ -201,8 +195,7 @@ def _compute_scalar_outputs(
                 if ds_name in scalar_outputs or statement.has_dataset_op:
                     continue
                 if statement.inputs and all(
-                    inp in scalar_outputs or inp in comp_or_scalar
-                    for inp in statement.inputs
+                    inp in scalar_outputs or inp in comp_or_scalar for inp in statement.inputs
                 ):
                     scalar_outputs.add(ds_name)
                     changed = True
@@ -397,6 +390,7 @@ def visit_RegularAggregation(self, node: RegularAggregation) -> None:
 
     def visit_BinOp(self, node: BinOp) -> None:
         if node.op == MEMBERSHIP:
+            self._current_has_dataset_op = True
             self.is_dataset = True
             self.visit(node.left)
             self.is_dataset = False
@@ -438,6 +432,7 @@ def visit_ParamOp(self, node: ParamOp) -> None:
             for arg in node.params:
                 index_arg = node.params.index(arg)
                 if do_ast.parameters[index_arg].type_.kind == "DataSet":
+                    self._current_has_dataset_op = True
                     self.visit(arg)
         else:
             super(DAGAnalyzer, self).visit_ParamOp(node)
@@ -462,9 +457,12 @@ def visit_UDOCall(self, node: UDOCall) -> None:
         if not node_args:
             super().visit_UDOCall(node)
         else:
-            node_sig = [type(p.type_) for p in node_args.parameters]
-            for sig, param in zip(node_sig, node.params):
-                if not isinstance(param, Constant) and sig is not Component:
+            for p, param in zip(node_args.parameters, node.params):
+                if isinstance(param, Constant):
+                    continue
+                if type(p.type_) is not Component:
+                    if isinstance(p.type_, Dataset):
+                        self._current_has_dataset_op = True
                     self.visit(param)
 
     def visit_HROperation(self, node: HROperation) -> None:
diff --git a/src/vtlengine/AST/DAG/_models.py b/src/vtlengine/AST/DAG/_models.py
index 92d7d4f9f..1e41b62ad 100644
--- a/src/vtlengine/AST/DAG/_models.py
+++ b/src/vtlengine/AST/DAG/_models.py
@@ -28,3 +28,4 @@ class DatasetSchedule:
     global_input_dataset_or_scalar: List[str] = field(default_factory=list)
     global_input_component_or_scalar: List[str] = field(default_factory=list)
     persistent: List[str] = field(default_factory=list)
+    all_outputs: List[str] = field(default_factory=list)
diff --git a/tests/DAG/data/references/17.json b/tests/DAG/data/references/17.json
new file mode 100644
index 000000000..9696fa802
--- /dev/null
+++ b/tests/DAG/data/references/17.json
@@ -0,0 +1,23 @@
+{
+    "insertion": {
+        "1": [
+            "DS_1"
+        ]
+    },
+    "deletion": {
+        "1": [
+            "DS_1",
+            "DS_r"
+        ]
+    },
+    "global_inputs": [
+        "DS_1"
+    ],
+    "global_input_datasets": [
+        "DS_1"
+    ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
+    "global_input_component_or_scalar": [],
+    "persistent": []
+}
\ No newline at end of file
diff --git a/tests/DAG/data/references/4.json b/tests/DAG/data/references/4.json
index 8c1ba0627..e84302eaa 100644
--- a/tests/DAG/data/references/4.json
+++ b/tests/DAG/data/references/4.json
@@ -16,12 +16,12 @@
         "DS_1",
         "DS_2"
     ],
-    "global_input_datasets": [],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [
+    "global_input_datasets": [
         "DS_1",
         "DS_2"
     ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
     "global_input_component_or_scalar": [],
     "persistent": []
-}
\ No newline at end of file
+}
diff --git a/tests/DAG/data/references/7.json b/tests/DAG/data/references/7.json
index e6ccc00ae..757fa6dac 100644
--- a/tests/DAG/data/references/7.json
+++ b/tests/DAG/data/references/7.json
@@ -576,17 +576,16 @@
         "ANCRDT_ENTTY_INSTRMNT_C",
         "ANCRDT_ENTTY_RSK_C",
         "ANCRDT_FNNCL_C",
+        "ANCRDT_FNNCL_C_T1",
         "ANCRDT_INSTRMNT_C",
+        "ANCRDT_INSTRMNT_C_T1",
         "ANCRDT_INSTRMNT_PRTCTN_RCVD_C",
         "ANCRDT_JNT_LBLTS_C",
         "ANCRDT_PRTCTN_RCVD_C",
         "ANCRDT_PRTCTN_RCVD_C_T1"
     ],
     "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [
-        "ANCRDT_FNNCL_C_T1",
-        "ANCRDT_INSTRMNT_C_T1"
-    ],
+    "global_input_dataset_or_scalar": [],
     "global_input_component_or_scalar": [
         "ACCMLTD_WRTFFS",
         "ANCRDT_DRGTN_QRTR_CR_OA",
diff --git a/tests/DAG/data/references/9.json b/tests/DAG/data/references/9.json
index c90da87a3..e92a92fc4 100644
--- a/tests/DAG/data/references/9.json
+++ b/tests/DAG/data/references/9.json
@@ -155,13 +155,12 @@
     "global_input_datasets": [
         "Income_PT",
         "Inflation_PT",
-        "Inflation_divisors_Q"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [
+        "Inflation_divisors_Q",
         "Oferta_PT_2025_Q1",
         "Vendas_PT_2025_Q1"
     ],
+    "global_input_scalars": [],
+    "global_input_dataset_or_scalar": [],
     "global_input_component_or_scalar": [
         "coefficient",
         "coefficient_cq",
diff --git a/tests/DAG/data/vtl/17.vtl b/tests/DAG/data/vtl/17.vtl
new file mode 100644
index 000000000..2e9470e04
--- /dev/null
+++ b/tests/DAG/data/vtl/17.vtl
@@ -0,0 +1 @@
+DS_r := DS_1#Me_1;

From 978f2206f8af1328f2c0f66fd416d950c1672923 Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 16:33:57 +0100
Subject: [PATCH 06/11] chore: add types-networkx to dev dependencies

---
 poetry.lock    | 56 ++++++++++++++++++++++++++++++++------------------
 pyproject.toml |  1 +
 2 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index f695794a1..50f1c7744 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -7,7 +7,7 @@ description = "Async client for aws services using botocore and aiohttp"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "aiobotocore-2.26.0-py3-none-any.whl", hash = "sha256:a793db51c07930513b74ea7a95bd79aaa42f545bdb0f011779646eafa216abec"},
     {file = "aiobotocore-2.26.0.tar.gz", hash = "sha256:50567feaf8dfe2b653570b4491f5bc8c6e7fb9622479d66442462c021db4fadc"},
@@ -34,7 +34,7 @@ description = "Happy Eyeballs for asyncio"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
     {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
@@ -47,7 +47,7 @@ description = "Async http client/server framework (asyncio)"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "aiohttp-3.13.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d5a372fd5afd301b3a89582817fdcdb6c34124787c70dbcc616f259013e7eef7"},
     {file = "aiohttp-3.13.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:147e422fd1223005c22b4fe080f5d93ced44460f5f9c105406b753612b587821"},
@@ -191,7 +191,7 @@ description = "itertools and builtins for AsyncIO and mixed iterables"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "aioitertools-0.13.0-py3-none-any.whl", hash = "sha256:0be0292b856f08dfac90e31f4739432f4cb6d7520ab9eb73e143f4f2fa5259be"},
     {file = "aioitertools-0.13.0.tar.gz", hash = "sha256:620bd241acc0bbb9ec819f1ab215866871b4bbd1f73836a55f799200ee86950c"},
@@ -207,7 +207,7 @@ description = "aiosignal: a list of registered asynchronous callbacks"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"},
     {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"},
@@ -267,7 +267,7 @@ description = "Timeout context manager for asyncio programs"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "(extra == \"all\" or extra == \"s3\") and python_version < \"3.11\""
+markers = "(extra == \"s3\" or extra == \"all\") and python_version < \"3.11\""
 files = [
     {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
     {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@@ -307,7 +307,7 @@ description = "Low-level, data-driven core of boto 3."
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "botocore-1.41.5-py3-none-any.whl", hash = "sha256:3fef7fcda30c82c27202d232cfdbd6782cb27f20f8e7e21b20606483e66ee73a"},
     {file = "botocore-1.41.5.tar.gz", hash = "sha256:0367622b811597d183bfcaab4a350f0d3ede712031ce792ef183cabdee80d3bf"},
@@ -699,7 +699,7 @@ description = "A list-like structure which implements collections.abc.MutableSeq
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "frozenlist-1.8.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b37f6d31b3dcea7deb5e9696e529a6aa4a898adc33db82da12e4c60a7c4d2011"},
     {file = "frozenlist-1.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef2b7b394f208233e471abc541cc6991f907ffd47dc72584acee3147899d6565"},
@@ -840,7 +840,7 @@ description = "File-system specification"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version == \"3.9\" and (extra == \"all\" or extra == \"s3\")"
+markers = "python_version == \"3.9\" and (extra == \"s3\" or extra == \"all\")"
 files = [
     {file = "fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d"},
     {file = "fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59"},
@@ -881,7 +881,7 @@ description = "File-system specification"
 optional = true
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version >= \"3.10\" and (extra == \"all\" or extra == \"s3\")"
+markers = "python_version >= \"3.10\" and (extra == \"s3\" or extra == \"all\")"
 files = [
     {file = "fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b"},
     {file = "fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973"},
@@ -1118,7 +1118,7 @@ description = "JSON Matching Expressions"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
     {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
@@ -1668,7 +1668,7 @@ description = "multidict implementation"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "multidict-6.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9f474ad5acda359c8758c8accc22032c6abe6dc87a8be2440d097785e27a9349"},
     {file = "multidict-6.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4b7a9db5a870f780220e931d0002bbfd88fb53aceb6293251e2c839415c1b20e"},
@@ -2229,7 +2229,7 @@ description = "Accelerated property cache"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "propcache-0.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c2d1fa3201efaf55d730400d945b5b3ab6e672e100ba0f9a409d950ab25d7db"},
     {file = "propcache-0.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1eb2994229cc8ce7fe9b3db88f5465f5fd8651672840b2e426b88cdb1a30aac8"},
@@ -2945,7 +2945,7 @@ description = "Convenient Filesystem interface over S3"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "python_version == \"3.9\" and (extra == \"all\" or extra == \"s3\")"
+markers = "python_version == \"3.9\" and (extra == \"s3\" or extra == \"all\")"
 files = [
     {file = "s3fs-2025.10.0-py3-none-any.whl", hash = "sha256:da7ef25efc1541f5fca8e1116361e49ea1081f83f4e8001fbd77347c625da28a"},
     {file = "s3fs-2025.10.0.tar.gz", hash = "sha256:e8be6cddc77aceea1681ece0f472c3a7f8ef71a0d2acddb1cc92bb6afa3e9e4f"},
@@ -2967,7 +2967,7 @@ description = "Convenient Filesystem interface over S3"
 optional = true
 python-versions = ">=3.10"
 groups = ["main"]
-markers = "python_version >= \"3.10\" and (extra == \"all\" or extra == \"s3\")"
+markers = "python_version >= \"3.10\" and (extra == \"s3\" or extra == \"all\")"
 files = [
     {file = "s3fs-2025.12.0-py3-none-any.whl", hash = "sha256:89d51e0744256baad7ae5410304a368ca195affd93a07795bc8ba9c00c9effbb"},
     {file = "s3fs-2025.12.0.tar.gz", hash = "sha256:8612885105ce14d609c5b807553f9f9956b45541576a17ff337d9435ed3eb01f"},
@@ -3298,6 +3298,22 @@ files = [
 [package.dependencies]
 referencing = "*"
 
+[[package]]
+name = "types-networkx"
+version = "3.6.1.20260303"
+description = "Typing stubs for networkx"
+optional = false
+python-versions = ">=3.10"
+groups = ["dev"]
+markers = "python_version >= \"3.10\""
+files = [
+    {file = "types_networkx-3.6.1.20260303-py3-none-any.whl", hash = "sha256:754c7c7bcaab3c317b0b86441240c0a5bd0d2f419aba80a88e9718248a5c89af"},
+    {file = "types_networkx-3.6.1.20260303.tar.gz", hash = "sha256:8248aa6fcadc08bd7992af6e412bfc5cfa043bda5ce7ab407fa591c808ce8557"},
+]
+
+[package.dependencies]
+numpy = ">=1.20"
+
 [[package]]
 name = "types-pytz"
 version = "2025.2.0.20251108"
@@ -3346,7 +3362,7 @@ files = [
     {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"},
     {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"},
 ]
-markers = {main = "python_version == \"3.9\" and (extra == \"all\" or extra == \"s3\")", docs = "python_version == \"3.9\""}
+markers = {main = "python_version == \"3.9\" and (extra == \"s3\" or extra == \"all\")", docs = "python_version == \"3.9\""}
 
 [package.extras]
 brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""]
@@ -3364,7 +3380,7 @@ files = [
     {file = "urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd"},
     {file = "urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797"},
 ]
-markers = {main = "python_version >= \"3.10\" and (extra == \"all\" or extra == \"s3\")"}
+markers = {main = "python_version >= \"3.10\" and (extra == \"s3\" or extra == \"all\")"}
 
 [package.extras]
 brotli = ["brotli (>=1.2.0) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=1.2.0.0) ; platform_python_implementation != \"CPython\""]
@@ -3379,7 +3395,7 @@ description = "Module for decorators, wrappers and monkey patching."
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04"},
     {file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2"},
@@ -3486,7 +3502,7 @@ description = "Yet another URL library"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"all\" or extra == \"s3\""
+markers = "extra == \"s3\" or extra == \"all\""
 files = [
     {file = "yarl-1.22.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c7bd6683587567e5a49ee6e336e0612bec8329be1b7d4c8af5687dcdeb67ee1e"},
     {file = "yarl-1.22.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5cdac20da754f3a723cceea5b3448e1a2074866406adeb4ef35b469d089adb8f"},
@@ -3653,4 +3669,4 @@ s3 = ["s3fs"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9,<4.0"
-content-hash = "44f127ea06ae4ebdcd56b245a4f9886dd3cedd5ffd64a38a110e4acc8fd4cc19"
+content-hash = "ee401a8363109109d158a23c5dc04aad4198208c7287710111f40756e21e33c6"
diff --git a/pyproject.toml b/pyproject.toml
index a75fc10d7..d5c2855f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,6 +62,7 @@ pandas-stubs = ">=2.2.2,<3.0"
 pyarrow-stubs = ">=19.0,<21.0"
 ruff = ">=0.14,<1.0.0"
 types-jsonschema = ">=4.25.1,<5.0"
+types-networkx = {markers = "python_version >= \"3.10\"", version = "^3.6.1.20260303"}
 
 [tool.poetry.group.docs.dependencies]
 sphinx = ">=7.4.7,<8.0"

From a56f90f7a9f426c3d84584d481cf722e55fef267 Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 16:38:25 +0100
Subject: [PATCH 07/11] docs: add docstrings to DAG models and filter global
 inputs by all_outputs

---
 src/vtlengine/AST/DAG/__init__.py |  2 +-
 src/vtlengine/AST/DAG/_models.py  | 39 +++++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/vtlengine/AST/DAG/__init__.py b/src/vtlengine/AST/DAG/__init__.py
index 9ee185272..0905b1954 100644
--- a/src/vtlengine/AST/DAG/__init__.py
+++ b/src/vtlengine/AST/DAG/__init__.py
@@ -140,7 +140,7 @@ def _classify_global_inputs(
 
         # Include component_or_scalar candidates in global_inputs
         for name in comp_or_scalar:
-            if name not in global_set:
+            if name not in global_set and name not in all_outputs:
                 global_set.add(name)
                 global_inputs.append(name)
 
diff --git a/src/vtlengine/AST/DAG/_models.py b/src/vtlengine/AST/DAG/_models.py
index 1e41b62ad..17a31f180 100644
--- a/src/vtlengine/AST/DAG/_models.py
+++ b/src/vtlengine/AST/DAG/_models.py
@@ -4,7 +4,18 @@
 
 @dataclass
 class StatementDeps:
-    """Per-statement dependency info tracked during AST visiting."""
+    """Per-statement dependency info tracked during AST visiting.
+
+    Attributes:
+        inputs: Variables consumed by this statement (excluding its own outputs).
+        outputs: Variables produced by this statement via ``:=`` assignment.
+        persistent: Variables produced by this statement via ``<-`` assignment.
+        unknown_variables: Variables inside RegularAggregation context that could
+            be either components of the dataset or external scalars.
+        has_dataset_op: Whether this statement involves a dataset operation
+            (RegularAggregation, JoinOp, Aggregation, Analytic, MEMBERSHIP,
+            UDO with dataset params, etc.).
+    """
 
     inputs: List[str] = field(default_factory=list)
     outputs: List[str] = field(default_factory=list)
@@ -17,7 +28,31 @@ class StatementDeps:
 class DatasetSchedule:
     """Typed result of DAG dataset usage analysis.
 
-    Tracks when datasets should be loaded/unloaded for memory-efficient execution.
+    Tracks when datasets should be loaded/unloaded for memory-efficient execution,
+    and classifies global inputs into four categories based on AST context.
+
+    Attributes:
+        insertion: Statement index to list of datasets to load at that point
+            (first use).
+        deletion: Statement index to list of datasets to unload at that point
+            (last use).
+        global_inputs: All external dependencies not produced by the script.
+            Union of the four ``global_input_*`` categories below (no duplicates).
+        global_input_datasets: Definite datasets — used in dataset operations
+            (RegularAggregation operand, Identifier with kind="DatasetID",
+            UDO dataset params, MEMBERSHIP left operand, JoinOp, etc.).
+        global_input_scalars: Definite scalars — feed exclusively into scalar
+            chains propagated from constant assignments with no dataset ops.
+        global_input_dataset_or_scalar: Ambiguous at top level (e.g.,
+            ``DS_r <- X + 2`` where X could be a dataset or scalar).
+            The caller may provide either.
+        global_input_component_or_scalar: Ambiguous inside RegularAggregation
+            (e.g., ``DS_1[calc Me_2 := Me_1 + X]`` where X could be a component
+            of DS_1 or an external scalar). Semantic error 1-1-6-11 is raised
+            at runtime if it collides with a component name.
+        persistent: Outputs written with ``<-`` (persistent assignment).
+        all_outputs: All variables produced by the script (both ``:=`` and
+            ``<-`` assignments).
     """
 
     insertion: Dict[int, List[str]] = field(default_factory=dict)

From d2f67a1ec5da3ddab4ebb6803942eccaf3624668 Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 16:40:26 +0100
Subject: [PATCH 08/11] refactor: rename DatasetSchedule to Schedule

---
 src/vtlengine/AST/DAG/__init__.py     | 8 ++++----
 src/vtlengine/AST/DAG/_models.py      | 2 +-
 src/vtlengine/Interpreter/__init__.py | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/vtlengine/AST/DAG/__init__.py b/src/vtlengine/AST/DAG/__init__.py
index 0905b1954..f5a4b3d6a 100644
--- a/src/vtlengine/AST/DAG/__init__.py
+++ b/src/vtlengine/AST/DAG/__init__.py
@@ -37,7 +37,7 @@
     VarID,
 )
 from vtlengine.AST.ASTTemplate import ASTTemplate
-from vtlengine.AST.DAG._models import DatasetSchedule, StatementDeps
+from vtlengine.AST.DAG._models import Schedule, StatementDeps
 from vtlengine.AST.Grammar.tokens import AS, DROP, KEEP, MEMBERSHIP, RENAME, TO
 from vtlengine.Exceptions import SemanticError
 from vtlengine.Model import Component, Dataset
@@ -67,12 +67,12 @@ class DAGAnalyzer(ASTTemplate):
     _resolved_from_unknown: Set[str] = field(default_factory=set)
 
     @classmethod
-    def ds_structure(cls, ast: AST) -> DatasetSchedule:
+    def ds_structure(cls, ast: AST) -> Schedule:
         dag = cls()
         dag.visit(ast)
         return dag._ds_usage_analysis()
 
-    def _ds_usage_analysis(self) -> DatasetSchedule:
+    def _ds_usage_analysis(self) -> Schedule:
         """Analyze dataset dependencies to build insertion/deletion schedules."""
         deletion: Dict[int, List[str]] = defaultdict(list)
         insertion: Dict[int, List[str]] = defaultdict(list)
@@ -110,7 +110,7 @@ def _ds_usage_analysis(self) -> DatasetSchedule:
 
         classification = self._classify_global_inputs(all_outputs, global_inputs, global_set)
 
-        return DatasetSchedule(
+        return Schedule(
             insertion=dict(insertion),
             deletion=dict(deletion),
             global_inputs=classification["global_inputs"],
diff --git a/src/vtlengine/AST/DAG/_models.py b/src/vtlengine/AST/DAG/_models.py
index 17a31f180..4c196006f 100644
--- a/src/vtlengine/AST/DAG/_models.py
+++ b/src/vtlengine/AST/DAG/_models.py
@@ -25,7 +25,7 @@ class StatementDeps:
 
 
 @dataclass
-class DatasetSchedule:
+class Schedule:
     """Typed result of DAG dataset usage analysis.
 
     Tracks when datasets should be loaded/unloaded for memory-efficient execution,
diff --git a/src/vtlengine/Interpreter/__init__.py b/src/vtlengine/Interpreter/__init__.py
index 8cdd19888..7381d2a98 100644
--- a/src/vtlengine/Interpreter/__init__.py
+++ b/src/vtlengine/Interpreter/__init__.py
@@ -11,7 +11,7 @@
 import vtlengine.Operators as Operators
 from vtlengine.AST.ASTTemplate import ASTTemplate
 from vtlengine.AST.DAG import HRDAGAnalyzer
-from vtlengine.AST.DAG._models import DatasetSchedule
+from vtlengine.AST.DAG._models import Schedule
 from vtlengine.AST.Grammar.tokens import (
     AGGREGATE,
     ALL,
@@ -115,7 +115,7 @@ class InterpreterAnalyzer(ASTTemplate):
     # Analysis mode
     only_semantic: bool = False
     # Memory efficient
-    ds_analysis: Optional[DatasetSchedule] = None
+    ds_analysis: Optional[Schedule] = None
     datapoints_paths: Optional[Dict[str, Path]] = None
     output_path: Optional[Union[str, Path]] = None
     # Time Period Representation

From 1b7f94e6b59e69997e500d28fc665d0c424512aa Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 18:27:51 +0100
Subject: [PATCH 09/11] feat: classify dataset vs scalar inputs in DAG Schedule
 (#571)

Extend DAG analysis to discriminate between datasets, scalars,
dataset-or-scalar, and component-or-scalar global inputs. Each
dataset-only operator now explicitly marks its direct VarID operands
as dataset_inputs, so sub-expression operands (e.g., SC_1 in
count(DS_1 + SC_1)) are correctly classified as ambiguous rather
than forced to datasets.

Split test_dag.py into test_classification.py (154 cases) and
test_scheduling.py (13 non-trivial cases), inline VTL for simple
tests, and delete unused reference/VTL files.
---
 src/vtlengine/AST/DAG/__init__.py             | 141 +++-
 src/vtlengine/AST/DAG/_models.py              |   7 +
 tests/DAG/data/references/1.json              |  18 -
 tests/DAG/data/references/12.json             |  30 -
 tests/DAG/data/references/14.json             |  29 -
 tests/DAG/data/references/15.json             |  29 -
 tests/DAG/data/references/17.json             |  23 -
 tests/DAG/data/references/4.json              |  27 -
 .../data/references/{ => scheduling}/10.json  |  38 -
 .../data/references/{ => scheduling}/11.json  |   5 -
 .../data/references/{ => scheduling}/13.json  |  17 -
 .../data/references/{ => scheduling}/16.json  |  11 -
 .../data/references/{ => scheduling}/2.json   |   9 -
 .../data/references/{ => scheduling}/3.json   |  22 -
 tests/DAG/data/references/scheduling/35.json  |  26 +
 tests/DAG/data/references/scheduling/36.json  |  24 +
 .../data/references/{ => scheduling}/5.json   |  22 -
 .../data/references/{ => scheduling}/6.json   |  30 -
 .../data/references/{ => scheduling}/7.json   | 126 ----
 .../data/references/{ => scheduling}/8.json   |  38 -
 .../data/references/{ => scheduling}/9.json   |  52 --
 tests/DAG/data/vtl/12.vtl                     |   1 -
 tests/DAG/data/vtl/14.vtl                     |   1 -
 tests/DAG/data/vtl/15.vtl                     |   2 -
 tests/DAG/data/vtl/17.vtl                     |   1 -
 tests/DAG/data/vtl/35.vtl                     |   3 +
 tests/DAG/data/vtl/36.vtl                     |   3 +
 tests/DAG/test_classification.py              | 657 ++++++++++++++++++
 tests/DAG/test_dag.py                         |  57 --
 tests/DAG/test_scheduling.py                  |  46 ++
 30 files changed, 893 insertions(+), 602 deletions(-)
 delete mode 100644 tests/DAG/data/references/1.json
 delete mode 100644 tests/DAG/data/references/12.json
 delete mode 100644 tests/DAG/data/references/14.json
 delete mode 100644 tests/DAG/data/references/15.json
 delete mode 100644 tests/DAG/data/references/17.json
 delete mode 100644 tests/DAG/data/references/4.json
 rename tests/DAG/data/references/{ => scheduling}/10.json (63%)
 rename tests/DAG/data/references/{ => scheduling}/11.json (51%)
 rename tests/DAG/data/references/{ => scheduling}/13.json (52%)
 rename tests/DAG/data/references/{ => scheduling}/16.json (53%)
 rename tests/DAG/data/references/{ => scheduling}/2.json (66%)
 rename tests/DAG/data/references/{ => scheduling}/3.json (54%)
 create mode 100644 tests/DAG/data/references/scheduling/35.json
 create mode 100644 tests/DAG/data/references/scheduling/36.json
 rename tests/DAG/data/references/{ => scheduling}/5.json (59%)
 rename tests/DAG/data/references/{ => scheduling}/6.json (63%)
 rename tests/DAG/data/references/{ => scheduling}/7.json (79%)
 rename tests/DAG/data/references/{ => scheduling}/8.json (72%)
 rename tests/DAG/data/references/{ => scheduling}/9.json (71%)
 delete mode 100644 tests/DAG/data/vtl/12.vtl
 delete mode 100644 tests/DAG/data/vtl/14.vtl
 delete mode 100644 tests/DAG/data/vtl/15.vtl
 delete mode 100644 tests/DAG/data/vtl/17.vtl
 create mode 100644 tests/DAG/data/vtl/35.vtl
 create mode 100644 tests/DAG/data/vtl/36.vtl
 create mode 100644 tests/DAG/test_classification.py
 delete mode 100644 tests/DAG/test_dag.py
 create mode 100644 tests/DAG/test_scheduling.py

diff --git a/src/vtlengine/AST/DAG/__init__.py b/src/vtlengine/AST/DAG/__init__.py
index f5a4b3d6a..1d49a4ecf 100644
--- a/src/vtlengine/AST/DAG/__init__.py
+++ b/src/vtlengine/AST/DAG/__init__.py
@@ -28,20 +28,52 @@
     HRuleset,
     Identifier,
     JoinOp,
+    MulOp,
     Operator,
     ParamOp,
     PersistentAssignment,
     RegularAggregation,
     Start,
+    TimeAggregation,
     UDOCall,
+    UnaryOp,
+    Validation,
     VarID,
 )
 from vtlengine.AST.ASTTemplate import ASTTemplate
 from vtlengine.AST.DAG._models import Schedule, StatementDeps
-from vtlengine.AST.Grammar.tokens import AS, DROP, KEEP, MEMBERSHIP, RENAME, TO
+from vtlengine.AST.Grammar.tokens import (
+    AS,
+    DROP,
+    EXISTS_IN,
+    FILL_TIME_SERIES,
+    FLOW_TO_STOCK,
+    INTERSECT,
+    KEEP,
+    MEMBERSHIP,
+    PERIOD_INDICATOR,
+    RENAME,
+    SETDIFF,
+    STOCK_TO_FLOW,
+    SYMDIFF,
+    TIMESHIFT,
+    TO,
+    UNION,
+)
+from vtlengine.DataTypes import COMP_NAME_MAPPING
 from vtlengine.Exceptions import SemanticError
 from vtlengine.Model import Component, Dataset
 
+# Operators that only accept datasets as input (never scalars).
+# Grouped by AST node type for clarity.
+DATASET_ONLY_MULOP = frozenset({UNION, INTERSECT, SETDIFF, SYMDIFF, EXISTS_IN})
+DATASET_ONLY_UNARYOP = frozenset({FLOW_TO_STOCK, STOCK_TO_FLOW, PERIOD_INDICATOR})
+DATASET_ONLY_BINOP = frozenset({MEMBERSHIP, TIMESHIFT})
+
+# Reserved component names generated by the engine (e.g., count() → int_var,
+# check() → bool_var). These are never external inputs.
+RESERVED_COMPONENT_NAMES = frozenset(COMP_NAME_MAPPING.values())
+
 
 @dataclass
 class DAGAnalyzer(ASTTemplate):
@@ -65,6 +97,8 @@ class DAGAnalyzer(ASTTemplate):
     unknown_variables: Set[str] = field(default_factory=set)
     # Outputs that were consumed via unknown_variables (RegularAggregation context)
     _resolved_from_unknown: Set[str] = field(default_factory=set)
+    # UDO names that have at least one dataset-typed parameter
+    _udos_with_dataset_params: Set[str] = field(default_factory=set)
 
     @classmethod
     def ds_structure(cls, ast: AST) -> Schedule:
@@ -131,11 +165,15 @@ def _classify_global_inputs(
         """Classify global inputs into datasets, scalars, and ambiguous categories."""
         scalar_outputs, comp_or_scalar = self._compute_scalar_outputs(all_outputs)
 
-        # Identify definite dataset inputs
+        # Identify definite dataset and scalar inputs
         definite_dataset_inputs: Set[str] = set()
+        definite_scalar_inputs: Set[str] = set()
         for statement in self.dependencies.values():
+            # Collect explicitly typed scalar inputs (e.g., UDO scalar params)
+            for inp in statement.scalar_inputs:
+                definite_scalar_inputs.add(inp)
             if statement.has_dataset_op:
-                for inp in statement.inputs:
+                for inp in statement.dataset_inputs:
                     definite_dataset_inputs.add(inp)
 
         # Include component_or_scalar candidates in global_inputs
@@ -158,7 +196,9 @@ def _classify_global_inputs(
                 result["global_input_component_or_scalar"].append(name)
             elif name in definite_dataset_inputs:
                 result["global_input_datasets"].append(name)
-            elif self._feeds_only_scalar_chains(name, scalar_outputs):
+            elif name in definite_scalar_inputs or self._feeds_only_scalar_chains(
+                name, scalar_outputs
+            ):
                 result["global_input_scalars"].append(name)
             else:
                 result["global_input_dataset_or_scalar"].append(name)
@@ -317,6 +357,8 @@ def statement_structure(self) -> StatementDeps:
             persistent=list(self.current_deps.persistent),
             unknown_variables=list(self.current_deps.unknown_variables),
             has_dataset_op=self._current_has_dataset_op,
+            dataset_inputs=list(self.current_deps.dataset_inputs),
+            scalar_inputs=list(self.current_deps.scalar_inputs),
         )
         self.unknown_variables.update(self.current_deps.unknown_variables)
         return result
@@ -338,10 +380,16 @@ def visit_Start(self, node: Start) -> None:
                 self.visit(child)
         """
         udos = {}
+        udos_with_ds: Set[str] = set()
         for ast_element in node.children:
             if isinstance(ast_element, Operator):
                 udos[ast_element.op] = ast_element
+                for p in ast_element.parameters:
+                    if isinstance(p.type_, Dataset):
+                        udos_with_ds.add(ast_element.op)
+                        break
         self.udos = udos
+        self._udos_with_dataset_params = udos_with_ds
         for child in node.children:
             if isinstance(child, (Assignment, PersistentAssignment)):
                 self.is_first_assignment = True
@@ -380,6 +428,8 @@ def visit_PersistentAssignment(self, node: PersistentAssignment) -> None:
 
     def visit_RegularAggregation(self, node: RegularAggregation) -> None:
         self._current_has_dataset_op = True
+        if isinstance(node.dataset, VarID) and node.dataset.value not in self.alias:
+            self.current_deps.dataset_inputs.append(node.dataset.value)
         self.visit(node.dataset)
         if node.op in [KEEP, DROP, RENAME]:
             return
@@ -389,8 +439,10 @@ def visit_RegularAggregation(self, node: RegularAggregation) -> None:
             self.is_from_regular_aggregation = False
 
     def visit_BinOp(self, node: BinOp) -> None:
-        if node.op == MEMBERSHIP:
+        if node.op in DATASET_ONLY_BINOP:
             self._current_has_dataset_op = True
+            if isinstance(node.left, VarID) and node.left.value not in self.alias:
+                self.current_deps.dataset_inputs.append(node.left.value)
             self.is_dataset = True
             self.visit(node.left)
             self.is_dataset = False
@@ -402,6 +454,22 @@ def visit_BinOp(self, node: BinOp) -> None:
             self.visit(node.left)
             self.visit(node.right)
 
+    def visit_MulOp(self, node: MulOp) -> None:
+        if node.op in DATASET_ONLY_MULOP:
+            self._current_has_dataset_op = True
+            for child in node.children:
+                if isinstance(child, VarID) and child.value not in self.alias:
+                    self.current_deps.dataset_inputs.append(child.value)
+        for child in node.children:
+            self.visit(child)
+
+    def visit_UnaryOp(self, node: UnaryOp) -> None:
+        if node.op in DATASET_ONLY_UNARYOP:
+            self._current_has_dataset_op = True
+            if isinstance(node.operand, VarID) and node.operand.value not in self.alias:
+                self.current_deps.dataset_inputs.append(node.operand.value)
+        self.visit(node.operand)
+
     def visit_VarID(self, node: VarID) -> None:
         if (
             not self.is_from_regular_aggregation or self.is_dataset
@@ -412,6 +480,7 @@ def visit_VarID(self, node: VarID) -> None:
             self.is_from_regular_aggregation
             and node.value not in self.alias
             and not self.is_dataset
+            and node.value not in RESERVED_COMPONENT_NAMES
             and node.value not in self.current_deps.unknown_variables
         ):
             self.current_deps.unknown_variables.append(node.value)
@@ -424,32 +493,56 @@ def visit_Identifier(self, node: Identifier) -> None:
         ):
             self._current_has_dataset_op = True
             self.current_deps.inputs.append(node.value)
+            self.current_deps.dataset_inputs.append(node.value)
 
     def visit_ParamOp(self, node: ParamOp) -> None:
         if self.udos and node.op in self.udos:
+            if node.op in self._udos_with_dataset_params:
+                self._current_has_dataset_op = True
             do_ast: Operator = self.udos[node.op]
-
-            for arg in node.params:
-                index_arg = node.params.index(arg)
-                if do_ast.parameters[index_arg].type_.kind == "DataSet":
-                    self._current_has_dataset_op = True
-                    self.visit(arg)
+            for i, arg in enumerate(node.params):
+                if isinstance(arg, Constant):
+                    continue
+                param_type = do_ast.parameters[i].type_
+                if type(param_type) is not Component:
+                    is_ds = isinstance(param_type, Dataset)
+                    if is_ds and isinstance(arg, VarID):
+                        self.current_deps.dataset_inputs.append(arg.value)
+                    elif not is_ds and isinstance(arg, VarID):
+                        self.current_deps.scalar_inputs.append(arg.value)
+                self.visit(arg)
         else:
+            if node.op == FILL_TIME_SERIES:
+                self._current_has_dataset_op = True
+                for child in node.children:
+                    if isinstance(child, VarID) and child.value not in self.alias:
+                        self.current_deps.dataset_inputs.append(child.value)
             super(DAGAnalyzer, self).visit_ParamOp(node)
 
     def visit_Aggregation(self, node: Aggregation) -> None:
         self._current_has_dataset_op = True
         if node.operand is not None:
+            if isinstance(node.operand, VarID) and node.operand.value not in self.alias:
+                self.current_deps.dataset_inputs.append(node.operand.value)
             self.visit(node.operand)
 
     def visit_Analytic(self, node: Analytic) -> None:
         self._current_has_dataset_op = True
-        if node.operand is not None:
+        # Inside RegularAggregation (calc/filter/etc.), analytic operands are always
+        # component references — they cannot be external scalars, so skip them.
+        if node.operand is not None and not self.is_from_regular_aggregation:
             self.visit(node.operand)
 
     def visit_JoinOp(self, node: JoinOp) -> None:
         self._current_has_dataset_op = True
+        # Join clauses contain BinOp(AS) nodes; dataset aliases are handled via visit_BinOp.
+        # Direct VarID children in clauses are dataset references.
         for clause in node.clauses:
+            if isinstance(clause, BinOp) and clause.op == AS:
+                if isinstance(clause.left, VarID) and clause.left.value not in self.alias:
+                    self.current_deps.dataset_inputs.append(clause.left.value)
+            elif isinstance(clause, VarID) and clause.value not in self.alias:
+                self.current_deps.dataset_inputs.append(clause.value)
             self.visit(clause)
 
     def visit_UDOCall(self, node: UDOCall) -> None:
@@ -457,24 +550,44 @@ def visit_UDOCall(self, node: UDOCall) -> None:
         if not node_args:
             super().visit_UDOCall(node)
         else:
+            if node.op in self._udos_with_dataset_params:
+                self._current_has_dataset_op = True
             for p, param in zip(node_args.parameters, node.params):
                 if isinstance(param, Constant):
                     continue
                 if type(p.type_) is not Component:
-                    if isinstance(p.type_, Dataset):
-                        self._current_has_dataset_op = True
+                    is_ds = isinstance(p.type_, Dataset)
+                    if is_ds and isinstance(param, VarID):
+                        self.current_deps.dataset_inputs.append(param.value)
+                    elif not is_ds and isinstance(param, VarID):
+                        self.current_deps.scalar_inputs.append(param.value)
                     self.visit(param)
 
     def visit_HROperation(self, node: HROperation) -> None:
         """Visit HROperation node for dependency analysis."""
         self._current_has_dataset_op = True
+        if isinstance(node.dataset, VarID) and node.dataset.value not in self.alias:
+            self.current_deps.dataset_inputs.append(node.dataset.value)
         self.visit(node.dataset)
 
     def visit_DPValidation(self, node: DPValidation) -> None:
         """Visit DPValidation node for dependency analysis."""
         self._current_has_dataset_op = True
+        if isinstance(node.dataset, VarID) and node.dataset.value not in self.alias:
+            self.current_deps.dataset_inputs.append(node.dataset.value)
         self.visit(node.dataset)
 
+    def visit_TimeAggregation(self, node: TimeAggregation) -> None:
+        if node.operand is not None:
+            self.visit(node.operand)
+
+    def visit_Validation(self, node: Validation) -> None:
+        # Don't force has_dataset_op here — let the inner expression determine it.
+        # Dataset-only operators (exists_in, membership, etc.) set it themselves.
+        self.visit(node.validation)
+        if node.imbalance is not None:
+            self.visit(node.imbalance)
+
 
 class HRDAGAnalyzer(DAGAnalyzer):
     def visit_HRuleset(self, node: HRuleset) -> None:
diff --git a/src/vtlengine/AST/DAG/_models.py b/src/vtlengine/AST/DAG/_models.py
index 4c196006f..bd58f00d3 100644
--- a/src/vtlengine/AST/DAG/_models.py
+++ b/src/vtlengine/AST/DAG/_models.py
@@ -15,6 +15,11 @@ class StatementDeps:
         has_dataset_op: Whether this statement involves a dataset operation
             (RegularAggregation, JoinOp, Aggregation, Analytic, MEMBERSHIP,
             UDO with dataset params, etc.).
+        dataset_inputs: Subset of inputs that are definitively datasets
+            (e.g., UDO params typed as dataset). Empty means all inputs in a
+            ``has_dataset_op`` statement are considered dataset inputs.
+        scalar_inputs: Subset of inputs that are definitively scalars
+            (e.g., UDO params typed as a scalar type like number, string, etc.).
     """
 
     inputs: List[str] = field(default_factory=list)
@@ -22,6 +27,8 @@ class StatementDeps:
     persistent: List[str] = field(default_factory=list)
     unknown_variables: List[str] = field(default_factory=list)
     has_dataset_op: bool = False
+    dataset_inputs: List[str] = field(default_factory=list)
+    scalar_inputs: List[str] = field(default_factory=list)
 
 
 @dataclass
diff --git a/tests/DAG/data/references/1.json b/tests/DAG/data/references/1.json
deleted file mode 100644
index 42a811fce..000000000
--- a/tests/DAG/data/references/1.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "insertion": {},
-    "deletion": {
-        "3": [
-            "a",
-            "b",
-            "c"
-        ]
-    },
-    "global_inputs": [],
-    "global_input_datasets": [],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [],
-    "persistent": [
-        "c"
-    ]
-}
\ No newline at end of file
diff --git a/tests/DAG/data/references/12.json b/tests/DAG/data/references/12.json
deleted file mode 100644
index 53d80fe5f..000000000
--- a/tests/DAG/data/references/12.json
+++ /dev/null
@@ -1,30 +0,0 @@
-{
-    "insertion": {
-        "1": [
-            "DS_1"
-        ]
-    },
-    "deletion": {
-        "1": [
-            "DS_1",
-            "DS_r"
-        ]
-    },
-    "global_inputs": [
-        "DS_1",
-        "Me_1",
-        "SC_1"
-    ],
-    "global_input_datasets": [
-        "DS_1"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "Me_1",
-        "SC_1"
-    ],
-    "persistent": [
-        "DS_r"
-    ]
-}
\ No newline at end of file
diff --git a/tests/DAG/data/references/14.json b/tests/DAG/data/references/14.json
deleted file mode 100644
index 34f8edeee..000000000
--- a/tests/DAG/data/references/14.json
+++ /dev/null
@@ -1,29 +0,0 @@
-{
-    "insertion": {
-        "1": [
-            "DS_1",
-            "DS_2"
-        ]
-    },
-    "deletion": {
-        "1": [
-            "DS_1",
-            "DS_2",
-            "DS_r"
-        ]
-    },
-    "global_inputs": [
-        "DS_1",
-        "DS_2"
-    ],
-    "global_input_datasets": [],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [
-        "DS_1",
-        "DS_2"
-    ],
-    "global_input_component_or_scalar": [],
-    "persistent": [
-        "DS_r"
-    ]
-}
\ No newline at end of file
diff --git a/tests/DAG/data/references/15.json b/tests/DAG/data/references/15.json
deleted file mode 100644
index f7cb442d6..000000000
--- a/tests/DAG/data/references/15.json
+++ /dev/null
@@ -1,29 +0,0 @@
-{
-    "insertion": {
-        "2": [
-            "DS_1"
-        ]
-    },
-    "deletion": {
-        "2": [
-            "DS_1",
-            "DS_r",
-            "SC_r"
-        ]
-    },
-    "global_inputs": [
-        "DS_1",
-        "Me_1"
-    ],
-    "global_input_datasets": [
-        "DS_1"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "Me_1"
-    ],
-    "persistent": [
-        "DS_r"
-    ]
-}
\ No newline at end of file
diff --git a/tests/DAG/data/references/17.json b/tests/DAG/data/references/17.json
deleted file mode 100644
index 9696fa802..000000000
--- a/tests/DAG/data/references/17.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-    "insertion": {
-        "1": [
-            "DS_1"
-        ]
-    },
-    "deletion": {
-        "1": [
-            "DS_1",
-            "DS_r"
-        ]
-    },
-    "global_inputs": [
-        "DS_1"
-    ],
-    "global_input_datasets": [
-        "DS_1"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [],
-    "persistent": []
-}
\ No newline at end of file
diff --git a/tests/DAG/data/references/4.json b/tests/DAG/data/references/4.json
deleted file mode 100644
index e84302eaa..000000000
--- a/tests/DAG/data/references/4.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-    "insertion": {
-        "1": [
-            "DS_1",
-            "DS_2"
-        ]
-    },
-    "deletion": {
-        "1": [
-            "DS_1",
-            "DS_2",
-            "DS_r"
-        ]
-    },
-    "global_inputs": [
-        "DS_1",
-        "DS_2"
-    ],
-    "global_input_datasets": [
-        "DS_1",
-        "DS_2"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [],
-    "persistent": []
-}
diff --git a/tests/DAG/data/references/10.json b/tests/DAG/data/references/scheduling/10.json
similarity index 63%
rename from tests/DAG/data/references/10.json
rename to tests/DAG/data/references/scheduling/10.json
index 74b3870eb..bf970b66c 100644
--- a/tests/DAG/data/references/10.json
+++ b/tests/DAG/data/references/scheduling/10.json
@@ -64,43 +64,5 @@
             "BOP"
         ]
     },
-    "global_inputs": [
-        "ACCOUNTING_ENTRY",
-        "ADJUSTMENT",
-        "BOP",
-        "COMP_METHOD",
-        "COUNTERPART_SECTOR",
-        "CURRENCY_DENOM",
-        "FLOW_STOCK_ENTRY",
-        "FREQ",
-        "FUNCTIONAL_CAT",
-        "INSTR_ASSET",
-        "INT_ACC_ITEM",
-        "MATURITY",
-        "REF_SECTOR",
-        "VALUATION",
-        "imbalance"
-    ],
-    "global_input_datasets": [
-        "BOP"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "ACCOUNTING_ENTRY",
-        "ADJUSTMENT",
-        "COMP_METHOD",
-        "COUNTERPART_SECTOR",
-        "CURRENCY_DENOM",
-        "FLOW_STOCK_ENTRY",
-        "FREQ",
-        "FUNCTIONAL_CAT",
-        "INSTR_ASSET",
-        "INT_ACC_ITEM",
-        "MATURITY",
-        "REF_SECTOR",
-        "VALUATION",
-        "imbalance"
-    ],
     "persistent": []
 }
\ No newline at end of file
diff --git a/tests/DAG/data/references/11.json b/tests/DAG/data/references/scheduling/11.json
similarity index 51%
rename from tests/DAG/data/references/11.json
rename to tests/DAG/data/references/scheduling/11.json
index 53e6ccfef..76743b78b 100644
--- a/tests/DAG/data/references/11.json
+++ b/tests/DAG/data/references/scheduling/11.json
@@ -9,11 +9,6 @@
             "c"
         ]
     },
-    "global_inputs": [],
-    "global_input_datasets": [],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [],
     "persistent": [
         "c"
     ]
diff --git a/tests/DAG/data/references/13.json b/tests/DAG/data/references/scheduling/13.json
similarity index 52%
rename from tests/DAG/data/references/13.json
rename to tests/DAG/data/references/scheduling/13.json
index 12ee853e3..082d06b0d 100644
--- a/tests/DAG/data/references/13.json
+++ b/tests/DAG/data/references/scheduling/13.json
@@ -19,23 +19,6 @@
             "SC_2"
         ]
     },
-    "global_inputs": [
-        "DS_1",
-        "Me_1",
-        "SC_1",
-        "SC_2"
-    ],
-    "global_input_datasets": [
-        "DS_1"
-    ],
-    "global_input_scalars": [
-        "SC_1",
-        "SC_2"
-    ],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "Me_1"
-    ],
     "persistent": [
         "DS_r"
     ]
diff --git a/tests/DAG/data/references/16.json b/tests/DAG/data/references/scheduling/16.json
similarity index 53%
rename from tests/DAG/data/references/16.json
rename to tests/DAG/data/references/scheduling/16.json
index c94e32353..741f492cb 100644
--- a/tests/DAG/data/references/16.json
+++ b/tests/DAG/data/references/scheduling/16.json
@@ -15,17 +15,6 @@
             "SC_r"
         ]
     },
-    "global_inputs": [
-        "DS_1",
-        "SC_1"
-    ],
-    "global_input_datasets": [],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [
-        "DS_1",
-        "SC_1"
-    ],
-    "global_input_component_or_scalar": [],
     "persistent": [
         "DS_r"
     ]
diff --git a/tests/DAG/data/references/2.json b/tests/DAG/data/references/scheduling/2.json
similarity index 66%
rename from tests/DAG/data/references/2.json
rename to tests/DAG/data/references/scheduling/2.json
index 0e8dae7c7..902fd2827 100644
--- a/tests/DAG/data/references/2.json
+++ b/tests/DAG/data/references/scheduling/2.json
@@ -22,15 +22,6 @@
             "A"
         ]
     },
-    "global_inputs": [
-        "A"
-    ],
-    "global_input_datasets": [],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [
-        "A"
-    ],
-    "global_input_component_or_scalar": [],
     "persistent": [
         "a",
         "b",
diff --git a/tests/DAG/data/references/3.json b/tests/DAG/data/references/scheduling/3.json
similarity index 54%
rename from tests/DAG/data/references/3.json
rename to tests/DAG/data/references/scheduling/3.json
index 63e131dec..2099a26ea 100644
--- a/tests/DAG/data/references/3.json
+++ b/tests/DAG/data/references/scheduling/3.json
@@ -29,28 +29,6 @@
             "A"
         ]
     },
-    "global_inputs": [
-        "A",
-        "A2",
-        "var1",
-        "var3",
-        "varF",
-        "varRel",
-        "varRel2"
-    ],
-    "global_input_datasets": [
-        "A",
-        "A2"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "var1",
-        "var3",
-        "varF",
-        "varRel",
-        "varRel2"
-    ],
     "persistent": [
         "F"
     ]
diff --git a/tests/DAG/data/references/scheduling/35.json b/tests/DAG/data/references/scheduling/35.json
new file mode 100644
index 000000000..931078719
--- /dev/null
+++ b/tests/DAG/data/references/scheduling/35.json
@@ -0,0 +1,26 @@
+{
+    "insertion": {
+        "1": [
+            "DS_1",
+            "DS_2"
+        ],
+        "2": [
+            "DS_3"
+        ]
+    },
+    "deletion": {
+        "3": [
+            "DS_A",
+            "DS_B",
+            "DS_r"
+        ],
+        "2": [
+            "DS_1",
+            "DS_3"
+        ],
+        "1": [
+            "DS_2"
+        ]
+    },
+    "persistent": []
+}
\ No newline at end of file
diff --git a/tests/DAG/data/references/scheduling/36.json b/tests/DAG/data/references/scheduling/36.json
new file mode 100644
index 000000000..4df60c70b
--- /dev/null
+++ b/tests/DAG/data/references/scheduling/36.json
@@ -0,0 +1,24 @@
+{
+    "insertion": {
+        "2": [
+            "SC_1"
+        ],
+        "3": [
+            "DS_1"
+        ]
+    },
+    "deletion": {
+        "2": [
+            "SC_1",
+            "SC_a"
+        ],
+        "3": [
+            "DS_1",
+            "DS_r",
+            "SC_b"
+        ]
+    },
+    "persistent": [
+        "DS_r"
+    ]
+}
\ No newline at end of file
diff --git a/tests/DAG/data/references/5.json b/tests/DAG/data/references/scheduling/5.json
similarity index 59%
rename from tests/DAG/data/references/5.json
rename to tests/DAG/data/references/scheduling/5.json
index 0fad1b8ad..6e3d061ee 100644
--- a/tests/DAG/data/references/5.json
+++ b/tests/DAG/data/references/scheduling/5.json
@@ -29,28 +29,6 @@
             "DSD_POP"
         ]
     },
-    "global_inputs": [
-        "AGE",
-        "DSD_AGR",
-        "DSD_POP",
-        "MEASURE",
-        "SEX",
-        "TIME_HORIZ",
-        "UNIT_MEASURE"
-    ],
-    "global_input_datasets": [
-        "DSD_AGR",
-        "DSD_POP"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "AGE",
-        "MEASURE",
-        "SEX",
-        "TIME_HORIZ",
-        "UNIT_MEASURE"
-    ],
     "persistent": [
         "DS_check_agr",
         "DS_check_countries",
diff --git a/tests/DAG/data/references/6.json b/tests/DAG/data/references/scheduling/6.json
similarity index 63%
rename from tests/DAG/data/references/6.json
rename to tests/DAG/data/references/scheduling/6.json
index aac54e973..b8672add0 100644
--- a/tests/DAG/data/references/6.json
+++ b/tests/DAG/data/references/scheduling/6.json
@@ -54,36 +54,6 @@
             "DS2"
         ]
     },
-    "global_inputs": [
-        "BIS_LOC_STATS",
-        "CURRENCY",
-        "CURRENCY_DENOM",
-        "DS1",
-        "DS2",
-        "DS3",
-        "EXCHANGE_RATE",
-        "EXR_SUFFIX",
-        "EXR_TYPE",
-        "FREQ",
-        "OBS_VALUE"
-    ],
-    "global_input_datasets": [
-        "BIS_LOC_STATS",
-        "DS1",
-        "DS2",
-        "DS3"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "CURRENCY",
-        "CURRENCY_DENOM",
-        "EXCHANGE_RATE",
-        "EXR_SUFFIX",
-        "EXR_TYPE",
-        "FREQ",
-        "OBS_VALUE"
-    ],
     "persistent": [
         "numCouYear",
         "numYearCou",
diff --git a/tests/DAG/data/references/7.json b/tests/DAG/data/references/scheduling/7.json
similarity index 79%
rename from tests/DAG/data/references/7.json
rename to tests/DAG/data/references/scheduling/7.json
index 757fa6dac..075c98c33 100644
--- a/tests/DAG/data/references/7.json
+++ b/tests/DAG/data/references/scheduling/7.json
@@ -506,132 +506,6 @@
             "ANCRDT_ACCNTNG_C"
         ]
     },
-    "global_inputs": [
-        "ACCMLTD_WRTFFS",
-        "ANCRDT_ACCNTNG_C",
-        "ANCRDT_ACCNTNG_C_Z",
-        "ANCRDT_DRGTN_QRTR_CR_OA",
-        "ANCRDT_ENTTY",
-        "ANCRDT_ENTTY_DFLT_C",
-        "ANCRDT_ENTTY_DFLT_C_T1",
-        "ANCRDT_ENTTY_INSTRMNT_C",
-        "ANCRDT_ENTTY_RSK_C",
-        "ANCRDT_FNNCL_C",
-        "ANCRDT_FNNCL_C_T1",
-        "ANCRDT_INSTRMNT_C",
-        "ANCRDT_INSTRMNT_C_T1",
-        "ANCRDT_INSTRMNT_PRTCTN_RCVD_C",
-        "ANCRDT_JNT_LBLTS_C",
-        "ANCRDT_PRTCTN_RCVD_C",
-        "ANCRDT_PRTCTN_RCVD_C_T1",
-        "CC0010",
-        "CNTRY",
-        "CRDTR",
-        "CRDTR_CD",
-        "DBTR_CD",
-        "DFLT_STTS",
-        "DT_BRTH",
-        "DT_INCPTN",
-        "DT_RFRNC",
-        "ENTTY_RIAD_CD",
-        "ENTTY_RL",
-        "FRGN_BRNCH",
-        "HD_OFFC_UNDRT_CD",
-        "HD_OFFC_UNDRT_CNTRY",
-        "HD_QRTR_CD_CRDTR",
-        "HD_QRTR_CD_DBTR",
-        "IMMDT_PRNT_UNDRT_CD",
-        "INSTTTNL_SCTR",
-        "INSTTTNL_SCTR_DTL",
-        "IS_PRTCTN_PRVDR",
-        "LGL_FRM",
-        "OBSRVD_AGNT_CD",
-        "OFF_BLNC_SHT_AMNT",
-        "OTHR_TYP_ENTTY",
-        "OTSTNDNG_NMNL_AMNT",
-        "PRTCTN_ALLCTD_VL",
-        "PRTCTN_PRVDR_CD",
-        "RCGNTN_STTS",
-        "RCRS",
-        "SPFUND",
-        "SRVCR",
-        "SSMSIGNIFICANCE",
-        "THRD_PRTY_PRRTY_CLMS",
-        "TRD_RCVBL_NN_RCRS",
-        "TTL_NMBR_DBTRS",
-        "TTL_NMBR_DFLT_DBTRS",
-        "TYP_INSTRMNT",
-        "TYP_PRTCTN",
-        "TYP_SCRTSTN",
-        "ULTMT_PRNT_UNDRT_CD",
-        "bool_var",
-        "int_var"
-    ],
-    "global_input_datasets": [
-        "ANCRDT_ACCNTNG_C",
-        "ANCRDT_ACCNTNG_C_Z",
-        "ANCRDT_ENTTY",
-        "ANCRDT_ENTTY_DFLT_C",
-        "ANCRDT_ENTTY_DFLT_C_T1",
-        "ANCRDT_ENTTY_INSTRMNT_C",
-        "ANCRDT_ENTTY_RSK_C",
-        "ANCRDT_FNNCL_C",
-        "ANCRDT_FNNCL_C_T1",
-        "ANCRDT_INSTRMNT_C",
-        "ANCRDT_INSTRMNT_C_T1",
-        "ANCRDT_INSTRMNT_PRTCTN_RCVD_C",
-        "ANCRDT_JNT_LBLTS_C",
-        "ANCRDT_PRTCTN_RCVD_C",
-        "ANCRDT_PRTCTN_RCVD_C_T1"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "ACCMLTD_WRTFFS",
-        "ANCRDT_DRGTN_QRTR_CR_OA",
-        "CC0010",
-        "CNTRY",
-        "CRDTR",
-        "CRDTR_CD",
-        "DBTR_CD",
-        "DFLT_STTS",
-        "DT_BRTH",
-        "DT_INCPTN",
-        "DT_RFRNC",
-        "ENTTY_RIAD_CD",
-        "ENTTY_RL",
-        "FRGN_BRNCH",
-        "HD_OFFC_UNDRT_CD",
-        "HD_OFFC_UNDRT_CNTRY",
-        "HD_QRTR_CD_CRDTR",
-        "HD_QRTR_CD_DBTR",
-        "IMMDT_PRNT_UNDRT_CD",
-        "INSTTTNL_SCTR",
-        "INSTTTNL_SCTR_DTL",
-        "IS_PRTCTN_PRVDR",
-        "LGL_FRM",
-        "OBSRVD_AGNT_CD",
-        "OFF_BLNC_SHT_AMNT",
-        "OTHR_TYP_ENTTY",
-        "OTSTNDNG_NMNL_AMNT",
-        "PRTCTN_ALLCTD_VL",
-        "PRTCTN_PRVDR_CD",
-        "RCGNTN_STTS",
-        "RCRS",
-        "SPFUND",
-        "SRVCR",
-        "SSMSIGNIFICANCE",
-        "THRD_PRTY_PRRTY_CLMS",
-        "TRD_RCVBL_NN_RCRS",
-        "TTL_NMBR_DBTRS",
-        "TTL_NMBR_DFLT_DBTRS",
-        "TYP_INSTRMNT",
-        "TYP_PRTCTN",
-        "TYP_SCRTSTN",
-        "ULTMT_PRNT_UNDRT_CD",
-        "bool_var",
-        "int_var"
-    ],
     "persistent": [
         "ACCNTNG_CMPLTNSS",
         "CN0230",
diff --git a/tests/DAG/data/references/8.json b/tests/DAG/data/references/scheduling/8.json
similarity index 72%
rename from tests/DAG/data/references/8.json
rename to tests/DAG/data/references/scheduling/8.json
index dacbdc15f..5f728af27 100644
--- a/tests/DAG/data/references/8.json
+++ b/tests/DAG/data/references/scheduling/8.json
@@ -98,44 +98,6 @@
             "ANCRDT_INSTRMNT_C_T3"
         ]
     },
-    "global_inputs": [
-        "ANCRDT_ACCNTNG_C",
-        "ANCRDT_ACCNTNG_C_T3",
-        "ANCRDT_ENTTY",
-        "ANCRDT_ENTTY_DFLT_C",
-        "ANCRDT_ENTTY_INSTRMNT_C",
-        "ANCRDT_FNNCL_C",
-        "ANCRDT_INSTRMNT_C",
-        "ANCRDT_INSTRMNT_C_T1",
-        "ANCRDT_INSTRMNT_C_T2",
-        "ANCRDT_INSTRMNT_C_T3",
-        "ENTTY_RIAD_CD",
-        "ENTTY_RL",
-        "HD_OFFC_UNDRT_CD",
-        "LGL_ENTTY_CD",
-        "OBSRVD_AGNT_CD"
-    ],
-    "global_input_datasets": [
-        "ANCRDT_ACCNTNG_C",
-        "ANCRDT_ACCNTNG_C_T3",
-        "ANCRDT_ENTTY",
-        "ANCRDT_ENTTY_DFLT_C",
-        "ANCRDT_ENTTY_INSTRMNT_C",
-        "ANCRDT_FNNCL_C",
-        "ANCRDT_INSTRMNT_C",
-        "ANCRDT_INSTRMNT_C_T1",
-        "ANCRDT_INSTRMNT_C_T2",
-        "ANCRDT_INSTRMNT_C_T3"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "ENTTY_RIAD_CD",
-        "ENTTY_RL",
-        "HD_OFFC_UNDRT_CD",
-        "LGL_ENTTY_CD",
-        "OBSRVD_AGNT_CD"
-    ],
     "persistent": [
         "DP_RLST_ACCNTNG_FRMWRK_RSLT",
         "DP_RLST_ACCNTNG_INTRCMPNY",
diff --git a/tests/DAG/data/references/9.json b/tests/DAG/data/references/scheduling/9.json
similarity index 71%
rename from tests/DAG/data/references/9.json
rename to tests/DAG/data/references/scheduling/9.json
index e92a92fc4..b487827b6 100644
--- a/tests/DAG/data/references/9.json
+++ b/tests/DAG/data/references/scheduling/9.json
@@ -128,58 +128,6 @@
             "Oferta_PT_2025_Q1"
         ]
     },
-    "global_inputs": [
-        "Income_PT",
-        "Inflation_PT",
-        "Inflation_divisors_Q",
-        "Oferta_PT_2025_Q1",
-        "Vendas_PT_2025_Q1",
-        "coefficient",
-        "coefficient_cq",
-        "coefficient_inv",
-        "coefficient_l",
-        "coefficient_lc",
-        "coefficient_lcq",
-        "coefficient_lq",
-        "coefficient_q",
-        "county",
-        "divisor",
-        "estado",
-        "income",
-        "period_label",
-        "regiao",
-        "value",
-        "var",
-        "year_str"
-    ],
-    "global_input_datasets": [
-        "Income_PT",
-        "Inflation_PT",
-        "Inflation_divisors_Q",
-        "Oferta_PT_2025_Q1",
-        "Vendas_PT_2025_Q1"
-    ],
-    "global_input_scalars": [],
-    "global_input_dataset_or_scalar": [],
-    "global_input_component_or_scalar": [
-        "coefficient",
-        "coefficient_cq",
-        "coefficient_inv",
-        "coefficient_l",
-        "coefficient_lc",
-        "coefficient_lcq",
-        "coefficient_lq",
-        "coefficient_q",
-        "county",
-        "divisor",
-        "estado",
-        "income",
-        "period_label",
-        "regiao",
-        "value",
-        "var",
-        "year_str"
-    ],
     "persistent": [
         "output_generic",
         "output_generic_eda",
diff --git a/tests/DAG/data/vtl/12.vtl b/tests/DAG/data/vtl/12.vtl
deleted file mode 100644
index f9ee4c4bd..000000000
--- a/tests/DAG/data/vtl/12.vtl
+++ /dev/null
@@ -1 +0,0 @@
-DS_r <- DS_1[calc Me_2 := Me_1 * SC_1];
diff --git a/tests/DAG/data/vtl/14.vtl b/tests/DAG/data/vtl/14.vtl
deleted file mode 100644
index e19c38657..000000000
--- a/tests/DAG/data/vtl/14.vtl
+++ /dev/null
@@ -1 +0,0 @@
-DS_r <- DS_1 + DS_2;
diff --git a/tests/DAG/data/vtl/15.vtl b/tests/DAG/data/vtl/15.vtl
deleted file mode 100644
index b17fb76c0..000000000
--- a/tests/DAG/data/vtl/15.vtl
+++ /dev/null
@@ -1,2 +0,0 @@
-SC_r := 10;
-DS_r <- DS_1[calc Me_2 := Me_1 + SC_r];
diff --git a/tests/DAG/data/vtl/17.vtl b/tests/DAG/data/vtl/17.vtl
deleted file mode 100644
index 2e9470e04..000000000
--- a/tests/DAG/data/vtl/17.vtl
+++ /dev/null
@@ -1 +0,0 @@
-DS_r := DS_1#Me_1;
diff --git a/tests/DAG/data/vtl/35.vtl b/tests/DAG/data/vtl/35.vtl
new file mode 100644
index 000000000..d5a7f8116
--- /dev/null
+++ b/tests/DAG/data/vtl/35.vtl
@@ -0,0 +1,3 @@
+DS_A := DS_1 + DS_2;
+DS_B := DS_1 * DS_3;
+DS_r := DS_A + DS_B;
diff --git a/tests/DAG/data/vtl/36.vtl b/tests/DAG/data/vtl/36.vtl
new file mode 100644
index 000000000..2d6cb0181
--- /dev/null
+++ b/tests/DAG/data/vtl/36.vtl
@@ -0,0 +1,3 @@
+SC_a := 10;
+SC_b := SC_a + SC_1;
+DS_r <- DS_1[calc Me_2 := Me_1 + SC_b];
diff --git a/tests/DAG/test_classification.py b/tests/DAG/test_classification.py
new file mode 100644
index 000000000..a31477db8
--- /dev/null
+++ b/tests/DAG/test_classification.py
@@ -0,0 +1,657 @@
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Optional
+
+import pytest
+
+from vtlengine.API import create_ast
+from vtlengine.AST.DAG import DAGAnalyzer
+
+data_path = Path(__file__).parent / "data"
+
+
+@dataclass
+class Classification:
+    datasets: List[str] = field(default_factory=list)
+    scalars: List[str] = field(default_factory=list)
+    dataset_or_scalar: List[str] = field(default_factory=list)
+    component_or_scalar: List[str] = field(default_factory=list)
+    vtl: Optional[str] = None
+
+
+# Tests 1-11: vtl=None → read from VTL files (complex multi-statement scripts).
+# Tests 12+: vtl is inline.
+CASES: dict[str, Classification] = {
+    # --- File-based tests (complex multi-statement scripts) ---
+    "1": Classification(),
+    "2": Classification(dataset_or_scalar=["A"]),
+    "3": Classification(
+        datasets=["A", "A2"],
+        component_or_scalar=["var1", "var3", "varF", "varRel", "varRel2"],
+    ),
+    "4": Classification(datasets=["DS_1", "DS_2"]),
+    "5": Classification(
+        datasets=["DSD_AGR", "DSD_POP"],
+        component_or_scalar=["AGE", "MEASURE", "SEX", "TIME_HORIZ", "UNIT_MEASURE"],
+    ),
+    "6": Classification(
+        datasets=["BIS_LOC_STATS", "DS1", "DS2", "DS3"],
+        component_or_scalar=[
+            "CURRENCY",
+            "CURRENCY_DENOM",
+            "EXCHANGE_RATE",
+            "EXR_SUFFIX",
+            "EXR_TYPE",
+            "FREQ",
+            "OBS_VALUE",
+        ],
+    ),
+    "7": Classification(
+        datasets=[
+            "ANCRDT_ACCNTNG_C",
+            "ANCRDT_ACCNTNG_C_Z",
+            "ANCRDT_ENTTY",
+            "ANCRDT_ENTTY_DFLT_C",
+            "ANCRDT_ENTTY_DFLT_C_T1",
+            "ANCRDT_ENTTY_INSTRMNT_C",
+            "ANCRDT_ENTTY_RSK_C",
+            "ANCRDT_FNNCL_C",
+            "ANCRDT_FNNCL_C_T1",
+            "ANCRDT_INSTRMNT_C",
+            "ANCRDT_INSTRMNT_C_T1",
+            "ANCRDT_INSTRMNT_PRTCTN_RCVD_C",
+            "ANCRDT_JNT_LBLTS_C",
+            "ANCRDT_PRTCTN_RCVD_C",
+            "ANCRDT_PRTCTN_RCVD_C_T1",
+        ],
+        component_or_scalar=[
+            "ACCMLTD_WRTFFS",
+            "ANCRDT_DRGTN_QRTR_CR_OA",
+            "CC0010",
+            "CNTRY",
+            "CRDTR",
+            "CRDTR_CD",
+            "DBTR_CD",
+            "DFLT_STTS",
+            "DT_BRTH",
+            "DT_INCPTN",
+            "DT_RFRNC",
+            "ENTTY_RIAD_CD",
+            "ENTTY_RL",
+            "FRGN_BRNCH",
+            "HD_OFFC_UNDRT_CD",
+            "HD_OFFC_UNDRT_CNTRY",
+            "HD_QRTR_CD_CRDTR",
+            "HD_QRTR_CD_DBTR",
+            "IMMDT_PRNT_UNDRT_CD",
+            "INSTTTNL_SCTR",
+            "INSTTTNL_SCTR_DTL",
+            "IS_PRTCTN_PRVDR",
+            "LGL_FRM",
+            "OBSRVD_AGNT_CD",
+            "OFF_BLNC_SHT_AMNT",
+            "OTHR_TYP_ENTTY",
+            "OTSTNDNG_NMNL_AMNT",
+            "PRTCTN_ALLCTD_VL",
+            "PRTCTN_PRVDR_CD",
+            "RCGNTN_STTS",
+            "RCRS",
+            "SPFUND",
+            "SRVCR",
+            "SSMSIGNIFICANCE",
+            "THRD_PRTY_PRRTY_CLMS",
+            "TRD_RCVBL_NN_RCRS",
+            "TTL_NMBR_DBTRS",
+            "TTL_NMBR_DFLT_DBTRS",
+            "TYP_INSTRMNT",
+            "TYP_PRTCTN",
+            "TYP_SCRTSTN",
+            "ULTMT_PRNT_UNDRT_CD",
+        ],
+    ),
+    "8": Classification(
+        datasets=[
+            "ANCRDT_ACCNTNG_C",
+            "ANCRDT_ACCNTNG_C_T3",
+            "ANCRDT_ENTTY",
+            "ANCRDT_ENTTY_DFLT_C",
+            "ANCRDT_ENTTY_INSTRMNT_C",
+            "ANCRDT_FNNCL_C",
+            "ANCRDT_INSTRMNT_C",
+            "ANCRDT_INSTRMNT_C_T1",
+            "ANCRDT_INSTRMNT_C_T2",
+            "ANCRDT_INSTRMNT_C_T3",
+        ],
+        component_or_scalar=[
+            "ENTTY_RIAD_CD",
+            "ENTTY_RL",
+            "HD_OFFC_UNDRT_CD",
+            "LGL_ENTTY_CD",
+            "OBSRVD_AGNT_CD",
+        ],
+    ),
+    "9": Classification(
+        datasets=[
+            "Income_PT",
+            "Inflation_PT",
+            "Inflation_divisors_Q",
+            "Oferta_PT_2025_Q1",
+            "Vendas_PT_2025_Q1",
+        ],
+        component_or_scalar=[
+            "coefficient",
+            "coefficient_cq",
+            "coefficient_inv",
+            "coefficient_lc",
+            "coefficient_lcq",
+            "coefficient_q",
+            "county",
+            "divisor",
+            "estado",
+            "income",
+            "period_label",
+            "regiao",
+            "value",
+            "var",
+            "year_str",
+        ],
+    ),
+    "10": Classification(
+        datasets=["BOP"],
+        component_or_scalar=[
+            "ACCOUNTING_ENTRY",
+            "ADJUSTMENT",
+            "COMP_METHOD",
+            "COUNTERPART_SECTOR",
+            "CURRENCY_DENOM",
+            "FLOW_STOCK_ENTRY",
+            "FREQ",
+            "FUNCTIONAL_CAT",
+            "INSTR_ASSET",
+            "INT_ACC_ITEM",
+            "MATURITY",
+            "REF_SECTOR",
+            "VALUATION",
+            "imbalance",
+        ],
+    ),
+    "11": Classification(),
+    # --- Inline tests ---
+    # Calc with external component/scalar
+    "12": Classification(
+        vtl="DS_r <- DS_1[calc Me_2 := Me_1 * SC_1];",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_1", "SC_1"],
+    ),
+    # Scalar chain with UDO
+    "13": Classification(
+        vtl="SC_a := SC_1 + SC_2;\nDS_r <- DS_1[calc Me_2 := Me_1 + SC_a];",
+        datasets=["DS_1"],
+        scalars=["SC_1", "SC_2"],
+        component_or_scalar=["Me_1"],
+    ),
+    # Dual binary op
+    "14": Classification(
+        vtl="DS_r <- DS_1 + DS_2;",
+        dataset_or_scalar=["DS_1", "DS_2"],
+    ),
+    # Scalar chain feeding calc
+    "15": Classification(
+        vtl="SC_r := 10;\nDS_r <- DS_1[calc Me_2 := Me_1 + SC_r];",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_1"],
+    ),
+    # Mixed dataset_or_scalar + scalar chain
+    "16": Classification(
+        vtl="DS_r <- DS_1 + SC_1;\nSC_r := SC_1 * 2;",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    # Membership
+    "17": Classification(vtl="DS_r := DS_1#Me_1;", datasets=["DS_1"]),
+    # Set operators (dataset-only)
+    "18": Classification(vtl="DS_r <- union(DS_1, DS_2);", datasets=["DS_1", "DS_2"]),
+    "19": Classification(vtl="DS_r <- intersect(DS_1, DS_2);", datasets=["DS_1", "DS_2"]),
+    "20": Classification(vtl="DS_r <- setdiff(DS_1, DS_2);", datasets=["DS_1", "DS_2"]),
+    # If-then-else (dual)
+    "21": Classification(
+        vtl="DS_r := if DS_1 then DS_2 else DS_3;",
+        dataset_or_scalar=["DS_1", "DS_2", "DS_3"],
+    ),
+    "22": Classification(
+        vtl="SC_r := if true then SC_1 else SC_2;",
+        dataset_or_scalar=["SC_1", "SC_2"],
+    ),
+    # Comparison / logical (dual)
+    "23": Classification(vtl="DS_r := DS_1 > DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "24": Classification(vtl="DS_r := DS_1 and DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "25": Classification(vtl="DS_r := not DS_1;", dataset_or_scalar=["DS_1"]),
+    # Numeric (dual)
+    "26": Classification(vtl="DS_r := abs(DS_1);", dataset_or_scalar=["DS_1"]),
+    "27": Classification(vtl="SC_r := abs(SC_1);", dataset_or_scalar=["SC_1"]),
+    # Aggregation (dataset-only)
+    "28": Classification(vtl="DS_r := sum(DS_1);", datasets=["DS_1"]),
+    # Clause operators
+    "29": Classification(
+        vtl="DS_r <- DS_1[filter Me_1 > 10];",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_1"],
+    ),
+    "30": Classification(vtl="DS_r <- DS_1[keep Me_1];", datasets=["DS_1"]),
+    "31": Classification(vtl="DS_r <- DS_1[drop Me_1];", datasets=["DS_1"]),
+    "32": Classification(vtl="DS_r <- DS_1[rename Me_1 to Me_2];", datasets=["DS_1"]),
+    "33": Classification(
+        vtl="DS_r := DS_1[sub Id_1 = 1];",
+        datasets=["DS_1"],
+        component_or_scalar=["Id_1"],
+    ),
+    # Parameterized (dual)
+    "34": Classification(vtl="DS_r := round(DS_1, 2);", dataset_or_scalar=["DS_1"]),
+    # Multi-statement with intermediates
+    "35": Classification(
+        vtl="DS_A := DS_1 + DS_2;\nDS_B := DS_1 * DS_3;\nDS_r := DS_A + DS_B;",
+        dataset_or_scalar=["DS_1", "DS_2", "DS_3"],
+    ),
+    # Scalar chain propagation
+    "36": Classification(
+        vtl="SC_a := 10;\nSC_b := SC_a + SC_1;\nDS_r <- DS_1[calc Me_2 := Me_1 + SC_b];",
+        datasets=["DS_1"],
+        scalars=["SC_1"],
+        component_or_scalar=["Me_1"],
+    ),
+    # Join (dataset-only)
+    "37": Classification(vtl="DS_r := inner_join(DS_1, DS_2);", datasets=["DS_1", "DS_2"]),
+    # Dual unary
+    "38": Classification(vtl="DS_r := isnull(DS_1);", dataset_or_scalar=["DS_1"]),
+    "39": Classification(vtl="DS_r := -DS_1;", dataset_or_scalar=["DS_1"]),
+    # Calc with multiple external refs
+    "40": Classification(
+        vtl="DS_r <- DS_1[calc Me_2 := Me_1 + SC_1, Me_3 := Me_1 * SC_2];",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_1", "SC_1", "SC_2"],
+    ),
+    # UDO with typed parameters
+    "41": Classification(
+        vtl=(
+            "define operator my_op (ds dataset, sc number)\n"
+            "  returns dataset is\n"
+            "    ds * sc\n"
+            "end operator;\n\n"
+            "DS_r := my_op(DS_1, SC_1);"
+        ),
+        datasets=["DS_1"],
+        scalars=["SC_1"],
+    ),
+    # Time operators (dataset-only)
+    "42": Classification(vtl="DS_r := flow_to_stock(DS_1);", datasets=["DS_1"]),
+    "43": Classification(vtl="DS_r := stock_to_flow(DS_1);", datasets=["DS_1"]),
+    "44": Classification(vtl="DS_r := exists_in(DS_1, DS_2, all);", datasets=["DS_1", "DS_2"]),
+    "45": Classification(vtl="DS_r := timeshift(DS_1, 1);", datasets=["DS_1"]),
+    # --- Dual BinOp operators ---
+    "46": Classification(vtl="DS_r := DS_1 / DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "47": Classification(vtl="DS_r := mod(DS_1, DS_2);", dataset_or_scalar=["DS_1", "DS_2"]),
+    "48": Classification(vtl="DS_r := DS_1 || DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "49": Classification(vtl="DS_r := DS_1 or DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "50": Classification(vtl="DS_r := DS_1 xor DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "51": Classification(vtl="DS_r := DS_1 = DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "52": Classification(vtl="DS_r := DS_1 <> DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "53": Classification(vtl="DS_r := DS_1 >= DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "54": Classification(vtl="DS_r := DS_1 < DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "55": Classification(vtl="DS_r := DS_1 <= DS_2;", dataset_or_scalar=["DS_1", "DS_2"]),
+    "56": Classification(vtl="DS_r := DS_1 in {1, 2, 3};", dataset_or_scalar=["DS_1"]),
+    "57": Classification(vtl="DS_r := DS_1 not_in {1, 2, 3};", dataset_or_scalar=["DS_1"]),
+    "58": Classification(
+        vtl='DS_r := match_characters(DS_1, "[a-z]+");',
+        dataset_or_scalar=["DS_1"],
+    ),
+    "59": Classification(vtl="DS_r := nvl(DS_1, 0);", dataset_or_scalar=["DS_1"]),
+    # --- Dual UnaryOp operators ---
+    "60": Classification(vtl="DS_r := exp(DS_1);", dataset_or_scalar=["DS_1"]),
+    "61": Classification(vtl="DS_r := ln(DS_1);", dataset_or_scalar=["DS_1"]),
+    "62": Classification(vtl="DS_r := sqrt(DS_1);", dataset_or_scalar=["DS_1"]),
+    "63": Classification(vtl="DS_r := ceil(DS_1);", dataset_or_scalar=["DS_1"]),
+    "64": Classification(vtl="DS_r := floor(DS_1);", dataset_or_scalar=["DS_1"]),
+    "65": Classification(vtl="DS_r := +DS_1;", dataset_or_scalar=["DS_1"]),
+    "66": Classification(vtl="DS_r := length(DS_1);", dataset_or_scalar=["DS_1"]),
+    "67": Classification(vtl="DS_r := upper(DS_1);", dataset_or_scalar=["DS_1"]),
+    "68": Classification(vtl="DS_r := lower(DS_1);", dataset_or_scalar=["DS_1"]),
+    "69": Classification(vtl="DS_r := trim(DS_1);", dataset_or_scalar=["DS_1"]),
+    "70": Classification(vtl="DS_r := ltrim(DS_1);", dataset_or_scalar=["DS_1"]),
+    "71": Classification(vtl="DS_r := rtrim(DS_1);", dataset_or_scalar=["DS_1"]),
+    # --- Dual ParamOp operators ---
+    "72": Classification(vtl="DS_r := trunc(DS_1, 2);", dataset_or_scalar=["DS_1"]),
+    "73": Classification(vtl="DS_r := power(DS_1, 2);", dataset_or_scalar=["DS_1"]),
+    "74": Classification(vtl="DS_r := log(DS_1, 10);", dataset_or_scalar=["DS_1"]),
+    "75": Classification(vtl="DS_r := substr(DS_1, 1, 3);", dataset_or_scalar=["DS_1"]),
+    "76": Classification(vtl='DS_r := replace(DS_1, "a", "b");', dataset_or_scalar=["DS_1"]),
+    "77": Classification(vtl='DS_r := instr(DS_1, "a");', dataset_or_scalar=["DS_1"]),
+    "78": Classification(vtl="DS_r := cast(DS_1, integer);", dataset_or_scalar=["DS_1"]),
+    # --- Dual MulOp / conditional ---
+    "79": Classification(vtl="DS_r := between(DS_1, 1, 10);", dataset_or_scalar=["DS_1"]),
+    "80": Classification(
+        vtl="DS_r := case when DS_1 > 0 then DS_2 else DS_3;",
+        dataset_or_scalar=["DS_1", "DS_2", "DS_3"],
+    ),
+    # --- Dual time operators ---
+    "81": Classification(
+        vtl="DS_r := datediff(DS_1, DS_2);",
+        dataset_or_scalar=["DS_1", "DS_2"],
+    ),
+    "82": Classification(vtl='DS_r := dateadd(DS_1, 1, "M");', dataset_or_scalar=["DS_1"]),
+    "83": Classification(vtl="DS_r := getyear(DS_1);", dataset_or_scalar=["DS_1"]),
+    "84": Classification(vtl="DS_r := getmonth(DS_1);", dataset_or_scalar=["DS_1"]),
+    "85": Classification(vtl="DS_r := dayofmonth(DS_1);", dataset_or_scalar=["DS_1"]),
+    "86": Classification(vtl="DS_r := dayofyear(DS_1);", dataset_or_scalar=["DS_1"]),
+    # --- Dataset-only operators ---
+    "87": Classification(vtl="DS_r := symdiff(DS_1, DS_2);", datasets=["DS_1", "DS_2"]),
+    "88": Classification(vtl="DS_r := left_join(DS_1, DS_2);", datasets=["DS_1", "DS_2"]),
+    "89": Classification(vtl="DS_r := full_join(DS_1, DS_2);", datasets=["DS_1", "DS_2"]),
+    "90": Classification(
+        vtl="DS_r := cross_join(DS_1 as d1, DS_2 as d2);",
+        datasets=["DS_1", "DS_2"],
+    ),
+    "91": Classification(vtl="DS_r := count(DS_1);", datasets=["DS_1"]),
+    "92": Classification(vtl="DS_r := min(DS_1);", datasets=["DS_1"]),
+    "93": Classification(vtl="DS_r := max(DS_1);", datasets=["DS_1"]),
+    "94": Classification(vtl="DS_r := avg(DS_1);", datasets=["DS_1"]),
+    "95": Classification(vtl="DS_r := median(DS_1);", datasets=["DS_1"]),
+    "96": Classification(vtl="DS_r := stddev_pop(DS_1);", datasets=["DS_1"]),
+    "97": Classification(vtl="DS_r := stddev_samp(DS_1);", datasets=["DS_1"]),
+    "98": Classification(vtl="DS_r := var_pop(DS_1);", datasets=["DS_1"]),
+    "99": Classification(vtl="DS_r := var_samp(DS_1);", datasets=["DS_1"]),
+    "100": Classification(vtl="DS_r := fill_time_series(DS_1, all);", datasets=["DS_1"]),
+    "101": Classification(vtl="DS_r := period_indicator(DS_1);", datasets=["DS_1"]),
+    # --- Clause operators ---
+    "102": Classification(
+        vtl="DS_r := DS_1[aggr Me_1 := sum(Me_2) group by Id_1];",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_2"],
+    ),
+    "103": Classification(vtl="DS_r := DS_1[pivot Id_1, Me_1];", datasets=["DS_1"]),
+    "104": Classification(vtl="DS_r := DS_1[unpivot Id_1, Me_1];", datasets=["DS_1"]),
+    # --- Analytic operators (operands are always components, never scalars) ---
+    "105": Classification(
+        vtl="DS_r := DS_1[calc Me_2 := first_value(Me_1 over (order by Id_1))];",
+        datasets=["DS_1"],
+    ),
+    "106": Classification(
+        vtl="DS_r := DS_1[calc Me_2 := last_value(Me_1 over (order by Id_1))];",
+        datasets=["DS_1"],
+    ),
+    "107": Classification(
+        vtl="DS_r := DS_1[calc Me_2 := lag(Me_1, 1 over (order by Id_1))];",
+        datasets=["DS_1"],
+    ),
+    "108": Classification(
+        vtl="DS_r := DS_1[calc Me_2 := lead(Me_1, 1 over (order by Id_1))];",
+        datasets=["DS_1"],
+    ),
+    "109": Classification(
+        vtl="DS_r := DS_1[calc Me_2 := rank(over (order by Id_1))];",
+        datasets=["DS_1"],
+    ),
+    "110": Classification(
+        vtl="DS_r := DS_1[calc Me_2 := ratio_to_report(Me_1 over (partition by Id_1))];",
+        datasets=["DS_1"],
+    ),
+    # --- Join clause operators ---
+    "111": Classification(
+        vtl="DS_r := inner_join(DS_1, DS_2 calc Me_2 := Me_1 + SC_1);",
+        datasets=["DS_1", "DS_2"],
+        component_or_scalar=["Me_1", "SC_1"],
+    ),
+    "112": Classification(
+        vtl="DS_r := inner_join(DS_1, DS_2 filter Me_1 > 10);",
+        datasets=["DS_1", "DS_2"],
+        component_or_scalar=["Me_1"],
+    ),
+    "113": Classification(
+        vtl="DS_r := inner_join(DS_1, DS_2 keep Me_1);",
+        datasets=["DS_1", "DS_2"],
+    ),
+    "114": Classification(
+        vtl="DS_r := inner_join(DS_1, DS_2 rename Me_1 to Me_2);",
+        datasets=["DS_1", "DS_2"],
+    ),
+    "115": Classification(
+        vtl="DS_r := inner_join(DS_1, DS_2 using Id_1);",
+        datasets=["DS_1", "DS_2"],
+    ),
+    "116": Classification(
+        vtl="DS_r := inner_join(DS_1, DS_2, DS_3);",
+        datasets=["DS_1", "DS_2", "DS_3"],
+    ),
+    "117": Classification(
+        vtl="DS_r := inner_join(DS_1, DS_2 using Id_1 calc Me_2 := Me_1 + SC_1);",
+        datasets=["DS_1", "DS_2"],
+        component_or_scalar=["Me_1", "SC_1"],
+    ),
+    "118": Classification(
+        vtl="DS_r := inner_join(DS_1, DS_2 aggr Me_1 := sum(Me_2) group by Id_1);",
+        datasets=["DS_1", "DS_2"],
+        component_or_scalar=["Me_2"],
+    ),
+    "119": Classification(
+        vtl="DS_r := left_join(DS_1, DS_2 calc Me_2 := Me_1 * 2);",
+        datasets=["DS_1", "DS_2"],
+        component_or_scalar=["Me_1"],
+    ),
+    "120": Classification(
+        vtl="DS_r := full_join(DS_1, DS_2 filter Me_1 > 0);",
+        datasets=["DS_1", "DS_2"],
+        component_or_scalar=["Me_1"],
+    ),
+    # --- Validation operators ---
+    "121": Classification(
+        vtl='DS_r := check(DS_1#Me_1 > 0 errorcode "E001" errorlevel 1);',
+        datasets=["DS_1"],
+    ),
+    "122": Classification(
+        vtl='DS_r := check(not isnull(DS_1#Me_1) errorcode "E002" errorlevel 2 invalid);',
+        datasets=["DS_1"],
+    ),
+    "123": Classification(
+        vtl='DS_r := check(exists_in(DS_1, DS_2, true) errorcode "E003" errorlevel 3 all);',
+        datasets=["DS_1", "DS_2"],
+    ),
+    "124": Classification(
+        vtl=(
+            "define datapoint ruleset dpr1 (variable Me_1 as Number) is\n"
+            '  rule1: Me_1 > 0 errorcode "E001" errorlevel 1\n'
+            "end datapoint ruleset;\n\n"
+            "DS_r := check_datapoint(DS_1, dpr1);"
+        ),
+        datasets=["DS_1"],
+    ),
+    "125": Classification(
+        vtl=(
+            "define datapoint ruleset dpr1 (variable Me_1 as Number) is\n"
+            '  rule1: Me_1 > 0 errorcode "E001" errorlevel 1\n'
+            "end datapoint ruleset;\n\n"
+            "DS_r := check_datapoint(DS_1, dpr1 all);"
+        ),
+        datasets=["DS_1"],
+    ),
+    "128": Classification(
+        vtl='DS_r := check(DS_1 + SC_1 > 0 errorcode "E001" errorlevel 1);',
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    # --- Hierarchical operators ---
+    "126": Classification(
+        vtl=(
+            "define hierarchical ruleset hr1 (variable rule Me_1) is\n"
+            "  A = B + C\n"
+            "end hierarchical ruleset;\n\n"
+            "DS_r := hierarchy(DS_1, hr1 rule Me_1 non_null all);"
+        ),
+        datasets=["DS_1"],
+    ),
+    "127": Classification(
+        vtl=(
+            "define hierarchical ruleset hr1 (variable rule Me_1) is\n"
+            "  A = B + C\n"
+            "end hierarchical ruleset;\n\n"
+            "DS_r := check_hierarchy(DS_1, hr1 rule Me_1 non_null all);"
+        ),
+        datasets=["DS_1"],
+    ),
+    # --- Dataset-only operators with mixed sub-expressions ---
+    # When a dataset-only operator wraps a complex expression (e.g., DS_1 + SC_1),
+    # the sub-expression operands should NOT all be forced to datasets.
+    "129": Classification(
+        vtl="DS_r := count(DS_1 + SC_1);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "130": Classification(
+        vtl="DS_r := sum(DS_1 * SC_1);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "131": Classification(
+        vtl="DS_r := avg(DS_1 + SC_1);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "132": Classification(
+        vtl="DS_r := flow_to_stock(DS_1 + SC_1);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "133": Classification(
+        vtl="DS_r := stock_to_flow(DS_1 - SC_1);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "134": Classification(
+        vtl="DS_r := union(DS_1 + SC_1, DS_2);",
+        datasets=["DS_2"],
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "135": Classification(
+        vtl="DS_r := intersect(DS_1 + SC_1, DS_2);",
+        datasets=["DS_2"],
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "136": Classification(
+        vtl="DS_r := union(DS_1 + SC_1, DS_2 + SC_2);",
+        dataset_or_scalar=["DS_1", "DS_2", "SC_1", "SC_2"],
+    ),
+    "137": Classification(
+        vtl="DS_r := exists_in(DS_1 + SC_1, DS_2, all);",
+        datasets=["DS_2"],
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "138": Classification(
+        vtl="DS_r := timeshift(DS_1 + SC_1, 1);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "139": Classification(
+        vtl="DS_r := (DS_1 + SC_1)#Me_1;",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    # Calc clause with dataset-only sub-expression: dataset is direct VarID,
+    # but calc body contains sub-expression with external refs
+    "140": Classification(
+        vtl="DS_r <- DS_1[calc Me_2 := sum(Me_1 + SC_1)];",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_1", "SC_1"],
+    ),
+    # --- Multi-statement ambiguity propagation ---
+    # Intermediate from mixed expr fed to aggregation: ambiguity propagates
+    "141": Classification(
+        vtl="DS_A := DS_1 + SC_1;\nDS_r := sum(DS_A);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    # Intermediate from union fed to flow_to_stock with scalar
+    "142": Classification(
+        vtl="DS_A := union(DS_1, DS_2);\nDS_r := flow_to_stock(DS_A + SC_1);",
+        datasets=["DS_1", "DS_2"],
+        dataset_or_scalar=["SC_1"],
+    ),
+    # Intermediate from mixed expr fed to timeshift
+    "143": Classification(
+        vtl="DS_A := DS_1 + SC_1;\nDS_r := timeshift(DS_A, 1);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    # --- Nested dataset-only operators ---
+    # count(union(...)): inner union marks datasets, count wraps it
+    "144": Classification(
+        vtl="DS_r := count(union(DS_1, DS_2));",
+        datasets=["DS_1", "DS_2"],
+    ),
+    # union result combined with scalar in outer expression
+    "145": Classification(
+        vtl="DS_r := union(DS_1, DS_2) + SC_1;",
+        datasets=["DS_1", "DS_2"],
+        dataset_or_scalar=["SC_1"],
+    ),
+    # --- fill_time_series / period_indicator with expression ---
+    "146": Classification(
+        vtl="DS_r := fill_time_series(DS_1 + SC_1, all);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "147": Classification(
+        vtl="DS_r := period_indicator(DS_1 + SC_1);",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    # --- setdiff / symdiff with expressions ---
+    "148": Classification(
+        vtl="DS_r := setdiff(DS_1 + SC_1, DS_2);",
+        datasets=["DS_2"],
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    "149": Classification(
+        vtl="DS_r := symdiff(DS_1 + SC_1, DS_2 + SC_2);",
+        dataset_or_scalar=["DS_1", "DS_2", "SC_1", "SC_2"],
+    ),
+    # --- check / validation with mixed sub-expressions ---
+    # check wrapping aggregation: sum forces DS_1 to dataset
+    "150": Classification(
+        vtl='DS_r := check(sum(DS_1) > 0 errorcode "E001" errorlevel 1);',
+        datasets=["DS_1"],
+    ),
+    # check with membership + scalar: DS_1 is dataset (membership), SC_1 ambiguous
+    "151": Classification(
+        vtl='DS_r := check(DS_1#Me_1 + SC_1 > 0 errorcode "E001" errorlevel 1);',
+        datasets=["DS_1"],
+        dataset_or_scalar=["SC_1"],
+    ),
+    # --- Clause operators with external scalar in expression ---
+    # filter with external scalar ref
+    "152": Classification(
+        vtl="DS_r <- DS_1[filter Me_1 + SC_1 > 0];",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_1", "SC_1"],
+    ),
+    # aggr with external scalar in aggregation body
+    "153": Classification(
+        vtl="DS_r := DS_1[aggr Me_1 := sum(Me_2 + SC_1) group by Id_1];",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_2", "SC_1"],
+    ),
+    # --- Membership on expression result ---
+    "154": Classification(
+        vtl="DS_r := (DS_1 * SC_1)#Me_1;",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+}
+
+
+_SORTED_CODES = sorted(CASES.keys(), key=lambda k: int(k))
+
+
+@pytest.mark.parametrize("test_code", _SORTED_CODES)
+def test_classification(test_code: str) -> None:
+    case = CASES[test_code]
+    if case.vtl is not None:
+        script = case.vtl
+    else:
+        with open(data_path / "vtl" / f"{test_code}.vtl") as f:
+            script = f.read()
+
+    schedule = DAGAnalyzer.ds_structure(create_ast(script))
+
+    assert sorted(schedule.global_input_datasets) == case.datasets
+    assert sorted(schedule.global_input_scalars) == case.scalars
+    assert sorted(schedule.global_input_dataset_or_scalar) == case.dataset_or_scalar
+    assert sorted(schedule.global_input_component_or_scalar) == case.component_or_scalar
+    # global_inputs is the union of all four categories
+    all_classified = sorted(
+        case.datasets + case.scalars + case.dataset_or_scalar + case.component_or_scalar
+    )
+    assert sorted(schedule.global_inputs) == all_classified
diff --git a/tests/DAG/test_dag.py b/tests/DAG/test_dag.py
deleted file mode 100644
index 7edbeb54c..000000000
--- a/tests/DAG/test_dag.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import json
-from pathlib import Path
-from typing import List
-
-import pytest
-
-from vtlengine.API import create_ast
-from vtlengine.AST.DAG import DAGAnalyzer
-
-override = False
-data_path = Path(__file__).parent / "data"
-
-
-def _discover_tests(data_root: Path) -> List[str]:
-    return sorted(p.stem for p in (data_root / "vtl").iterdir() if p.is_file())
-
-
-def _normalize_ds_structure(ds_structure):
-    return json.loads(
-        json.dumps(
-            {
-                "insertion": {k: sorted(v) for k, v in ds_structure.insertion.items()},
-                "deletion": {k: sorted(v) for k, v in ds_structure.deletion.items()},
-                "global_inputs": sorted(ds_structure.global_inputs),
-                "global_input_datasets": sorted(ds_structure.global_input_datasets),
-                "global_input_scalars": sorted(ds_structure.global_input_scalars),
-                "global_input_dataset_or_scalar": sorted(
-                    ds_structure.global_input_dataset_or_scalar
-                ),
-                "global_input_component_or_scalar": sorted(
-                    ds_structure.global_input_component_or_scalar
-                ),
-                "persistent": sorted(ds_structure.persistent),
-            }
-        )
-    )
-
-
-tests = _discover_tests(data_path)
-
-
-@pytest.mark.parametrize("test_code", tests)
-def test_ds_structure(test_code):
-    with open(data_path / "vtl" / f"{test_code}.vtl") as f:
-        script = f.read()
-
-    ds_structures = DAGAnalyzer.ds_structure(create_ast(script))
-
-    if override:
-        with open(data_path / "references" / f"{test_code}.json", "w") as f:
-            json.dump(_normalize_ds_structure(ds_structures), f, indent=4)
-
-    with open(data_path / "references" / f"{test_code}.json") as f:
-        reference = json.load(f)
-
-    normalized_ds_structures = _normalize_ds_structure(ds_structures)
-    assert normalized_ds_structures == reference
diff --git a/tests/DAG/test_scheduling.py b/tests/DAG/test_scheduling.py
new file mode 100644
index 000000000..ba5552464
--- /dev/null
+++ b/tests/DAG/test_scheduling.py
@@ -0,0 +1,46 @@
+import json
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+
+from vtlengine.API import create_ast
+from vtlengine.AST.DAG import DAGAnalyzer
+
+override = False
+data_path = Path(__file__).parent / "data"
+
+
+def _normalize_scheduling(schedule: Any) -> Dict[str, Any]:
+    return json.loads(
+        json.dumps(
+            {
+                "insertion": {k: sorted(v) for k, v in schedule.insertion.items()},
+                "deletion": {k: sorted(v) for k, v in schedule.deletion.items()},
+                "persistent": sorted(schedule.persistent),
+            }
+        )
+    )
+
+
+# Only keep tests with non-trivial scheduling (multiple insertion/deletion points).
+NONTRIVIAL_TESTS = ["2", "3", "5", "6", "7", "8", "9", "10", "11", "13", "16", "35", "36"]
+
+
+@pytest.mark.parametrize("test_code", NONTRIVIAL_TESTS)
+def test_scheduling(test_code: str) -> None:
+    with open(data_path / "vtl" / f"{test_code}.vtl") as f:
+        script = f.read()
+
+    schedule = DAGAnalyzer.ds_structure(create_ast(script))
+    ref_path = data_path / "references" / "scheduling" / f"{test_code}.json"
+
+    if override:
+        ref_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(ref_path, "w") as f:
+            json.dump(_normalize_scheduling(schedule), f, indent=4)
+
+    with open(ref_path) as f:
+        reference = json.load(f)
+
+    assert _normalize_scheduling(schedule) == reference

From d07869a84216c8e3fcb16cc0ac2b2db658bcd45b Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 18:46:31 +0100
Subject: [PATCH 10/11] feat: add eval visitor and scalar-constrained parameter
 classification

- Add visit_EvalOp to mark eval operands as dataset_inputs
- Classify scalar-only parameters: random index, round/trunc/substr/
  replace/instr params, between from/to bounds
- Add tests 161-171 covering eval, random, and scalar-constrained params
---
 src/vtlengine/AST/DAG/__init__.py | 39 ++++++++++++++++
 tests/DAG/test_classification.py  | 74 +++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+)

diff --git a/src/vtlengine/AST/DAG/__init__.py b/src/vtlengine/AST/DAG/__init__.py
index 1d49a4ecf..8f2138cb1 100644
--- a/src/vtlengine/AST/DAG/__init__.py
+++ b/src/vtlengine/AST/DAG/__init__.py
@@ -24,6 +24,7 @@
     DefIdentifier,
     DPRuleset,
     DPValidation,
+    EvalOp,
     HROperation,
     HRuleset,
     Identifier,
@@ -44,20 +45,27 @@
 from vtlengine.AST.DAG._models import Schedule, StatementDeps
 from vtlengine.AST.Grammar.tokens import (
     AS,
+    BETWEEN,
     DROP,
     EXISTS_IN,
     FILL_TIME_SERIES,
     FLOW_TO_STOCK,
+    INSTR,
     INTERSECT,
     KEEP,
     MEMBERSHIP,
     PERIOD_INDICATOR,
+    RANDOM,
     RENAME,
+    REPLACE,
+    ROUND,
     SETDIFF,
     STOCK_TO_FLOW,
+    SUBSTR,
     SYMDIFF,
     TIMESHIFT,
     TO,
+    TRUNC,
     UNION,
 )
 from vtlengine.DataTypes import COMP_NAME_MAPPING
@@ -70,6 +78,15 @@
 DATASET_ONLY_UNARYOP = frozenset({FLOW_TO_STOCK, STOCK_TO_FLOW, PERIOD_INDICATOR})
 DATASET_ONLY_BINOP = frozenset({MEMBERSHIP, TIMESHIFT})
 
+# BinOp operators where the right operand must always be a scalar.
+SCALAR_RIGHT_BINOP = frozenset({RANDOM})
+
+# ParamOp operators where all params (not children) must be scalars.
+SCALAR_PARAMS_PARAMOP = frozenset({ROUND, TRUNC, SUBSTR, REPLACE, INSTR})
+
+# MulOp operators where children[1:] must be scalars (operand is children[0]).
+SCALAR_TAIL_MULOP = frozenset({BETWEEN})
+
 # Reserved component names generated by the engine (e.g., count() → int_var,
 # check() → bool_var). These are never external inputs.
 RESERVED_COMPONENT_NAMES = frozenset(COMP_NAME_MAPPING.values())
@@ -447,6 +464,11 @@ def visit_BinOp(self, node: BinOp) -> None:
             self.visit(node.left)
             self.is_dataset = False
             self.visit(node.right)
+        elif node.op in SCALAR_RIGHT_BINOP:
+            self.visit(node.left)
+            if isinstance(node.right, VarID) and node.right.value not in self.alias:
+                self.current_deps.scalar_inputs.append(node.right.value)
+            self.visit(node.right)
         elif node.op == AS or node.op == TO:
             self.visit(node.left)
             self.alias.add(node.right.value)
@@ -460,6 +482,11 @@ def visit_MulOp(self, node: MulOp) -> None:
             for child in node.children:
                 if isinstance(child, VarID) and child.value not in self.alias:
                     self.current_deps.dataset_inputs.append(child.value)
+        elif node.op in SCALAR_TAIL_MULOP:
+            # First child is the operand (dual), rest are scalar-only params.
+            for child in node.children[1:]:
+                if isinstance(child, VarID) and child.value not in self.alias:
+                    self.current_deps.scalar_inputs.append(child.value)
         for child in node.children:
             self.visit(child)
 
@@ -517,6 +544,10 @@ def visit_ParamOp(self, node: ParamOp) -> None:
                 for child in node.children:
                     if isinstance(child, VarID) and child.value not in self.alias:
                         self.current_deps.dataset_inputs.append(child.value)
+            elif node.op in SCALAR_PARAMS_PARAMOP:
+                for param in node.params:
+                    if isinstance(param, VarID) and param.value not in self.alias:
+                        self.current_deps.scalar_inputs.append(param.value)
             super(DAGAnalyzer, self).visit_ParamOp(node)
 
     def visit_Aggregation(self, node: Aggregation) -> None:
@@ -588,6 +619,14 @@ def visit_Validation(self, node: Validation) -> None:
         if node.imbalance is not None:
             self.visit(node.imbalance)
 
+    def visit_EvalOp(self, node: EvalOp) -> None:
+        """Eval operands are always datasets (external SQL routine inputs)."""
+        self._current_has_dataset_op = True
+        for operand in node.operands:
+            if isinstance(operand, VarID) and operand.value not in self.alias:
+                self.current_deps.dataset_inputs.append(operand.value)
+            self.visit(operand)
+
 
 class HRDAGAnalyzer(DAGAnalyzer):
     def visit_HRuleset(self, node: HRuleset) -> None:
diff --git a/tests/DAG/test_classification.py b/tests/DAG/test_classification.py
index a31477db8..9f5c159ee 100644
--- a/tests/DAG/test_classification.py
+++ b/tests/DAG/test_classification.py
@@ -629,6 +629,80 @@ class Classification:
         vtl="DS_r := (DS_1 * SC_1)#Me_1;",
         dataset_or_scalar=["DS_1", "SC_1"],
     ),
+    # --- Duration conversion operators (dual) ---
+    "155": Classification(vtl="DS_r := daytoyear(DS_1);", dataset_or_scalar=["DS_1"]),
+    "156": Classification(vtl="DS_r := daytomonth(DS_1);", dataset_or_scalar=["DS_1"]),
+    "157": Classification(vtl="DS_r := yeartoday(DS_1);", dataset_or_scalar=["DS_1"]),
+    "158": Classification(vtl="DS_r := monthtoday(DS_1);", dataset_or_scalar=["DS_1"]),
+    # --- time_agg (dual) ---
+    "159": Classification(
+        vtl='DS_r := time_agg("A", DS_1);',
+        dataset_or_scalar=["DS_1"],
+    ),
+    # --- current_date (no inputs) ---
+    "160": Classification(vtl="SC_r := current_date();"),
+    # --- random (seed=dual, index=scalar) ---
+    "161": Classification(vtl="SC_r := random(42, 1);"),
+    "162": Classification(vtl="DS_r := random(DS_1, 1);", dataset_or_scalar=["DS_1"]),
+    "165": Classification(
+        vtl="DS_r := random(DS_1, SC_1);",
+        scalars=["SC_1"],
+        dataset_or_scalar=["DS_1"],
+    ),
+    # --- eval (dataset-only: external SQL routine) ---
+    "163": Classification(
+        vtl=(
+            "DS_r := eval(my_routine(DS_1)"
+            ' language "SQL"'
+            " returns dataset { identifier Id_1, measure Me_1 });"
+        ),
+        datasets=["DS_1"],
+    ),
+    "164": Classification(
+        vtl=(
+            "DS_r := eval(my_routine(DS_1, DS_2)"
+            ' language "SQL"'
+            " returns dataset { identifier Id_1, measure Me_1 });"
+        ),
+        datasets=["DS_1", "DS_2"],
+    ),
+    # --- Operators with scalar-constrained parameters ---
+    # round: param is always scalar
+    "166": Classification(
+        vtl="DS_r := round(DS_1, SC_1);",
+        scalars=["SC_1"],
+        dataset_or_scalar=["DS_1"],
+    ),
+    # trunc: param is always scalar
+    "167": Classification(
+        vtl="DS_r := trunc(DS_1, SC_1);",
+        scalars=["SC_1"],
+        dataset_or_scalar=["DS_1"],
+    ),
+    # substr: start and length are always scalars
+    "168": Classification(
+        vtl="DS_r := substr(DS_1, SC_1, SC_2);",
+        scalars=["SC_1", "SC_2"],
+        dataset_or_scalar=["DS_1"],
+    ),
+    # replace: pattern and replacement are always scalars
+    "169": Classification(
+        vtl="DS_r := replace(DS_1, SC_1, SC_2);",
+        scalars=["SC_1", "SC_2"],
+        dataset_or_scalar=["DS_1"],
+    ),
+    # instr: pattern is always scalar
+    "170": Classification(
+        vtl="DS_r := instr(DS_1, SC_1);",
+        scalars=["SC_1"],
+        dataset_or_scalar=["DS_1"],
+    ),
+    # between: from and to are always scalars
+    "171": Classification(
+        vtl="DS_r := between(DS_1, SC_1, SC_2);",
+        scalars=["SC_1", "SC_2"],
+        dataset_or_scalar=["DS_1"],
+    ),
 }
 
 

From 3d1c5a3c4c5667ad0a8fd74e6367f4ff0b95f335 Mon Sep 17 00:00:00 2001
From: Javier Hernandez <javier.hernandez@meaningfuldata.eu>
Date: Thu, 5 Mar 2026 19:01:47 +0100
Subject: [PATCH 11/11] fix: handle dual-context variables and add
 classification edge case tests

Fix bug where a variable appearing in both dataset_inputs and
scalar_inputs across statements was classified as dataset instead of
dataset_or_scalar. Add 15 edge case tests (172-186) covering scalar
chain propagation, unknown variable resolution, nested operators,
deeply nested expressions, and if-then-else with dataset-only branches.
---
 src/vtlengine/AST/DAG/__init__.py |  3 ++
 tests/DAG/test_classification.py  | 88 +++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

diff --git a/src/vtlengine/AST/DAG/__init__.py b/src/vtlengine/AST/DAG/__init__.py
index 8f2138cb1..37c7f5e56 100644
--- a/src/vtlengine/AST/DAG/__init__.py
+++ b/src/vtlengine/AST/DAG/__init__.py
@@ -211,6 +211,9 @@ def _classify_global_inputs(
         for name in global_inputs:
             if name in comp_or_scalar:
                 result["global_input_component_or_scalar"].append(name)
+            elif name in definite_dataset_inputs and name in definite_scalar_inputs:
+                # Used as dataset in one context and scalar in another → ambiguous
+                result["global_input_dataset_or_scalar"].append(name)
             elif name in definite_dataset_inputs:
                 result["global_input_datasets"].append(name)
             elif name in definite_scalar_inputs or self._feeds_only_scalar_chains(
diff --git a/tests/DAG/test_classification.py b/tests/DAG/test_classification.py
index 9f5c159ee..65c711843 100644
--- a/tests/DAG/test_classification.py
+++ b/tests/DAG/test_classification.py
@@ -703,6 +703,94 @@ class Classification:
         scalars=["SC_1", "SC_2"],
         dataset_or_scalar=["DS_1"],
     ),
+    # --- Group A: Classification logic edge cases ---
+    # A1: Variable in both dataset_inputs (union) and scalar_inputs (round param)
+    "172": Classification(
+        vtl="DS_r := union(DS_1, DS_2);\nDS_r2 := round(DS_3, DS_1);",
+        datasets=["DS_2"],
+        dataset_or_scalar=["DS_1", "DS_3"],
+    ),
+    # A2: Scalar chain from resolved-from-unknown variable
+    "173": Classification(
+        vtl="DS_r <- DS_1[calc Me_2 := Me_1 + SC_b];\nSC_b := SC_1 + 10;",
+        datasets=["DS_1"],
+        scalars=["SC_1"],
+        component_or_scalar=["Me_1"],
+    ),
+    # A3: Scalar chain broken by dataset-only operator
+    "174": Classification(
+        vtl="SC_a := SC_1 + SC_2;\nDS_r := union(SC_a, DS_1);",
+        datasets=["DS_1"],
+        dataset_or_scalar=["SC_1", "SC_2"],
+    ),
+    # A4: Same variable in ambiguous AND scalar chain contexts
+    "175": Classification(
+        vtl="DS_A := DS_1 + SC_1;\nSC_r := SC_1 * 2;",
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    # A5: Multiple persistent assignments with shared variable
+    "176": Classification(
+        vtl="DS_r1 <- DS_1 + SC_1;\nDS_r2 <- DS_2 * SC_1;",
+        dataset_or_scalar=["DS_1", "DS_2", "SC_1"],
+    ),
+    # --- Group B: Component/scalar edge cases ---
+    # B1: Calc with multiple assignments referencing each other's components
+    "177": Classification(
+        vtl="DS_r <- DS_1[calc Me_2 := Me_1 + SC_1, Me_3 := sum(Me_2)];",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_1", "Me_2", "SC_1"],
+    ),
+    # B2: Unknown variable not resolved (stays as component_or_scalar)
+    "178": Classification(
+        vtl="DS_A <- DS_1[calc Me_2 := Me_1 + X];\nDS_B := DS_A + 1;",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_1", "X"],
+    ),
+    # B3: Unknown variable resolved by later output (not a global input)
+    "179": Classification(
+        vtl="DS_r <- DS_1[calc Me_2 := Me_1 + X];\nX := 10;",
+        datasets=["DS_1"],
+        component_or_scalar=["Me_1"],
+    ),
+    # --- Group C: Operator combination edge cases ---
+    # C1: Membership on union result
+    "180": Classification(
+        vtl="DS_r := (union(DS_1, DS_2))#Me_1;",
+        datasets=["DS_1", "DS_2"],
+    ),
+    # C2: Scalar-constrained param with expression operand
+    "181": Classification(
+        vtl="DS_r := round(DS_1 + SC_1, SC_2);",
+        scalars=["SC_2"],
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    # C3: time_agg with expression operand
+    "182": Classification(
+        vtl='DS_r := time_agg("A", DS_1 + SC_1);',
+        dataset_or_scalar=["DS_1", "SC_1"],
+    ),
+    # C4: Check with two membership refs on same dataset
+    "183": Classification(
+        vtl='DS_r := check(DS_1#Me_1 + DS_1#Me_2 > 0 errorcode "E001" errorlevel 1 all);',
+        datasets=["DS_1"],
+    ),
+    # C5: Join with scalar-constrained param in calc
+    "184": Classification(
+        vtl="DS_r := inner_join(DS_1, DS_2 calc Me_2 := round(Me_1, SC_1));",
+        datasets=["DS_1", "DS_2"],
+        component_or_scalar=["Me_1", "SC_1"],
+    ),
+    # C6: Deeply nested dual operators (6 levels)
+    "185": Classification(
+        vtl="DS_r := abs(ceil(floor(exp(ln(sqrt(DS_1))))));",
+        dataset_or_scalar=["DS_1"],
+    ),
+    # C7: If-then-else with dataset-only operator in branch
+    "186": Classification(
+        vtl="DS_r := if DS_1 then union(DS_2, DS_3) else DS_4;",
+        datasets=["DS_2", "DS_3"],
+        dataset_or_scalar=["DS_1", "DS_4"],
+    ),
 }