From 269f3c972b72b057c99ecf383b64647893859ea7 Mon Sep 17 00:00:00 2001 From: Hadrien Kohl Date: Tue, 13 Jan 2026 14:46:31 +0100 Subject: [PATCH 1/3] Calc changes --- .../vtl/engine/visitors/ClauseVisitor.java | 99 +++++++++++++++---- .../engine/visitors/ClauseVisitorTest.java | 73 +++++++++++--- 2 files changed, 139 insertions(+), 33 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index d8853822a..c38853080 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -179,45 +179,106 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext @Override public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) { - var expressions = new LinkedHashMap(); - var expressionStrings = new LinkedHashMap(); - var roles = new LinkedHashMap(); - var currentDatasetExpression = datasetExpression; - // TODO: Refactor so we call the executeCalc for each CalcClauseItemContext the same way we call - // the - // analytics functions. + // Dataset structure (ordered) and quick lookups + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); + + final Map byName = + componentsInOrder.stream() + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); + + // Accumulators for non-analytic calc items + final LinkedHashMap expressions = new LinkedHashMap<>(); + final LinkedHashMap expressionStrings = new LinkedHashMap<>(); + final LinkedHashMap roles = new LinkedHashMap<>(); + + // Tracks duplicates in the same clause (target names) + final Set targetsSeen = new LinkedHashSet<>(); + + // We need a rolling dataset expression to chain analytics items + DatasetExpression currentDatasetExpression = datasetExpression; + + // TODO: Refactor so we call executeCalc per CalcClauseItemContext (as analytics do). for (VtlParser.CalcClauseItemContext calcCtx : ctx.calcClauseItem()) { - var columnName = getName(calcCtx.componentID()); - var columnRole = - calcCtx.componentRole() == null + + // ---- Resolve target name and desired role ---- + final String columnName = getName(calcCtx.componentID()); + final Dataset.Role columnRole = + (calcCtx.componentRole() == null) ? Dataset.Role.MEASURE : Dataset.Role.valueOf(calcCtx.componentRole().getText().toUpperCase()); - if ((calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) - && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() - instanceof VtlParser.AnalyticFunctionsContext) { - AnalyticsVisitor analyticsVisitor = + // If the target already exists in the dataset, check its role + final Dataset.Component existing = byName.get(columnName); + if (existing != null) { + // Explicitly block overwriting identifiers (already handled above if role==IDENTIFIER). + if (existing.getRole() == Dataset.Role.IDENTIFIER) { + final String meta = + String.format( + "(role=%s, type=%s)", + existing.getRole(), existing.getType() != null ? existing.getType() : "n/a"); + throw new VtlRuntimeException( + new InvalidArgumentException( + // TODO: see if other cases are the same error (already defined in assignment for + // example). + String.format("CALC cannot overwrite IDENTIFIER '%s' %s.", columnName, meta), + fromContext(ctx))); + } + } + + // ---- Dispatch: analytics vs. regular calc ---- + final boolean isAnalytic = + (calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext) + && ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions() + instanceof VtlParser.AnalyticFunctionsContext; + + if (isAnalytic) { + // Analytics are executed immediately and update the rolling dataset expression + final AnalyticsVisitor analyticsVisitor = new AnalyticsVisitor(processingEngine, currentDatasetExpression, columnName); - VtlParser.FunctionsExpressionContext functionExprCtx = + final VtlParser.FunctionsExpressionContext functionExprCtx = (VtlParser.FunctionsExpressionContext) calcCtx.expr(); - VtlParser.AnalyticFunctionsContext anFuncCtx = + final VtlParser.AnalyticFunctionsContext anFuncCtx = (VtlParser.AnalyticFunctionsContext) functionExprCtx.functions(); + currentDatasetExpression = analyticsVisitor.visit(anFuncCtx); } else { - ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); + // Regular calc expression – build resolvable expression and capture its source text + final ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx); + + final String exprSource = getSource(calcCtx.expr()); + if (exprSource == null || exprSource.isEmpty()) { + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "empty or unavailable source expression for '%s' in CALC.", columnName), + fromContext(ctx))); + } + // Store in insertion order (deterministic column creation) expressions.put(columnName, calc); - expressionStrings.put(columnName, getSource(calcCtx.expr())); + expressionStrings.put(columnName, exprSource); roles.put(columnName, columnRole); } } + // ---- Consistency checks before execution ---- + if (!(expressions.keySet().equals(expressionStrings.keySet()) + && expressions.keySet().equals(roles.keySet()))) { + throw new VtlRuntimeException( + new InvalidArgumentException( + "internal CALC maps out of sync (expressions/expressionStrings/roles)", + fromContext(ctx))); + } + + // ---- Execute the batch calc if any non-analytic expressions were collected ---- if (!expressionStrings.isEmpty()) { currentDatasetExpression = processingEngine.executeCalc( currentDatasetExpression, expressions, roles, expressionStrings); } - return currentDatasetExpression; } diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java index 4c06bb405..bd3be457e 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java @@ -125,8 +125,9 @@ public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptExceptio assertThat(unitComponent.getRole()).isEqualTo(Role.ATTRIBUTE); } + /** RENAME: duplicate "to" name inside the clause must raise a detailed script error. */ @Test - public void testRenameClause() throws ScriptException { + public void testRenameClause_duplicateToNameShouldFail() { InMemoryDataset dataset = new InMemoryDataset( List.of( @@ -136,23 +137,67 @@ public void testRenameClause() throws ScriptException { Map.of("name", String.class, "age", Long.class, "weight", Long.class), Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - ScriptContext context = engine.getContext(); - context.setAttribute("ds", dataset, ScriptContext.ENGINE_SCOPE); + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); - engine.eval("ds1 := ds[rename age to weight, weight to age, name to pseudo];"); + assertThatThrownBy( + () -> engine.eval("ds := ds1[rename age to weight, weight to age, name to age];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("duplicate output column name in RENAME clause"); + } - assertThat(engine.getContext().getAttribute("ds1")).isInstanceOf(Dataset.class); - assertThat(((Dataset) engine.getContext().getAttribute("ds1")).getDataAsMap()) - .containsExactlyInAnyOrder( - Map.of("pseudo", "Hadrien", "weight", 10L, "age", 11L), - Map.of("pseudo", "Nico", "weight", 11L, "age", 10L), - Map.of("pseudo", "Franck", "weight", 12L, "age", 9L)); + /** RENAME: duplicate "from" name inside the clause must raise a detailed script error. */ + @Test + public void testRenameClause_duplicateFromNameShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); - assertThatThrownBy( - () -> engine.eval("ds2 := ds[rename age to weight, weight to age, name to age];")) + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[rename age to weight, age to weight2];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("duplicate source name in RENAME clause"); + } + + /** RENAME: "from" column must exist in dataset. */ + @Test + public void testRenameClause_fromColumnNotFoundShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[rename unknown to something];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("source column to rename not found: 'unknown'"); + } + + /** + * RENAME: target collides with an untouched existing column -> must error with details + * (role/type). + */ + @Test + public void testRenameClause_targetCollidesWithUntouchedShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy(() -> engine.eval("ds := ds1[rename name to age];")) .isInstanceOf(VtlScriptException.class) - .hasMessage("duplicate column: age") - .is(atPosition(0, 47, 58)); + .hasMessageContaining("target name 'age'") // main message + .hasMessageContaining("already exists in dataset and is not being renamed") + .hasMessageContaining("(role=MEASURE, type=class java.lang.Long)"); } @Test From 011dc74ac72a324ca058a6a89f6278556056e4d6 Mon Sep 17 00:00:00 2001 From: Hadrien Kohl Date: Tue, 13 Jan 2026 15:05:44 +0100 Subject: [PATCH 2/3] Rename Changes --- .../vtl/engine/visitors/ClauseVisitor.java | 90 +++++++++++++++++-- 1 file changed, 85 insertions(+), 5 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index c38853080..3676e40a7 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -290,18 +290,98 @@ public DatasetExpression visitFilterClause(VtlParser.FilterClauseContext ctx) { @Override public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { + + // Dataset structure in order + lookup maps + final List componentsInOrder = + new ArrayList<>(datasetExpression.getDataStructure().values()); + final Set availableColumns = + componentsInOrder.stream() + .map(Dataset.Component::getName) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + // Map for detailed error reporting (includes role/type if available) + final Map byName = + componentsInOrder.stream() + .collect( + Collectors.toMap( + Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new)); + + // Parse the RENAME clause and validate Map fromTo = new LinkedHashMap<>(); - Set renamed = new HashSet<>(); + Set toSeen = new LinkedHashSet<>(); + Set fromSeen = new LinkedHashSet<>(); + for (VtlParser.RenameClauseItemContext renameCtx : ctx.renameClauseItem()) { - var toNameString = getName(renameCtx.toName); - var fromNameString = getName(renameCtx.fromName); - if (!renamed.add(toNameString)) { + final String toNameString = getName(renameCtx.toName); + final String fromNameString = getName(renameCtx.fromName); + + // Validate: no duplicate "from" names inside the clause + if (!fromSeen.add(fromNameString)) { throw new VtlRuntimeException( new InvalidArgumentException( - "duplicate column: %s".formatted(toNameString), fromContext(renameCtx))); + String.format("Error: duplicate source name in RENAME clause: '%s", fromNameString), + fromContext(ctx))); } + + // Validate: "from" must exist in dataset + if (!availableColumns.contains(fromNameString)) { + Dataset.Component comp = byName.get(fromNameString); + String meta = + (comp != null) + ? String.format( + " (role=%s, type=%s)", + comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") + : ""; + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "Error: source column to rename not found: '%s'%s", fromNameString, meta), + fromContext(ctx))); + } + + // Validate: no duplicate "to" names inside the clause + if (!toSeen.add(toNameString)) { + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "Error: duplicate output column name in RENAME clause: '%s.", fromNameString), + fromContext(ctx))); + } + fromTo.put(fromNameString, toNameString); } + + // Validate collisions with untouched dataset columns ("Untouched" = columns that are not + // being renamed) + final Set untouched = + availableColumns.stream() + .filter(c -> !fromTo.containsKey(c)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + for (Map.Entry e : fromTo.entrySet()) { + final String from = e.getKey(); + final String to = e.getValue(); + + // If target already exists as untouched, it would cause a collision + if (untouched.contains(to)) { + Dataset.Component comp = byName.get(to); + String meta = + (comp != null) + ? String.format( + " (role=%s, type=%s)", + comp.getRole(), comp.getType() != null ? comp.getType() : "n/a") + : ""; + + throw new VtlRuntimeException( + new InvalidArgumentException( + String.format( + "Error: target name '%s'%s already exists in dataset and is not being renamed.", + to, meta), + fromContext(ctx))); + } + } + + // Execute rename in processing engine return processingEngine.executeRename(datasetExpression, fromTo); } From 13d349354380b9927be6960fe815bc3164473bc5 Mon Sep 17 00:00:00 2001 From: Hadrien Kohl Date: Fri, 16 Jan 2026 12:46:47 +0100 Subject: [PATCH 3/3] Add more tests --- .../vtl/engine/visitors/ClauseVisitor.java | 4 +- .../engine/visitors/ClauseVisitorTest.java | 77 +++++++++++++++++-- 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java index 913801b25..b48bd736c 100644 --- a/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java +++ b/vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java @@ -306,7 +306,7 @@ public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { if (!fromSeen.add(fromNameString)) { throw new VtlRuntimeException( new InvalidArgumentException( - String.format("Error: duplicate source name in RENAME clause: '%s", fromNameString), + String.format("Error: duplicate source name in RENAME clause: '%s'", fromNameString), fromContext(ctx))); } @@ -331,7 +331,7 @@ public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) { throw new VtlRuntimeException( new InvalidArgumentException( String.format( - "Error: duplicate output column name in RENAME clause: '%s.", fromNameString), + "Error: duplicate output column name in RENAME clause: '%s'", fromNameString), fromContext(ctx))); } diff --git a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java index bfdba6985..35f93f81d 100644 --- a/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java +++ b/vtl-engine/src/test/java/fr/insee/vtl/engine/visitors/ClauseVisitorTest.java @@ -125,7 +125,69 @@ public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptExceptio assertThat(unitComponent.getRole()).isEqualTo(Role.ATTRIBUTE); } - /** RENAME: duplicate "to" name inside the clause must raise a detailed script error. */ + @Test + public void testRenameClause_unknownVariable() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L), + Map.of("name", "Franck", "age", 12L, "weight", 9L) + ), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy( + () -> engine.eval("ds := ds1[rename missing to foo];")) + .isInstanceOf(VtlScriptException.class) + .is(atPosition(0, 47, 58)) + .hasMessageContaining("Error: source column to rename not found: 'missing'"); + } + + @Test + public void testRenameClause_duplicateToNamesShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L), + Map.of("name", "Franck", "age", 12L, "weight", 9L) + ), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy( + () -> engine.eval("ds := ds1[rename age to dup, weight to dup];")) + .isInstanceOf(VtlScriptException.class) + .is(atPosition(0, 47, 58)) + .hasMessageContaining("Error: source column to rename not found: 'missing'"); + } + + @Test + public void testRenameClause_duplicateFromNamesShouldFail() { + InMemoryDataset dataset = + new InMemoryDataset( + List.of( + Map.of("name", "Hadrien", "age", 10L, "weight", 11L), + Map.of("name", "Nico", "age", 11L, "weight", 10L), + Map.of("name", "Franck", "age", 12L, "weight", 9L) + ), + Map.of("name", String.class, "age", Long.class, "weight", Long.class), + Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); + + engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE); + + assertThatThrownBy( + () -> engine.eval("ds := ds1[rename age to foo, age to bar];")) + .isInstanceOf(VtlScriptException.class) + .hasMessageContaining("Error: duplicate source name in RENAME clause: 'age'") + .is(atPosition(0, 47, 58)); + } + @Test public void testRenameClause_duplicateToNameShouldFail() { InMemoryDataset dataset = @@ -133,7 +195,8 @@ public void testRenameClause_duplicateToNameShouldFail() { List.of( Map.of("name", "Hadrien", "age", 10L, "weight", 11L), Map.of("name", "Nico", "age", 11L, "weight", 10L), - Map.of("name", "Franck", "age", 12L, "weight", 9L)), + Map.of("name", "Franck", "age", 12L, "weight", 9L) + ), Map.of("name", String.class, "age", Long.class, "weight", Long.class), Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE)); @@ -142,7 +205,8 @@ public void testRenameClause_duplicateToNameShouldFail() { assertThatThrownBy( () -> engine.eval("ds := ds1[rename age to weight, weight to age, name to age];")) .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("duplicate output column name in RENAME clause"); + .is(atPosition(0, 47, 58)) + .hasMessageContaining("TODO: Improve: Error: duplicate output column name in RENAME clause: 'name'"); } /** RENAME: duplicate "from" name inside the clause must raise a detailed script error. */ @@ -160,7 +224,8 @@ public void testRenameClause_duplicateFromNameShouldFail() { assertThatThrownBy(() -> engine.eval("ds := ds1[rename age to weight, age to weight2];")) .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("duplicate source name in RENAME clause"); + .is(atPosition(0, 0, 0, 0)) + .hasMessage("TODO: Improve: duplicate source name in RENAME clause"); } /** RENAME: "from" column must exist in dataset. */ @@ -176,7 +241,8 @@ public void testRenameClause_fromColumnNotFoundShouldFail() { assertThatThrownBy(() -> engine.eval("ds := ds1[rename unknown to something];")) .isInstanceOf(VtlScriptException.class) - .hasMessageContaining("source column to rename not found: 'unknown'"); + .is(atPosition(0, 0, 0, 0)) + .hasMessageContaining("TODO: Improve: source column to rename not found: 'unknown'"); } /** @@ -195,6 +261,7 @@ public void testRenameClause_targetCollidesWithUntouchedShouldFail() { assertThatThrownBy(() -> engine.eval("ds := ds1[rename name to age];")) .isInstanceOf(VtlScriptException.class) + .is(atPosition(0, 0, 0, 0)) .hasMessageContaining("target name 'age'") // main message .hasMessageContaining("already exists in dataset and is not being renamed") .hasMessageContaining("(role=MEASURE, type=class java.lang.Long)");