Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 165 additions & 24 deletions vtl-engine/src/main/java/fr/insee/vtl/engine/visitors/ClauseVisitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -166,45 +166,106 @@ public DatasetExpression visitKeepOrDropClause(VtlParser.KeepOrDropClauseContext
@Override
public DatasetExpression visitCalcClause(VtlParser.CalcClauseContext ctx) {

var expressions = new LinkedHashMap<String, ResolvableExpression>();
var expressionStrings = new LinkedHashMap<String, String>();
var roles = new LinkedHashMap<String, Dataset.Role>();
var currentDatasetExpression = datasetExpression;
// TODO: Refactor so we call the executeCalc for each CalcClauseItemContext the same way we call
// the
// analytics functions.
// Dataset structure (ordered) and quick lookups
final List<Dataset.Component> componentsInOrder =
new ArrayList<>(datasetExpression.getDataStructure().values());

final Map<String, Dataset.Component> byName =
componentsInOrder.stream()
.collect(
Collectors.toMap(
Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new));

// Accumulators for non-analytic calc items
final LinkedHashMap<String, ResolvableExpression> expressions = new LinkedHashMap<>();
final LinkedHashMap<String, String> expressionStrings = new LinkedHashMap<>();
final LinkedHashMap<String, Dataset.Role> roles = new LinkedHashMap<>();

// Tracks duplicates in the same clause (target names)
final Set<String> targetsSeen = new LinkedHashSet<>();

// We need a rolling dataset expression to chain analytics items
DatasetExpression currentDatasetExpression = datasetExpression;

// TODO: Refactor so we call executeCalc per CalcClauseItemContext (as analytics do).
for (VtlParser.CalcClauseItemContext calcCtx : ctx.calcClauseItem()) {
var columnName = getName(calcCtx.componentID());
var columnRole =
calcCtx.componentRole() == null

// ---- Resolve target name and desired role ----
final String columnName = getName(calcCtx.componentID());
final Dataset.Role columnRole =
(calcCtx.componentRole() == null)
? Dataset.Role.MEASURE
: Dataset.Role.valueOf(calcCtx.componentRole().getText().toUpperCase());

if ((calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext)
&& ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions()
instanceof VtlParser.AnalyticFunctionsContext) {
AnalyticsVisitor analyticsVisitor =
// If the target already exists in the dataset, check its role
final Dataset.Component existing = byName.get(columnName);
if (existing != null) {
// Explicitly block overwriting identifiers (already handled above if role==IDENTIFIER).
if (existing.getRole() == Dataset.Role.IDENTIFIER) {
final String meta =
String.format(
"(role=%s, type=%s)",
existing.getRole(), existing.getType() != null ? existing.getType() : "n/a");
throw new VtlRuntimeException(
new InvalidArgumentException(
// TODO: see if other cases are the same error (already defined in assignment for
// example).
String.format("CALC cannot overwrite IDENTIFIER '%s' %s.", columnName, meta),
fromContext(ctx)));
}
}

// ---- Dispatch: analytics vs. regular calc ----
final boolean isAnalytic =
(calcCtx.expr() instanceof VtlParser.FunctionsExpressionContext)
&& ((VtlParser.FunctionsExpressionContext) calcCtx.expr()).functions()
instanceof VtlParser.AnalyticFunctionsContext;

if (isAnalytic) {
// Analytics are executed immediately and update the rolling dataset expression
final AnalyticsVisitor analyticsVisitor =
new AnalyticsVisitor(processingEngine, currentDatasetExpression, columnName);
VtlParser.FunctionsExpressionContext functionExprCtx =
final VtlParser.FunctionsExpressionContext functionExprCtx =
(VtlParser.FunctionsExpressionContext) calcCtx.expr();
VtlParser.AnalyticFunctionsContext anFuncCtx =
final VtlParser.AnalyticFunctionsContext anFuncCtx =
(VtlParser.AnalyticFunctionsContext) functionExprCtx.functions();

currentDatasetExpression = analyticsVisitor.visit(anFuncCtx);
} else {
ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx);
// Regular calc expression – build resolvable expression and capture its source text
final ResolvableExpression calc = componentExpressionVisitor.visit(calcCtx);

final String exprSource = getSource(calcCtx.expr());
if (exprSource == null || exprSource.isEmpty()) {
throw new VtlRuntimeException(
new InvalidArgumentException(
String.format(
"empty or unavailable source expression for '%s' in CALC.", columnName),
fromContext(ctx)));
}

// Store in insertion order (deterministic column creation)
expressions.put(columnName, calc);
expressionStrings.put(columnName, getSource(calcCtx.expr()));
expressionStrings.put(columnName, exprSource);
roles.put(columnName, columnRole);
}
}

// ---- Consistency checks before execution ----
if (!(expressions.keySet().equals(expressionStrings.keySet())
&& expressions.keySet().equals(roles.keySet()))) {
throw new VtlRuntimeException(
new InvalidArgumentException(
"internal CALC maps out of sync (expressions/expressionStrings/roles)",
fromContext(ctx)));
}

// ---- Execute the batch calc if any non-analytic expressions were collected ----
if (!expressionStrings.isEmpty()) {
currentDatasetExpression =
processingEngine.executeCalc(
currentDatasetExpression, expressions, roles, expressionStrings);
}

return currentDatasetExpression;
}

Expand All @@ -216,18 +277,98 @@ public DatasetExpression visitFilterClause(VtlParser.FilterClauseContext ctx) {

@Override
public DatasetExpression visitRenameClause(VtlParser.RenameClauseContext ctx) {

// Dataset structure in order + lookup maps
final List<Dataset.Component> componentsInOrder =
new ArrayList<>(datasetExpression.getDataStructure().values());
final Set<String> availableColumns =
componentsInOrder.stream()
.map(Dataset.Component::getName)
.collect(Collectors.toCollection(LinkedHashSet::new));

// Map for detailed error reporting (includes role/type if available)
final Map<String, Dataset.Component> byName =
componentsInOrder.stream()
.collect(
Collectors.toMap(
Dataset.Component::getName, c -> c, (a, b) -> a, LinkedHashMap::new));

// Parse the RENAME clause and validate
Map<String, String> fromTo = new LinkedHashMap<>();
Set<String> renamed = new HashSet<>();
Set<String> toSeen = new LinkedHashSet<>();
Set<String> fromSeen = new LinkedHashSet<>();

for (VtlParser.RenameClauseItemContext renameCtx : ctx.renameClauseItem()) {
var toNameString = getName(renameCtx.toName);
var fromNameString = getName(renameCtx.fromName);
if (!renamed.add(toNameString)) {
final String toNameString = getName(renameCtx.toName);
final String fromNameString = getName(renameCtx.fromName);

// Validate: no duplicate "from" names inside the clause
if (!fromSeen.add(fromNameString)) {
throw new VtlRuntimeException(
new InvalidArgumentException(
String.format("Error: duplicate source name in RENAME clause: '%s'", fromNameString),
fromContext(ctx)));
}

// Validate: "from" must exist in dataset
if (!availableColumns.contains(fromNameString)) {
Dataset.Component comp = byName.get(fromNameString);
String meta =
(comp != null)
? String.format(
" (role=%s, type=%s)",
comp.getRole(), comp.getType() != null ? comp.getType() : "n/a")
: "";
throw new VtlRuntimeException(
new InvalidArgumentException(
String.format(
"Error: source column to rename not found: '%s'%s", fromNameString, meta),
fromContext(ctx)));
}

// Validate: no duplicate "to" names inside the clause
if (!toSeen.add(toNameString)) {
throw new VtlRuntimeException(
new InvalidArgumentException(
"duplicate column: %s".formatted(toNameString), fromContext(renameCtx)));
String.format(
"Error: duplicate output column name in RENAME clause: '%s'", fromNameString),
fromContext(ctx)));
}

fromTo.put(fromNameString, toNameString);
}

// Validate collisions with untouched dataset columns ("Untouched" = columns that are not
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

{a b c}[a -> z, b -> z] // Z already exist

{a b c}[b -> z, b -> y] // b already renamed (variable not found?)

{a b c}[a -> b] // ?

{a b c}[a -> z, b -> a] // Allowed?

// being renamed)
final Set<String> untouched =
availableColumns.stream()
.filter(c -> !fromTo.containsKey(c))
.collect(Collectors.toCollection(LinkedHashSet::new));

for (Map.Entry<String, String> e : fromTo.entrySet()) {
final String from = e.getKey();
final String to = e.getValue();

// If target already exists as untouched, it would cause a collision
if (untouched.contains(to)) {
Dataset.Component comp = byName.get(to);
String meta =
(comp != null)
? String.format(
" (role=%s, type=%s)",
comp.getRole(), comp.getType() != null ? comp.getType() : "n/a")
: "";

throw new VtlRuntimeException(
new InvalidArgumentException(
String.format(
"Error: target name '%s'%s already exists in dataset and is not being renamed.",
to, meta),
fromContext(ctx)));
}
}

// Execute rename in processing engine
return processingEngine.executeRename(datasetExpression, fromTo);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,35 +126,147 @@ public void testCalcRoleModifier_measuresAndAttributesOk() throws ScriptExceptio
}

@Test
public void testRenameClause() throws ScriptException {
public void testRenameClause_unknownVariable() {
InMemoryDataset dataset =
new InMemoryDataset(
List.of(
Map.of("name", "Hadrien", "age", 10L, "weight", 11L),
Map.of("name", "Nico", "age", 11L, "weight", 10L),
Map.of("name", "Franck", "age", 12L, "weight", 9L)),
Map.of("name", "Franck", "age", 12L, "weight", 9L)
),
Map.of("name", String.class, "age", Long.class, "weight", Long.class),
Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE));

ScriptContext context = engine.getContext();
context.setAttribute("ds", dataset, ScriptContext.ENGINE_SCOPE);
engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);

engine.eval("ds1 := ds[rename age to weight, weight to age, name to pseudo];");
assertThatThrownBy(
() -> engine.eval("ds := ds1[rename missing to foo];"))
.isInstanceOf(VtlScriptException.class)
.is(atPosition(0, 47, 58))
.hasMessageContaining("Error: source column to rename not found: 'missing'");
}

assertThat(engine.getContext().getAttribute("ds1")).isInstanceOf(Dataset.class);
assertThat(((Dataset) engine.getContext().getAttribute("ds1")).getDataAsMap())
.containsExactlyInAnyOrder(
Map.of("pseudo", "Hadrien", "weight", 10L, "age", 11L),
Map.of("pseudo", "Nico", "weight", 11L, "age", 10L),
Map.of("pseudo", "Franck", "weight", 12L, "age", 9L));
@Test
public void testRenameClause_duplicateToNamesShouldFail() {
InMemoryDataset dataset =
new InMemoryDataset(
List.of(
Map.of("name", "Hadrien", "age", 10L, "weight", 11L),
Map.of("name", "Nico", "age", 11L, "weight", 10L),
Map.of("name", "Franck", "age", 12L, "weight", 9L)
),
Map.of("name", String.class, "age", Long.class, "weight", Long.class),
Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE));

engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);

assertThatThrownBy(
() -> engine.eval("ds := ds1[rename age to dup, weight to dup];"))
.isInstanceOf(VtlScriptException.class)
.is(atPosition(0, 47, 58))
.hasMessageContaining("Error: source column to rename not found: 'missing'");
}

@Test
public void testRenameClause_duplicateFromNamesShouldFail() {
InMemoryDataset dataset =
new InMemoryDataset(
List.of(
Map.of("name", "Hadrien", "age", 10L, "weight", 11L),
Map.of("name", "Nico", "age", 11L, "weight", 10L),
Map.of("name", "Franck", "age", 12L, "weight", 9L)
),
Map.of("name", String.class, "age", Long.class, "weight", Long.class),
Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE));

engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);

assertThatThrownBy(
() -> engine.eval("ds2 := ds[rename age to weight, weight to age, name to age];"))
() -> engine.eval("ds := ds1[rename age to foo, age to bar];"))
.isInstanceOf(VtlScriptException.class)
.hasMessage("duplicate column: age")
.hasMessageContaining("Error: duplicate source name in RENAME clause: 'age'")
.is(atPosition(0, 47, 58));
}

@Test
public void testRenameClause_duplicateToNameShouldFail() {
InMemoryDataset dataset =
new InMemoryDataset(
List.of(
Map.of("name", "Hadrien", "age", 10L, "weight", 11L),
Map.of("name", "Nico", "age", 11L, "weight", 10L),
Map.of("name", "Franck", "age", 12L, "weight", 9L)
),
Map.of("name", String.class, "age", Long.class, "weight", Long.class),
Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE));

engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);

assertThatThrownBy(
() -> engine.eval("ds := ds1[rename age to weight, weight to age, name to age];"))
.isInstanceOf(VtlScriptException.class)
.is(atPosition(0, 47, 58))
.hasMessageContaining("TODO: Improve: Error: duplicate output column name in RENAME clause: 'name'");
}

/** RENAME: duplicate "from" name inside the clause must raise a detailed script error. */
@Test
public void testRenameClause_duplicateFromNameShouldFail() {
InMemoryDataset dataset =
new InMemoryDataset(
List.of(
Map.of("name", "Hadrien", "age", 10L, "weight", 11L),
Map.of("name", "Nico", "age", 11L, "weight", 10L)),
Map.of("name", String.class, "age", Long.class, "weight", Long.class),
Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE));

engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);

assertThatThrownBy(() -> engine.eval("ds := ds1[rename age to weight, age to weight2];"))
.isInstanceOf(VtlScriptException.class)
.is(atPosition(0, 0, 0, 0))
.hasMessage("TODO: Improve: duplicate source name in RENAME clause");
}

/** RENAME: "from" column must exist in dataset. */
@Test
public void testRenameClause_fromColumnNotFoundShouldFail() {
InMemoryDataset dataset =
new InMemoryDataset(
List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)),
Map.of("name", String.class, "age", Long.class, "weight", Long.class),
Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE));

engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);

assertThatThrownBy(() -> engine.eval("ds := ds1[rename unknown to something];"))
.isInstanceOf(VtlScriptException.class)
.is(atPosition(0, 0, 0, 0))
.hasMessageContaining("TODO: Improve: source column to rename not found: 'unknown'");
}

/**
* RENAME: target collides with an untouched existing column -> must error with details
* (role/type).
*/
@Test
public void testRenameClause_targetCollidesWithUntouchedShouldFail() {
InMemoryDataset dataset =
new InMemoryDataset(
List.of(Map.of("name", "Hadrien", "age", 10L, "weight", 11L)),
Map.of("name", String.class, "age", Long.class, "weight", Long.class),
Map.of("name", Role.IDENTIFIER, "age", Role.MEASURE, "weight", Role.MEASURE));

engine.getContext().setAttribute("ds1", dataset, ScriptContext.ENGINE_SCOPE);

assertThatThrownBy(() -> engine.eval("ds := ds1[rename name to age];"))
.isInstanceOf(VtlScriptException.class)
.is(atPosition(0, 0, 0, 0))
.hasMessageContaining("target name 'age'") // main message
.hasMessageContaining("already exists in dataset and is not being renamed")
.hasMessageContaining("(role=MEASURE, type=class java.lang.Long)");
}

@Test
public void testCalcClause() throws ScriptException {

Expand Down
Loading