From 6b9692165a79b2f58ebda394d035656e70c511a0 Mon Sep 17 00:00:00 2001 From: Mateo Date: Thu, 19 Mar 2026 18:09:09 +0100 Subject: [PATCH 1/3] Fixed SetDiff operator taking rows with pre-existing null values as results --- src/vtlengine/Operators/Set.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/vtlengine/Operators/Set.py b/src/vtlengine/Operators/Set.py index 96007ea54..d9826ad88 100644 --- a/src/vtlengine/Operators/Set.py +++ b/src/vtlengine/Operators/Set.py @@ -149,18 +149,15 @@ def evaluate(cls, operands: List[Dataset]) -> Dataset: else: if data is None: data = pd.DataFrame(columns=result.get_identifiers_names()) - result.data = result.data.merge(data, how="left", on=result.get_identifiers_names()) - if len(result.data) > 0: - result.data = result.data[result.data.isnull().any(axis=1)] - - not_identifiers = result.get_measures_names() + result.get_attributes_names() - for col in not_identifiers: - if col + "_x" in result.data: - result.data[col] = result.data[col + "_x"] - del result.data[col + "_x"] - if col + "_y" in result.data: - del result.data[col + "_y"] - result.data = result.data[result.get_identifiers_names() + not_identifiers] + id_names = result.get_identifiers_names() + result.data = result.data.merge( + data[id_names].drop_duplicates(), + how="left", + on=id_names, + indicator=True, + ) + result.data = result.data[result.data["_merge"] == "left_only"] + result.data = result.data.drop(columns=["_merge"]) if result.data is not None: result.data.reset_index(drop=True, inplace=True) return result From 12b2e4a81a86b4fa449180e0fa3354df776f5e2e Mon Sep 17 00:00:00 2001 From: Mateo Date: Thu, 19 Mar 2026 18:16:13 +0100 Subject: [PATCH 2/3] Fixed related test references --- tests/Attributes/data/DataSet/output/8-4-1-8-1.csv | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/Attributes/data/DataSet/output/8-4-1-8-1.csv b/tests/Attributes/data/DataSet/output/8-4-1-8-1.csv index f2023220c..2d651ba16 100644 --- a/tests/Attributes/data/DataSet/output/8-4-1-8-1.csv +++ b/tests/Attributes/data/DataSet/output/8-4-1-8-1.csv @@ -1,5 +1,2 @@ Id_1,Id_2,Id_3,Id_4,Me_1,Me_2 2021,Belgium,Total,Total,10.0,10.0 -2021,Denmark,Total,Total,4.0,20.0 -2021,France,Total,Total,6.0,24.0 -2021,Spain,Total,Total,8.0,40.0 \ No newline at end of file From 77dc616b2a74c8ecfe3f86a6e73d7d126882f37a Mon Sep 17 00:00:00 2001 From: Mateo Date: Thu, 19 Mar 2026 18:16:33 +0100 Subject: [PATCH 3/3] Added related test --- .../data/DataSet/input/GH_611-1.csv | 4 +++ .../data/DataSet/input/GH_611-2.csv | 4 +++ .../data/DataSet/output/GH_611-1.csv | 3 +++ .../data/DataStructure/input/GH_611-1.json | 27 +++++++++++++++++++ .../data/DataStructure/input/GH_611-2.json | 27 +++++++++++++++++++ .../data/DataStructure/output/GH_611-1.json | 27 +++++++++++++++++++ tests/Attributes/data/vtl/GH_611.vtl | 1 + tests/Attributes/test_attributes.py | 15 +++++++++++ 8 files changed, 108 insertions(+) create mode 100644 tests/Attributes/data/DataSet/input/GH_611-1.csv create mode 100644 tests/Attributes/data/DataSet/input/GH_611-2.csv create mode 100644 tests/Attributes/data/DataSet/output/GH_611-1.csv create mode 100644 tests/Attributes/data/DataStructure/input/GH_611-1.json create mode 100644 tests/Attributes/data/DataStructure/input/GH_611-2.json create mode 100644 tests/Attributes/data/DataStructure/output/GH_611-1.json create mode 100644 tests/Attributes/data/vtl/GH_611.vtl diff --git a/tests/Attributes/data/DataSet/input/GH_611-1.csv b/tests/Attributes/data/DataSet/input/GH_611-1.csv new file mode 100644 index 000000000..8711f4ce6 --- /dev/null +++ b/tests/Attributes/data/DataSet/input/GH_611-1.csv @@ -0,0 +1,4 @@ +Id_1,Me_1,At_1 +1,1,1 +2,2,2 +3,3,3 \ No newline at end of file diff --git a/tests/Attributes/data/DataSet/input/GH_611-2.csv b/tests/Attributes/data/DataSet/input/GH_611-2.csv new file mode 100644 index 000000000..08cbc4a29 --- /dev/null +++ b/tests/Attributes/data/DataSet/input/GH_611-2.csv @@ -0,0 +1,4 @@ +Id_1,Me_1 +3,3 +4,4 +5,5 \ No newline at end of file diff --git a/tests/Attributes/data/DataSet/output/GH_611-1.csv b/tests/Attributes/data/DataSet/output/GH_611-1.csv new file mode 100644 index 000000000..f83efbe8c --- /dev/null +++ b/tests/Attributes/data/DataSet/output/GH_611-1.csv @@ -0,0 +1,3 @@ +Id_1,Me_1,At_1 +1,1,1 +2,2,2 \ No newline at end of file diff --git a/tests/Attributes/data/DataStructure/input/GH_611-1.json b/tests/Attributes/data/DataStructure/input/GH_611-1.json new file mode 100644 index 000000000..b0658051a --- /dev/null +++ b/tests/Attributes/data/DataStructure/input/GH_611-1.json @@ -0,0 +1,27 @@ +{ + "datasets": [ + { + "name": "DS_1", + "DataStructure": [ + { + "name": "Id_1", + "type": "Integer", + "role": "Identifier", + "nullable": false + }, + { + "name": "Me_1", + "type": "Number", + "role": "Measure", + "nullable": true + }, + { + "name": "At_1", + "type": "Number", + "role": "Measure", + "nullable": true + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/Attributes/data/DataStructure/input/GH_611-2.json b/tests/Attributes/data/DataStructure/input/GH_611-2.json new file mode 100644 index 000000000..0d729e96a --- /dev/null +++ b/tests/Attributes/data/DataStructure/input/GH_611-2.json @@ -0,0 +1,27 @@ +{ + "datasets": [ + { + "name": "DS_2", + "DataStructure": [ + { + "name": "Id_1", + "type": "Integer", + "role": "Identifier", + "nullable": false + }, + { + "name": "Me_1", + "type": "Number", + "role": "Measure", + "nullable": true + }, + { + "name": "At_1", + "type": "Number", + "role": "Measure", + "nullable": true + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/Attributes/data/DataStructure/output/GH_611-1.json b/tests/Attributes/data/DataStructure/output/GH_611-1.json new file mode 100644 index 000000000..2a13788f5 --- /dev/null +++ b/tests/Attributes/data/DataStructure/output/GH_611-1.json @@ -0,0 +1,27 @@ +{ + "datasets": [ + { + "name": "DS_r", + "DataStructure": [ + { + "name": "Id_1", + "type": "Integer", + "role": "Identifier", + "nullable": false + }, + { + "name": "Me_1", + "type": "Number", + "role": "Measure", + "nullable": true + }, + { + "name": "At_1", + "type": "Number", + "role": "Measure", + "nullable": true + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/Attributes/data/vtl/GH_611.vtl b/tests/Attributes/data/vtl/GH_611.vtl new file mode 100644 index 000000000..78325106d --- /dev/null +++ b/tests/Attributes/data/vtl/GH_611.vtl @@ -0,0 +1 @@ +DS_r <- setdiff(DS_1, DS_2); \ No newline at end of file diff --git a/tests/Attributes/test_attributes.py b/tests/Attributes/test_attributes.py index 8946affc5..a4f947979 100644 --- a/tests/Attributes/test_attributes.py +++ b/tests/Attributes/test_attributes.py @@ -4205,6 +4205,21 @@ def test_10(self): self.BaseTest(code=code, number_inputs=number_inputs, references_names=references_names) + def test_GH_611(self): + """ + SET DIFFERENCE: setdiff + Dataset --> Dataset + Status: OK + Expression: DS_r := setdiff(DS_1,DS_2) + + Description: Check the operator its not returning rows with pre-existing null values as results + """ + code = "GH_611" + number_inputs = 2 + references_names = ["1"] + + self.BaseTest(code=code, number_inputs=number_inputs, references_names=references_names) + class ConditionalOperatorsTest(TestAttributesHelper): """