diff --git a/src/vtlengine/Operators/Set.py b/src/vtlengine/Operators/Set.py index 96007ea54..d9826ad88 100644 --- a/src/vtlengine/Operators/Set.py +++ b/src/vtlengine/Operators/Set.py @@ -149,18 +149,15 @@ def evaluate(cls, operands: List[Dataset]) -> Dataset: else: if data is None: data = pd.DataFrame(columns=result.get_identifiers_names()) - result.data = result.data.merge(data, how="left", on=result.get_identifiers_names()) - if len(result.data) > 0: - result.data = result.data[result.data.isnull().any(axis=1)] - - not_identifiers = result.get_measures_names() + result.get_attributes_names() - for col in not_identifiers: - if col + "_x" in result.data: - result.data[col] = result.data[col + "_x"] - del result.data[col + "_x"] - if col + "_y" in result.data: - del result.data[col + "_y"] - result.data = result.data[result.get_identifiers_names() + not_identifiers] + id_names = result.get_identifiers_names() + result.data = result.data.merge( + data[id_names].drop_duplicates(), + how="left", + on=id_names, + indicator=True, + ) + result.data = result.data[result.data["_merge"] == "left_only"] + result.data = result.data.drop(columns=["_merge"]) if result.data is not None: result.data.reset_index(drop=True, inplace=True) return result diff --git a/tests/Attributes/data/DataSet/input/GH_611-1.csv b/tests/Attributes/data/DataSet/input/GH_611-1.csv new file mode 100644 index 000000000..8711f4ce6 --- /dev/null +++ b/tests/Attributes/data/DataSet/input/GH_611-1.csv @@ -0,0 +1,4 @@ +Id_1,Me_1,At_1 +1,1,1 +2,2,2 +3,3,3 \ No newline at end of file diff --git a/tests/Attributes/data/DataSet/input/GH_611-2.csv b/tests/Attributes/data/DataSet/input/GH_611-2.csv new file mode 100644 index 000000000..08cbc4a29 --- /dev/null +++ b/tests/Attributes/data/DataSet/input/GH_611-2.csv @@ -0,0 +1,4 @@ +Id_1,Me_1 +3,3 +4,4 +5,5 \ No newline at end of file diff --git a/tests/Attributes/data/DataSet/output/8-4-1-8-1.csv b/tests/Attributes/data/DataSet/output/8-4-1-8-1.csv index f2023220c..2d651ba16 100644 --- a/tests/Attributes/data/DataSet/output/8-4-1-8-1.csv +++ b/tests/Attributes/data/DataSet/output/8-4-1-8-1.csv @@ -1,5 +1,2 @@ Id_1,Id_2,Id_3,Id_4,Me_1,Me_2 2021,Belgium,Total,Total,10.0,10.0 -2021,Denmark,Total,Total,4.0,20.0 -2021,France,Total,Total,6.0,24.0 -2021,Spain,Total,Total,8.0,40.0 \ No newline at end of file diff --git a/tests/Attributes/data/DataSet/output/GH_611-1.csv b/tests/Attributes/data/DataSet/output/GH_611-1.csv new file mode 100644 index 000000000..f83efbe8c --- /dev/null +++ b/tests/Attributes/data/DataSet/output/GH_611-1.csv @@ -0,0 +1,3 @@ +Id_1,Me_1,At_1 +1,1,1 +2,2,2 \ No newline at end of file diff --git a/tests/Attributes/data/DataStructure/input/GH_611-1.json b/tests/Attributes/data/DataStructure/input/GH_611-1.json new file mode 100644 index 000000000..b0658051a --- /dev/null +++ b/tests/Attributes/data/DataStructure/input/GH_611-1.json @@ -0,0 +1,27 @@ +{ + "datasets": [ + { + "name": "DS_1", + "DataStructure": [ + { + "name": "Id_1", + "type": "Integer", + "role": "Identifier", + "nullable": false + }, + { + "name": "Me_1", + "type": "Number", + "role": "Measure", + "nullable": true + }, + { + "name": "At_1", + "type": "Number", + "role": "Measure", + "nullable": true + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/Attributes/data/DataStructure/input/GH_611-2.json b/tests/Attributes/data/DataStructure/input/GH_611-2.json new file mode 100644 index 000000000..0d729e96a --- /dev/null +++ b/tests/Attributes/data/DataStructure/input/GH_611-2.json @@ -0,0 +1,27 @@ +{ + "datasets": [ + { + "name": "DS_2", + "DataStructure": [ + { + "name": "Id_1", + "type": "Integer", + "role": "Identifier", + "nullable": false + }, + { + "name": "Me_1", + "type": "Number", + "role": "Measure", + "nullable": true + }, + { + "name": "At_1", + "type": "Number", + "role": "Measure", + "nullable": true + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/Attributes/data/DataStructure/output/GH_611-1.json b/tests/Attributes/data/DataStructure/output/GH_611-1.json new file mode 100644 index 000000000..2a13788f5 --- /dev/null +++ b/tests/Attributes/data/DataStructure/output/GH_611-1.json @@ -0,0 +1,27 @@ +{ + "datasets": [ + { + "name": "DS_r", + "DataStructure": [ + { + "name": "Id_1", + "type": "Integer", + "role": "Identifier", + "nullable": false + }, + { + "name": "Me_1", + "type": "Number", + "role": "Measure", + "nullable": true + }, + { + "name": "At_1", + "type": "Number", + "role": "Measure", + "nullable": true + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/Attributes/data/vtl/GH_611.vtl b/tests/Attributes/data/vtl/GH_611.vtl new file mode 100644 index 000000000..78325106d --- /dev/null +++ b/tests/Attributes/data/vtl/GH_611.vtl @@ -0,0 +1 @@ +DS_r <- setdiff(DS_1, DS_2); \ No newline at end of file diff --git a/tests/Attributes/test_attributes.py b/tests/Attributes/test_attributes.py index 8946affc5..a4f947979 100644 --- a/tests/Attributes/test_attributes.py +++ b/tests/Attributes/test_attributes.py @@ -4205,6 +4205,21 @@ def test_10(self): self.BaseTest(code=code, number_inputs=number_inputs, references_names=references_names) + def test_GH_611(self): + """ + SET DIFFERENCE: setdiff + Dataset --> Dataset + Status: OK + Expression: DS_r := setdiff(DS_1,DS_2) + + Description: Check the operator its not returning rows with pre-existing null values as results + """ + code = "GH_611" + number_inputs = 2 + references_names = ["1"] + + self.BaseTest(code=code, number_inputs=number_inputs, references_names=references_names) + class ConditionalOperatorsTest(TestAttributesHelper): """