From fc2efa40c817bca24e4e497caf2594e5d670914c Mon Sep 17 00:00:00 2001 From: Felipe Alex Hofmann Date: Tue, 17 Feb 2026 10:00:43 -0800 Subject: [PATCH 1/2] Update detection --- sdv/metadata/multi_table.py | 29 ++++--- tests/unit/metadata/test_multi_table.py | 101 ++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 9 deletions(-) diff --git a/sdv/metadata/multi_table.py b/sdv/metadata/multi_table.py index fdc7eb9fa..097dabe94 100644 --- a/sdv/metadata/multi_table.py +++ b/sdv/metadata/multi_table.py @@ -543,25 +543,36 @@ def _detect_foreign_keys_by_column_name(self, data): """ for parent_candidate in self.tables.keys(): primary_key = self.tables[parent_candidate].primary_key + if primary_key is None: + continue + + pk_sdtype = self.tables[parent_candidate].columns[primary_key]['sdtype'] for child_candidate in self.tables.keys() - {parent_candidate}: child_meta = self.tables[child_candidate] if primary_key in child_meta.columns.keys(): + original_fk_meta = deepcopy(child_meta.columns[primary_key]) + original_fk_sdtype = original_fk_meta['sdtype'] + if pk_sdtype != 'id' and original_fk_sdtype != pk_sdtype: + continue + try: - original_foreign_key_sdtype = child_meta.columns[primary_key]['sdtype'] - if original_foreign_key_sdtype != 'id': + if pk_sdtype == 'id' and original_fk_sdtype != 'id': self.update_column( - table_name=child_candidate, column_name=primary_key, sdtype='id' + table_name=child_candidate, + column_name=primary_key, + sdtype='id', ) - self.add_relationship( parent_candidate, child_candidate, primary_key, primary_key ) + except InvalidMetadataError: - self.update_column( - table_name=child_candidate, - column_name=primary_key, - sdtype=original_foreign_key_sdtype, - ) + if pk_sdtype == 'id' and original_fk_sdtype != 'id': + self.update_column( + table_name=child_candidate, + column_name=primary_key, + **original_fk_meta, + ) continue def _detect_relationships(self, data=None, foreign_key_inference_algorithm='column_name_match'): diff --git a/tests/unit/metadata/test_multi_table.py b/tests/unit/metadata/test_multi_table.py index 22aa78473..78066587c 100644 --- a/tests/unit/metadata/test_multi_table.py +++ b/tests/unit/metadata/test_multi_table.py @@ -12,6 +12,7 @@ from sdv.errors import InvalidDataError from sdv.metadata.errors import InvalidMetadataError +from sdv.metadata.metadata import Metadata from sdv.metadata.multi_table import MultiTableMetadata, SingleTableMetadata from tests.utils import catch_sdv_logs, get_multi_table_data, get_multi_table_metadata @@ -2624,6 +2625,106 @@ def test__detect_relationships(self): assert instance.relationships == expected_relationships assert instance.tables['sessions'].columns['user_id']['sdtype'] == 'id' + def test__detect_relationships_semantic_foreign_key(self): + """Test semantic foreign keys are automatically detected without changing the sdtype.""" + # Setup + instance = Metadata.load_from_dict({ + 'tables': { + 'parent': { + 'primary_key': 'email', + 'columns': { + 'email': {'sdtype': 'email'}, + 'user_name': {'sdtype': 'categorical'}, + }, + }, + 'child': { + 'primary_key': 'child_id', + 'columns': { + 'child_id': {'sdtype': 'id'}, + 'email': {'sdtype': 'email', 'pii': True}, + }, + }, + }, + 'relationships': [], + }) + + # Run + instance._detect_relationships() + + # Assert + expected_relationships = [ + { + 'parent_table_name': 'parent', + 'child_table_name': 'child', + 'parent_primary_key': 'email', + 'child_foreign_key': 'email', + } + ] + assert instance.relationships == expected_relationships + assert instance.tables['child'].columns['email'] == {'sdtype': 'email', 'pii': True} + + def test__detect_relationships_semantic_foreign_key_does_not_overwrite_mismatch(self): + """Test semantic foreign key mismatches do not coerce the child sdtype.""" + # Setup + instance = Metadata.load_from_dict({ + 'tables': { + 'parent': { + 'primary_key': 'email', + 'columns': { + 'email': {'sdtype': 'email'}, + 'user_name': {'sdtype': 'categorical'}, + }, + }, + 'child': { + 'primary_key': 'child_id', + 'columns': { + 'child_id': {'sdtype': 'id'}, + 'email': {'sdtype': 'categorical'}, + }, + }, + }, + 'relationships': [], + }) + + # Run + instance._detect_relationships() + + # Assert + assert instance.relationships == [] + assert instance.tables['child'].columns['email']['sdtype'] == 'categorical' + + def test__detect_relationships_restores_foreign_key_metadata_after_failure(self): + """Test failed detection restores all original metadata values in the child foreign key.""" + # Setup + original_foreign_key_metadata = {'sdtype': 'email', 'pii': True} + instance = Metadata.load_from_dict({ + 'tables': { + 'users': { + 'primary_key': 'user_id', + 'columns': { + 'user_id': {'sdtype': 'id'}, + 'user_name': {'sdtype': 'categorical'}, + }, + }, + 'sessions': { + 'primary_key': 'session_id', + 'columns': { + 'user_id': original_foreign_key_metadata.copy(), + 'session_id': {'sdtype': 'id'}, + }, + }, + }, + 'relationships': [], + }) + instance.add_relationship = Mock(side_effect=InvalidMetadataError('bad relationship')) + + # Run + instance._detect_relationships() + + # Assert + instance.add_relationship.assert_called_once_with('users', 'sessions', 'user_id', 'user_id') + assert instance.tables['sessions'].columns['user_id'] == original_foreign_key_metadata + def test__detect_relationships_circular(self): """Test that relationships that invalidate the metadata are not added.""" # Setup From db0b9e97948e6a4d6196eb6f305c06edfdeaf82c Mon Sep 17 00:00:00 2001 From: Felipe Alex Hofmann Date: Tue, 17 Feb 2026 12:50:12 -0800 Subject: [PATCH 2/2] Update test --- tests/unit/metadata/test_multi_table.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/unit/metadata/test_multi_table.py b/tests/unit/metadata/test_multi_table.py index 78066587c..943b5c547 100644 --- a/tests/unit/metadata/test_multi_table.py +++ b/tests/unit/metadata/test_multi_table.py @@ -2662,6 +2662,8 @@ def test__detect_relationships_semantic_foreign_key(self): ] assert instance.relationships == expected_relationships assert instance.tables['child'].columns['email'] == {'sdtype': 'email', 'pii': True} + assert instance.tables['parent'].columns['email'] == {'sdtype': 'email'} + assert instance.tables['parent'].primary_key == 'email' def test__detect_relationships_semantic_foreign_key_does_not_overwrite_mismatch(self): """Test semantic foreign key mismatches do not coerce the child sdtype.""" @@ -2691,7 +2693,9 @@ def test__detect_relationships_semantic_foreign_key_does_not_overwrite_mismatch( # Assert assert instance.relationships == [] - assert instance.tables['child'].columns['email']['sdtype'] == 'categorical' + assert instance.tables['child'].columns['email'] == {'sdtype': 'categorical'} + assert instance.tables['parent'].columns['email'] == {'sdtype': 'email'} + assert instance.tables['parent'].primary_key == 'email' def test__detect_relationships_restores_foreign_key_metadata_after_failure(self): """Test failed detection restores all original metadata values in the child foreign key."""