diff --git a/src/pgslice/dumper/sql_generator.py b/src/pgslice/dumper/sql_generator.py index e222d60..30a1942 100644 --- a/src/pgslice/dumper/sql_generator.py +++ b/src/pgslice/dumper/sql_generator.py @@ -845,19 +845,46 @@ def _build_on_conflict_clause( auto_gen_pks: list[str], ) -> str: """ - Build ON CONFLICT clause for unique constraints. + Build ON CONFLICT clause for primary keys or unique constraints. Uses DO UPDATE with a no-op update to always get RETURNING values. This makes the SQL idempotent - reusing existing records instead of failing. + Priority order: + 1. Non-auto-generated primary keys (string PKs, UUIDs, manual IDs) + 2. Unique constraints (existing behavior) + 3. Empty string if all PKs are auto-generated and no unique constraints + Args: - table_meta: Table metadata with unique constraints + table_meta: Table metadata with primary keys and unique constraints insert_columns: Columns being inserted auto_gen_pks: Auto-generated PK columns (excluded from ON CONFLICT) Returns: - ON CONFLICT clause string, or empty string if no unique constraints + ON CONFLICT clause string, or empty string if no conflict target available """ + # PRIORITY 1: Check for non-auto-generated primary keys + # These are string PKs, UUIDs, or manually-set integer PKs + if table_meta.primary_keys: + # Get PKs that are NOT auto-generated + non_auto_gen_pks = [ + pk for pk in table_meta.primary_keys if pk not in auto_gen_pks + ] + + # Verify all PK columns are being inserted + if non_auto_gen_pks and all( + pk in insert_columns for pk in non_auto_gen_pks + ): + conflict_cols = ", ".join(f'"{pk}"' for pk in non_auto_gen_pks) + # Use first PK for no-op update + update_col = non_auto_gen_pks[0] + on_conflict = ( + f"ON CONFLICT ({conflict_cols}) " + f'DO UPDATE SET "{update_col}" = EXCLUDED."{update_col}"' + ) + return on_conflict + + # PRIORITY 2: Check for unique constraints (existing logic) unique_constraints = table_meta.unique_constraints # Filter out unique constraints that only contain auto-generated PKs @@ -1041,14 +1068,24 @@ def _generate_insert_with_fk_remapping( values_sql = ", ".join(values) values_rows.append(f" ({values_sql})") values_clause = ",\n".join(values_rows) - return "\n".join( - [ - f" INSERT INTO {full_table_name} ({columns_sql})", - " VALUES", - f"{values_clause};", - ] + + # Build ON CONFLICT clause for idempotency + table_meta = self.introspector.get_table_metadata(schema, table) + on_conflict = self._build_on_conflict_clause( + table_meta, columns, auto_gen_pks ) + sql_parts = [ + f" INSERT INTO {full_table_name} ({columns_sql})", + " VALUES", + f"{values_clause}", + ] + if on_conflict: + sql_parts.append(f" {on_conflict}") + sql_parts.append(";") + + return "\n".join(sql_parts) + # Build VALUES clause with old FK values as strings values_rows = [] old_pk_values = [] # Track old PK values for mapping (if has_auto_gen_pks) @@ -1215,8 +1252,19 @@ def _generate_insert_with_fk_remapping( ) return "\n".join(sql_lines) else: - # No auto-gen PKs: return simple INSERT-SELECT + # No auto-gen PKs: return simple INSERT-SELECT with ON CONFLICT + table_meta = self.introspector.get_table_metadata(schema, table) + + # Build ON CONFLICT clause for idempotency + on_conflict = self._build_on_conflict_clause( + table_meta, columns, auto_gen_pks + ) + sql_lines = base_sql_lines.copy() - # Add semicolon to the last line - sql_lines[-1] = sql_lines[-1] + ";" + # Remove trailing semicolon if present + sql_lines[-1] = sql_lines[-1].rstrip(";") + if on_conflict: + sql_lines.append(f" {on_conflict}") + # Add semicolon at the end + sql_lines.append(";") return "\n".join(sql_lines) diff --git a/tests/unit/dumper/test_sql_generator.py b/tests/unit/dumper/test_sql_generator.py index 323930b..775f36c 100644 --- a/tests/unit/dumper/test_sql_generator.py +++ b/tests/unit/dumper/test_sql_generator.py @@ -984,7 +984,7 @@ def test_build_on_conflict_with_unique_constraints( def test_build_on_conflict_without_constraints( self, mock_introspector: MagicMock ) -> None: - """Should return empty string when no unique constraints.""" + """Should build ON CONFLICT for non-auto-generated PK even without unique constraints.""" table = Table( schema_name="public", table_name="logs", @@ -995,6 +995,7 @@ def test_build_on_conflict_without_constraints( udt_name="int4", nullable=False, is_primary_key=True, + is_auto_generated=False, # Explicitly not auto-generated ), ], primary_keys=["id"], @@ -1008,8 +1009,313 @@ def test_build_on_conflict_without_constraints( result = generator._build_on_conflict_clause(table, ["id"], []) - # Should return empty string or None - assert result == "" or result is None + # Should build ON CONFLICT for non-auto-generated PK + assert "ON CONFLICT" in result + assert '("id")' in result + assert "DO UPDATE SET" in result + assert '"id" = EXCLUDED."id"' in result + + def test_build_on_conflict_with_string_pk( + self, mock_introspector: MagicMock + ) -> None: + """Should build ON CONFLICT clause for non-auto-generated string PK.""" + table = Table( + schema_name="public", + table_name="shipments_shipmentstate", + columns=[ + Column( + name="id", + data_type="character varying", + udt_name="varchar", + nullable=False, + is_primary_key=True, + is_auto_generated=False, # NOT auto-generated + ), + Column( + name="name", + data_type="text", + udt_name="text", + nullable=False, + ), + ], + primary_keys=["id"], + foreign_keys_outgoing=[], + foreign_keys_incoming=[], + unique_constraints={}, # No unique constraints + ) + + mock_introspector.get_table_metadata.return_value = table + generator = SQLGenerator(mock_introspector) + + # Test the helper method directly + result = generator._build_on_conflict_clause( + table, + ["id", "name"], + [], # No auto-gen PKs + ) + + # Should generate ON CONFLICT for the string PK + assert "ON CONFLICT" in result + assert '("id")' in result + assert "DO UPDATE SET" in result + assert '"id" = EXCLUDED."id"' in result + + def test_build_on_conflict_with_composite_string_pks( + self, mock_introspector: MagicMock + ) -> None: + """Should build ON CONFLICT clause for composite non-auto-generated PKs.""" + table = Table( + schema_name="public", + table_name="junction_table", + columns=[ + Column( + name="entity_a_id", + data_type="uuid", + udt_name="uuid", + nullable=False, + is_primary_key=True, + is_auto_generated=False, + ), + Column( + name="entity_b_id", + data_type="uuid", + udt_name="uuid", + nullable=False, + is_primary_key=True, + is_auto_generated=False, + ), + Column( + name="created_at", + data_type="timestamp", + udt_name="timestamp", + nullable=False, + ), + ], + primary_keys=["entity_a_id", "entity_b_id"], + foreign_keys_outgoing=[], + foreign_keys_incoming=[], + unique_constraints={}, + ) + + mock_introspector.get_table_metadata.return_value = table + generator = SQLGenerator(mock_introspector) + + result = generator._build_on_conflict_clause( + table, ["entity_a_id", "entity_b_id", "created_at"], [] + ) + + # Should generate ON CONFLICT with both PKs + assert "ON CONFLICT" in result + assert '("entity_a_id", "entity_b_id")' in result + assert "DO UPDATE SET" in result + # Should use first PK for no-op update + assert '"entity_a_id" = EXCLUDED."entity_a_id"' in result + + def test_build_on_conflict_with_mixed_pks( + self, mock_introspector: MagicMock + ) -> None: + """Should build ON CONFLICT for non-auto-generated PKs in composite key.""" + table = Table( + schema_name="public", + table_name="versioned_data", + columns=[ + Column( + name="id", + data_type="integer", + udt_name="int4", + nullable=False, + is_primary_key=True, + is_auto_generated=True, # Auto-generated + ), + Column( + name="version", + data_type="integer", + udt_name="int4", + nullable=False, + is_primary_key=True, + is_auto_generated=False, # Manually set + ), + Column( + name="data", + data_type="text", + udt_name="text", + nullable=False, + ), + ], + primary_keys=["id", "version"], + foreign_keys_outgoing=[], + foreign_keys_incoming=[], + unique_constraints={}, + ) + + mock_introspector.get_table_metadata.return_value = table + generator = SQLGenerator(mock_introspector) + + # Only "version" is being inserted (id is excluded as auto-gen) + result = generator._build_on_conflict_clause( + table, + ["version", "data"], + ["id"], # id is auto-gen + ) + + # Should generate ON CONFLICT for non-auto-gen PK only + assert "ON CONFLICT" in result + assert '("version")' in result + assert "DO UPDATE SET" in result + assert '"version" = EXCLUDED."version"' in result + + def test_build_on_conflict_all_auto_gen_pks_no_unique( + self, mock_introspector: MagicMock + ) -> None: + """Should return empty string when all PKs are auto-generated and no unique constraints.""" + table = Table( + schema_name="public", + table_name="products", + columns=[ + Column( + name="id", + data_type="integer", + udt_name="int4", + nullable=False, + is_primary_key=True, + is_auto_generated=True, + ), + Column( + name="name", + data_type="text", + udt_name="text", + nullable=False, + ), + ], + primary_keys=["id"], + foreign_keys_outgoing=[], + foreign_keys_incoming=[], + unique_constraints={}, + ) + + mock_introspector.get_table_metadata.return_value = table + generator = SQLGenerator(mock_introspector) + + result = generator._build_on_conflict_clause( + table, + ["name"], + ["id"], # id is auto-gen and excluded + ) + + # Should return empty string (no ON CONFLICT needed) + assert result == "" + + def test_build_on_conflict_string_pk_takes_priority_over_unique( + self, mock_introspector: MagicMock + ) -> None: + """Should use string PK for ON CONFLICT even when unique constraint exists.""" + table = Table( + schema_name="public", + table_name="users", + columns=[ + Column( + name="username", + data_type="text", + udt_name="text", + nullable=False, + is_primary_key=True, + is_auto_generated=False, + ), + Column( + name="email", + data_type="text", + udt_name="text", + nullable=False, + ), + ], + primary_keys=["username"], + foreign_keys_outgoing=[], + foreign_keys_incoming=[], + unique_constraints={"uq_email": ["email"]}, # Has unique constraint + ) + + mock_introspector.get_table_metadata.return_value = table + generator = SQLGenerator(mock_introspector) + + result = generator._build_on_conflict_clause(table, ["username", "email"], []) + + # Should use PK, not unique constraint + assert "ON CONFLICT" in result + assert '("username")' in result # PK, not email + assert "DO UPDATE SET" in result + assert '"username" = EXCLUDED."username"' in result + + def test_generate_batch_plpgsql_string_pk_idempotent( + self, mock_introspector: MagicMock + ) -> None: + """Should generate idempotent PL/pgSQL for string PK tables.""" + table = Table( + schema_name="public", + table_name="shipments_shipmentstate", + columns=[ + Column( + name="id", + data_type="character varying", + udt_name="varchar", + nullable=False, + is_primary_key=True, + is_auto_generated=False, + ), + Column( + name="name", + data_type="text", + udt_name="text", + nullable=False, + ), + ], + primary_keys=["id"], + foreign_keys_outgoing=[], + foreign_keys_incoming=[], + unique_constraints={}, + ) + + mock_introspector.get_table_metadata.return_value = table + + # Mock column types + mock_introspector.get_column_types.return_value = { + "id": "character varying", + "name": "text", + } + + generator = SQLGenerator(mock_introspector) + + records = [ + RecordData( + identifier=RecordIdentifier( + table_name="shipments_shipmentstate", + schema_name="public", + pk_values=("barge_departed",), + ), + data={"id": "barge_departed", "name": "Barge Departed"}, + dependencies=[], + ), + RecordData( + identifier=RecordIdentifier( + table_name="shipments_shipmentstate", + schema_name="public", + pk_values=("empty_outgated",), + ), + data={"id": "empty_outgated", "name": "Empty Outgated"}, + dependencies=[], + ), + ] + + # Generate with keep_pks=False (PL/pgSQL mode) + sql = generator.generate_batch(records, keep_pks=False) + + # Should have ON CONFLICT clause for idempotency + assert "ON CONFLICT" in sql + assert '("id")' in sql + assert "DO UPDATE SET" in sql + + # Verify PL/pgSQL structure is intact + assert "DO $$" in sql + assert "CREATE TEMP TABLE IF NOT EXISTS _pgslice_id_map" in sql class TestHelperMethods(TestSQLGenerator):