From 068a797e789950f96902ab57e83207f81a7cd2cc Mon Sep 17 00:00:00 2001 From: BernardWez Date: Wed, 11 Feb 2026 16:27:02 +0100 Subject: [PATCH 1/3] Sync schema.selected with metadata in select() and deselect() --- elx/catalog.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/elx/catalog.py b/elx/catalog.py index 67fb1db..beb64f1 100644 --- a/elx/catalog.py +++ b/elx/catalog.py @@ -133,6 +133,13 @@ def deselect( }, ) + # If deselecting an entire stream (not a property), also update + # schema.selected to stay consistent with metadata. This is needed + # because singer-python's is_selected() short-circuits on + # schema.selected before checking metadata. + if not breadcrumb: + stream.stream_schema["selected"] = False + return catalog def select(self, streams: Optional[List[str]]) -> "Catalog": @@ -158,6 +165,11 @@ def select(self, streams: Optional[List[str]]) -> "Catalog": }, ) + # Also update schema.selected to stay consistent with metadata. + # This is needed because singer-python's is_selected() short-circuits + # on schema.selected before checking metadata. + stream.stream_schema["selected"] = is_selected + return catalog def set_replication_keys(self, replication_keys: Optional[dict]) -> "Catalog": From 143a5d50bd7aa2574dcad1a86c29f78c5eddd8ee Mon Sep 17 00:00:00 2001 From: BernardWez Date: Wed, 11 Feb 2026 16:27:11 +0100 Subject: [PATCH 2/3] Add tests for select() and deselect() schema updates in Catalog --- tests/test_elx/test_catalog.py | 112 +++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/tests/test_elx/test_catalog.py b/tests/test_elx/test_catalog.py index 9682635..43a3881 100644 --- a/tests/test_elx/test_catalog.py +++ b/tests/test_elx/test_catalog.py @@ -212,3 +212,115 @@ def test_catalog_add_nested_custom_property(tap: Tap): # Verify that the custom property is in the schema of the catalog assert "items" in tap.catalog.streams[1].stream_schema["properties"] + + +def test_select_updates_schema_selected(): + """ + When select() marks a stream as not selected, it should also set + schema.selected = False so that singer-python's is_selected() + (which short-circuits on schema.selected) respects the selection. + """ + catalog = Catalog( + **{ + "streams": [ + { + "tap_stream_id": "stream_a", + "key_properties": [], + "schema": {"type": "object", "properties": {}, "selected": True}, + "metadata": [], + }, + { + "tap_stream_id": "stream_b", + "key_properties": [], + "schema": {"type": "object", "properties": {}, "selected": True}, + "metadata": [], + }, + ] + } + ) + + # Select only stream_a + result = catalog.select(["stream_a"]) + + # stream_a should remain selected in both metadata and schema + assert result.streams[0].is_selected == True + assert result.streams[0].stream_schema.get("selected") == True + + # stream_b should be deselected in both metadata and schema + assert result.streams[1].is_selected == False + assert result.streams[1].stream_schema.get("selected") == False + + +def test_deselect_updates_schema_selected(): + """ + When deselect() marks an entire stream as not selected, it should also + set schema.selected = False in the schema dict. + """ + catalog = Catalog( + **{ + "streams": [ + { + "tap_stream_id": "my_stream", + "key_properties": [], + "schema": {"type": "object", "properties": {"col": {"type": "string"}}, "selected": True}, + "metadata": [], + }, + ] + } + ) + + result = catalog.deselect(["my_stream"]) + + # The stream should be deselected in both metadata and schema + assert result.streams[0].is_selected == False + assert result.streams[0].stream_schema.get("selected") == False + + +def test_deselect_property_does_not_change_schema_selected(): + """ + When deselect() targets a property (not the whole stream), it should NOT + change schema.selected on the stream itself. + """ + catalog = Catalog( + **{ + "streams": [ + { + "tap_stream_id": "my_stream", + "key_properties": [], + "schema": {"type": "object", "properties": {"col": {"type": "string"}}, "selected": True}, + "metadata": [], + }, + ] + } + ) + + result = catalog.deselect(["my_stream.col"]) + + # The stream-level schema.selected should remain True + assert result.streams[0].stream_schema.get("selected") == True + + # But the property metadata should be deselected + prop_meta = result.streams[0].find_metadata_by_breadcrumb(["properties", "col"]) + assert prop_meta["selected"] == False + + +def test_select_none_returns_unchanged_catalog(): + """ + select(None) should return the catalog unchanged, without touching + schema.selected. + """ + catalog = Catalog( + **{ + "streams": [ + { + "tap_stream_id": "stream_a", + "key_properties": [], + "schema": {"type": "object", "properties": {}, "selected": True}, + "metadata": [], + }, + ] + } + ) + + result = catalog.select(None) + assert result.streams[0].stream_schema.get("selected") == True From f46bcd42cfa8564f480297a11b3fa1747f8931ac Mon Sep 17 00:00:00 2001 From: BernardWez Date: Wed, 11 Feb 2026 17:19:00 +0100 Subject: [PATCH 3/3] Add JSON Schema version to DEFAULT_CATALOG in test_catalog.py --- tests/test_elx/test_catalog.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_elx/test_catalog.py b/tests/test_elx/test_catalog.py index 43a3881..1048281 100644 --- a/tests/test_elx/test_catalog.py +++ b/tests/test_elx/test_catalog.py @@ -12,6 +12,7 @@ "is_view": False, "table_name": None, "schema": { + "$schema": "https://json-schema.org/draft/2020-12/schema", "properties": { "id": {"type": ["integer", "null"]}, "animal_name": {"type": ["string", "null"]}, @@ -51,6 +52,7 @@ "table_name": None, "key_properties": ["id"], "schema": { + "$schema": "https://json-schema.org/draft/2020-12/schema", "properties": { "id": {"type": ["integer", "null"]}, "name": {"type": ["string", "null"]},