From f70b728bc194c28fbc7f01c0bd2a1f33ae24b7c5 Mon Sep 17 00:00:00 2001 From: RushT007 Date: Wed, 9 Feb 2022 14:28:09 +0000 Subject: [PATCH 1/3] updated tap intercom discovery test --- tests/base.py | 3 +-- tests/test_intercom_discovery.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/tests/base.py b/tests/base.py index e6eb82d..861cc61 100644 --- a/tests/base.py +++ b/tests/base.py @@ -193,11 +193,10 @@ def run_and_verify_check_mode(self, conn_id): menagerie.verify_check_exit_status(self, exit_status, check_job_name) found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id)) + # self.assertGreater(len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id)) found_catalog_names = set(map(lambda c: c['stream_name'], found_catalogs)) - self.assertSetEqual(self.expected_streams(), found_catalog_names, msg="discovered schemas do not match") print("discovered schemas are OK") return found_catalogs diff --git a/tests/test_intercom_discovery.py b/tests/test_intercom_discovery.py index 08aef81..a90613e 100644 --- a/tests/test_intercom_discovery.py +++ b/tests/test_intercom_discovery.py @@ -33,6 +33,7 @@ def test_run(self): conn_id = connections.ensure_connection(self) found_catalogs = self.run_and_verify_check_mode(conn_id) + self.assertGreater(len(found_catalogs), 0, msg="No catalogs found") # Verify stream names follow naming convention # streams should only have lowercase alphas and underscores @@ -40,6 +41,20 @@ def test_run(self): self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), msg="One or more streams don't follow standard naming") + # Verify number of actual streams discovered match expected + self.assertEqual( + len(streams_to_test), + len(found_catalog_names), + msg="Number of actual streams ({0}) doesn't match with expected number of streams ({1}).)".format(len(found_catalog_names), len(streams_to_test)) + ) + + # Verify the stream names discovered were what we expect + self.assertSetEqual( + set(streams_to_test), + set(found_catalog_names), + msg="Stream names doesn't match with the expectation." + ) + for stream in streams_to_test: with self.subTest(stream=stream): @@ -66,10 +81,23 @@ def test_run(self): if item.get("metadata").get("inclusion") == "automatic" ) + # Get replication keys from metadata + actual_replication_keys = set( + stream_properties[0].get( + "metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS, []) + ) + ########################################################################## ### metadata assertions ########################################################################## + # verify replication key(s) + self.assertSetEqual( + actual_replication_keys, + expected_replication_keys, + msg=r"Replication keys don't match with expectation." + ) + # verify there is only 1 top level breadcrumb in metadata self.assertTrue(len(stream_properties) == 1, msg="There is NOT only one top level breadcrumb for {}".format(stream) + \ From d38eee7e65c767e88aac81c879e3a38a5f70c8d9 Mon Sep 17 00:00:00 2001 From: Rushikesh Todkar Date: Fri, 11 Feb 2022 20:27:00 +0530 Subject: [PATCH 2/3] Added test for intercom tap for all fields selection scenario --- tests/test_intercom_all_fields.py | 87 +++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 tests/test_intercom_all_fields.py diff --git a/tests/test_intercom_all_fields.py b/tests/test_intercom_all_fields.py new file mode 100644 index 0000000..f64c898 --- /dev/null +++ b/tests/test_intercom_all_fields.py @@ -0,0 +1,87 @@ +import os + +from tap_tester import runner, connections, menagerie + +from base import IntercomBaseTest + + +class AllFields(IntercomBaseTest): + """Test that with all fields selected for a stream automatic and available fields are replicated""" + + @staticmethod + def name(): + return "tap_tester_intercom_all_fields" + + def test_run(self): + """ + Ensure running the tap with all streams and fields selected results in the + replication of all fields. + - Verify no unexpected streams were replicated + - Verify that more than just the automatic fields are replicated for each stream. + """ + + expected_streams = self.expected_streams() + + # instantiate connection + conn_id = connections.ensure_connection(self) + + # run check mode + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # table and field selection + test_catalogs_all_fields = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in expected_streams] + self.perform_and_verify_table_and_field_selection( + conn_id, test_catalogs_all_fields, select_all_fields=True, + ) + + # grab metadata after performing table-and-field selection to set expectations + stream_to_all_catalog_fields = dict() # used for asserting all fields are replicated + for catalog in test_catalogs_all_fields: + stream_id, stream_name = catalog['stream_id'], catalog['stream_name'] + catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id) + fields_from_field_level_md = [md_entry['breadcrumb'][1] + for md_entry in catalog_entry['metadata'] + if md_entry['breadcrumb'] != []] + stream_to_all_catalog_fields[stream_name] = set(fields_from_field_level_md) + + # run initial sync + record_count_by_stream = self.run_and_verify_sync(conn_id) + synced_records = runner.get_records_from_target_output() + + # Verify no unexpected streams were replicated + synced_stream_names = set(synced_records.keys()) + + # Rushikesh: how to get source details to verify why other streams didn't sync + with self.subTest(stream="Validate expected streams are replicated"): + self.LOGGER.info("(Expected streams) ==> " + str(expected_streams)) + self.assertSetEqual(expected_streams, synced_stream_names) + + # for stream in expected_streams: + for stream in synced_stream_names: + with self.subTest(stream=stream): + # expected values + expected_automatic_keys = self.expected_automatic_fields().get(stream) + + # get all expected keys + expected_all_keys = stream_to_all_catalog_fields[stream] + + # collect actual values + messages = synced_records.get(stream) + actual_all_keys = [set(message['data'].keys()) for message in messages['messages'] + if message['action'] == 'upsert'][0] + + self.LOGGER.info(stream + "(Expected keys) ==> " + str(expected_all_keys)) + self.LOGGER.info(stream + "(Expected automatic keys) ==> " + str(expected_automatic_keys)) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream.get(stream, -1), 0) + + # verify all fields for a stream were replicated + self.assertEqual(len(expected_automatic_keys), len(expected_automatic_keys)) + + self.assertTrue(expected_automatic_keys.issubset(expected_all_keys), + msg=f'{expected_automatic_keys - expected_all_keys} is not in "expected_all_keys"') + + + self.assertSetEqual(expected_all_keys, actual_all_keys) From d4ad6b7797a1a6185623d4ea204f4ba4b59cae74 Mon Sep 17 00:00:00 2001 From: Rushikesh Todkar Date: Mon, 14 Feb 2022 19:49:57 +0530 Subject: [PATCH 3/3] Fixed the actual keys collection logic --- tests/test_intercom_all_fields.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_intercom_all_fields.py b/tests/test_intercom_all_fields.py index f64c898..75d2bc9 100644 --- a/tests/test_intercom_all_fields.py +++ b/tests/test_intercom_all_fields.py @@ -68,8 +68,9 @@ def test_run(self): # collect actual values messages = synced_records.get(stream) - actual_all_keys = [set(message['data'].keys()) for message in messages['messages'] - if message['action'] == 'upsert'][0] + actual_all_keys = set().union( + *[set(message['data'].keys()) for message in messages['messages'] if message['action'] == 'upsert'] + ) self.LOGGER.info(stream + "(Expected keys) ==> " + str(expected_all_keys)) self.LOGGER.info(stream + "(Expected automatic keys) ==> " + str(expected_automatic_keys)) @@ -83,5 +84,4 @@ def test_run(self): self.assertTrue(expected_automatic_keys.issubset(expected_all_keys), msg=f'{expected_automatic_keys - expected_all_keys} is not in "expected_all_keys"') - self.assertSetEqual(expected_all_keys, actual_all_keys)