From 300531cbf676702b250c645ad9431af4008d30bc Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Fri, 13 Feb 2026 02:19:41 +0000 Subject: [PATCH 1/4] first draft of changes --- sdks/python/apache_beam/yaml/json_utils.py | 17 +++--- .../apache_beam/yaml/json_utils_test.py | 56 +++++++++++++++++++ 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index 832651a477dd..0dc1bef65348 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -63,7 +63,7 @@ def maybe_nullable(beam_type, nullable): json_type = json_schema.get('type', None) if json_type != 'object': - raise ValueError('Expected object type, got {json_type}.') + raise ValueError(f'Expected object type, got {json_type}.') if 'properties' not in json_schema: # Technically this is a valid (vacuous) schema, but as it's not generally # meaningful, throw an informative error instead. @@ -314,24 +314,23 @@ def _validate_compatible(weak_schema, strong_schema): return if weak_schema['type'] != strong_schema['type']: raise ValueError( - 'Incompatible types: %r vs %r' % - (weak_schema['type'] != strong_schema['type'])) + f"Incompatible types: {weak_schema['type']} vs {strong_schema['type']}") if weak_schema['type'] == 'array': _validate_compatible(weak_schema['items'], strong_schema['items']) - elif weak_schema == 'object': + elif weak_schema['type'] == 'object': for required in strong_schema.get('required', []): if required not in weak_schema['properties']: - raise ValueError('Missing or unkown property %r' % required) - for name, spec in weak_schema.get('properties', {}): + raise ValueError(f"Missing or unknown property '{required}'") + for name, spec in weak_schema.get('properties', {}).items(): if name in strong_schema['properties']: try: _validate_compatible(spec, strong_schema['properties'][name]) except Exception as exn: - raise ValueError('Incompatible schema for %r' % name) from exn + raise ValueError(f"Incompatible schema for '{name}'") from exn elif not strong_schema.get('additionalProperties'): raise ValueError( - 'Prohibited property: {property}; ' - 'perhaps additionalProperties: False is missing?') + f"Prohibited property: '{name}'; " + "perhaps additionalProperties: False is missing?") def row_validator(beam_schema: schema_pb2.Schema, diff --git a/sdks/python/apache_beam/yaml/json_utils_test.py b/sdks/python/apache_beam/yaml/json_utils_test.py index 3d4d5ea3dd41..a7f66c5a2666 100644 --- a/sdks/python/apache_beam/yaml/json_utils_test.py +++ b/sdks/python/apache_beam/yaml/json_utils_test.py @@ -152,6 +152,62 @@ def test_json_to_row_with_missing_required_field(self): with self.assertRaises(KeyError): converter(json_data) + def test_validate_compatible(self): + from apache_beam.yaml.json_utils import _validate_compatible + + # Compatible cases + _validate_compatible({'type': 'string'}, {'type': 'string'}) + _validate_compatible( + { + 'type': 'object', 'properties': { + 'f': { + 'type': 'string' + } + } + }, { + 'type': 'object', 'properties': { + 'f': { + 'type': 'string' + } + } + }) + + # Incompatible types + with self.assertRaisesRegex(ValueError, 'Incompatible types'): + _validate_compatible({'type': 'string'}, {'type': 'integer'}) + + # Missing property + with self.assertRaisesRegex(ValueError, 'Missing or unknown property'): + _validate_compatible({ + 'type': 'object', 'properties': {} + }, + { + 'type': 'object', + 'properties': { + 'f': { + 'type': 'string' + } + }, + 'required': ['f'] + }) + + # Incompatible property type + with self.assertRaisesRegex(ValueError, 'Incompatible schema for \'f\''): + _validate_compatible( + { + 'type': 'object', 'properties': { + 'f': { + 'type': 'integer' + } + } + }, { + 'type': 'object', 'properties': { + 'f': { + 'type': 'string' + } + } + }) + if __name__ == '__main__': unittest.main() From 66a60c2f4e60f61b6e55e1819fabae9874e90ca0 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Fri, 13 Feb 2026 02:34:13 +0000 Subject: [PATCH 2/4] additionalProperties --- sdks/python/apache_beam/yaml/json_utils.py | 6 +++ .../apache_beam/yaml/json_utils_test.py | 40 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index 0dc1bef65348..e57256753ac6 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -318,6 +318,12 @@ def _validate_compatible(weak_schema, strong_schema): if weak_schema['type'] == 'array': _validate_compatible(weak_schema['items'], strong_schema['items']) elif weak_schema['type'] == 'object': + if 'additionalProperties' in weak_schema: + if not strong_schema.get('additionalProperties'): + raise ValueError('Incompatible types: map vs object') + _validate_compatible( + weak_schema['additionalProperties'], + strong_schema['additionalProperties']) for required in strong_schema.get('required', []): if required not in weak_schema['properties']: raise ValueError(f"Missing or unknown property '{required}'") diff --git a/sdks/python/apache_beam/yaml/json_utils_test.py b/sdks/python/apache_beam/yaml/json_utils_test.py index a7f66c5a2666..267b4478f17e 100644 --- a/sdks/python/apache_beam/yaml/json_utils_test.py +++ b/sdks/python/apache_beam/yaml/json_utils_test.py @@ -208,6 +208,46 @@ def test_validate_compatible(self): } }) + def test_validate_compatible_map(self): + from apache_beam.yaml.json_utils import _validate_compatible + + # Compatible maps + _validate_compatible( + { + 'type': 'object', 'additionalProperties': { + 'type': 'string' + } + }, { + 'type': 'object', 'additionalProperties': { + 'type': 'string' + } + }) + + # Incompatible map values + with self.assertRaisesRegex(ValueError, 'Incompatible types'): + _validate_compatible( + { + 'type': 'object', 'additionalProperties': { + 'type': 'string' + } + }, { + 'type': 'object', 'additionalProperties': { + 'type': 'integer' + } + }) + + # Map vs Object + with self.assertRaisesRegex(ValueError, + 'Incompatible types: map vs object'): + _validate_compatible( + { + 'type': 'object', 'additionalProperties': { + 'type': 'string' + } + }, { + 'type': 'object', 'properties': {}, 'additionalProperties': False + }) + if __name__ == '__main__': unittest.main() From efb8514cb61c8341727df43203304c8872784fdc Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Fri, 13 Feb 2026 14:15:45 +0000 Subject: [PATCH 3/4] fix weak schema logic --- sdks/python/apache_beam/yaml/json_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index e57256753ac6..8f21a0c56c76 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -318,7 +318,7 @@ def _validate_compatible(weak_schema, strong_schema): if weak_schema['type'] == 'array': _validate_compatible(weak_schema['items'], strong_schema['items']) elif weak_schema['type'] == 'object': - if 'additionalProperties' in weak_schema: + if weak_schema.get('additionalProperties'): if not strong_schema.get('additionalProperties'): raise ValueError('Incompatible types: map vs object') _validate_compatible( From 5aa4380e8087b2ef6c465a2debdba703f0fb1906 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Fri, 13 Feb 2026 18:15:40 +0000 Subject: [PATCH 4/4] correct some logic --- sdks/python/apache_beam/yaml/json_utils.py | 9 ++++-- .../apache_beam/yaml/json_utils_test.py | 28 +++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index 8f21a0c56c76..3967645c2019 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -318,8 +318,10 @@ def _validate_compatible(weak_schema, strong_schema): if weak_schema['type'] == 'array': _validate_compatible(weak_schema['items'], strong_schema['items']) elif weak_schema['type'] == 'object': + # If the weak schema allows for arbitrary keys (is a map), + # the strong schema must also allow for arbitrary keys. if weak_schema.get('additionalProperties'): - if not strong_schema.get('additionalProperties'): + if not strong_schema.get('additionalProperties', True): raise ValueError('Incompatible types: map vs object') _validate_compatible( weak_schema['additionalProperties'], @@ -328,12 +330,15 @@ def _validate_compatible(weak_schema, strong_schema): if required not in weak_schema['properties']: raise ValueError(f"Missing or unknown property '{required}'") for name, spec in weak_schema.get('properties', {}).items(): + if name in strong_schema['properties']: try: _validate_compatible(spec, strong_schema['properties'][name]) except Exception as exn: raise ValueError(f"Incompatible schema for '{name}'") from exn - elif not strong_schema.get('additionalProperties'): + elif not strong_schema.get('additionalProperties', True): + # The property is not explicitly in the strong schema, and the strong + # schema does not allow for extra properties. raise ValueError( f"Prohibited property: '{name}'; " "perhaps additionalProperties: False is missing?") diff --git a/sdks/python/apache_beam/yaml/json_utils_test.py b/sdks/python/apache_beam/yaml/json_utils_test.py index 267b4478f17e..e930577ec1e6 100644 --- a/sdks/python/apache_beam/yaml/json_utils_test.py +++ b/sdks/python/apache_beam/yaml/json_utils_test.py @@ -248,6 +248,34 @@ def test_validate_compatible_map(self): 'type': 'object', 'properties': {}, 'additionalProperties': False }) + def test_validate_compatible_extra_properties(self): + from apache_beam.yaml.json_utils import _validate_compatible + + # Extra properties in weak_schema should be allowed if strong_schema + # doesn't explicitly forbid them (default additionalProperties=True). + _validate_compatible({ + 'type': 'object', 'properties': { + 'extra': { + 'type': 'string' + } + } + }, { + 'type': 'object', 'properties': {} + }) + + # But if strong_schema says additionalProperties: False, it should raise. + with self.assertRaisesRegex(ValueError, 'Prohibited property'): + _validate_compatible( + { + 'type': 'object', 'properties': { + 'extra': { + 'type': 'string' + } + } + }, { + 'type': 'object', 'properties': {}, 'additionalProperties': False + }) + if __name__ == '__main__': unittest.main()