From b2470851c36ea4dd02c176435b3564acb0ef8a20 Mon Sep 17 00:00:00 2001 From: David Mora Date: Fri, 20 Jan 2023 15:22:10 +0000 Subject: [PATCH 1/4] Keep all errors for all union types and attach the schema name ..So not only the errors from the first failed union type are kept --- lang/ruby/lib/avro/schema_validator.rb | 20 +++++++++++++++----- lang/ruby/test/test_schema_validator.rb | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/lang/ruby/lib/avro/schema_validator.rb b/lang/ruby/lib/avro/schema_validator.rb index 8ebb65f8d59..53fd2f1c254 100644 --- a/lang/ruby/lib/avro/schema_validator.rb +++ b/lang/ruby/lib/avro/schema_validator.rb @@ -198,9 +198,14 @@ def validate_union(expected_schema, datum, path, result, options) compatible_type = first_compatible_type(datum, expected_schema, path, failures, options) return unless compatible_type.nil? - complex_type_failed = failures.detect { |r| COMPLEX_TYPES.include?(r[:type]) } - if complex_type_failed - complex_type_failed[:result].errors.each { |error| result << error } + complex_type_failed = failures.select { |r| COMPLEX_TYPES.include?(r[:type]) } + if complex_type_failed.any? + complex_type_failed.each do |type_failures| + type_failures[:result].errors.each do |error| + error_msg = type_failures[:schema] ? "#{type_failures[:schema]} #{error}" : error + result << error_msg + end + end else types = expected_schema.schemas.map { |s| "'#{s.type_sym}'" }.join(', ') result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}") @@ -210,11 +215,16 @@ def validate_union(expected_schema, datum, path, result, options) def first_compatible_type(datum, expected_schema, path, failures, options = {}) expected_schema.schemas.find do |schema| # Avoid expensive validation if we're just validating a nil - next datum.nil? if schema.type_sym == :null + if schema.type_sym == :null + next datum.nil? + end result = Result.new validate_recursive(schema, datum, path, result, options) - failures << { type: schema.type_sym, result: result } if result.failure? + + failure = { type: schema.type_sym, result: result } + failure[:schema] = schema.name if schema.is_a?(Avro::Schema::RecordSchema) + failures << failure if result.failure? !result.failure? end end diff --git a/lang/ruby/test/test_schema_validator.rb b/lang/ruby/test/test_schema_validator.rb index 8b120927929..6e10d9b7074 100644 --- a/lang/ruby/test/test_schema_validator.rb +++ b/lang/ruby/test/test_schema_validator.rb @@ -556,7 +556,7 @@ def test_validate_union_extra_fields validate!(schema, { 'name' => 'apple', 'color' => 'green' }, fail_on_extra_fields: true) end assert_equal(1, exception.result.errors.size) - assert_equal("at . extra field 'color' - not in schema", exception.to_s) + assert_equal("fruit at . extra field 'color' - not in schema", exception.to_s) end def test_validate_bytes_decimal From fc1576b055f12691dc6b0f9925a62cd172560521 Mon Sep 17 00:00:00 2001 From: David Mora Date: Thu, 20 Feb 2025 14:33:15 +0100 Subject: [PATCH 2/4] Wrap failure addition into block --- lang/ruby/lib/avro/schema_validator.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lang/ruby/lib/avro/schema_validator.rb b/lang/ruby/lib/avro/schema_validator.rb index 53fd2f1c254..1a638a9ea0e 100644 --- a/lang/ruby/lib/avro/schema_validator.rb +++ b/lang/ruby/lib/avro/schema_validator.rb @@ -221,10 +221,11 @@ def first_compatible_type(datum, expected_schema, path, failures, options = {}) result = Result.new validate_recursive(schema, datum, path, result, options) - - failure = { type: schema.type_sym, result: result } - failure[:schema] = schema.name if schema.is_a?(Avro::Schema::RecordSchema) - failures << failure if result.failure? + if result.failure? + failure = { type: schema.type_sym, result: result } + failure[:schema] = schema.name if schema.is_a?(Avro::Schema::RecordSchema) + failures << failure + end !result.failure? end end From 5246f0db07dc1051b5761dbba500fc7ab53beb41 Mon Sep 17 00:00:00 2001 From: David Mora Date: Thu, 20 Feb 2025 17:46:51 +0100 Subject: [PATCH 3/4] Consider enum or other named types --- lang/ruby/lib/avro/schema_validator.rb | 29 ++++++++------ lang/ruby/test/test_schema_validator.rb | 51 ++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 14 deletions(-) diff --git a/lang/ruby/lib/avro/schema_validator.rb b/lang/ruby/lib/avro/schema_validator.rb index 1a638a9ea0e..e6d2c1b7e66 100644 --- a/lang/ruby/lib/avro/schema_validator.rb +++ b/lang/ruby/lib/avro/schema_validator.rb @@ -24,6 +24,7 @@ class SchemaValidator COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze BOOLEAN_VALUES = [true, false].freeze DEFAULT_VALIDATION_OPTIONS = { recursive: true, encoded: false, fail_on_extra_fields: false }.freeze + NAMED_SCHEMAS = [Avro::Schema::RecordSchema, Avro::Schema::EnumSchema, Avro::Schema::FixedSchema].freeze RECURSIVE_SIMPLE_VALIDATION_OPTIONS = { encoded: true }.freeze RUBY_CLASS_TO_AVRO_TYPE = { NilClass => 'null', @@ -198,18 +199,22 @@ def validate_union(expected_schema, datum, path, result, options) compatible_type = first_compatible_type(datum, expected_schema, path, failures, options) return unless compatible_type.nil? - complex_type_failed = failures.select { |r| COMPLEX_TYPES.include?(r[:type]) } - if complex_type_failed.any? - complex_type_failed.each do |type_failures| - type_failures[:result].errors.each do |error| - error_msg = type_failures[:schema] ? "#{type_failures[:schema]} #{error}" : error - result << error_msg - end - end - else - types = expected_schema.schemas.map { |s| "'#{s.type_sym}'" }.join(', ') - result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}") + + failed_complex_types = failures.select { |r| COMPLEX_TYPES.include?(r[:type]) } + complex_type_errors = [] + failed_complex_types.each do |failed_complex_type| + error_msg = failed_complex_type[:result].errors.map do |error| + error + end.join("; ") + schema_name_prefix = "#{failed_complex_type[:schema_name]}: " if failed_complex_type[:schema_name] + complex_type_errors << "#{schema_name_prefix}#{error_msg}" end + + types = expected_schema.schemas.map do |s| + s.respond_to?(:name) ? "#{s.name} ('#{s.type}')" : "'#{s.type}'" + end.join(', ') + type_mismatches = %Q{\nUnion type specific errors:\n#{complex_type_errors.join("\n")}} if complex_type_errors.any? + result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}#{type_mismatches}") end def first_compatible_type(datum, expected_schema, path, failures, options = {}) @@ -223,7 +228,7 @@ def first_compatible_type(datum, expected_schema, path, failures, options = {}) validate_recursive(schema, datum, path, result, options) if result.failure? failure = { type: schema.type_sym, result: result } - failure[:schema] = schema.name if schema.is_a?(Avro::Schema::RecordSchema) + failure[:schema_name] = schema.name if schema.respond_to?(:name) failures << failure end !result.failure? diff --git a/lang/ruby/test/test_schema_validator.rb b/lang/ruby/test/test_schema_validator.rb index 6e10d9b7074..6d7a5dcd5c6 100644 --- a/lang/ruby/test/test_schema_validator.rb +++ b/lang/ruby/test/test_schema_validator.rb @@ -331,7 +331,12 @@ def test_validate_union_of_nil_and_record_inside_array assert_nothing_raised { validate_simple!(schema, 'person' => { houses: [] }) } assert_nothing_raised { validate_simple!(schema, 'person' => { 'houses' => [{ 'number_of_rooms' => 1 }] }) } - message = 'at .person.houses[1].number_of_rooms expected type long, got string with value "not valid at all"' + message = <<~EXPECTED_ERROR.chomp + at .person.houses expected union of ['null', 'array'], got Array with value [{"number_of_rooms"=>2}, {"number_of_rooms"=>"not valid at all"}] + Union type specific errors: + at .person.houses[1].number_of_rooms expected type long, got string with value "not valid at all" + EXPECTED_ERROR + datum = { 'person' => { 'houses' => [ @@ -556,9 +561,51 @@ def test_validate_union_extra_fields validate!(schema, { 'name' => 'apple', 'color' => 'green' }, fail_on_extra_fields: true) end assert_equal(1, exception.result.errors.size) - assert_equal("fruit at . extra field 'color' - not in schema", exception.to_s) + expected_error = <<~EXPECTED_ERROR.chomp + at . expected union of ['null', fruit ('record')], got record with value {"name"=>"apple", "color"=>"green"} + Union type specific errors: + fruit: at . extra field 'color' - not in schema + EXPECTED_ERROR + assert_equal(expected_error, exception.to_s) end + def test_validate_union_complex_and_simple_types + schema = hash_to_schema([ + 'null', + { + type: 'record', + name: 'fruit', + fields: [{ name: 'name', type: 'string' }] + }, + { + type: 'record', + name: 'animal', + fields: [ + { name: 'name', type: 'string' }, + { name: 'species', type: 'string' } + ] + }, + { + type: 'enum', + name: 'person', + symbols: %w(one two three) + } + ]) + exception = assert_raise(Avro::SchemaValidator::ValidationError) do + validate!(schema, { 'namo' => 'apple', 'color' => 'green' }, fail_on_extra_fields: true) + end + + assert_equal(1, exception.result.errors.size) + expected_error = <<~EXPECTED_ERROR.chomp + at . expected union of ['null', fruit ('record'), animal ('record'), person ('enum')], got record with value {"namo"=>"apple", "color"=>"green"} + Union type specific errors: + fruit: at .name expected type string, got null; at . extra field 'namo' - not in schema; at . extra field 'color' - not in schema + animal: at .name expected type string, got null; at .species expected type string, got null; at . extra field 'namo' - not in schema; at . extra field 'color' - not in schema + EXPECTED_ERROR + assert_equal(expected_error, exception.to_s) + end + + def test_validate_bytes_decimal schema = hash_to_schema(type: 'bytes', logicalType: 'decimal', precision: 4, scale: 2) assert_valid_schema(schema, [BigDecimal('1.23'), 4.2, 1], ['4.2', BigDecimal('233.2')], true) From 63796919c00bd2ac9fc3b82d7139a2432c09b3d6 Mon Sep 17 00:00:00 2001 From: David Mora Date: Thu, 20 Feb 2025 17:51:23 +0100 Subject: [PATCH 4/4] Remove unnecessary change --- lang/ruby/lib/avro/schema_validator.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lang/ruby/lib/avro/schema_validator.rb b/lang/ruby/lib/avro/schema_validator.rb index e6d2c1b7e66..678b615c393 100644 --- a/lang/ruby/lib/avro/schema_validator.rb +++ b/lang/ruby/lib/avro/schema_validator.rb @@ -24,7 +24,6 @@ class SchemaValidator COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze BOOLEAN_VALUES = [true, false].freeze DEFAULT_VALIDATION_OPTIONS = { recursive: true, encoded: false, fail_on_extra_fields: false }.freeze - NAMED_SCHEMAS = [Avro::Schema::RecordSchema, Avro::Schema::EnumSchema, Avro::Schema::FixedSchema].freeze RECURSIVE_SIMPLE_VALIDATION_OPTIONS = { encoded: true }.freeze RUBY_CLASS_TO_AVRO_TYPE = { NilClass => 'null',