Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions lang/ruby/lib/avro/schema_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -198,23 +198,38 @@ def validate_union(expected_schema, datum, path, result, options)
compatible_type = first_compatible_type(datum, expected_schema, path, failures, options)
return unless compatible_type.nil?

complex_type_failed = failures.detect { |r| COMPLEX_TYPES.include?(r[:type]) }
if complex_type_failed
complex_type_failed[:result].errors.each { |error| result << error }
else
types = expected_schema.schemas.map { |s| "'#{s.type_sym}'" }.join(', ')
result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}")

failed_complex_types = failures.select { |r| COMPLEX_TYPES.include?(r[:type]) }
complex_type_errors = []
failed_complex_types.each do |failed_complex_type|
error_msg = failed_complex_type[:result].errors.map do |error|
error
end.join("; ")
schema_name_prefix = "#{failed_complex_type[:schema_name]}: " if failed_complex_type[:schema_name]
complex_type_errors << "#{schema_name_prefix}#{error_msg}"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

schema_name_prefix is assigned only when schema_name is present; on iterations without a name, the variable can retain the previous iteration’s value and incorrectly prefix errors. Consider resetting the prefix each loop iteration to avoid leaking a prior schema name.

🤖 React with 👍 or 👎 to let us know if the comment was useful.

Copy link
Copy Markdown
Owner Author

@martin-augment martin-augment Oct 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:annoying; category:bug; feedback:the model wrongly assumes that the local variable would leak to the next iteration. This could happen with "for loop" in Ruby but the code here uses an iterator.

Here is a small test showing that temp does not survive to the next loop:

#!/usr/bin/env ruby

i = 0
collection = ['a', 'b', 'c']
collection.each do |x|
  temp = "#{x}" if i % 2 == 0
  print "#{i} #{x}: #{temp}\n"
  i = i + 1
end

It prints:

0 a: a
1 b:
2 c: c

end

types = expected_schema.schemas.map do |s|
s.respond_to?(:name) ? "#{s.name} ('#{s.type}')" : "'#{s.type}'"
end.join(', ')
type_mismatches = %Q{\nUnion type specific errors:\n#{complex_type_errors.join("\n")}} if complex_type_errors.any?
result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}#{type_mismatches}")
end

def first_compatible_type(datum, expected_schema, path, failures, options = {})
expected_schema.schemas.find do |schema|
# Avoid expensive validation if we're just validating a nil
next datum.nil? if schema.type_sym == :null
if schema.type_sym == :null
next datum.nil?
end

result = Result.new
validate_recursive(schema, datum, path, result, options)
failures << { type: schema.type_sym, result: result } if result.failure?
if result.failure?
failure = { type: schema.type_sym, result: result }
failure[:schema_name] = schema.name if schema.respond_to?(:name)
failures << failure
end
!result.failure?
end
end
Expand Down
51 changes: 49 additions & 2 deletions lang/ruby/test/test_schema_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,12 @@ def test_validate_union_of_nil_and_record_inside_array
assert_nothing_raised { validate_simple!(schema, 'person' => { houses: [] }) }
assert_nothing_raised { validate_simple!(schema, 'person' => { 'houses' => [{ 'number_of_rooms' => 1 }] }) }

message = 'at .person.houses[1].number_of_rooms expected type long, got string with value "not valid at all"'
message = <<~EXPECTED_ERROR.chomp
at .person.houses expected union of ['null', 'array'], got Array with value [{"number_of_rooms"=>2}, {"number_of_rooms"=>"not valid at all"}]
Union type specific errors:
at .person.houses[1].number_of_rooms expected type long, got string with value "not valid at all"
EXPECTED_ERROR

datum = {
'person' => {
'houses' => [
Expand Down Expand Up @@ -556,9 +561,51 @@ def test_validate_union_extra_fields
validate!(schema, { 'name' => 'apple', 'color' => 'green' }, fail_on_extra_fields: true)
end
assert_equal(1, exception.result.errors.size)
assert_equal("at . extra field 'color' - not in schema", exception.to_s)
expected_error = <<~EXPECTED_ERROR.chomp
at . expected union of ['null', fruit ('record')], got record with value {"name"=>"apple", "color"=>"green"}
Union type specific errors:
fruit: at . extra field 'color' - not in schema
EXPECTED_ERROR
assert_equal(expected_error, exception.to_s)
end

def test_validate_union_complex_and_simple_types
schema = hash_to_schema([
'null',
{
type: 'record',
name: 'fruit',
fields: [{ name: 'name', type: 'string' }]
},
{
type: 'record',
name: 'animal',
fields: [
{ name: 'name', type: 'string' },
{ name: 'species', type: 'string' }
]
},
{
type: 'enum',
name: 'person',
symbols: %w(one two three)
}
])
exception = assert_raise(Avro::SchemaValidator::ValidationError) do
validate!(schema, { 'namo' => 'apple', 'color' => 'green' }, fail_on_extra_fields: true)
end

assert_equal(1, exception.result.errors.size)
expected_error = <<~EXPECTED_ERROR.chomp
at . expected union of ['null', fruit ('record'), animal ('record'), person ('enum')], got record with value {"namo"=>"apple", "color"=>"green"}
Union type specific errors:
fruit: at .name expected type string, got null; at . extra field 'namo' - not in schema; at . extra field 'color' - not in schema
animal: at .name expected type string, got null; at .species expected type string, got null; at . extra field 'namo' - not in schema; at . extra field 'color' - not in schema
EXPECTED_ERROR
assert_equal(expected_error, exception.to_s)
end


def test_validate_bytes_decimal
schema = hash_to_schema(type: 'bytes', logicalType: 'decimal', precision: 4, scale: 2)
assert_valid_schema(schema, [BigDecimal('1.23'), 4.2, 1], ['4.2', BigDecimal('233.2')], true)
Expand Down