Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test-lang-php.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}
tools: composer:2.8.12
tools: composer:2.9.3

- name: Get Composer Cache Directory
id: composer-cache
Expand Down Expand Up @@ -99,7 +99,7 @@ jobs:
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}
tools: composer:2.8.12
tools: composer:2.9.3

- name: Cache Local Maven Repository
uses: actions/cache@v5
Expand Down
16 changes: 12 additions & 4 deletions lang/php/lib/Schema/AvroField.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class AvroField extends AvroSchema implements AvroAliasedSchema
*/
private ?string $order;
private ?array $aliases;
private ?string $doc;

/**
* @throws AvroSchemaParseException
Expand All @@ -94,7 +95,8 @@ public function __construct(
bool $hasDefault,
mixed $default,
?string $order = null,
mixed $aliases = null
mixed $aliases = null,
?string $doc = null
) {
if (!AvroName::isWellFormedName($name)) {
throw new AvroSchemaParseException('Field requires a "name" attribute');
Expand All @@ -111,6 +113,8 @@ public function __construct(
$this->order = $order;
self::hasValidAliases($aliases);
$this->aliases = $aliases;

$this->doc = $doc;
}

public function toAvro(): string|array
Expand All @@ -131,13 +135,17 @@ public function toAvro(): string|array
$avro[self::ORDER_ATTR] = $this->order;
}

if (!is_null($this->doc)) {
$avro[AvroSchema::DOC_ATTR] = $this->doc;
}

Comment on lines +138 to +141
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# First, let's check the full toAvro() method in AvroField.php
echo "=== Full toAvro() method ===" 
rg -n -A30 'function toAvro' lang/php/lib/Schema/AvroField.php

echo -e "\n=== Check for aliases field definition ===" 
rg -n -B5 -A5 'aliases' lang/php/lib/Schema/AvroField.php | head -50

echo -e "\n=== Search for aliases in tests ===" 
find lang/php -name "*test*.php" -o -name "*Test*.php" | xargs rg -l 'aliases.*field|field.*aliases' 2>/dev/null || echo "No test files found with aliases references"

echo -e "\n=== Check Avro Schema documentation or references ===" 
rg -n 'aliases' lang/php/lib/Schema/ | head -20

Repository: martin-augment/avro

Length of output: 4360


🏁 Script executed:

#!/bin/bash
# Also check if there's a base class or interface that defines what should be serialized
echo "=== AvroField class definition ===" 
rg -n -A5 'class AvroField' lang/php/lib/Schema/AvroField.php

echo -e "\n=== Check constructor for aliases ===" 
rg -n -A20 'public function __construct' lang/php/lib/Schema/AvroField.php

Repository: martin-augment/avro

Length of output: 1066


🏁 Script executed:

#!/bin/bash
# Check tests for how fields with aliases should be serialized
echo "=== Search SchemaTest.php for field aliases ===" 
rg -n -B3 -A3 'aliases.*field|field.*aliases' lang/php/test/SchemaTest.php | head -60

echo -e "\n=== Look for field toAvro tests ===" 
rg -n -B5 -A5 'toAvro.*field|field.*toAvro' lang/php/test/SchemaTest.php

echo -e "\n=== Search for AvroField parsing ===" 
rg -n -B3 -A10 'AvroField' lang/php/test/SchemaTest.php | head -80

Repository: martin-augment/avro

Length of output: 515


🏁 Script executed:

#!/bin/bash
# Check if aliases are parsed when reading field definitions
echo "=== Check field parsing from JSON schema ===" 
rg -n -B5 -A5 'FIELD_NAME_ATTR|field.*=' lang/php/lib/Schema/AvroRecordSchema.php | head -80

echo -e "\n=== Check Avro spec or comments about field serialization ===" 
rg -n 'field|Field' lang/php/lib/Schema/AvroField.php | grep -i 'doc\|comment\|spec\|serial'

Repository: martin-augment/avro

Length of output: 3250


🏁 Script executed:

#!/bin/bash
# Check if there are any tests that verify field serialization round-trip
echo "=== Search for test data with field aliases ===" 
find lang/php/test -name "*.json" -o -name "*.php" | xargs grep -l 'aliases.*:' 2>/dev/null | head -5

echo -e "\n=== Check the test that compares toAvro() output ===" 
rg -n -B10 -A10 'toAvro.*==|assertEquals.*toAvro' lang/php/test/SchemaTest.php | head -80

echo -e "\n=== Look for JSON test schemas with field aliases ===" 
find lang/php/test -type f -name "*.json" | xargs grep -l 'aliases' 2>/dev/null

Repository: martin-augment/avro

Length of output: 2760


🏁 Script executed:

#!/bin/bash
# Check if there's a specific test for AvroRecordSchema that includes field aliases
echo "=== Search for record schema test data ===" 
rg -n 'test.*record\|record.*test' lang/php/test/SchemaTest.php -i

echo -e "\n=== Check if there are any fixture files ===" 
find lang/php -type d -name fixtures -o -name data -o -name schema

echo -e "\n=== Check the test_schemas.php to see what test data exists ===" 
find lang/php -name "*test*.php" -type f | xargs ls -la

Repository: martin-augment/avro

Length of output: 295


🏁 Script executed:

#!/bin/bash
# Check if there are test cases with actual field-level aliases
echo "=== Look for field-level aliases in test schemas ===" 
rg -n '"name".*"aliases"|"aliases".*"name"' lang/php/test/SchemaTest.php

echo -e "\n=== Get more context around the round-trip test ===" 
rg -n -B25 'assertEquals.*toAvro' lang/php/test/SchemaTest.php | head -100

echo -e "\n=== Check Avro specification comments or documentation ===" 
find lang/php -name "*.php" -type f | xargs grep -i 'field.*alias\|specification\|according to' | grep -i 'avro\|spec' | head -20

Repository: martin-augment/avro

Length of output: 5043


🏁 Script executed:

#!/bin/bash
# Search for actual field definitions with aliases in test data
echo "=== Search for field schemas with aliases property ===" 
rg -n '\{\s*"name"' lang/php/test/SchemaTest.php -A5 | grep -B5 -A5 'aliases'

echo -e "\n=== Check Apache Avro specification references ===" 
find lang/php -type f \( -name "*.md" -o -name "*.txt" -o -name "*.php" \) | xargs grep -i 'apache avro\|spec\|specification' 2>/dev/null | head -20

Repository: martin-augment/avro

Length of output: 3173


🏁 Script executed:

#!/bin/bash
# Let's look at the exact test that has field aliases
echo "=== Get the full test_validate_field_aliases and surrounding context ===" 
rg -n -B5 -A25 'test_validate_field_aliases|{"name": "banana".*aliases' lang/php/test/SchemaTest.php

echo -e "\n=== Check if the test with field aliases actually passes ===" 
rg -n -B20 '"banana".*"yellow"' lang/php/test/SchemaTest.php

Repository: martin-augment/avro

Length of output: 1228


🏁 Script executed:

#!/bin/bash
# Check if there's a test method that specifically tests field alias serialization
echo "=== Look for test methods related to field aliases ===" 
rg -n 'public function test.*alias|def.*test.*alias' lang/php/test/SchemaTest.php -i

echo -e "\n=== Get full context of the test around line 205 ===" 
sed -n '195,215p' lang/php/test/SchemaTest.php

Repository: martin-augment/avro

Length of output: 1321


🏁 Script executed:

#!/bin/bash
# Get the full test_to_avro_includes_aliases test to see what it's actually testing
echo "=== Full test_to_avro_includes_aliases ===" 
sed -n '112,127p' lang/php/test/SchemaTest.php

echo -e "\n=== Also check test_validate_same_alias_multiple_fields more carefully ===" 
sed -n '196,211p' lang/php/test/SchemaTest.php

echo -e "\n=== Check if there's a test that expects field aliases in toAvro output ===" 
rg -n -A30 'test_to_avro' lang/php/test/SchemaTest.php | head -80

Repository: martin-augment/avro

Length of output: 2879


🏁 Script executed:

#!/bin/bash
# The critical observation: test_to_avro_includes_aliases has NO field-level aliases
# Let's verify: does the test parse field-level aliases without error?
echo "=== Parse and check field aliases behavior ===" 
cat > /tmp/test_field_aliases.php << 'EOF'
<?php
// Test if field aliases are parsed correctly
$schema_with_field_aliases = <<<JSON
{
    "type": "record",
    "name": "test",
    "fields": [
        {"name": "f1", "type": "string", "aliases": ["field_one"]},
        {"name": "f2", "type": "int"}
    ]
}
JSON;

// Show what we expect: the original schema
echo "Original schema:\n";
echo $schema_with_field_aliases . "\n\n";

// Show what the test compares
echo "Expected behavior from test_to_avro_includes_aliases:\n";
echo "schema->toAvro() should equal json_decode(original, true)\n";
EOF
cat /tmp/test_field_aliases.php

Repository: martin-augment/avro

Length of output: 656


🏁 Script executed:

#!/bin/bash
# Let's check what Apache Avro spec says about field aliases
echo "=== Check README or documentation ===" 
find lang/php -name "README*" -o -name "*.md" | xargs cat 2>/dev/null | head -50

echo -e "\n=== Check if there are any comments about field serialization ===" 
rg -n 'field.*serial|serialize.*field' lang/php/lib/Schema/AvroField.php

Repository: martin-augment/avro

Length of output: 1744


🏁 Script executed:

#!/bin/bash
# Critical investigation: Let's check what the Avro 1.4.0 spec says
# Search for any references to what should be serialized
echo "=== Check AvroRecordSchema toAvro to see how it handles field serialization ===" 
rg -n -A50 'public function toAvro' lang/php/lib/Schema/AvroRecordSchema.php | head -80

echo -e "\n=== Check if there's a comment about which attributes are included in toAvro ===" 
rg -n -B5 'ALIASES_ATTR|aliases' lang/php/lib/Schema/AvroField.php | head -40

Repository: martin-augment/avro

Length of output: 2612


🏁 Script executed:

#!/bin/bash
# Let's trace through: when AvroRecordSchema calls toAvro on fields, what does it do?
echo "=== Find where fields toAvro is called ===" 
rg -n 'field.*toAvro|toAvro.*field' lang/php/lib/Schema/AvroRecordSchema.php

echo -e "\n=== Get the full toAvro implementation of AvroRecordSchema ===" 
rg -n -A100 'public function toAvro' lang/php/lib/Schema/AvroRecordSchema.php | head -150

Repository: martin-augment/avro

Length of output: 1800


🏁 Script executed:

#!/bin/bash
# Critical finding: test_to_avro_includes_aliases does NOT have field-level aliases
# Let's verify what the test actually contains
echo "=== Verify the test schema has NO field-level aliases ===" 
sed -n '112,127p' lang/php/test/SchemaTest.php | grep -E '"aliases"|"fields"' -A2

echo -e "\n=== Check Apache Avro specification references in code ===" 
rg -n 'avro.apache.org\|specification\|spec' lang/php/lib/ | head -10

echo -e "\n=== The question: does AvroIODatumReader actually use field aliases in practice? ===" 
rg -n -B5 -A5 'fieldsByAlias' lang/php/lib/Datum/AvroIODatumReader.php

Repository: martin-augment/avro

Length of output: 1433


🏁 Script executed:

#!/bin/bash
# Let's determine: if a schema is parsed with field aliases, does toAvro lose that info?
# This would be a round-trip bug
echo "=== Check if there are any comments in AvroField about what toAvro should include ===" 
sed -n '70,145p' lang/php/lib/Schema/AvroField.php | head -80

echo -e "\n=== Check the git history or comments about field aliases ===" 
rg -n -B10 'aliases.*field|field.*alias' lang/php/lib/Schema/AvroField.php | head -50

Repository: martin-augment/avro

Length of output: 2237


🏁 Script executed:

#!/bin/bash
# Let me check: is there a round-trip test that includes field-level aliases?
echo "=== Search all tests for round-trip tests with field aliases ===" 
rg -n 'test.*round|round.*trip' lang/php/test/SchemaTest.php -i

echo -e "\n=== Check if test_validate_same_alias_multiple_fields schema would pass a round-trip test ===" 
echo "If field aliases ARE included in toAvro, then a schema with field aliases"
echo "should satisfy: schema->toAvro() == json_decode(original, true)"

echo -e "\n=== Create a test to demonstrate the issue ===" 
cat > /tmp/test_round_trip.php << 'EOF'
<?php
// This would demonstrate the bug:
$schema_json = <<<JSON
{
    "type": "record",
    "name": "test",
    "fields": [
        {"name": "f1", "type": "string", "aliases": ["field_one"]},
        {"name": "f2", "type": "int"}
    ]
}
JSON;

// Parse and re-serialize
$parsed_array = json_decode($schema_json, true);
// Expected: $parsed_array["fields"][0] still has "aliases": ["field_one"]

// If AvroField->toAvro() doesn't include aliases:
// $schema->toAvro()["fields"][0] would NOT have "aliases" key
// This breaks round-trip: schema->toAvro() != original
EOF
cat /tmp/test_round_trip.php

Repository: martin-augment/avro

Length of output: 974


🏁 Script executed:

#!/bin/bash
# The Apache Avro specification definitely includes field aliases
# Let's verify by looking at what the implementation expects
echo "=== Check if AvroNamedSchema (parent of AvroField) has ALIASES_ATTR handling ===" 
rg -n 'ALIASES_ATTR|aliases' lang/php/lib/Schema/AvroNamedSchema.php | head -20

echo -e "\n=== Check parent class toAvro ===" 
rg -n -A20 'public function toAvro' lang/php/lib/Schema/AvroNamedSchema.php

Repository: martin-augment/avro

Length of output: 1156


🏁 Script executed:

#!/bin/bash
# Perfect! Let me verify this is indeed a bug by checking:
# 1. Does AvroField implement AvroAliasedSchema?
# 2. What does AvroAliasedSchema interface require?
echo "=== Check AvroAliasedSchema interface ===" 
rg -n -A10 'interface AvroAliasedSchema' lang/php/lib/Schema/

echo -e "\n=== Confirm AvroField implements AvroAliasedSchema ===" 
rg -n 'class AvroField' lang/php/lib/Schema/AvroField.php

Repository: martin-augment/avro

Length of output: 481


🏁 Script executed:

#!/bin/bash
# Now let's check: is there documentation about what fields should be serialized?
echo "=== Apache Avro spec: fields should have aliases ===" 
echo "According to Avro spec, a field definition can have:"
echo "- name (required)"
echo "- type (required)"  
echo "- default (optional)"
echo "- order (optional)"
echo "- aliases (optional)"
echo "- doc (optional)"

echo -e "\n=== Current AvroField->toAvro includes: ===" 
echo "- name (required)"
echo "- type (required)"
echo "- default (if set)"
echo "- order (if set)"
echo "- doc (if not null)"
echo "- aliases (MISSING!)"

echo -e "\n=== Verify by checking other schema types ===" 
rg -n 'ALIASES_ATTR' lang/php/lib/Schema/*.php

Repository: martin-augment/avro

Length of output: 884


Field aliases should be serialized in toAvro() output.

The aliases field is not being serialized, which breaks round-trip serialization for schemas containing field aliases. Although aliases are parsed from the input schema (line 76 in AvroRecordSchema.php), validated, and stored, they're discarded when calling toAvro(). This is inconsistent with:

  • The doc and order attributes being included (lines 134-140)
  • AvroField implementing the AvroAliasedSchema interface with getAliases() method
  • AvroNamedSchema (parent class) including aliases in its toAvro() output (lines 65-66)
  • The Avro specification, which supports optional aliases for fields
  • Runtime usage in AvroIODatumReader, which retrieves field aliases via fieldsByAlias()

Add aliases serialization:

if (!is_null($this->aliases)) {
    $avro[AvroSchema::ALIASES_ATTR] = $this->aliases;
}
🤖 Prompt for AI Agents
In @lang/php/lib/Schema/AvroField.php around lines 138 - 141, The toAvro()
output for AvroField currently omits field aliases; update AvroField::toAvro()
to include aliases when present by checking $this->aliases (or getAliases()) and
adding AvroSchema::ALIASES_ATTR => $this->aliases to the $avro array alongside
the existing doc and order handling so field aliases are serialized and preserve
round-trip behavior.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:good-to-have; category:bug; feedback:The CodeRabbit AI reviewer is correct! For consistency with the other fields of this class the old $aliases field should also be serialized in the JSON representation. Prevents releasing an unfinished feature.

return $avro;
}

/**
* @returns string the name of this field
*/
public function name()
public function name(): ?string
{
return $this->name;
}
Expand All @@ -153,7 +161,7 @@ public function defaultValue()
/**
* @returns boolean true if the field has a default and false otherwise
*/
public function hasDefaultValue()
public function hasDefaultValue(): bool
{
return $this->hasDefault;
}
Expand All @@ -163,7 +171,7 @@ public function getAliases(): ?array
return $this->aliases;
}

public function hasAliases()
public function hasAliases(): bool
{
return null !== $this->aliases;
}
Expand Down
4 changes: 3 additions & 1 deletion lang/php/lib/Schema/AvroRecordSchema.php
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ public static function parseFields(
$type = $field[AvroSchema::TYPE_ATTR] ?? null;
$order = $field[AvroField::ORDER_ATTR] ?? null;
$aliases = $field[AvroSchema::ALIASES_ATTR] ?? null;
$doc = $field[AvroSchema::DOC_ATTR] ?? null;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since $doc comes from decoded JSON, a non-string value will flow into AvroField’s ?string $doc parameter and can raise a TypeError instead of AvroSchemaParseException. Consider validating doc is a string (or null) during parsing to keep error handling consistent for invalid schemas.

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎


$default = null;
$has_default = false;
Expand Down Expand Up @@ -110,7 +111,8 @@ public static function parseFields(
hasDefault: $has_default,
default: $default,
order: $order,
aliases: $aliases
aliases: $aliases,
doc: $doc
);
$field_names[] = $name;
if ($new_field->hasAliases() && array_intersect($alias_names, $new_field->getAliases())) {
Expand Down
43 changes: 32 additions & 11 deletions lang/php/test/SchemaTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@

class SchemaExample
{
public $name;
public $normalized_schema_string;
public string $name;
public string $normalized_schema_string;

public function __construct(
public $schema_string,
public $is_valid,
$normalized_schema_string = null,
$name = null,
public $comment = null
public string $schema_string,
public bool $is_valid,
?string $normalized_schema_string = null,
?string $name = null,
public ?string $comment = null
) {
$this->name = $name ?: $this->schema_string;
$this->normalized_schema_string = $normalized_schema_string ?: json_encode(json_decode((string) $this->schema_string, true));
Expand All @@ -45,8 +45,8 @@ public function __construct(

class SchemaTest extends TestCase
{
private static $examples = [];
private static $valid_examples = [];
private static array $examples = [];
private static array $valid_examples = [];

public function test_json_decode(): void
{
Expand Down Expand Up @@ -232,6 +232,27 @@ public function test_validate_repeated_aliases(): void
);
}

public function test_doc_attribute_on_primitive_fields(): void
{
$schemaJson = <<<JSON
{
"type": "record",
"name": "fruits",
"fields": [
{
"name": "banana",
"type": "string",
"doc": "This is a banana"
}
]
}
JSON;

$schema = AvroSchema::parse($schemaJson);

self::assertEquals($schemaJson, json_encode($schema->toAvro(), flags: JSON_PRETTY_PRINT));
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This compares raw JSON strings, but $schemaJson includes leading indentation/newlines from the heredoc so it may not match json_encode(..., JSON_PRETTY_PRINT) output. Consider normalizing both sides (e.g., compare decoded structures) so the test is robust to whitespace/formatting differences.

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:good-to-have; category:bug; feedback:The Augment AI reviewer is correct! Instead of comparing the JSONs as strings it would be better to compare the decoded version of the JSON values. This way the JSON will be normalized and any leading space and/or different fields order won't break the test.

}

public function test_logical_types_in_record(): void
{
$avro = <<<JSON
Expand Down Expand Up @@ -380,7 +401,7 @@ protected static function make_examples(): void
new SchemaExample('{"no_type": "test"}', false),
new SchemaExample('{"type": "panther"}', false),
],
self::make_primitive_examples()
self::makePrimitiveExamples()
);

$array_examples = [
Expand Down Expand Up @@ -783,7 +804,7 @@ protected static function make_examples(): void
}
}

protected static function make_primitive_examples()
protected static function makePrimitiveExamples(): array
{
$examples = [];
foreach ([
Expand Down