-
Notifications
You must be signed in to change notification settings - Fork 0
3708: Add new code generator from AVRO to PHP #36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,374 @@ | ||
| <?php | ||
|
|
||
| /** | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * https://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Apache\Avro\Generator; | ||
|
|
||
| use Apache\Avro\Schema\AvroArraySchema; | ||
| use Apache\Avro\Schema\AvroEnumSchema; | ||
| use Apache\Avro\Schema\AvroMapSchema; | ||
| use Apache\Avro\Schema\AvroPrimitiveSchema; | ||
| use Apache\Avro\Schema\AvroRecordSchema; | ||
| use Apache\Avro\Schema\AvroSchema; | ||
| use Apache\Avro\Schema\AvroUnionSchema; | ||
| use PhpParser\BuilderFactory; | ||
| use PhpParser\Node; | ||
| use PhpParser\Node\Scalar\String_; | ||
| use PhpParser\Node\Stmt; | ||
| use PhpParser\PrettyPrinter\Standard; | ||
|
|
||
| class AvroCodeGenerator | ||
| { | ||
| private BuilderFactory $factory; | ||
| private Standard $printer; | ||
|
|
||
| /** @var array<string, AvroSchema> */ | ||
| private array $registry = []; | ||
|
|
||
| public function __construct() | ||
| { | ||
| $this->factory = new BuilderFactory(); | ||
| $this->printer = new Standard(['shortArraySyntax' => true]); | ||
| } | ||
|
|
||
| /** | ||
| * @return array<string, string> Map of filename to file contents | ||
| */ | ||
| public function translate( | ||
| AvroSchema $schema, | ||
| string $path, | ||
| string $phpNamespace | ||
| ): array { | ||
| $this->buildRegistry($schema); | ||
|
|
||
| $files = []; | ||
|
|
||
| foreach ($this->registry as $name => $registeredSchema) { | ||
| $node = match (true) { | ||
| $registeredSchema instanceof AvroEnumSchema => $this->buildEnum( | ||
| $registeredSchema, | ||
| $phpNamespace, | ||
| $registeredSchema->symbols() | ||
| ), | ||
| $registeredSchema instanceof AvroRecordSchema => $this->buildRecord( | ||
| $registeredSchema, | ||
| $phpNamespace | ||
| ), | ||
| default => null | ||
| }; | ||
|
|
||
| if (null !== $node) { | ||
| $code = <<<PHP | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| {$this->printer->prettyPrint([$node])} | ||
|
|
||
| PHP; | ||
|
|
||
| $filename = $path.'/'.ucwords($name).'.php'; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The filename is being generated using the Avro fullname ( $filename = $path.'/'.ucwords($registeredSchema->name()).'.php';There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lang/php/lib/Generator/AvroCodeGenerator.php:88: Severity: medium 🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filename uses fullname instead of simple nameHigh Severity The filename at line 88 is built from Additional Locations (1) |
||
| $files[$filename] = $code; | ||
|
Comment on lines
+88
to
+89
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Consider using Proposed fix- $filename = $path.'/'.ucwords($name).'.php';
+ $filename = $path.'/'.ucfirst($registeredSchema->name()).'.php';🤖 Prompt for AI Agents |
||
| } | ||
| } | ||
|
|
||
| return $files; | ||
| } | ||
|
|
||
| private function buildRegistry(AvroSchema $rootSchema): void | ||
| { | ||
| $this->registry = []; | ||
| $this->collectSchemas($rootSchema); | ||
| } | ||
|
|
||
| private function collectSchemas(AvroSchema $schema): void | ||
| { | ||
| switch ($schema::class) { | ||
| case AvroRecordSchema::class: | ||
| if (!array_key_exists($schema->fullname(), $this->registry)) { | ||
| $this->registry[$schema->fullname()] = $schema; | ||
| foreach ($schema->fields() as $field) { | ||
| $this->collectSchemas($field->type()); | ||
| } | ||
| } | ||
|
|
||
| break; | ||
| case AvroEnumSchema::class: | ||
| $this->registry[$schema->fullname()] = $schema; | ||
|
|
||
| break; | ||
| case AvroArraySchema::class: | ||
| $this->collectSchemas($schema->items()); | ||
|
|
||
| break; | ||
| case AvroMapSchema::class: | ||
| $this->collectSchemas($schema->values()); | ||
|
|
||
| break; | ||
| case AvroUnionSchema::class: | ||
| foreach ($schema->schemas() as $unionSchema) { | ||
| $this->collectSchemas($unionSchema); | ||
| } | ||
|
|
||
| break; | ||
| } | ||
| } | ||
|
|
||
| private function buildRecord( | ||
| AvroRecordSchema $avroRecord, | ||
| string $phpNamespace | ||
| ): Node { | ||
| $className = ucwords($avroRecord->name()); | ||
| $class = $this->factory->class($className)->makeFinal()->implement('\\JsonSerializable'); | ||
|
|
||
| foreach ($avroRecord->fields() as $field) { | ||
| $phpType = $this->avroTypeToPhp($field->type(), $phpNamespace); | ||
| $property = $this->factory->property($field->name()) | ||
| ->makePrivate() | ||
| ->setType($phpType); | ||
|
|
||
| $phpDocType = $this->avroTypeToPhpDoc($field->type(), $phpNamespace); | ||
| if (null !== $phpDocType) { | ||
| $property->setDocComment('/** @var '.$phpDocType.' */'); | ||
| } | ||
|
|
||
| if ($field->hasDefaultValue()) { | ||
| $property->setDefault($this->buildDefault($field->defaultValue())); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lang/php/lib/Generator/AvroCodeGenerator.php:154: Default values are injected without considering the field schema, so defaults for non-scalar typed fields (notably enums and records) will likely emit invalid PHP (e.g., a string default for a property typed as a generated enum). This can cause a fatal error when the generated class is loaded. Severity: high Other Locations
🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage. |
||
| } | ||
|
|
||
| $class->addStmt($property); | ||
| } | ||
|
|
||
| $constructor = $this->factory->method('__construct')->makePublic(); | ||
| $constructorParamDocs = []; | ||
| foreach ($avroRecord->fields() as $field) { | ||
| $phpType = $this->avroTypeToPhp($field->type(), $phpNamespace); | ||
| $param = $this->factory->param($field->name())->setType($phpType); | ||
| if ($field->hasDefaultValue()) { | ||
| $param->setDefault($this->buildDefault($field->defaultValue())); | ||
| } | ||
|
|
||
| $phpDocType = $this->avroTypeToPhpDoc($field->type(), $phpNamespace); | ||
| if (null !== $phpDocType) { | ||
| $constructorParamDocs[] = '@param '.$phpDocType.' $'.$field->name(); | ||
| } | ||
|
|
||
| $constructor->addParam($param); | ||
| $constructor->addStmt( | ||
| new Node\Expr\Assign( | ||
| new Node\Expr\PropertyFetch(new Node\Expr\Variable('this'), $field->name()), | ||
| new Node\Expr\Variable($field->name()) | ||
| ) | ||
| ); | ||
| } | ||
| if ([] !== $constructorParamDocs) { | ||
| $docLines = "/**\n"; | ||
| foreach ($constructorParamDocs as $doc) { | ||
| $docLines .= ' * '.$doc."\n"; | ||
| } | ||
| $docLines .= ' */'; | ||
| $constructor->setDocComment($docLines); | ||
| } | ||
| $class->addStmt($constructor); | ||
|
|
||
| foreach ($avroRecord->fields() as $field) { | ||
| $phpType = $this->avroTypeToPhp($field->type(), $phpNamespace); | ||
| $getter = $this->factory->method($field->name()) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lang/php/lib/Generator/AvroCodeGenerator.php:194: Generating property/method/parameter names directly from Avro field names can produce invalid PHP in edge cases (e.g., field name colliding with reserved keywords or with generated methods like Severity: medium Other Locations
🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage. |
||
| ->makePublic() | ||
| ->setReturnType($phpType) | ||
| ->addStmt( | ||
| new Stmt\Return_( | ||
| new Node\Expr\PropertyFetch(new Node\Expr\Variable('this'), $field->name()) | ||
| ) | ||
| ); | ||
|
|
||
| $phpDocType = $this->avroTypeToPhpDoc($field->type(), $phpNamespace); | ||
| if (null !== $phpDocType) { | ||
| $getter->setDocComment('/** @return '.$phpDocType.' */'); | ||
| } | ||
|
|
||
| $class->addStmt($getter); | ||
| } | ||
|
|
||
| $arrayItems = []; | ||
| foreach ($avroRecord->fields() as $field) { | ||
| $arrayItems[] = new Node\ArrayItem( | ||
| $this->buildJsonSerializeValue($field->type(), $field->name()), | ||
| new String_($field->name()) | ||
| ); | ||
|
Comment on lines
+213
to
+216
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The class $arrayItems[] = new Node\Expr\ArrayItem(
$this->buildJsonSerializeValue($field->type(), $field->name()),
new String_($field->name())
); |
||
| } | ||
| $jsonSerialize = $this->factory->method('jsonSerialize') | ||
| ->makePublic() | ||
| ->setReturnType('mixed') | ||
| ->addStmt( | ||
| new Stmt\Return_( | ||
| new Node\Expr\Array_($arrayItems, ['kind' => Node\Expr\Array_::KIND_SHORT]) | ||
| ) | ||
| ); | ||
| $class->addStmt($jsonSerialize); | ||
|
|
||
| return $this->factory->namespace($phpNamespace) | ||
| ->addStmt($class) | ||
| ->getNode(); | ||
| } | ||
|
|
||
| /** | ||
| * Builds the expression used inside jsonSerialize() for a single field. | ||
| * | ||
| * - EnumSchema → $this->field->value (plain string for Avro + JSON) | ||
| * - union[null, Enum] → $this->field?->value (null-safe, still plain) | ||
| * - anything else → $this->field | ||
| */ | ||
| private function buildJsonSerializeValue(AvroSchema $fieldType, string $fieldName): Node\Expr | ||
| { | ||
| $propertyFetch = new Node\Expr\PropertyFetch(new Node\Expr\Variable('this'), $fieldName); | ||
|
|
||
| if ($fieldType instanceof AvroEnumSchema) { | ||
| return new Node\Expr\PropertyFetch($propertyFetch, 'value'); | ||
| } | ||
|
|
||
| if ($fieldType instanceof AvroUnionSchema) { | ||
| $nonNullSchemas = array_values(array_filter( | ||
| $fieldType->schemas(), | ||
| static fn (AvroSchema $s): bool => !($s instanceof AvroPrimitiveSchema && AvroSchema::NULL_TYPE === $s->type()) | ||
| )); | ||
|
|
||
| if (1 === count($nonNullSchemas) && $nonNullSchemas[0] instanceof AvroEnumSchema) { | ||
| return new Node\Expr\NullsafePropertyFetch($propertyFetch, 'value'); | ||
| } | ||
| } | ||
|
|
||
| return $propertyFetch; | ||
| } | ||
|
Comment on lines
+240
to
+260
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
|
||
| /** | ||
| * @param list<string> $values | ||
| */ | ||
| private function buildEnum( | ||
| AvroEnumSchema $avroEnum, | ||
| string $phpNamespace, | ||
| array $values | ||
| ): Node { | ||
| $className = ucwords($avroEnum->name()); | ||
| $enum = $this->factory->enum($className)->setScalarType('string'); | ||
|
|
||
| foreach ($values as $value) { | ||
| $caseName = strtoupper($value); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lang/php/lib/Generator/AvroCodeGenerator.php:274: Enum case names are derived via Severity: medium 🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Enum case names can collide with PHP reserved wordsMedium Severity
|
||
| $enum->addStmt( | ||
| $this->factory->enumCase($caseName)->setValue($value) | ||
| ); | ||
|
Comment on lines
+273
to
+277
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential enum case name collision when symbols differ only by case.
Consider adding a check for collisions or preserving original casing: Proposed fix - detect collisions+ private function buildEnumCases(array $values): array
+ {
+ $caseNames = [];
+ $statements = [];
+ foreach ($values as $value) {
+ $caseName = strtoupper($value);
+ if (isset($caseNames[$caseName])) {
+ throw new AvroCodeGeneratorException(
+ sprintf('Enum case collision: "%s" and "%s" both map to "%s"', $caseNames[$caseName], $value, $caseName)
+ );
+ }
+ $caseNames[$caseName] = $value;
+ $statements[] = $this->factory->enumCase($caseName)->setValue($value);
+ }
+ return $statements;
+ }🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| return $this->factory->namespace($phpNamespace) | ||
| ->addStmt($enum) | ||
| ->getNode(); | ||
| } | ||
|
|
||
| private function avroTypeToPhp(AvroSchema $schema, string $phpNamespace): string | ||
| { | ||
| return match (true) { | ||
| $schema instanceof AvroPrimitiveSchema => $this->avroPrimitiveTypeToPhp($schema), | ||
| $schema instanceof AvroArraySchema, $schema instanceof AvroMapSchema => 'array', | ||
| $schema instanceof AvroRecordSchema, $schema instanceof AvroEnumSchema => '\\'.$phpNamespace.'\\'.ucwords($schema->name()), | ||
| $schema instanceof AvroUnionSchema => $this->unionToPhp($schema, $phpNamespace), | ||
| default => 'mixed' | ||
| }; | ||
| } | ||
|
|
||
| private function avroPrimitiveTypeToPhp(AvroPrimitiveSchema $primitiveSchema): string | ||
| { | ||
| return match ($primitiveSchema->type()) { | ||
| AvroSchema::NULL_TYPE => 'null', | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lang/php/lib/Generator/AvroCodeGenerator.php:299: Mapping Avro Severity: high Other Locations
🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage. |
||
| AvroSchema::BOOLEAN_TYPE => 'bool', | ||
| AvroSchema::INT_TYPE, AvroSchema::LONG_TYPE => 'int', | ||
| AvroSchema::FLOAT_TYPE, AvroSchema::DOUBLE_TYPE => 'float', | ||
| AvroSchema::STRING_TYPE, AvroSchema::BYTES_TYPE => 'string', | ||
| default => throw new AvroCodeGeneratorException("Unknown primitive type: ".$primitiveSchema->type()), | ||
| }; | ||
| } | ||
|
|
||
| private function unionToPhp(AvroUnionSchema $union, string $phpNamespace): string | ||
| { | ||
| $types = []; | ||
| foreach ($union->schemas() as $schema) { | ||
| $types[] = $this->avroTypeToPhp($schema, $phpNamespace); | ||
| } | ||
|
|
||
| return implode('|', array_unique($types)); | ||
| } | ||
|
|
||
| private function buildDefault(mixed $value): mixed | ||
| { | ||
| if (is_array($value)) { | ||
| return $this->factory->val($value); | ||
| } | ||
|
|
||
| return $value; | ||
| } | ||
|
|
||
| /** | ||
| * Returns a PHPDoc type string for schemas that need richer type info than | ||
| * what PHP's native type system can express (arrays and maps), or null when | ||
| * the native type hint is sufficient. | ||
| */ | ||
| private function avroTypeToPhpDoc(AvroSchema $schema, string $phpNamespace): ?string | ||
| { | ||
| return match (true) { | ||
| $schema instanceof AvroArraySchema => 'list<'.$this->avroTypeToPhpDocInner($schema->items(), $phpNamespace).'>', | ||
| $schema instanceof AvroMapSchema => 'array<string, '.$this->avroTypeToPhpDocInner($schema->values(), $phpNamespace).'>', | ||
| $schema instanceof AvroUnionSchema => $this->unionToPhpDoc($schema, $phpNamespace), | ||
| default => null, | ||
| }; | ||
| } | ||
|
|
||
| private function avroTypeToPhpDocInner(AvroSchema $schema, string $phpNamespace): string | ||
| { | ||
| return match (true) { | ||
| $schema instanceof AvroPrimitiveSchema => $this->avroPrimitiveTypeToPhp($schema), | ||
| $schema instanceof AvroArraySchema => 'list<'.$this->avroTypeToPhpDocInner($schema->items(), $phpNamespace).'>', | ||
| $schema instanceof AvroMapSchema => 'array<string, '.$this->avroTypeToPhpDocInner($schema->values(), $phpNamespace).'>', | ||
| $schema instanceof AvroRecordSchema, $schema instanceof AvroEnumSchema => '\\'.$phpNamespace.'\\'.ucwords($schema->name()), | ||
| $schema instanceof AvroUnionSchema => $this->unionToPhp($schema, $phpNamespace), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PHPDoc loses generic types for unions in nested contextsLow Severity
|
||
| default => 'mixed', | ||
| }; | ||
| } | ||
|
|
||
| private function unionToPhpDoc(AvroUnionSchema $union, string $phpNamespace): ?string | ||
| { | ||
| $hasArrayOrMap = false; | ||
| $docParts = []; | ||
|
|
||
| foreach ($union->schemas() as $schema) { | ||
| if ($schema instanceof AvroArraySchema || $schema instanceof AvroMapSchema) { | ||
| $hasArrayOrMap = true; | ||
| $docParts[] = $this->avroTypeToPhpDocInner($schema, $phpNamespace); | ||
| } else { | ||
| $docParts[] = $this->avroTypeToPhp($schema, $phpNamespace); | ||
| } | ||
| } | ||
|
|
||
| if (!$hasArrayOrMap) { | ||
| return null; | ||
| } | ||
|
|
||
| return implode('|', array_unique($docParts)); | ||
| } | ||
| } | ||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using indented flexible heredocs with empty lines can be fragile. In PHP, every line within the heredoc must have at least the same indentation as the closing identifier. If the 'empty' lines (81, 83, 85) do not contain the exact number of spaces as the closing
PHP;tag, aParseErrorwill occur. It is safer to use a non-indented heredoc or ensure the empty lines are properly padded, though the former is much more maintainable.