Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions python/google/protobuf/internal/json_format_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,41 @@ def testParseEnumValue(self):
'for enum type protobuf_unittest.TestAllTypes.NestedEnum.',
json_format.Parse, '{"optionalNestedEnum": 12345}', message)

def testParseUnknownEnumStringValueProto2(self):
message = json_format_pb2.TestNumbers()
# Note that in src/google/protobuf/util/json_format.proto::TestNumbers
# has a field 'optional MyType a = 1', which is different
# from the test below that are using
# src/google/protobuf/util/json_format_proto3.proto::TestMessage.
text = '{"a": "UNKNOWN_STRING_VALUE"}'

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious why the key is a here instead of enumValue like in other tests?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, I verified that this is OK and added comment.

    # Note that in src/google/protobuf/util/json_format.proto::TestNumbers
    # has a field 'optional MyType a = 1', which is different
    # from the test below that are using
    # src/google/protobuf/util/json_format_proto3.proto::TestMessage.

json_format.Parse(text, message, ignore_unknown_fields=True)
# In proto2 we can see that the field was not set at all.
self.assertFalse(message.HasField("a"))

def testParseErrorCorrectCatchForUnknownEnumValue(self):
message = json_format_pb2.TestNumbers()
self.assertRaisesRegexp(
json_format.ParseError,
'Invalid enum value',
json_format.Parse, '{"a": "UNKNOWN_STRING_VALUE"}', message)

def testParseUnknownEnumStringValueProto3(self):
message = json_format_proto3_pb2.TestMessage()
text = '{"enumValue": "UNKNOWN_STRING_VALUE"}'
json_format.Parse(text, message, ignore_unknown_fields=True)
# In proto3, there is no difference between the default value and 0.
self.assertEqual(message.enum_value, 0)

# Note that in noom-contracts we banned the repeated enum fields, so this test
# is not adding value for Noom's use of this library.
# Still, we are testing that the behavior here is consistent with what
# binary serialization would do.
def testParseUnknownEnumStringValueRepeatedProto3(self):
message = json_format_proto3_pb2.TestMessage()
text = '{"repeatedEnumValue": ["UNKNOWN_STRING_VALUE", "FOO", "BAR"]}'

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't an array of enums banned?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question :)
We banned the array of enums in noom-contracts to avoid people getting confused with this behavior. The array length is 2 although there were 3 fields (one of them unknown).

In this repository (protobuf) I am still testing that the behavior that we have with repeated enums is consistent with the behavior in the binary serialization (regardless of the fact that the behavior is confusing).

I'll add the comment in the code.

json_format.Parse(text, message, ignore_unknown_fields=True)
self.assertEquals(len(message.repeated_enum_value), 2)

def testBytes(self):
message = json_format_proto3_pb2.TestMessage()
# Test url base64
Expand Down
56 changes: 45 additions & 11 deletions python/google/protobuf/json_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from operator import methodcaller
import re
import sys
from contextlib import contextmanager

from google.protobuf import descriptor
from google.protobuf import symbol_database
Expand Down Expand Up @@ -116,6 +117,31 @@ class ParseError(Error):
"""Thrown in case of parsing error."""


class _UnknownEnumStringValueParseError(ParseError):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this class inherit from ParseError (it looks like it does)? So we don't break code that tries to catch ParseError

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. Added a comment about that.

Although I think the implementation will never leak _UnknownEnumStringValueParseError, it still gets renamed to ParseError in _ConvertFieldValuePair.. but I wouldn't rely on that, the code is very hard to follow :) It's safer if I just inherit and then I know I didn't change any public interface

"""
Thrown if an unknown enum string value is encountered.
We inherit ParseError to avoid breaking the code that might expect ParseError.
"""


@contextmanager
def _MaybeSuppressUnknownEnumStringValueParseError(should_suppress: bool):
"""
This context manager will suppress _UnknownEnumStringValueParseError if the should_suppress is set to true. Usage:

with _MaybeSuppressUnknownEnumStringValueParseError(True):
run_some_code_that_might_throw()
"""
try:
yield
except _UnknownEnumStringValueParseError as ex:
if should_suppress:
# Here we suppress _UnknownEnumStringValueParseError.
pass
else:
raise


def MessageToJson(
message,
including_default_value_fields=False,
Expand Down Expand Up @@ -598,8 +624,9 @@ def _ConvertFieldValuePair(self, js, message):
if item is None:
raise ParseError('null is not allowed to be used as an element'
' in a repeated field.')
getattr(message, field.name).append(
_ConvertScalarFieldValue(item, field))
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
getattr(message, field.name).append(
_ConvertScalarFieldValue(item, field))
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
if field.is_extension:
sub_message = message.Extensions[field]
Expand All @@ -608,10 +635,11 @@ def _ConvertFieldValuePair(self, js, message):
sub_message.SetInParent()
self.ConvertMessage(value, sub_message)
else:
if field.is_extension:
message.Extensions[field] = _ConvertScalarFieldValue(value, field)
else:
setattr(message, field.name, _ConvertScalarFieldValue(value, field))
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
if field.is_extension:
message.Extensions[field] = _ConvertScalarFieldValue(value, field)
else:
setattr(message, field.name, _ConvertScalarFieldValue(value, field))
except ParseError as e:
if field and field.containing_oneof is None:
raise ParseError('Failed to parse {0} field: {1}.'.format(name, e))
Expand Down Expand Up @@ -698,7 +726,8 @@ def _ConvertStructMessage(self, value, message):
def _ConvertWrapperMessage(self, value, message):
"""Convert a JSON representation into Wrapper message."""
field = message.DESCRIPTOR.fields_by_name['value']
setattr(message, 'value', _ConvertScalarFieldValue(value, field))
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
setattr(message, 'value', _ConvertScalarFieldValue(value, field))

def _ConvertMapFieldValue(self, value, message, field):
"""Convert map field value for a message map field.
Expand All @@ -718,13 +747,15 @@ def _ConvertMapFieldValue(self, value, message, field):
key_field = field.message_type.fields_by_name['key']
value_field = field.message_type.fields_by_name['value']
for key in value:
key_value = _ConvertScalarFieldValue(key, key_field, True)
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
key_value = _ConvertScalarFieldValue(key, key_field, True)
if value_field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
self.ConvertMessage(value[key], getattr(
message, field.name)[key_value])
else:
getattr(message, field.name)[key_value] = _ConvertScalarFieldValue(
value[key], value_field)
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
getattr(message, field.name)[key_value] = _ConvertScalarFieldValue(
value[key], value_field)


def _ConvertScalarFieldValue(value, field, require_str=False):
Expand All @@ -740,6 +771,7 @@ def _ConvertScalarFieldValue(value, field, require_str=False):

Raises:
ParseError: In case of convert problems.
_UnknownEnumStringValueParseError: If unknown enum string value is encountered during parsing.
"""
if field.cpp_type in _INT_TYPES:
return _ConvertInteger(value)
Expand Down Expand Up @@ -770,7 +802,9 @@ def _ConvertScalarFieldValue(value, field, require_str=False):
number = int(value)
enum_value = field.enum_type.values_by_number.get(number, None)
except ValueError:
raise ParseError('Invalid enum value {0} for enum type {1}.'.format(
# The ValueError will be raised by the conversion to int.
# That means that here we know that we have an unknown enum string value.
raise _UnknownEnumStringValueParseError('Invalid enum value {0} for enum type {1}.'.format(
value, field.enum_type.full_name))
if enum_value is None:
if field.file.syntax == 'proto3':
Expand Down