From 290e414b66f1d19359bdeb022de39b21a39803f1 Mon Sep 17 00:00:00 2001 From: Zachary Drudi Date: Fri, 8 Oct 2021 15:17:15 -0400 Subject: [PATCH 01/12] loosen validation to support unrecognized enum values --- .../naptime/courier/CourierFormats.scala | 1 - .../naptime/courier/CourierSerializer.scala | 7 +- .../courier/validation/CoercionMode.scala | 16 + .../courier/validation/RequiredMode.scala | 16 + .../ValidateDataAgainstSchema.scala | 493 ++++++++++++++++++ .../validation/ValidationOptions.scala | 14 + .../naptime/courier/TestArrayOfEnum.courier | 5 + .../coursera/naptime/courier/TestEnum.courier | 6 + .../naptime/courier/TestWrappedEnum.courier | 5 + .../naptime/courier/CourierFormatsTest.scala | 47 ++ 10 files changed, 606 insertions(+), 4 deletions(-) create mode 100644 naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala create mode 100644 naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala create mode 100644 naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala create mode 100644 naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala create mode 100644 naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestArrayOfEnum.courier create mode 100644 naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestEnum.courier create mode 100644 naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestWrappedEnum.courier diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierFormats.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierFormats.scala index d06aa882..9467d1c5 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierFormats.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierFormats.scala @@ -17,7 +17,6 @@ package org.coursera.naptime.courier import java.io.IOException -import java.time.Clock import com.linkedin.data.ByteString import com.linkedin.data.DataList diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierSerializer.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierSerializer.scala index 483753bc..f501a6a2 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierSerializer.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierSerializer.scala @@ -21,14 +21,14 @@ import com.linkedin.data.codec.JacksonDataCodec import com.linkedin.data.codec.TextDataCodec import com.linkedin.data.schema.DataSchema import com.linkedin.data.schema.TyperefDataSchema -import com.linkedin.data.schema.validation.ValidateDataAgainstSchema -import com.linkedin.data.schema.validation.ValidationOptions import com.linkedin.data.schema.validation.ValidationResult import com.linkedin.data.schema.validator.DataSchemaAnnotationValidator import com.linkedin.data.template.DataTemplate import com.linkedin.data.template.UnionTemplate import org.coursera.courier.templates.DataTemplates.DataConversion import org.coursera.courier.templates.DataValidationException +import org.coursera.naptime.courier.validation.ValidateDataAgainstSchema +import org.coursera.naptime.courier.validation.ValidationOptions import org.coursera.pegasus.TypedDefinitionCodec import scala.reflect.ClassTag @@ -97,7 +97,7 @@ object CourierSerializer { } } - private[this] val recordValidationOptions = new ValidationOptions() + private[this] val recordValidationOptions = ValidationOptions() class TemplateBuilder[T <: DataTemplate[_ <: AnyRef]](private val clazz: Class[T]) { private[this] val companionInstance = companion(clazz) @@ -122,6 +122,7 @@ object CourierSerializer { schema, recordValidationOptions, annotationValidator) + if (!validationResult.isValid) { Left(validationResult) } else { diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala new file mode 100644 index 00000000..fb59816d --- /dev/null +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala @@ -0,0 +1,16 @@ +package org.coursera.naptime.courier.validation + +import com.linkedin.data.schema.validation.{CoercionMode => PegasusCoercionMode} + +sealed trait CoercionMode { + def toPegasus: PegasusCoercionMode +} + +object CoercionMode { + case object NORMAL extends CoercionMode { + override def toPegasus: PegasusCoercionMode = PegasusCoercionMode.NORMAL + } + case object STRING_TO_PRIMITIVE extends CoercionMode { + override def toPegasus: PegasusCoercionMode = PegasusCoercionMode.STRING_TO_PRIMITIVE + } +} diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala new file mode 100644 index 00000000..1136654c --- /dev/null +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala @@ -0,0 +1,16 @@ +package org.coursera.naptime.courier.validation + +import com.linkedin.data.schema.validation.{RequiredMode => PegasusRequiredMode} + +sealed trait RequiredMode { + def toPegasus: PegasusRequiredMode +} + +object RequiredMode { + case object CAN_BE_ABSENT_IF_HAS_DEFAULT extends RequiredMode { + override def toPegasus: PegasusRequiredMode = PegasusRequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT + } + case object FIXUP_ABSENT_WITH_DEFAULT extends RequiredMode { + override def toPegasus: PegasusRequiredMode = PegasusRequiredMode.FIXUP_ABSENT_WITH_DEFAULT + } +} diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala new file mode 100644 index 00000000..a39043fc --- /dev/null +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala @@ -0,0 +1,493 @@ +package org.coursera.naptime.courier.validation + +import java.math.BigDecimal +import java.util +import java.util.Collections + +import com.linkedin.data.ByteString +import com.linkedin.data.Data +import com.linkedin.data.DataList +import com.linkedin.data.DataMap +import com.linkedin.data.element.DataElement +import com.linkedin.data.element.MutableDataElement +import com.linkedin.data.element.SimpleDataElement +import com.linkedin.data.it.IterationOrder +import com.linkedin.data.it.ObjectIterator +import com.linkedin.data.message.Message +import com.linkedin.data.message.MessageList +import com.linkedin.data.schema.DataSchema +import com.linkedin.data.schema.DataSchemaConstants +import com.linkedin.data.schema.FixedDataSchema +import com.linkedin.data.schema.RecordDataSchema +import com.linkedin.data.schema.TyperefDataSchema +import com.linkedin.data.schema.UnionDataSchema +import com.linkedin.data.schema.validation.ValidationResult +import com.linkedin.data.schema.validator.Validator +import com.linkedin.data.schema.validator.ValidatorContext +import com.linkedin.data.schema.validation.{ValidationOptions => PegasusValidationOptions} + +import scala.collection.JavaConverters._ + +object ValidateDataAgainstSchema { + + val _primitiveTypeToClassMap = Map( + DataSchema.Type.INT -> classOf[Integer], + DataSchema.Type.LONG -> classOf[java.lang.Long], + DataSchema.Type.FLOAT -> classOf[java.lang.Float], + DataSchema.Type.DOUBLE -> classOf[java.lang.Double], + DataSchema.Type.STRING -> classOf[java.lang.String], + DataSchema.Type.BOOLEAN -> classOf[java.lang.Boolean], + DataSchema.Type.NULL -> classOf[com.linkedin.data.Null]) + + def validate( + obj: AnyRef, + schema: DataSchema, + options: ValidationOptions, + validator: Validator): ValidationResult = + validate(new SimpleDataElement(obj, schema), options, validator) + + def validate(obj: AnyRef, schema: DataSchema, options: ValidationOptions): ValidationResult = + validate(obj, schema, options, null) + + def validate( + element: DataElement, + options: ValidationOptions, + validator: Validator): ValidationResult = { + val state = new State(options, validator) + state.validate(element) + state + } + + private class State(options: ValidationOptions, validator: Validator) extends ValidationResult { + + private var _fixed: AnyRef = null + private var _hasFixupReadOnlyError = false + private var _hasFix: Boolean = false + private var _valid: Boolean = true + private val _messages: MessageList[Message] = new MessageList[Message]() + private val context: Context = if (validator == null) { + null + } else { + new Context() + } + + override def hasFix: Boolean = _hasFix + + override def getMessages: util.Collection[Message] = Collections.unmodifiableList(_messages) + + override def hasFixupReadOnlyError: Boolean = _hasFixupReadOnlyError + + override def getFixed: AnyRef = _fixed + + override def isValid: Boolean = _valid + + def validate(element: DataElement): Unit = validateIterative(element) + + // maybe don't need this? + def validateIterative(element: DataElement): Unit = { + _fixed = element.getValue + val it = new ObjectIterator(element, IterationOrder.POST_ORDER) + var nextElement: DataElement = it.next() + while (nextElement != null) { + // do stuff here + val nextElementSchema = nextElement.getSchema + if (nextElementSchema != null) { + validate(nextElement, nextElementSchema, nextElement.getValue) + } + nextElement = it.next() + } + } + + def validate(element: DataElement, schema: DataSchema, obj: AnyRef): AnyRef = { + // can we do dynamic dispatch here? + val fixed: AnyRef = schema.getType match { + case DataSchema.Type.ARRAY => + validateArray(element, obj) + case DataSchema.Type.BYTES => + validateBytes(element, obj) + case DataSchema.Type.ENUM => validateEnum(element, obj) + case DataSchema.Type.FIXED => + validateFixed( + element, + schema + .asInstanceOf[FixedDataSchema], + obj) + case DataSchema.Type.MAP => validateMap(element, obj) + case DataSchema.Type.RECORD => + validateRecord( + element, + schema + .asInstanceOf[RecordDataSchema], + obj) + case DataSchema.Type.TYPEREF => + validateTyperef( + element, + schema + .asInstanceOf[TyperefDataSchema], + obj) + case DataSchema.Type.UNION => + validateUnion( + element, + schema + .asInstanceOf[UnionDataSchema], + obj) + case _ => validatePrimitive(element, schema, obj) + } + if (fixed != obj) { + fixValue(element, fixed) + } + if (validator != null && element.getSchema == schema) { + val validatorElement = if (fixed eq obj) { + element + } else { + element match { + case mutableElement: MutableDataElement => + mutableElement.setValue(fixed) + element + case _ => + new SimpleDataElement(fixed, element.getName, schema, element.getParent) + } + } + context._el = validatorElement + validator.validate(context) + } + fixed + } + + protected def validatePrimitive( + element: DataElement, + schema: DataSchema, + obj: AnyRef): AnyRef = { + val primitiveClass = _primitiveTypeToClassMap(schema.getType) + var fixed = obj + if (obj.getClass != primitiveClass) { + fixed = fixupPrimitive(schema, obj) + if (fixed eq obj) { + addMessage( + element, + "%1$s cannot be coerced to %2$s", + String.valueOf(obj), + primitiveClass.getSimpleName) + } + } + fixed + } + + protected def fixupPrimitive(schema: DataSchema, obj: AnyRef): AnyRef = { + val schemaType = schema.getType + try schemaType match { + case DataSchema.Type.INT => + if (obj.isInstanceOf[Number]) { + new java.lang.Integer(obj.asInstanceOf[Number].intValue) + } else if ((obj.getClass eq classOf[String]) && + (options.coercionMode == CoercionMode.STRING_TO_PRIMITIVE)) { + new java.lang.Integer(new BigDecimal(obj.asInstanceOf[String]).intValue) + } else { + obj + } + case DataSchema.Type.LONG => + if (obj.isInstanceOf[Number]) { + new java.lang.Long(obj.asInstanceOf[Number].longValue) + } else if ((obj.getClass eq classOf[String]) && + (options.coercionMode == CoercionMode.STRING_TO_PRIMITIVE)) { + new java.lang.Long(new BigDecimal(obj.asInstanceOf[String]).longValue) + } else { + obj + } + case DataSchema.Type.FLOAT => + if (obj.isInstanceOf[Number]) { + new java.lang.Float(obj.asInstanceOf[Number].floatValue) + } else if ((obj.getClass eq classOf[String]) && + (options.coercionMode == CoercionMode.STRING_TO_PRIMITIVE)) { + new java.lang.Float( + new BigDecimal( + obj + .asInstanceOf[String]).floatValue) + } else { + obj + } + case DataSchema.Type.DOUBLE => + if (obj.isInstanceOf[Number]) { + new java.lang.Double(obj.asInstanceOf[Number].doubleValue) + } else if ((obj.getClass eq classOf[String]) && + (options.coercionMode == CoercionMode.STRING_TO_PRIMITIVE)) { + new java.lang.Double( + new BigDecimal( + obj + .asInstanceOf[String]).doubleValue) + } else { + obj + } + case DataSchema.Type.BOOLEAN => + if ((obj.getClass eq classOf[String]) && + (options.coercionMode == CoercionMode.STRING_TO_PRIMITIVE)) { + val string = obj.asInstanceOf[String] + if ("true".equalsIgnoreCase(string)) + java.lang.Boolean.TRUE + else if ("false".equalsIgnoreCase(string)) java.lang.Boolean.FALSE + else obj + } else { + obj + } + case DataSchema.Type.STRING | DataSchema.Type.NULL | _ => + obj + } catch { + case _: NumberFormatException => + obj + } + } + + protected def validateUnion( + element: DataElement, + schema: UnionDataSchema, + obj: AnyRef): AnyRef = { + obj match { + case Data.NULL => + if (schema.getType(DataSchemaConstants.NULL_TYPE) == null) { + addMessage(element, "null is not a member type of union %1$s", schema) + } + case map: DataMap => + if (map.size != 1) { + addMessage(element, "DataMap should have exactly one entry for a union type") + } else { + val entry = map.entrySet.iterator.next + val key = entry.getKey + val memberSchema = schema.getType(key) + if (memberSchema == null) { + addMessage(element, "\"%1$s\" is not a member type of union %2$s", key, schema) + } + } + case _ => addMessage(element, "union type is not backed by a DataMap or null") + } + obj + } + + protected def validateTyperef( + element: DataElement, + schema: TyperefDataSchema, + obj: AnyRef): AnyRef = validate(element, schema.getRef, obj) + + def validateArray(element: DataElement, obj: AnyRef): AnyRef = { + if (!obj.isInstanceOf[DataList]) { + addMessage(element, "array type is not backed by a DataList") + } + obj + } + + def validateBytes(element: DataElement, obj: AnyRef): AnyRef = { + var fixed: AnyRef = obj + val clazz = obj.getClass + if (clazz == classOf[String]) { + val str = obj.asInstanceOf[String] + var error = false + val bytes = ByteString.copyAvroString(str, true) + if (bytes != null) { + _hasFix = true + fixed = bytes + } else { + error = true + } + if (error) { + addMessage(element, "\"%1$s\" is not a valid string representation of bytes", str) + } + } else if (clazz != classOf[ByteString]) { + addMessage(element, "bytes type is not backed by a String or ByteString") + } + fixed + } + + protected def validateMap(element: DataElement, obj: AnyRef): AnyRef = { + if (!obj.isInstanceOf[DataMap]) { + addMessage(element, "map type is not backed by a DataMap") + } + obj + } + + def validateEnum(element: DataElement, obj: AnyRef): AnyRef = { + if (!obj.isInstanceOf[String]) { + addMessage(element, "enum type is not backed by a String") + } + obj + } + + def validateFixed(element: DataElement, schema: FixedDataSchema, obj: AnyRef): AnyRef = { + var fixed = obj + val clazz = obj.getClass + val size = schema.getSize + if (clazz == classOf[String]) { + val str = obj.asInstanceOf[String] + var error = false + if (str.length != size) { + addMessage( + element, + "\"%1$s\" length (%2$d) is inconsistent with expected fixed size of %3$d", + str, + new java.lang.Integer(str.length), + new java.lang.Integer(size)) + } else { + val bytes = ByteString.copyAvroString(str, true) + if (bytes != null) { + _hasFix = true + fixed = bytes + } else { + error = true + } + } + if (error) { + addMessage(element, "\"%1$s\" is not a valid string representation of bytes", str) + } + } else if (clazz == classOf[ByteString]) { + val bytes = obj.asInstanceOf[ByteString] + if (bytes.length != size) { + addMessage( + element, + "\"%1$s\" length (%2$d) is inconsistent with expected fixed size of %3$d", + bytes, + new Integer(bytes.length), + new Integer(size)) + } + } else { + addMessage(element, "fixed type is not backed by a String or ByteString") + } + fixed + } + + protected def validateRecord( + element: DataElement, + schema: RecordDataSchema, + obj: AnyRef): AnyRef = { + obj match { + case map: DataMap => + for (field <- schema.getFields.asScala) { + if (!isFieldOptional(field, element) && !map.containsKey(field.getName)) { + options.requiredMode match { + case RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT => + if (field.getDefault == null) { + addIsRequiredMessage( + element, + field, + "field is required but not found and has no default value") + } + case RequiredMode.FIXUP_ABSENT_WITH_DEFAULT => + val defaultValue = field.getDefault + if (defaultValue == null) { + addIsRequiredMessage( + element, + field, + "field is required but not found and has no default value") + } else if (map.isReadOnly) { + _hasFix = true + _hasFixupReadOnlyError = true + addIsRequiredMessage( + element, + field, + "field is required and has default value but not found and cannot be fixed because DataMap of record is read - only") + + } else { + _hasFix = true + map.put(field.getName, defaultValue) + } + + } + } + } + case _ => addMessage(element, "record type is not backed by a DataMap") + } + obj + } + + private def isFieldOptional(field: RecordDataSchema.Field, element: DataElement): Boolean = { + if (field.getOptional) { + true + } else { + options.treatOptional + .evaluate(new SimpleDataElement(null, field.getName, field.getType, element)) + } + } + + protected def addIsRequiredMessage( + element: DataElement, + field: RecordDataSchema.Field, + msg: String): Unit = { + _messages.add(new Message(element.path(field.getName), msg)) + _valid = false + } + + def addMessage(element: DataElement, format: String): Unit = { + _messages.add(new Message(element.path(), format)) + _valid = false + } + + def addMessage(element: DataElement, format: String, arg: AnyRef): Unit = { + _messages.add(new Message(element.path(), format, arg)) + _valid = false + } + + def addMessage(element: DataElement, format: String, arg1: AnyRef, arg2: AnyRef): Unit = { + _messages.add(new Message(element.path(), format, arg1, arg2)) + _valid = false + } + + def addMessage( + element: DataElement, + format: String, + arg1: AnyRef, + arg2: AnyRef, + arg3: AnyRef): Unit = { + _messages.add(new Message(element.path(), format, arg1, arg2, arg3)) + _valid = false + } + + def fixValue(element: DataElement, fixed: AnyRef): Unit = { + _hasFix = true + val parentElement = element.getParent + if (parentElement == null) { + _fixed = fixed + } else { + val parent = parentElement.getValue + if (parent.getClass == classOf[DataMap]) { + val map = parent.asInstanceOf[DataMap] + if (map.isReadOnly) { + _hasFixupReadOnlyError = true + addMessage( + element, + "cannot be fixed because DataMap backing %1$s type is read-only", + parentElement.getSchema.getUnionMemberKey) + } else { + map.put(element.getName.asInstanceOf[String], fixed) + } + } else if (parent.getClass == classOf[DataList]) { + val list = parent.asInstanceOf[DataList] + if (list.isReadOnly) { + _hasFixupReadOnlyError = true + addMessage( + element, + "cannot be fixed because DataList backing an array type is read-only") + } else { + list.set(element.getName.asInstanceOf[Integer], fixed) + } + } + } + } + + private class Context extends ValidatorContext { + var _el: DataElement = null + + override def addResult(message: Message): Unit = { + _messages.add(message) + if (message.isError) { + _valid = false + } + } + + override def validationOptions(): PegasusValidationOptions = + options.toPegasus + + override def dataElement(): DataElement = _el + + override def setHasFix(value: Boolean): Unit = _hasFix = value + + override def setHasFixupReadOnlyError(value: Boolean): Unit = _hasFixupReadOnlyError = value + } + } +} diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala new file mode 100644 index 00000000..392e5290 --- /dev/null +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala @@ -0,0 +1,14 @@ +package org.coursera.naptime.courier.validation + +import com.linkedin.data.it.Predicate +import com.linkedin.data.it.Predicates +import com.linkedin.data.schema.validation.{ValidationOptions => PegasusValidationOptions} + +case class ValidationOptions( + coercionMode: CoercionMode = CoercionMode.NORMAL, + requiredMode: RequiredMode = RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT, + treatOptional: Predicate = Predicates.alwaysFalse()) { + def toPegasus: PegasusValidationOptions = { + new PegasusValidationOptions(requiredMode.toPegasus, coercionMode.toPegasus) + } +} diff --git a/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestArrayOfEnum.courier b/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestArrayOfEnum.courier new file mode 100644 index 00000000..a9abc295 --- /dev/null +++ b/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestArrayOfEnum.courier @@ -0,0 +1,5 @@ +namespace org.coursera.naptime.courier + +record TestArrayOfEnum { + value: array[TestEnum] +} diff --git a/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestEnum.courier b/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestEnum.courier new file mode 100644 index 00000000..d41b79cf --- /dev/null +++ b/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestEnum.courier @@ -0,0 +1,6 @@ +namespace org.coursera.naptime.courier + +enum TestEnum { + FIRST + SECOND +} diff --git a/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestWrappedEnum.courier b/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestWrappedEnum.courier new file mode 100644 index 00000000..72157fe7 --- /dev/null +++ b/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestWrappedEnum.courier @@ -0,0 +1,5 @@ +namespace org.coursera.naptime.courier + +record TestWrappedEnum { + value: TestEnum +} diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/CourierFormatsTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/CourierFormatsTest.scala index 7cfa09af..b9479182 100644 --- a/naptime-models/src/test/scala/org/coursera/naptime/courier/CourierFormatsTest.scala +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/CourierFormatsTest.scala @@ -16,6 +16,8 @@ import org.coursera.naptime.courier.Exceptions.ReadException import org.junit.Test import org.scalatest.junit.AssertionsForJUnit import play.api.libs.json.JsError +import play.api.libs.json.JsObject +import play.api.libs.json.JsString import play.api.libs.json.JsSuccess import play.api.libs.json.Json import play.api.libs.json.OFormat @@ -384,6 +386,51 @@ class CourierFormatsTest extends AssertionsForJUnit { assertThrows[ReadException](CourierFormats.bigDecimalToNumber(decimalTooBigForDouble)) assertThrows[ReadException](CourierFormats.bigDecimalToNumber(decimalTooSmallForDouble)) } + + @Test + def testEnum(): Unit = { + val reader = CourierFormats.enumerationFormat(TestEnum) + val enum = """ "FIRST" """ + assertResult(JsSuccess(TestEnum.FIRST))(Json.parse(enum).validate[TestEnum](reader)) + } + + @Test + def testUnknownEnum(): Unit = { + val reader = CourierFormats.enumerationFormat(TestEnum) + val enum = """ "FOO_BAR" """ + assertResult(JsSuccess(TestEnum.$UNKNOWN))(Json.parse(enum).validate[TestEnum](reader)) + } + + @Test + def testNestedEnum(): Unit = { + val reader = CourierFormats.recordTemplateFormats[TestWrappedEnum] + val recordOfEnum = """ {"value": "FIRST"} """ + assertResult(JsSuccess(TestWrappedEnum(TestEnum.FIRST)))( + Json.parse(recordOfEnum).validate[TestWrappedEnum](reader)) + } + + @Test + def deserializeUnknownNestedEnum(): Unit = { + val reader = CourierFormats.recordTemplateFormats[TestWrappedEnum] + val value = """ {"value": "FOO_BAR"} """ + assertResult(JsSuccess(TestWrappedEnum(TestEnum.$UNKNOWN)))( + Json.parse(value).validate[TestWrappedEnum](reader)) + } + + @Test + def serializeUnknownNestedEnum(): Unit = { + val writer = CourierFormats.recordTemplateFormats[TestWrappedEnum] + assertResult(JsObject(List("value" -> JsString("$UNKNOWN"))))( + writer.writes(TestWrappedEnum(TestEnum.$UNKNOWN))) + } + + @Test + def deserializeUnknownArrayOfEnum(): Unit = { + val reader = CourierFormats.recordTemplateFormats[TestArrayOfEnum] + val record = """ {"value": ["FOO_BAR"]} """ + assertResult(JsSuccess(TestArrayOfEnum(List(TestEnum.$UNKNOWN))))( + Json.parse(record).validate[TestArrayOfEnum](reader)) + } } object CourierFormatsTest { From 3a1752759df7f4838cc756bceb3d310b2b84fa35 Mon Sep 17 00:00:00 2001 From: Zachary Drudi Date: Wed, 20 Oct 2021 10:57:26 -0400 Subject: [PATCH 02/12] added tests for ValidateDataAgainstSchema. one is failing. --- .../ValidateDataAgainstSchema.scala | 2 +- .../ValidateDataAgainstSchemaTest.scala | 1236 +++++++++++++++++ 2 files changed, 1237 insertions(+), 1 deletion(-) create mode 100644 naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala index a39043fc..f8320f51 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala @@ -160,7 +160,7 @@ object ValidateDataAgainstSchema { obj: AnyRef): AnyRef = { val primitiveClass = _primitiveTypeToClassMap(schema.getType) var fixed = obj - if (obj.getClass != primitiveClass) { + if (obj.getClass ne primitiveClass) { fixed = fixupPrimitive(schema, obj) if (fixed eq obj) { addMessage( diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala new file mode 100644 index 00000000..acea707d --- /dev/null +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala @@ -0,0 +1,1236 @@ +package org.coursera.naptime.courier.validation + +import org.scalatest.junit.AssertionsForJUnit +import com.linkedin.data.element.DataElement +import com.linkedin.data.schema.validation.ValidationResult +import com.linkedin.data.schema.DataSchema +import com.linkedin.data.DataMap +import com.linkedin.data.schema.RecordDataSchema +import com.linkedin.data.schema.SchemaParser +import com.linkedin.data.ByteString +import org.junit.Test +import com.linkedin.data.message.Message +import com.linkedin.data.Data +import java.io.ByteArrayInputStream +import com.linkedin.data.codec.JacksonDataCodec + +import com.linkedin.data.DataComplex +import com.linkedin.data.DataList +import com.linkedin.data.element.DataElementUtil + +class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { + import ValidateDataAgainstSchemaTest._ + + def testCoercionValidation( + schemaText: String, + key: String, + inputs: List[(AnyRef, AnyRef)], + badObjects: List[AnyRef], + coercionMode: CoercionMode): Unit = { + val options = normalCoercionValidationOption.copy(coercionMode = coercionMode) + val schema = dataSchemaFromString(schemaText).asInstanceOf[RecordDataSchema] + assert(schema !== null) + val map = new DataMap + for (row <- inputs) { + map.put(key, row._1) + val result = validate(map, schema, options) + assert(result.isValid) + if (result.hasFix) { + val fixedMap = result.getFixed.asInstanceOf[DataMap] + assert(fixedMap.getClass eq classOf[DataMap]) + val fixed = fixedMap.get(key) + assert(fixed !== null) + val fixedClass = fixed.getClass + val goodClass = row._1.getClass + schema.getField(key).getType.getDereferencedType match { + case DataSchema.Type.BYTES | DataSchema.Type.FIXED => + // String to ByteString conversion check + assert(goodClass ne fixedClass) + assert(goodClass eq classOf[java.lang.String]) + assert(fixedClass eq classOf[ByteString]) + assert(fixed.asInstanceOf[ByteString].asAvroString === row._1) + + case DataSchema.Type.INT => + // convert numbers to Integer + assert(goodClass ne fixedClass) + assertAllowedClass(coercionMode, goodClass) + assert(fixedClass eq classOf[java.lang.Integer]) + + case DataSchema.Type.LONG => + // convert numbers to Long + assert(goodClass ne fixedClass) + assertAllowedClass(coercionMode, goodClass) + assert(fixedClass eq classOf[java.lang.Long]) + + case DataSchema.Type.FLOAT => + // convert numbers to Float + assert(goodClass ne fixedClass) + assertAllowedClass(coercionMode, goodClass) + assert(fixedClass eq classOf[java.lang.Float]) + + case DataSchema.Type.DOUBLE => + // convert numbers to Double + assert(goodClass ne fixedClass) + assertAllowedClass(coercionMode, goodClass) + assert(fixedClass eq classOf[java.lang.Double]) + + case DataSchema.Type.BOOLEAN => + if (coercionMode === CoercionMode.STRING_TO_PRIMITIVE) { + assert(goodClass ne fixedClass) + assert(goodClass === classOf[java.lang.String]) + assert(fixedClass eq classOf[java.lang.Boolean]) + } + + case DataSchema.Type.RECORD | DataSchema.Type.ARRAY | DataSchema.Type.MAP | + DataSchema.Type.UNION => + assert(goodClass eq fixedClass) + + case _ => + throw new IllegalStateException("unknown conversion") + } + assert(fixed === row._2) + } else assert(map eq result.getFixed) + } + for (bad <- badObjects) { + map.put(key, bad) + val result = validate(map, schema, options) + assert(!result.isValid) + assert(map eq result.getFixed) + } + } + + // Tests for CoercionMode.NORMAL + def testNormalCoercionValidation( + schemaText: String, + key: String, + inputs: List[(AnyRef, AnyRef)], + badObjects: List[AnyRef]): Unit = + testCoercionValidation(schemaText, key, inputs, badObjects, CoercionMode.NORMAL) + + // Tests for CoercionMode.STRING_TO_PRIMITIVE + def testStringToPrimitiveCoercionValidation( + schemaText: String, + key: String, + inputs: List[(AnyRef, AnyRef)], + badObjects: List[AnyRef]): Unit = + testCoercionValidation(schemaText, key, inputs, badObjects, CoercionMode.STRING_TO_PRIMITIVE) + + def testCoercionValidation( + schemaText: String, + key: String, + goodObjects: Seq[AnyRef], + badObjects: Seq[AnyRef], + options: ValidationOptions): Unit = { + val schema = dataSchemaFromString(schemaText).asInstanceOf[RecordDataSchema] + assert(schema != null) + val map = new DataMap + for (good <- goodObjects) { + map.put(key, good) + val result = validate(map, schema, options) + assert(result.isValid) + assert(!result.hasFix) + assert(map eq result.getFixed) + } + for (bad <- badObjects) { + map.put(key, bad) + val result = validate(map, schema, options) + assert(!result.isValid) + assert(map eq result.getFixed) + } + } + + @Test + def testStringValidation(): Unit = { + val goodObjects = List("a valid string") + val badObjects = List( + FALSE, + I1, + L1, + F1, + D1, + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + // There is no coercion for this type. + // Test with all coercion modes, result should be the same for all cases. + testCoercionValidation( + STRING_SCHEMA, + "bar", + goodObjects, + badObjects, + normalCoercionValidationOption) + testCoercionValidation( + STRING_SCHEMA, + "bar", + goodObjects, + badObjects, + stringToPrimitiveCoercionValidationOption) + } + + @Test + def testBooleanValidation(): Unit = { + val goodObjects = List(TRUE, FALSE) + val badObjects = Array( + I1, + L1, + F1, + D1, + new String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testCoercionValidation( + BOOLEAN_SCHEMA, + "bar", + goodObjects, + badObjects, + normalCoercionValidationOption) + } + + @Test + def testBooleanStringToPrimitiveFixupValidation(): Unit = { + val input = List( + (new String("true"), java.lang.Boolean.TRUE), + (new String("false"), java.lang.Boolean.FALSE)) + val badObjects = List(I1, L1, F1, D1, new String("abc"), new DataMap, new DataList) + testStringToPrimitiveCoercionValidation(BOOLEAN_SCHEMA, "bar", input, badObjects) + } + + @Test + def testIntegerNormalCoercionValidation(): Unit = { + val input = List( + (I1, I1), + (new java.lang.Integer(-1), new java.lang.Integer(-1)), + ( + new java.lang.Integer(java.lang.Integer.MAX_VALUE), + new java.lang.Integer(java.lang.Integer.MAX_VALUE)), + ( + new java.lang.Integer(java.lang.Integer.MAX_VALUE - 1), + new java.lang.Integer(java.lang.Integer.MAX_VALUE - 1)), + (new java.lang.Long(1), new Integer(1)), + (new java.lang.Float(1), new Integer(1)), + (new java.lang.Double(1), new Integer(1)) + ) + + val badObjects = List( + TRUE, + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testNormalCoercionValidation(INTEGER_SCHEMA, "bar", input, badObjects) + } + + @Test + def testIntegerStringToPrimitiveCoercionValidation(): Unit = { + val input = List( + (new java.lang.String("1"), new java.lang.Integer(1)), + (new java.lang.String("-1"), new java.lang.Integer(-1)), + (new java.lang.String("" + Integer.MAX_VALUE), new java.lang.Integer(Integer.MAX_VALUE)), + ( + new java.lang.String("" + (Integer.MAX_VALUE - 1)), + new java.lang.Integer(Integer.MAX_VALUE - 1)), + (new java.lang.String("1.5"), new java.lang.Integer(1)), + (new java.lang.String("-1.5"), new java.lang.Integer(-1)), + (new java.lang.Integer(1), new java.lang.Integer(1)), + (new java.lang.Integer(-1), new java.lang.Integer(-1)), + (new java.lang.Integer(Integer.MAX_VALUE), new java.lang.Integer(Integer.MAX_VALUE)), + ( + new java.lang.Integer(Integer.MAX_VALUE - 1), + new java.lang.Integer( + Integer.MAX_VALUE - + 1)), + (new java.lang.Long(1), new java.lang.Integer(1)), + (new java.lang.Float(1), new java.lang.Integer(1)), + (new java.lang.Double(1), new java.lang.Integer(1))) + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testStringToPrimitiveCoercionValidation(INTEGER_SCHEMA, "bar", input, badObjects) + } + + @Test + def testLongNormalCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : \"long\" } ] }" + val inputs = List( + (new java.lang.Long(1), new java.lang.Long(1)), + (new java.lang.Long(-1), new java.lang.Long(-1)), + (new java.lang.Integer(1), new java.lang.Long(1)), + (new java.lang.Float(1), new java.lang.Long(1)), + (new java.lang.Double(1), new java.lang.Long(1))) + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testLongStringToPrimitiveCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : \"long\" } ] }" + val inputs = List( + (new java.lang.String("1"), new java.lang.Long(1)), + (new java.lang.String("-1"), new java.lang.Long(-1)), + (new java.lang.String("" + Long.MaxValue), new java.lang.Long(Long.MaxValue)), + (new java.lang.Long(1), new java.lang.Long(1)), + (new java.lang.Long(-1), new java.lang.Long(-1)), + (new java.lang.Integer(1), new java.lang.Long(1)), + (new java.lang.Float(1), new java.lang.Long(1)), + (new java.lang.Double(1), new java.lang.Long(1))) + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testFloatNormalCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : \"float\" } ] }" + val inputs = List( + (new java.lang.Float(1), new java.lang.Float(1)), + (new java.lang.Float(-1), new java.lang.Float(-1)), + (new java.lang.Integer(1), new java.lang.Float(1)), + (new java.lang.Long(1), new java.lang.Float(1)), + (new java.lang.Double(1), new java.lang.Float(1))) + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testFloatStringToPrimitiveCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : \"float\" } ] }" + val inputs = List( + (new java.lang.String("1"), new java.lang.Float(1)), + (new java.lang.String("-1"), new java.lang.Float(-1)), + (new java.lang.String("1.01"), new java.lang.Float(1.01)), + (new java.lang.String("-1.01"), new java.lang.Float(-1.01)), + (new java.lang.String("" + Float.MaxValue), new java.lang.Float(Float.MaxValue)), + (new java.lang.Float(1), new java.lang.Float(1)), + (new java.lang.Float(1), new java.lang.Float(1)), + (new java.lang.Float(-1), new java.lang.Float(-1)), + (new java.lang.Integer(1), new java.lang.Float(1)), + (new java.lang.Long(1), new java.lang.Float(1)), + (new java.lang.Double(1), new java.lang.Float(1))) + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testDoubleNormalCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : \"double\" } ] }" + val inputs = List( + (new java.lang.Double(1), new java.lang.Double(1)), + (new java.lang.Double(-1), new java.lang.Double(-1)), + (new java.lang.Integer(1), new java.lang.Double(1)), + (new java.lang.Long(1), new java.lang.Double(1)), + (new java.lang.Float(1), new java.lang.Double(1))) + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testDoubleStringToPrimitiveCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : \"double\" } ] }" + val inputs = List( + (new java.lang.String("1"), new java.lang.Double(1)), + (new java.lang.String("-1"), new java.lang.Double(-1)), + (new java.lang.String("1.01"), new java.lang.Double(1.01)), + (new java.lang.String("-1.01"), new java.lang.Double(-1.01)), + (new java.lang.String("" + Double.MaxValue), new java.lang.Double(Double.MaxValue)), + (new java.lang.Double(1), new java.lang.Double(1)), + (new java.lang.Double(-1), new java.lang.Double(-1)), + (new java.lang.Integer(1), new java.lang.Double(1)), + (new java.lang.Long(1), new java.lang.Double(1)), + (new java.lang.Float(1), new java.lang.Double(1))) + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testBytesValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : \"bytes\" } ] }" + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.Integer(1), + new java.lang.Long(1), + new java.lang.Float(1), + new java.lang.Double(1), + new DataMap, + new DataList, + new java.lang.String("\u0100"), + new java.lang.String("ab\u0100c"), + new java.lang.String("ab\u0100c\u0200")) + val inputs = List( + (ByteString.copyAvroString("abc", false), ByteString.copyAvroString("abc", false)), + ("abc", ByteString.copyAvroString("abc", false))) + testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testFixedValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : { \"name\" : \"fixed4\", \"type\" : \"fixed\", \"size\"" + + " : 4 } } ] }" + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.Integer(1), + new java.lang.Long(1), + new java.lang.Float(1), + new java.lang.Double(1), + new DataMap, + new DataList, + new java.lang.String, + "1", + "12", + "123", + "12345", + "\u0100", + "ab\u0100c", + "b\u0100c\u0200", + ByteString.empty, + ByteString.copyAvroString("1", false), + ByteString.copyAvroString("12", false), + ByteString.copyAvroString("123", false), + ByteString.copyAvroString("12345", false)) + val inputs = List( + ("abcd", ByteString.copyAvroString("abcd", false)), + ("\u0001\u0002\u0003\u0004", ByteString.copyAvroString("\u0001\u0002\u0003\u0004", false)), + (ByteString.copyAvroString("abcd", false), ByteString.copyAvroString("abcd", false)), + ( + ByteString.copyAvroString("\u0001\u0002\u0003\u0004", false), + ByteString.copyAvroString("\u0001\u0002\u0003\u0004", false))) + testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testEnumCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : { \"name\" : \"fruits\", \"type\" : \"enum\", " + + "\"symbols\" : [ \"apple\", \"orange\", \"banana\" ] } } ] }" + val goodObjects = List( + new java.lang.String("apple"), + new java.lang.String("orange"), + new java.lang.String("banana")) + // There are no strings in the list of bad objects because all strings are accepted as valid + // enum values, regardless of schema. They are deserialized to $UNKNOWN. + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.Integer(1), + new java.lang.Long(1), + new java.lang.Float(1), + new java.lang.Double(1), + new DataMap, + new DataList) + // There is no coercion for this type. + // Test with all coercion validation options, result should be the same for all cases. + testCoercionValidation( + schemaText, + "bar", + goodObjects, + badObjects, + normalCoercionValidationOption) + testCoercionValidation( + schemaText, + "bar", + goodObjects, + badObjects, + stringToPrimitiveCoercionValidationOption) + } + + @Test + def testArrayNormalCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : { \"type\" : \"array\", \"items\" : \"int\" } } ] }" + val inputs = List( + (new DataList, new DataList), + (new DataList(asList(1)), new DataList(asList(1))), + (new DataList(asList(2, 3)), new DataList(asList(2, 3))), + (new DataList(asList(1L)), new DataList(asList(1))), + (new DataList(asList(1.0f)), new DataList(asList(1))), + (new DataList(asList(1.0)), new DataList(asList(1)))) + val badObjects = List( + new java.lang.Boolean(true), + new java.lang.Integer(1), + new java.lang.Long(1), + new java.lang.Float(1), + new java.lang.Double(1), + new java.lang.String, + new DataMap, + new DataList(asList(TRUE)), + new DataList(asList(new String("1"))), + new DataList(asList(new DataMap)), + new DataList(asList(new DataList)), + new DataList(asList(TRUE, I1)), + new DataList(asList(new Integer(1), TRUE))) + testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testArrayStringToPrimitiveCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : { \"type\" : \"array\", \"items\" : \"int\" } } ] }" + val inputs = List( + (new DataList(asList("1")), new DataList(asList(1))), + (new DataList(asList("1", "2", "3")), new DataList(asList(1, 2, 3))), + (new DataList, new DataList), + (new DataList(asList(1)), new DataList(asList(1))), + (new DataList(asList(2, 3)), new DataList(asList(2, 3))), + (new DataList(asList(1L)), new DataList(asList(1))), + (new DataList(asList(1.0f)), new DataList(asList(1))), + (new DataList(asList(1.0)), new DataList(asList(1)))) + val badObjects = List( + TRUE, + I1, + L1, + F1, + D1, + new java.lang.String, + new DataMap, + new DataList(asList(TRUE)), + new DataList(asList(new DataMap)), + new DataList(asList(new DataList)), + new DataList(asList(TRUE, I1)), + new DataList(asList(I1, TRUE))) + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testMapNormalCoercionValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : { \"type\" : \"map\", \"values\" : \"int\" } } ] }" + val inputs = List( + (new DataMap, new DataMap), + (new DataMap(asMap("key1" -> 1)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1, "key2" -> 2)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), + (new DataMap(asMap("key1" -> 1L)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1.0)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1.0f)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1, "key2" -> 2L)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), + ( + new DataMap(asMap("key1" -> 1L, "key2" -> 2.0)), + new DataMap(asMap("key1" -> 1, "key2" -> 2)))) + val badObjects = List( + TRUE, + I1, + L1, + F1, + D1, + new java.lang.String, + new DataList, + new DataMap(asMap("key1" -> TRUE)), + new DataMap(asMap("key1" -> new java.lang.String("1"))), + new DataMap(asMap("key1" -> new DataMap)), + new DataMap(asMap("key1" -> new DataList))) + testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testMapStringToPrimitiveValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + + "[ { \"name\" : \"bar\", \"type\" : { \"type\" : \"map\", \"values\" : \"int\" } } ] }" + val inputs = List( + (new DataMap(asMap("key1" -> "1")), new DataMap(asMap("key1" -> 1))), + ( + new DataMap(asMap("key1" -> "1", "key2" -> "2")), + new DataMap(asMap("key1" -> 1, "key2" -> 2))), + (new DataMap, new DataMap), + (new DataMap(asMap("key1" -> 1)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1, "key2" -> 2)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), + (new DataMap(asMap("key1" -> 1L)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1.0)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1.0f)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1, "key2" -> 2L)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), + ( + new DataMap(asMap("key1" -> 1L, "key2" -> 2.0)), + new DataMap(asMap("key1" -> 1, "key2" -> 2)))) + val badObjects = List( + TRUE, + I1, + L1, + F1, + D1, + new java.lang.String, + new DataList, + new DataMap(asMap("key1" -> TRUE)), + new DataMap(asMap("key1" -> new DataMap)), + new DataMap(asMap("key1" -> new DataList))) + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + } + + @Test + def testUnionNormalCoercionValidation(): Unit = { + val inputs = List( + (Data.NULL, Data.NULL), + (new DataMap(asMap("int" -> 1)), new DataMap(asMap("int" -> 1))), + (new DataMap(asMap("string" -> "x")), new DataMap(asMap("string" -> "x"))), + (new DataMap(asMap("Fruits" -> "APPLE")), new DataMap(asMap("Fruits" -> "APPLE"))), + (new DataMap(asMap("Fruits" -> "ORANGE")), new DataMap(asMap("Fruits" -> "ORANGE"))), + (new DataMap(asMap("int" -> 1L)), new DataMap(asMap("int" -> 1))), + (new DataMap(asMap("int" -> 1.0f)), new DataMap(asMap("int" -> 1))), + (new DataMap(asMap("int" -> 1.0)), new DataMap(asMap("int" -> 1)))) + val badObjects = List( + TRUE, + I1, + L1, + F1, + D1, + new java.lang.String, + new DataList, + new DataMap(asMap("int" -> TRUE)), + new DataMap(asMap("int" -> new java.lang.String("1"))), + new DataMap(asMap("int" -> new DataMap)), + new DataMap(asMap("int" -> new DataList)), + new DataMap(asMap("string" -> TRUE)), + new DataMap(asMap("string" -> I1)), + new DataMap(asMap("string" -> L1)), + new DataMap(asMap("string" -> F1)), + new DataMap(asMap("string" -> D1)), + new DataMap(asMap("string" -> new DataMap)), + new DataMap(asMap("string" -> new DataList)), + new DataMap(asMap("Fruits" -> I1)), + new DataMap(asMap("Fruits" -> new DataMap)), + new DataMap(asMap("Fruits" -> new DataList)), + new DataMap(asMap("int" -> I1, "string" -> "x")), + new DataMap(asMap("x" -> I1, "y" -> L1))) + testNormalCoercionValidation(UNION_SCHEMA, "bar", inputs, badObjects) + } + + @Test + def testTyperefNormalCoercionValidation(): Unit = { + val inputs = + List( + (I1, I1), + (new java.lang.Integer(-1), new java.lang.Integer(-1)), + (L1, I1), + (F1, I1), + (D1, I1)) + val badObjects = List( + TRUE, + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testNormalCoercionValidation(TYPEREF_SCHEMA, "bar1", inputs, badObjects) + testNormalCoercionValidation(TYPEREF_SCHEMA, "bar2", inputs, badObjects) + testNormalCoercionValidation(TYPEREF_SCHEMA, "bar3", inputs, badObjects) + testNormalCoercionValidation(TYPEREF_SCHEMA, "bar4", inputs, badObjects) + } + + @Test + def testTyperefStringToPrimitiveCoercionValidation(): Unit = { + val inputs = List( + (new java.lang.String("1"), I1), + (I1, new Integer(1)), + (new Integer(-1), new Integer(-1)), + (L1, I1), + (F1, I1), + (D1, I1)) + val badObjects = List( + TRUE, + new String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + testStringToPrimitiveCoercionValidation(TYPEREF_SCHEMA, "bar1", inputs, badObjects) + testStringToPrimitiveCoercionValidation(TYPEREF_SCHEMA, "bar2", inputs, badObjects) + testStringToPrimitiveCoercionValidation(TYPEREF_SCHEMA, "bar3", inputs, badObjects) + testStringToPrimitiveCoercionValidation(TYPEREF_SCHEMA, "bar4", inputs, badObjects) + } + + @Test + def testRecordValidation(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + "[ " + + "{ \"name\" : \"bar\", \"type\" : { \"name\" : \"barType\", \"type\" : \"record\", " + + "\"fields\" : [" + + "{ \"name\" : \"requiredInt\", \"type\" : \"int\" }," + + "{ \"name\" : \"requiredString\", \"type\" : \"string\" }," + + "{ \"name\" : \"defaultString\", \"type\" : \"string\", \"default\" : \"apple\" }," + + "{ \"name\" : \"optionalBoolean\", \"type\" : \"boolean\", \"optional\" : true }," + + "{ \"name\" : \"optionalDouble\", \"type\" : \"double\", \"optional\" : true }," + + "{ \"name\" : \"optionalWithDefaultString\", \"type\" : \"string\", \"optional\" : true, " + + "\"default\" : \"orange\" }" + + "] } } ] }" + val good = List( + List( + ValidationOptions( + requiredMode = RequiredMode.FIXUP_ABSENT_WITH_DEFAULT, + coercionMode = CoercionMode.NORMAL)) -> + List( + new DataMap(asMap("requiredInt" -> 12, "requiredString" -> "")), + new DataMap(asMap("requiredInt" -> 34, "requiredString" -> "cow")), + new DataMap( + asMap("requiredInt" -> 56, "requiredString" -> "cat", "optionalBoolean" -> false)), + new DataMap( + asMap( + "requiredInt" -> + 78, + "requiredString" -> + "dog", + "optionalBoolean" -> + true, + "optionalDouble" -> + 999.5)), + new DataMap( + asMap( + "requiredInt" -> + 78, + "requiredString" -> + "dog", + "optionalBoolean" -> + true, + "optionalDouble" -> + 999.5, + "optionalWithDefaultString" -> + "tag")), + new DataMap( + asMap( + "requiredInt" -> + 78, + "requiredString" -> + "dog", + "extra1" -> + TRUE)), + new DataMap( + asMap( + "requiredInt" -> + 78, + "requiredString" -> + "dog", + "optionalBoolean" -> + true, + "optionalDouble" -> + 999.5, + "extra1" -> + TRUE)))) + // All bad examples used CoercionMode.OFF which is unimplemented. So they have been skipped. + testValidationWithDifferentValidationOptions(schemaText, "bar", good, List()) + } + + @Test + def testValidationWithNormalCoercion(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : \n" + + "[ { \"name\" : \"bar\", \"type\" : { \"name\" : \"barType\", \"type\" : \"record\", " + + "\"fields\" : [ \n" + + "{ \"name\" : \"boolean\", \"type\" : \"boolean\", \"optional\" : true }, \n" + + "{ \"name\" : \"int\", \"type\" : \"int\", \"optional\" : true }, \n" + + "{ \"name\" : \"long\", \"type\" : \"long\", \"optional\" : true }, \n" + + "{ \"name\" : \"float\", \"type\" : \"float\", \"optional\" : true }, \n" + + "{ \"name\" : \"double\", \"type\" : \"double\", \"optional\" : true }, \n" + + "{ \"name\" : \"string\", \"type\" : \"string\", \"optional\" : true }, \n" + + "{ \"name\" : \"bytes\", \"type\" : \"bytes\", \"optional\" : true }, \n" + + "{ \"name\" : \"array\", \"type\" : { \"type\" : \"array\", \"items\" : \"int\" }, " + + "\"optional\" : true }, \n" + + "{ \"name\" : \"enum\", \"type\" : { \"type\" : \"enum\", \"name\" : \"enumType\", " + + "\"symbols\" : [ \"apple\", \"orange\", \"banana\" ] }, \"optional\" : true }, \n" + + "{ \"name\" : \"fixed\", \"type\" : { \"type\" : \"fixed\", \"name\" : \"fixedType\", " + + "\"size\" : 4 }, \"optional\" : true }, \n" + + "{ \"name\" : \"map\", \"type\" : { \"type\" : \"map\", \"values\" : \"int\" }, " + + "\"optional\" : true }, \n" + + "{ \"name\" : \"record\", \"type\" : { \"type\" : \"record\", \"name\" : \"recordType\", " + + "\"fields\" : [ { \"name\" : \"int\", \"type\" : \"int\" } ] }, \"optional\" : true }, \n" + + "{ \"name\" : \"union\", \"type\" : [ \"int\", \"recordType\", \"enumType\", \"fixedType\" " + + "], \"optional\" : true }, \n" + + "{ \"name\" : \"unionWithNull\", \"type\" : [ \"null\", \"enumType\", \"fixedType\" ], " + + "\"optional\" : true } \n" + + "] } } ] }" + val key = "bar" + val schema = dataSchemaFromString(schemaText) + val input = + List( + ValidationOptions(requiredMode = RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT), + ValidationOptions(requiredMode = RequiredMode.FIXUP_ABSENT_WITH_DEFAULT)) -> + // int + List( + new DataMap(asMap("int" -> 1L)) -> new DataMap(asMap("int" -> 1)), + new DataMap(asMap("int" -> 1.0f)) -> new DataMap(asMap("int" -> 1)), + new DataMap(asMap("int" -> 1.0)) -> new DataMap(asMap("int" -> 1)), + // long + new DataMap(asMap("long" -> 1)) -> new DataMap(asMap("long" -> 1L)), + new DataMap(asMap("long" -> 1.0f)) -> new DataMap(asMap("long" -> 1L)), + new DataMap(asMap("long" -> 1.0)) -> new DataMap(asMap("long" -> 1L)), + // float + new DataMap(asMap("float" -> 1)) -> new DataMap(asMap("float" -> 1.0f)), + new DataMap(asMap("float" -> 1L)) -> new DataMap(asMap("float" -> 1.0f)), + new DataMap(asMap("float" -> 1.0)) -> new DataMap(asMap("float" -> 1.0f)), + // double + new DataMap(asMap("double" -> 1)) -> new DataMap(asMap("double" -> 1.0)), + new DataMap(asMap("double" -> 1L)) -> new DataMap(asMap("double" -> 1.0)), + new DataMap(asMap("double" -> 1.0f)) -> new DataMap(asMap("double" -> 1.0)), + // array of int's + new DataMap( + asMap("array" -> + new DataList(asList(1, 2, 3, 1.0, 2.0, 3.0, 1.0f, 2.0f, 3.0f, 1.0, 2.0, 3.0)))) -> + new DataMap( + asMap("array" -> + new DataList(asList(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3)))), + // map of int's + new DataMap( + asMap( + "map"-> + new DataMap(asMap("int1"-> 1, "long"-> 1L, "float"-> 1.0f, "double"-> 1.0)))) -> + new DataMap( + asMap( + "map"-> + new DataMap( + asMap( + "int1"-> + 1, + "long"-> + 1, + "float"-> + 1, + "double"-> + 1)))), + // record with int fields + new DataMap(asMap("record"-> new DataMap(asMap("int"-> 1L)))) -> + new DataMap(asMap("record"-> new DataMap(asMap("int"-> 1)))), + + new DataMap(asMap("record"-> new DataMap(asMap("int"-> 1.0f)))) -> + new DataMap(asMap("record"-> new DataMap(asMap("int"-> 1)))), + + new DataMap(asMap("record"-> new DataMap(asMap("int"-> 1.0)))) -> + new DataMap(asMap("record"-> new DataMap(asMap("int"-> 1)))), + // union with int + + new DataMap(asMap("union"-> new DataMap(asMap("int"-> 1L)))) -> + new DataMap(asMap("union"-> new DataMap(asMap("int"-> 1)))), + + new DataMap(asMap("union"-> new DataMap(asMap("int"-> 1.0f)))) -> + new DataMap(asMap("union"-> new DataMap(asMap("int"-> 1)))), + + new DataMap(asMap("union"-> new DataMap(asMap("int"-> 1.0)))) -> + new DataMap( + asMap( + "union"-> + new DataMap(asMap("int"-> 1)))), + // union with record containing int + + new DataMap( + asMap( + "union"-> + new DataMap(asMap("recordType"-> new DataMap(asMap("int"-> 1L)))))) -> + new DataMap( + asMap( + "union"-> + new DataMap(asMap("recordType"-> new DataMap(asMap("int"-> 1)))))), + + new DataMap( + asMap( + "union"-> + new DataMap(asMap("recordType"-> new DataMap(asMap("int"-> 1.0f)))))) -> + new DataMap( + asMap( + "union"-> + new DataMap(asMap("recordType"-> new DataMap(asMap("int"-> 1)))))), + + new DataMap( + asMap( + "union"-> + new DataMap(asMap("recordType"-> new DataMap(asMap("int"-> 1.0)))))) -> + new DataMap( + asMap( + "union"-> + new DataMap(asMap("recordType"-> new DataMap(asMap("int"-> 1)))))) + ) + testValidationWithNormalCoercionHelper(schema, key, input) + } + + @Test + def testValidationWithFixupAbsentWithDefault(): Unit = { + val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : \n" + + "[ { \"name\" : \"bar\", \"type\" : { \"name\" : \"barType\", \"type\" : \"record\", " + + "\"fields\" : [ \n" + + "{ \"name\" : \"boolean\", \"type\" : \"boolean\", \"default\" : true }, \n" + + "{ \"name\" : \"int\", \"type\" : \"int\", \"default\" : 1 }, \n" + + "{ \"name\" : \"long\", \"type\" : \"long\", \"default\" : 2 }, \n" + + "{ \"name\" : \"float\", \"type\" : \"float\", \"default\" : 3.0 }, \n" + + "{ \"name\" : \"double\", \"type\" : \"double\", \"default\" : 4.0 }, \n" + + "{ \"name\" : \"string\", \"type\" : \"string\", \"default\" : \"cow\" }, \n" + + "{ \"name\" : \"bytes\", \"type\" : \"bytes\", \"default\" : \"dog\" }, \n" + + "{ \"name\" : \"array\", \"type\" : { \"type\" : \"array\", \"items\" : \"int\" }, " + + "\"default\" : [ -1, -2, -3 ] }, \n" + + "{ \"name\" : \"enum\", \"type\" : { \"type\" : \"enum\", \"name\" : \"enumType\", " + + "\"symbols\" : [ \"apple\", \"orange\", \"banana\" ] }, \"default\" : \"apple\" }, \n" + + "{ \"name\" : \"fixed\", \"type\" : { \"type\" : \"fixed\", \"name\" : \"fixedType\", " + + "\"size\" : 4 }, \"default\" : \"1234\" }, \n" + + "{ \"name\" : \"map\", \"type\" : { \"type\" : \"map\", \"values\" : \"int\" }, \"default\"" + + " : { \"1\" : 1, \"2\" : 2 } }, \n" + + "{ \"name\" : \"record\", \"type\" : { \"type\" : \"record\", \"name\" : \"recordType\", " + + "\"fields\" : [ { \"name\" : \"int\", \"type\" : \"int\" } ] }, \"default\" : { \"int\" : 1" + + " } }, \n" + + "{ \"name\" : \"union\", \"type\" : [ \"int\", \"recordType\", \"enumType\", \"fixedType\" " + + "], \"default\" : { \"enumType\" : \"orange\" } }, \n" + + "{ \"name\" : \"unionWithNull\", \"type\" : [ \"null\", \"enumType\", \"fixedType\" ], " + + "\"default\" : null }, \n" + + "{ \"name\" : \"optionalInt\", \"type\" : \"int\", \"optional\" : true }, \n" + + "{ \"name\" : \"optionalDefaultInt\", \"type\" : \"int\", \"optional\" : true, \"default\" " + + ": 42 } \n" + + "] } } ] }" + val key = "bar" + val schema = dataSchemaFromString(schemaText) + assert(schema != null) + val input = + List( + ValidationOptions(requiredMode = + RequiredMode.FIXUP_ABSENT_WITH_DEFAULT)) -> + List(new DataMap -> new DataMap( + asMap( + "boolean" -> + true, + "int" -> + 1, + "long" -> + 2L, + "float" -> + 3.0f, + "double" -> + 4.0, + "string" -> + "cow", + "bytes" -> + ByteString.copyAvroString("dog", false), + "array" -> + new DataList(asList(-1, -2, -3)), + "enum" -> + "apple", + "fixed" -> + ByteString.copyAvroString("1234", false), + "map" -> + new DataMap(asMap("1"-> 1, "2"-> 2)), + "record" -> + new DataMap(asMap("int"-> 1)), + "union" -> + new DataMap(asMap("enumType" -> "orange")), + "unionWithNull" -> + Data.NULL))) + testValidationWithNormalCoercionHelper(schema, key, input) + } + + @Test + def testNonRootStartDataElement(): Unit = { + val schemaText = "{\n" + " \"name\" : \"Foo\",\n" + " \"type\" : \"record\",\n" + + " \"fields\" : [\n" + + " { \"name\" : \"intField\", \"type\" : \"int\", \"optional\" : true },\n" + + " { \"name\" : \"stringField\", \"type\" : \"string\", \"optional\" : true },\n" + + " { \"name\" : \"arrayField\", \"type\" : { \"type\" : \"array\", \"items\" : \"Foo\" }," + + " \"optional\" : true },\n" + + " { \"name\" : \"mapField\", \"type\" : { \"type\" : \"map\", \"values\" : \"Foo\" }, " + + "\"optional\" : true },\n" + + " { \"name\" : \"unionField\", \"type\" : [ \"int\", \"string\", \"Foo\" ], \"optional\"" + + " : true },\n" + + " { \"name\" : \"fooField\", \"type\" : \"Foo\", \"optional\" : true }\n" + " ]\n" + "}\n" + val empty = List() + val input: List[(String, String, List[String], List[String])] = List( + ( + "{ \"intField\" : \"bad\", \"fooField\" : { \"intField\" : 32 } }", + "/fooField", + empty, + List("ERROR")), + ( + "{ \"intField\" : 32, \"fooField\" : { \"intField\" : \"bad\" } }", + "/fooField", + List[String]("ERROR", "/fooField/intField"), + empty), + ( + "{\n" + " \"stringField\" : 32,\n" + + " \"arrayField\" : [ { \"intField\" : \"bad0\" }, { \"intField\" : \"bad1\" } ]\n" + + "}\n", + "/arrayField/0", + List[String]("ERROR", "/arrayField/0/intField"), + List[String]("/stringField", "/arrayField/1/intField")), + ( + "{\n" + " \"stringField\" : 32,\n" + + " \"mapField\" : { \"m0\" : { \"intField\" : \"bad0\" }, \"m1\" : { \"intField\" " + + ": \"bad1\" } }\n" + + "}\n", + "/mapField/m1", + List[String]("ERROR", "/mapField/m1/intField"), + List[String]("/stringField", "/mapField/m0/intField")), + ( + "{\n" + " \"stringField\" : 32,\n" + " \"arrayField\" : [\n" + + " { \"unionField\" : { \"Foo\" : { \"intField\" : \"bad0\" } } },\n" + + " { \"unionField\" : { \"int\" : \"bad1\" } }\n" + " ]\n" + "}\n", + "/arrayField/0/unionField", + List[String]("ERROR", "/arrayField/0/unionField/Foo/intField"), + List[String]("/stringField", "/arrayField/1/unionField/int")), + ( + "{\n" + " \"stringField\" : 32,\n" + " \"fooField\" : {\n" + + " \"stringField\" : 45,\n" + " \"fooField\" : { \"intField\" : \"bad1\" } }\n" + + " }\n" + "}\n", + "/fooField/fooField", + List[String]("ERROR", "/fooField/fooField/intField"), + List[String]("/stringField", "/fooField/stringField"))) + val schema = dataSchemaFromString(schemaText) + for (row <- input) { + val (dataString, startPath, expectedStrings, notExpectedStrings) = row + val map = dataMapFromString(dataString) + val startElement = DataElementUtil.element(map, schema, startPath) + assert(startElement ne null) + val result = validate(startElement, ValidationOptions()) + val message = result.getMessages.toString + for (expected <- expectedStrings) { + assert(message.contains(expected), message + " does not contain " + expected) + } + for (notExpected <- notExpectedStrings) { + assert(!message.contains(notExpected), message + " contains " + notExpected) + } + } + } +} + +object ValidateDataAgainstSchemaTest { + import collection.JavaConverters._ + + val STRING_SCHEMA: String = + """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "string"}]}""" + + val BOOLEAN_SCHEMA: String = + """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "boolean"}]}""" + + val INTEGER_SCHEMA: String = + """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "int"}]}""" + + val UNION_SCHEMA: String = + """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": ["null", "int", + |"string", {"type": "enum", "name": "Fruits", "symbols": ["APPLE", "ORANGE"]}]}]}""".stripMargin + + val TYPEREF_SCHEMA: String = + """{ "type" : "record", "name" : "foo", "fields" : [ + | { "name" : "bar1", "type" : { "type" : "typeref", "name" : "int2", "ref": "int" }, "optional" : true }, + | { "name" : "bar2", "type" : "int2", "optional" : true }, + | { "name" : "bar3", "type" : { "type" : "typeref", "name" : "int3", "ref": "int2" }, "optional" : true }, + | { "name" : "bar4", "type" : "int3", "optional" : true }] }""".stripMargin + + val FALSE = new java.lang.Boolean(false) + val TRUE = new java.lang.Boolean(true) + + val I1 = new java.lang.Integer(1) + val F1 = new java.lang.Float(1) + val L1 = new java.lang.Long(1) + val D1 = new java.lang.Double(1) + + private val codec = new JacksonDataCodec() + + def inputStreamFromString(s: String): ByteArrayInputStream = { + val bytes = s.getBytes(Data.UTF_8_CHARSET) + val bais = new ByteArrayInputStream(bytes) + bais + } + + def schemaParserFromString(s: String): SchemaParser = { + val parser = new SchemaParser + parser.parse(inputStreamFromString(s)) + parser + } + + def dataSchemaFromString(s: String): DataSchema = { + val parser = schemaParserFromString(s) + if (parser.hasError) { + println("ERROR: " + parser.errorMessage) + null + } else { + parser.topLevelDataSchemas.get(parser.topLevelDataSchemas.size - 1) + } + } + + def validate(map: DataMap, schema: DataSchema, options: ValidationOptions): ValidationResult = + ValidateDataAgainstSchema.validate(map, schema, options) + + def validate(element: DataElement, options: ValidationOptions): ValidationResult = + ValidateDataAgainstSchema.validate(element, options, null) + + def normalCoercionValidationOption: ValidationOptions = { + val options = ValidationOptions() + assert(options.requiredMode == RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT) + assert(options.coercionMode == CoercionMode.NORMAL) + options + } + + def stringToPrimitiveCoercionValidationOption: ValidationOptions = { + val options = ValidationOptions(coercionMode = CoercionMode.STRING_TO_PRIMITIVE) + assert(options.requiredMode == RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT) + options + } + + private def assertAllowedClass(coercionMode: CoercionMode, clazz: Class[_]): Unit = { + assert((clazz eq classOf[java.lang.Integer]) || (clazz eq classOf[java.lang.Long]) || (clazz eq + classOf[java.lang.Float]) || + (clazz eq classOf[java.lang.Double]) || + ((coercionMode == CoercionMode.STRING_TO_PRIMITIVE) && (clazz eq classOf[java.lang.String]))) + } + + def asList[T](refs: T*): java.util.List[T] = { + val list = new java.util.ArrayList[T]() + for (ref <- refs) { + list.add(ref) + } + list + } + + def asMap[V](kvs: (String, V)*): java.util.Map[String, V] = { + val jmap = new java.util.HashMap[String, V]() + for (kv <- kvs) { + jmap.put(kv._1, kv._2) + } + jmap + } + + def testValidationWithDifferentValidationOptions( + schemaText: String, + key: String, + goodInput: Seq[(Seq[ValidationOptions], Seq[AnyRef])], + badInput: Seq[(Seq[ValidationOptions], Seq[AnyRef], String, Seq[Seq[String]])]): Unit = { + val schema = dataSchemaFromString(schemaText) + assert(schema != null) + val map = new DataMap + for (rows <- goodInput) { + val modes = rows._1 + val dataObjects = rows._2 + for (mode <- modes) { + for (dataObject <- dataObjects) { + map.put(key, dataObject) + val result = validate(map, schema, mode) + assert(result.isValid) + if (!result.hasFix) { + assert(map eq result.getFixed) + } + } + } + } + for (rows <- badInput) { + val modes = rows._1 + val dataObjects = rows._2 + val expectedString = rows._3 + val errorPaths = rows._4 + for (mode <- modes) { + var index = 0 + for (dataObject <- dataObjects) { + map.put(key, dataObject) + val result = validate(map, schema, mode) + assert(!result.isValid) + assert(map eq result.getFixed) + checkMessages(result.getMessages.asScala, expectedString) + if (index < errorPaths.length) + checkMessagesErrorPath(result.getMessages.asScala, errorPaths(index)) + index += 1 + } + } + } + } + + private def checkMessages(messages: Iterable[Message], expectedString: String): Unit = { + for (m <- messages) { + assert(m.getFormat.contains(expectedString)) + } + } + + private def checkMessagesErrorPath(messages: Iterable[Message], errorPaths: Seq[String]): Unit = { + for ((m, errorPath) <- messages.zip(errorPaths)) { + val path = pathAsString(m.getPath) + assert(path == errorPath) + } + } + + private def pathAsString(path: Array[AnyRef]): String = { + val sb = new StringBuilder + for (component <- path) { + sb.append(DataElement.SEPARATOR) + sb.append(component.toString) + } + sb.toString + } + + private def testValidationWithNormalCoercionHelper( + schema: DataSchema, + key: String, + input: (Seq[ValidationOptions], Seq[(DataMap, DataMap)])): Unit = { + val (optionsList, pairs) = input + for (options <- optionsList) { + // Data object is read-only. + for (pair <- pairs) { + val foo = new DataMap + foo.put(key, pair._1) + foo.makeReadOnly() + assert(foo.isReadOnly) + assert(pair._1.asInstanceOf[DataComplex].isReadOnly) + assert(foo.get(key) eq pair._1) + val result = ValidateDataAgainstSchema.validate(foo, schema, options) + System.out.println(result) + assert(!result.isValid) + assert(result.hasFix) + assert(result.hasFixupReadOnlyError) + assert(foo.isReadOnly) + assert(pair._1.asInstanceOf[DataComplex].isReadOnly) + val fooFixed = result.getFixed.asInstanceOf[DataMap] + val barFixed = fooFixed.get(key) + assert(pair._1 == barFixed) // not changed + + assert(fooFixed eq foo) + assert(fooFixed.isReadOnly) + assert(barFixed.asInstanceOf[DataComplex].isReadOnly) + assert(barFixed eq pair._1) + } + // Data object is read-write + for (pair <- pairs) { + val foo = new DataMap + val pair0 = pair._1.asInstanceOf[DataMap].copy // get read-write clone + assert(!pair0.isReadOnly) + foo.put(key, pair0) + val result = validate(foo, schema, options) + assert(result.isValid) + val fooFixed = result.getFixed.asInstanceOf[DataMap] + val barFixed = fooFixed.get(key) + assert(result.isValid) + assert(result.hasFix) + assert(!result.hasFixupReadOnlyError) + assert(!foo.isReadOnly) + assert(!pair0.isReadOnly) + assert(pair._2 == barFixed) + assert(result.getFixed eq foo) // modify in place + + assert(!barFixed.asInstanceOf[DataComplex].isReadOnly) + assert(barFixed eq pair0) + } + } + + } + + def dataMapFromString(json: String): DataMap = codec.stringToMap(json) +} From 7acf659e73dad48f50849c3b9746eee7ad4069fb Mon Sep 17 00:00:00 2001 From: Zachary Drudi Date: Mon, 25 Oct 2021 21:02:32 -0400 Subject: [PATCH 03/12] fix equality bug --- .../ValidateDataAgainstSchema.scala | 2 +- .../ValidateDataAgainstSchemaTest.scala | 199 +++++++++++++----- 2 files changed, 148 insertions(+), 53 deletions(-) diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala index f8320f51..e6d71807 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala @@ -133,7 +133,7 @@ object ValidateDataAgainstSchema { obj) case _ => validatePrimitive(element, schema, obj) } - if (fixed != obj) { + if (fixed ne obj) { fixValue(element, fixed) } if (validator != null && element.getSchema == schema) { diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala index acea707d..892ea97b 100644 --- a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala @@ -745,33 +745,8 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testValidationWithNormalCoercion(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : \n" + - "[ { \"name\" : \"bar\", \"type\" : { \"name\" : \"barType\", \"type\" : \"record\", " + - "\"fields\" : [ \n" + - "{ \"name\" : \"boolean\", \"type\" : \"boolean\", \"optional\" : true }, \n" + - "{ \"name\" : \"int\", \"type\" : \"int\", \"optional\" : true }, \n" + - "{ \"name\" : \"long\", \"type\" : \"long\", \"optional\" : true }, \n" + - "{ \"name\" : \"float\", \"type\" : \"float\", \"optional\" : true }, \n" + - "{ \"name\" : \"double\", \"type\" : \"double\", \"optional\" : true }, \n" + - "{ \"name\" : \"string\", \"type\" : \"string\", \"optional\" : true }, \n" + - "{ \"name\" : \"bytes\", \"type\" : \"bytes\", \"optional\" : true }, \n" + - "{ \"name\" : \"array\", \"type\" : { \"type\" : \"array\", \"items\" : \"int\" }, " + - "\"optional\" : true }, \n" + - "{ \"name\" : \"enum\", \"type\" : { \"type\" : \"enum\", \"name\" : \"enumType\", " + - "\"symbols\" : [ \"apple\", \"orange\", \"banana\" ] }, \"optional\" : true }, \n" + - "{ \"name\" : \"fixed\", \"type\" : { \"type\" : \"fixed\", \"name\" : \"fixedType\", " + - "\"size\" : 4 }, \"optional\" : true }, \n" + - "{ \"name\" : \"map\", \"type\" : { \"type\" : \"map\", \"values\" : \"int\" }, " + - "\"optional\" : true }, \n" + - "{ \"name\" : \"record\", \"type\" : { \"type\" : \"record\", \"name\" : \"recordType\", " + - "\"fields\" : [ { \"name\" : \"int\", \"type\" : \"int\" } ] }, \"optional\" : true }, \n" + - "{ \"name\" : \"union\", \"type\" : [ \"int\", \"recordType\", \"enumType\", \"fixedType\" " + - "], \"optional\" : true }, \n" + - "{ \"name\" : \"unionWithNull\", \"type\" : [ \"null\", \"enumType\", \"fixedType\" ], " + - "\"optional\" : true } \n" + - "] } } ] }" val key = "bar" - val schema = dataSchemaFromString(schemaText) + val schema = dataSchemaFromString(SCHEMA_FOR_NORMAL_COERCION) val input = List( ValidationOptions(requiredMode = RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT), @@ -956,18 +931,17 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { " { \"name\" : \"unionField\", \"type\" : [ \"int\", \"string\", \"Foo\" ], \"optional\"" + " : true },\n" + " { \"name\" : \"fooField\", \"type\" : \"Foo\", \"optional\" : true }\n" + " ]\n" + "}\n" - val empty = List() val input: List[(String, String, List[String], List[String])] = List( ( "{ \"intField\" : \"bad\", \"fooField\" : { \"intField\" : 32 } }", "/fooField", - empty, + List.empty, List("ERROR")), ( "{ \"intField\" : 32, \"fooField\" : { \"intField\" : \"bad\" } }", "/fooField", List[String]("ERROR", "/fooField/intField"), - empty), + List.empty), ( "{\n" + " \"stringField\" : 32,\n" + " \"arrayField\" : [ { \"intField\" : \"bad0\" }, { \"intField\" : \"bad1\" } ]\n" + @@ -1018,26 +992,6 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { object ValidateDataAgainstSchemaTest { import collection.JavaConverters._ - val STRING_SCHEMA: String = - """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "string"}]}""" - - val BOOLEAN_SCHEMA: String = - """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "boolean"}]}""" - - val INTEGER_SCHEMA: String = - """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "int"}]}""" - - val UNION_SCHEMA: String = - """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": ["null", "int", - |"string", {"type": "enum", "name": "Fruits", "symbols": ["APPLE", "ORANGE"]}]}]}""".stripMargin - - val TYPEREF_SCHEMA: String = - """{ "type" : "record", "name" : "foo", "fields" : [ - | { "name" : "bar1", "type" : { "type" : "typeref", "name" : "int2", "ref": "int" }, "optional" : true }, - | { "name" : "bar2", "type" : "int2", "optional" : true }, - | { "name" : "bar3", "type" : { "type" : "typeref", "name" : "int3", "ref": "int2" }, "optional" : true }, - | { "name" : "bar4", "type" : "int3", "optional" : true }] }""".stripMargin - val FALSE = new java.lang.Boolean(false) val TRUE = new java.lang.Boolean(true) @@ -1184,6 +1138,7 @@ object ValidateDataAgainstSchemaTest { val (optionsList, pairs) = input for (options <- optionsList) { // Data object is read-only. + assert(options.coercionMode == CoercionMode.NORMAL) for (pair <- pairs) { val foo = new DataMap foo.put(key, pair._1) @@ -1192,7 +1147,6 @@ object ValidateDataAgainstSchemaTest { assert(pair._1.asInstanceOf[DataComplex].isReadOnly) assert(foo.get(key) eq pair._1) val result = ValidateDataAgainstSchema.validate(foo, schema, options) - System.out.println(result) assert(!result.isValid) assert(result.hasFix) assert(result.hasFixupReadOnlyError) @@ -1224,13 +1178,154 @@ object ValidateDataAgainstSchemaTest { assert(!pair0.isReadOnly) assert(pair._2 == barFixed) assert(result.getFixed eq foo) // modify in place - assert(!barFixed.asInstanceOf[DataComplex].isReadOnly) assert(barFixed eq pair0) } } - } def dataMapFromString(json: String): DataMap = codec.stringToMap(json) + + val STRING_SCHEMA: String = + """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "string"}]}""" + + val BOOLEAN_SCHEMA: String = + """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "boolean"}]}""" + + val INTEGER_SCHEMA: String = + """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "int"}]}""" + + val UNION_SCHEMA: String = + """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": ["null", "int", + |"string", {"type": "enum", "name": "Fruits", "symbols": ["APPLE", "ORANGE"]}]}]}""".stripMargin + + val TYPEREF_SCHEMA: String = + """{ "type" : "record", "name" : "foo", "fields" : [ + | { "name" : "bar1", "type" : { "type" : "typeref", "name" : "int2", "ref": "int" }, "optional" : true }, + | { "name" : "bar2", "type" : "int2", "optional" : true }, + | { "name" : "bar3", "type" : { "type" : "typeref", "name" : "int3", "ref": "int2" }, "optional" : true }, + | { "name" : "bar4", "type" : "int3", "optional" : true }] }""".stripMargin + + val SCHEMA_FOR_NORMAL_COERCION = + """ { + | "type": "record", + | "name": "foo", + | "fields": [ + | { + | "name": "bar", + | "type": { + | "name": "barType", + | "type": "record", + | "fields": [ + | { + | "name": "boolean", + | "type": "boolean", + | "optional": true + | }, + | { + | "name": "int", + | "type": "int", + | "optional": true + | }, + | { + | "name": "long", + | "type": "long", + | "optional": true + | }, + | { + | "name": "float", + | "type": "float", + | "optional": true + | }, + | { + | "name": "double", + | "type": "double", + | "optional": true + | }, + | { + | "name": "string", + | "type": "string", + | "optional": true + | }, + | { + | "name": "bytes", + | "type": "bytes", + | "optional": true + | }, + | { + | "name": "array", + | "type": { + | "type": "array", + | "items": "int" + | }, + | "optional": true + | }, + | { + | "name": "enum", + | "type": { + | "type": "enum", + | "name": "enumType", + | "symbols": [ + | "apple", + | "orange", + | "banana" + | ] + | }, + | "optional": true + | }, + | { + | "name": "fixed", + | "type": { + | "type": "fixed", + | "name": "fixedType", + | "size": 4 + | }, + | "optional": true + | }, + | { + | "name": "map", + | "type": { + | "type": "map", + | "values": "int" + | }, + | "optional": true + | }, + | { + | "name": "record", + | "type": { + | "type": "record", + | "name": "recordType", + | "fields": [ + | { + | "name": "int", + | "type": "int" + | } + | ] + | }, + | "optional": true + | }, + | { + | "name": "union", + | "type": [ + | "int", + | "recordType", + | "enumType", + | "fixedType" + | ], + | "optional": true + | }, + | { + | "name": "unionWithNull", + | "type": [ + | "null", + | "enumType", + | "fixedType" + | ], + | "optional": true + | } + | ] + | } + | } + | ] + |} """.stripMargin } From 553f49c875b538b3d085fd3a891fd8fe1cace5d4 Mon Sep 17 00:00:00 2001 From: Zachary Drudi Date: Mon, 25 Oct 2021 21:56:11 -0400 Subject: [PATCH 04/12] fix string formatting --- .../ValidateDataAgainstSchema.scala | 3 - .../ValidateDataAgainstSchemaTest.scala | 211 +++++++++--------- 2 files changed, 111 insertions(+), 103 deletions(-) diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala index e6d71807..4bece15f 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala @@ -83,13 +83,11 @@ object ValidateDataAgainstSchema { def validate(element: DataElement): Unit = validateIterative(element) - // maybe don't need this? def validateIterative(element: DataElement): Unit = { _fixed = element.getValue val it = new ObjectIterator(element, IterationOrder.POST_ORDER) var nextElement: DataElement = it.next() while (nextElement != null) { - // do stuff here val nextElementSchema = nextElement.getSchema if (nextElementSchema != null) { validate(nextElement, nextElementSchema, nextElement.getValue) @@ -99,7 +97,6 @@ object ValidateDataAgainstSchema { } def validate(element: DataElement, schema: DataSchema, obj: AnyRef): AnyRef = { - // can we do dynamic dispatch here? val fixed: AnyRef = schema.getType match { case DataSchema.Type.ARRAY => validateArray(element, obj) diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala index 892ea97b..8bb3c310 100644 --- a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala @@ -254,8 +254,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testLongNormalCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : \"long\" } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : "long" }]}""".stripMargin val inputs = List( (new java.lang.Long(1), new java.lang.Long(1)), (new java.lang.Long(-1), new java.lang.Long(-1)), @@ -273,8 +274,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testLongStringToPrimitiveCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : \"long\" } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : "long" } ] }""".stripMargin val inputs = List( (new java.lang.String("1"), new java.lang.Long(1)), (new java.lang.String("-1"), new java.lang.Long(-1)), @@ -295,8 +297,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testFloatNormalCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : \"float\" } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : "float" } ] }""".stripMargin val inputs = List( (new java.lang.Float(1), new java.lang.Float(1)), (new java.lang.Float(-1), new java.lang.Float(-1)), @@ -314,8 +317,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testFloatStringToPrimitiveCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : \"float\" } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : "float" } ] }""".stripMargin val inputs = List( (new java.lang.String("1"), new java.lang.Float(1)), (new java.lang.String("-1"), new java.lang.Float(-1)), @@ -339,8 +343,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testDoubleNormalCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : \"double\" } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : "double" } ] }""".stripMargin val inputs = List( (new java.lang.Double(1), new java.lang.Double(1)), (new java.lang.Double(-1), new java.lang.Double(-1)), @@ -358,8 +363,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testDoubleStringToPrimitiveCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : \"double\" } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : "double" } ] }""".stripMargin val inputs = List( (new java.lang.String("1"), new java.lang.Double(1)), (new java.lang.String("-1"), new java.lang.Double(-1)), @@ -382,8 +388,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testBytesValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : \"bytes\" } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : "bytes" } ] }""".stripMargin val badObjects = List( new java.lang.Boolean(true), new java.lang.Integer(1), @@ -404,9 +411,10 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testFixedValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : { \"name\" : \"fixed4\", \"type\" : \"fixed\", \"size\"" + - " : 4 } } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : { "name" : "fixed4", "type" : "fixed", "size" : 4 } } ] } + |""".stripMargin val badObjects = List( new java.lang.Boolean(true), new java.lang.Integer(1), @@ -441,9 +449,13 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testEnumCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : { \"name\" : \"fruits\", \"type\" : \"enum\", " + - "\"symbols\" : [ \"apple\", \"orange\", \"banana\" ] } } ] }" + val schemaText = + """{ "type": "record", "name": "foo", "fields": + |[ { "name": "bar", + | "type": { "name" : "fruits", + | "type" : "enum", + | "symbols" : [ "apple", "orange", "banana" ] } + | } ] }""".stripMargin val goodObjects = List( new java.lang.String("apple"), new java.lang.String("orange"), @@ -476,8 +488,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testArrayNormalCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : { \"type\" : \"array\", \"items\" : \"int\" } } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + | [ { "name" : "bar", "type" : { "type" : "array", "items" : "int" } } ] }""".stripMargin val inputs = List( (new DataList, new DataList), (new DataList(asList(1)), new DataList(asList(1))), @@ -504,8 +517,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testArrayStringToPrimitiveCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : { \"type\" : \"array\", \"items\" : \"int\" } } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : { "type" : "array", "items" : "int" } } ] }""".stripMargin val inputs = List( (new DataList(asList("1")), new DataList(asList(1))), (new DataList(asList("1", "2", "3")), new DataList(asList(1, 2, 3))), @@ -533,8 +547,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testMapNormalCoercionValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : { \"type\" : \"map\", \"values\" : \"int\" } } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : { "type" : "map", "values" : "int" } } ] }""".stripMargin val inputs = List( (new DataMap, new DataMap), (new DataMap(asMap("key1" -> 1)), new DataMap(asMap("key1" -> 1))), @@ -563,8 +578,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testMapStringToPrimitiveValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + - "[ { \"name\" : \"bar\", \"type\" : { \"type\" : \"map\", \"values\" : \"int\" } } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[ { "name" : "bar", "type" : { "type" : "map", "values" : "int" } } ] }""".stripMargin val inputs = List( (new DataMap(asMap("key1" -> "1")), new DataMap(asMap("key1" -> 1))), ( @@ -596,6 +612,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testUnionNormalCoercionValidation(): Unit = { + val schemaText = + """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": ["null", "int", + |"string", {"type": "enum", "name": "Fruits", "symbols": ["APPLE", "ORANGE"]}]}]}""".stripMargin val inputs = List( (Data.NULL, Data.NULL), (new DataMap(asMap("int" -> 1)), new DataMap(asMap("int" -> 1))), @@ -629,7 +648,7 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { new DataMap(asMap("Fruits" -> new DataList)), new DataMap(asMap("int" -> I1, "string" -> "x")), new DataMap(asMap("x" -> I1, "y" -> L1))) - testNormalCoercionValidation(UNION_SCHEMA, "bar", inputs, badObjects) + testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) } @Test @@ -676,17 +695,16 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testRecordValidation(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : " + "[ " + - "{ \"name\" : \"bar\", \"type\" : { \"name\" : \"barType\", \"type\" : \"record\", " + - "\"fields\" : [" + - "{ \"name\" : \"requiredInt\", \"type\" : \"int\" }," + - "{ \"name\" : \"requiredString\", \"type\" : \"string\" }," + - "{ \"name\" : \"defaultString\", \"type\" : \"string\", \"default\" : \"apple\" }," + - "{ \"name\" : \"optionalBoolean\", \"type\" : \"boolean\", \"optional\" : true }," + - "{ \"name\" : \"optionalDouble\", \"type\" : \"double\", \"optional\" : true }," + - "{ \"name\" : \"optionalWithDefaultString\", \"type\" : \"string\", \"optional\" : true, " + - "\"default\" : \"orange\" }" + - "] } } ] }" + val schemaText = + """{ "type" : "record", "name" : "foo", "fields" : + |[{ "name" : "bar", "type" : { "name" : "barType", "type" : "record", "fields" : + | [{ "name" : "requiredInt", "type" : "int" }, + | { "name" : "requiredString", "type" : "string" }, + | { "name" : "defaultString", "type" : "string", "default" : "apple" }, + | { "name" : "optionalBoolean", "type" : "boolean", "optional" : true }, + | { "name" : "optionalDouble", "type" : "double", "optional" : true }, + | { "name" : "optionalWithDefaultString", "type" : "string", "optional" : true, + | "default" : "orange" }] } } ] }""".stripMargin val good = List( List( ValidationOptions( @@ -849,35 +867,35 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testValidationWithFixupAbsentWithDefault(): Unit = { - val schemaText = "{ \"type\" : \"record\", \"name\" : \"foo\", \"fields\" : \n" + - "[ { \"name\" : \"bar\", \"type\" : { \"name\" : \"barType\", \"type\" : \"record\", " + - "\"fields\" : [ \n" + - "{ \"name\" : \"boolean\", \"type\" : \"boolean\", \"default\" : true }, \n" + - "{ \"name\" : \"int\", \"type\" : \"int\", \"default\" : 1 }, \n" + - "{ \"name\" : \"long\", \"type\" : \"long\", \"default\" : 2 }, \n" + - "{ \"name\" : \"float\", \"type\" : \"float\", \"default\" : 3.0 }, \n" + - "{ \"name\" : \"double\", \"type\" : \"double\", \"default\" : 4.0 }, \n" + - "{ \"name\" : \"string\", \"type\" : \"string\", \"default\" : \"cow\" }, \n" + - "{ \"name\" : \"bytes\", \"type\" : \"bytes\", \"default\" : \"dog\" }, \n" + - "{ \"name\" : \"array\", \"type\" : { \"type\" : \"array\", \"items\" : \"int\" }, " + - "\"default\" : [ -1, -2, -3 ] }, \n" + - "{ \"name\" : \"enum\", \"type\" : { \"type\" : \"enum\", \"name\" : \"enumType\", " + - "\"symbols\" : [ \"apple\", \"orange\", \"banana\" ] }, \"default\" : \"apple\" }, \n" + - "{ \"name\" : \"fixed\", \"type\" : { \"type\" : \"fixed\", \"name\" : \"fixedType\", " + - "\"size\" : 4 }, \"default\" : \"1234\" }, \n" + - "{ \"name\" : \"map\", \"type\" : { \"type\" : \"map\", \"values\" : \"int\" }, \"default\"" + - " : { \"1\" : 1, \"2\" : 2 } }, \n" + - "{ \"name\" : \"record\", \"type\" : { \"type\" : \"record\", \"name\" : \"recordType\", " + - "\"fields\" : [ { \"name\" : \"int\", \"type\" : \"int\" } ] }, \"default\" : { \"int\" : 1" + - " } }, \n" + - "{ \"name\" : \"union\", \"type\" : [ \"int\", \"recordType\", \"enumType\", \"fixedType\" " + - "], \"default\" : { \"enumType\" : \"orange\" } }, \n" + - "{ \"name\" : \"unionWithNull\", \"type\" : [ \"null\", \"enumType\", \"fixedType\" ], " + - "\"default\" : null }, \n" + - "{ \"name\" : \"optionalInt\", \"type\" : \"int\", \"optional\" : true }, \n" + - "{ \"name\" : \"optionalDefaultInt\", \"type\" : \"int\", \"optional\" : true, \"default\" " + - ": 42 } \n" + - "] } } ] }" + val schemaText = + """{ "type": "record", "name": "foo", "fields": + | [ { "name": "bar", "type": { "name": "barType", "type": "record", "fields": + | [ {"name": "boolean", "type": "boolean", "default": true }, + | {"name": "int", "type": "int", "default": 1 }, + | {"name": "long", "type": "long", "default": 2 }, + | {"name": "float", "type": "float", "default": 3.0 }, + | {"name": "double", "type": "double", "default": 4.0 }, + | {"name": "string", "type": "string", "default": "cow" }, + | {"name": "bytes", "type": "bytes", "default": "dog" }, + | {"name": "array", "type": { "type": "array", "items": "int" }, + | "default": [ -1, -2, -3 ] }, + | {"name": "enum", "type": { "type": "enum", "name": "enumType", + | "symbols": [ "apple", "orange", "banana" ] }, + | "default": "apple" }, + | {"name": "fixed", "type": { "type": "fixed", "name": "fixedType", "size": 4 }, + | "default": "1234" }, + | {"name": "map", "type": { "type": "map", "values": "int" }, + | "default": { "1": 1, "2": 2 } }, + | {"name": "record", "type": { "type": "record", "name": "recordType", + | "fields": [ { "name": "int", "type": "int" } ] }, + | "default": { "int": 1 } }, + | {"name" : "union", "type" : [ "int", "recordType", "enumType", "fixedType"], + | "default" : { "enumType" : "orange" } }, + | {"name" : "unionWithNull", "type" : [ "null", "enumType", "fixedType" ], + | "default" : null }, + | {"name" : "optionalInt", "type" : "int", "optional" : true }, + | {"name" : "optionalDefaultInt", "type" : "int", "optional" : true, + | "default": 42 } ] } } ] }""".stripMargin val key = "bar" val schema = dataSchemaFromString(schemaText) assert(schema != null) @@ -920,54 +938,51 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { @Test def testNonRootStartDataElement(): Unit = { - val schemaText = "{\n" + " \"name\" : \"Foo\",\n" + " \"type\" : \"record\",\n" + - " \"fields\" : [\n" + - " { \"name\" : \"intField\", \"type\" : \"int\", \"optional\" : true },\n" + - " { \"name\" : \"stringField\", \"type\" : \"string\", \"optional\" : true },\n" + - " { \"name\" : \"arrayField\", \"type\" : { \"type\" : \"array\", \"items\" : \"Foo\" }," + - " \"optional\" : true },\n" + - " { \"name\" : \"mapField\", \"type\" : { \"type\" : \"map\", \"values\" : \"Foo\" }, " + - "\"optional\" : true },\n" + - " { \"name\" : \"unionField\", \"type\" : [ \"int\", \"string\", \"Foo\" ], \"optional\"" + - " : true },\n" + - " { \"name\" : \"fooField\", \"type\" : \"Foo\", \"optional\" : true }\n" + " ]\n" + "}\n" + val schemaText = + """{ "name": "Foo", "type": "record", "fields": + |[{ "name": "intField", "type": "int", "optional": true }, + | { "name": "stringField", "type": "string", "optional": true }, + | { "name": "arrayField", "type": { "type": "array", "items": "Foo" }, "optional": true }, + | { "name": "mapField", "type": { "type": "map", "values": "Foo" }, "optional": true }, + | { "name": "unionField", "type": [ "int", "string", "Foo" ], "optional": true }, + | { "name": "fooField", "type": "Foo", "optional": true } ]}""".stripMargin val input: List[(String, String, List[String], List[String])] = List( ( - "{ \"intField\" : \"bad\", \"fooField\" : { \"intField\" : 32 } }", + """{ "intField" : "bad", "fooField" : { "intField" : 32 } }""", "/fooField", List.empty, List("ERROR")), ( - "{ \"intField\" : 32, \"fooField\" : { \"intField\" : \"bad\" } }", + """{ "intField" : 32, "fooField" : { "intField" : "bad" } }""", "/fooField", List[String]("ERROR", "/fooField/intField"), List.empty), ( - "{\n" + " \"stringField\" : 32,\n" + - " \"arrayField\" : [ { \"intField\" : \"bad0\" }, { \"intField\" : \"bad1\" } ]\n" + - "}\n", + """{"stringField": 32, "arrayField": [{"intField": "bad0"}, {"intField": "bad1"}]}""", "/arrayField/0", List[String]("ERROR", "/arrayField/0/intField"), List[String]("/stringField", "/arrayField/1/intField")), ( - "{\n" + " \"stringField\" : 32,\n" + - " \"mapField\" : { \"m0\" : { \"intField\" : \"bad0\" }, \"m1\" : { \"intField\" " + - ": \"bad1\" } }\n" + - "}\n", + """{"stringField" : 32, + | "mapField" : { "m0" : { "intField" : "bad0" }, + | "m1" : { "intField" : "bad1" } }}""" + .stripMargin, "/mapField/m1", List[String]("ERROR", "/mapField/m1/intField"), List[String]("/stringField", "/mapField/m0/intField")), ( - "{\n" + " \"stringField\" : 32,\n" + " \"arrayField\" : [\n" + - " { \"unionField\" : { \"Foo\" : { \"intField\" : \"bad0\" } } },\n" + - " { \"unionField\" : { \"int\" : \"bad1\" } }\n" + " ]\n" + "}\n", + """ + |{"stringField": 32, + | "arrayField": [{"unionField": {"Foo": {"intField": "bad0"}}}, + | { "unionField": {"int": "bad1"}}]}""".stripMargin, "/arrayField/0/unionField", List[String]("ERROR", "/arrayField/0/unionField/Foo/intField"), List[String]("/stringField", "/arrayField/1/unionField/int")), ( - "{\n" + " \"stringField\" : 32,\n" + " \"fooField\" : {\n" + - " \"stringField\" : 45,\n" + " \"fooField\" : { \"intField\" : \"bad1\" } }\n" + - " }\n" + "}\n", + """ + |{"stringField" : 32, + | "fooField" : {"stringField" : 45, + | "fooField" : {"intField" : "bad1" }}}}""".stripMargin, "/fooField/fooField", List[String]("ERROR", "/fooField/fooField/intField"), List[String]("/stringField", "/fooField/stringField"))) @@ -1195,10 +1210,6 @@ object ValidateDataAgainstSchemaTest { val INTEGER_SCHEMA: String = """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": "int"}]}""" - val UNION_SCHEMA: String = - """{"type": "record", "name": "foo", "fields": [{"name": "bar", "type": ["null", "int", - |"string", {"type": "enum", "name": "Fruits", "symbols": ["APPLE", "ORANGE"]}]}]}""".stripMargin - val TYPEREF_SCHEMA: String = """{ "type" : "record", "name" : "foo", "fields" : [ | { "name" : "bar1", "type" : { "type" : "typeref", "name" : "int2", "ref": "int" }, "optional" : true }, @@ -1206,7 +1217,7 @@ object ValidateDataAgainstSchemaTest { | { "name" : "bar3", "type" : { "type" : "typeref", "name" : "int3", "ref": "int2" }, "optional" : true }, | { "name" : "bar4", "type" : "int3", "optional" : true }] }""".stripMargin - val SCHEMA_FOR_NORMAL_COERCION = + val SCHEMA_FOR_NORMAL_COERCION: String = """ { | "type": "record", | "name": "foo", From b1ea8b4078381f305dddc87c509372cea738b044 Mon Sep 17 00:00:00 2001 From: Zachary Drudi Date: Mon, 25 Oct 2021 22:01:51 -0400 Subject: [PATCH 05/12] fixups --- .../ValidateDataAgainstSchemaTest.scala | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala index 8bb3c310..7f493f1a 100644 --- a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala @@ -946,6 +946,8 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { | { "name": "mapField", "type": { "type": "map", "values": "Foo" }, "optional": true }, | { "name": "unionField", "type": [ "int", "string", "Foo" ], "optional": true }, | { "name": "fooField", "type": "Foo", "optional": true } ]}""".stripMargin + val stringField = "/stringField" + val error = "ERROR" val input: List[(String, String, List[String], List[String])] = List( ( """{ "intField" : "bad", "fooField" : { "intField" : 32 } }""", @@ -955,37 +957,37 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { ( """{ "intField" : 32, "fooField" : { "intField" : "bad" } }""", "/fooField", - List[String]("ERROR", "/fooField/intField"), + List[String](error, "/fooField/intField"), List.empty), ( """{"stringField": 32, "arrayField": [{"intField": "bad0"}, {"intField": "bad1"}]}""", "/arrayField/0", - List[String]("ERROR", "/arrayField/0/intField"), - List[String]("/stringField", "/arrayField/1/intField")), + List[String](error, "/arrayField/0/intField"), + List[String](stringField, "/arrayField/1/intField")), ( """{"stringField" : 32, | "mapField" : { "m0" : { "intField" : "bad0" }, | "m1" : { "intField" : "bad1" } }}""" .stripMargin, "/mapField/m1", - List[String]("ERROR", "/mapField/m1/intField"), - List[String]("/stringField", "/mapField/m0/intField")), + List[String](error, "/mapField/m1/intField"), + List[String](stringField, "/mapField/m0/intField")), ( """ |{"stringField": 32, | "arrayField": [{"unionField": {"Foo": {"intField": "bad0"}}}, - | { "unionField": {"int": "bad1"}}]}""".stripMargin, + | { "unionField": {"int": "bad1"}}]}""".stripMargin, "/arrayField/0/unionField", - List[String]("ERROR", "/arrayField/0/unionField/Foo/intField"), - List[String]("/stringField", "/arrayField/1/unionField/int")), + List[String](error, "/arrayField/0/unionField/Foo/intField"), + List[String](stringField, "/arrayField/1/unionField/int")), ( """ |{"stringField" : 32, | "fooField" : {"stringField" : 45, - | "fooField" : {"intField" : "bad1" }}}}""".stripMargin, + | "fooField": {"intField": "bad1" }}}}""".stripMargin, "/fooField/fooField", - List[String]("ERROR", "/fooField/fooField/intField"), - List[String]("/stringField", "/fooField/stringField"))) + List[String](error, "/fooField/fooField/intField"), + List[String](stringField, "/fooField/stringField"))) val schema = dataSchemaFromString(schemaText) for (row <- input) { val (dataString, startPath, expectedStrings, notExpectedStrings) = row From d9f78aa9804cb924ed6a1dca9133a19fd6e39932 Mon Sep 17 00:00:00 2001 From: Zachary Drudi Date: Mon, 25 Oct 2021 22:04:41 -0400 Subject: [PATCH 06/12] cleanup --- .../validation/ValidateDataAgainstSchemaTest.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala index 7f493f1a..76879ece 100644 --- a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala @@ -1092,8 +1092,7 @@ object ValidateDataAgainstSchemaTest { assert(schema != null) val map = new DataMap for (rows <- goodInput) { - val modes = rows._1 - val dataObjects = rows._2 + val (modes, dataObjects) = rows for (mode <- modes) { for (dataObject <- dataObjects) { map.put(key, dataObject) @@ -1106,10 +1105,7 @@ object ValidateDataAgainstSchemaTest { } } for (rows <- badInput) { - val modes = rows._1 - val dataObjects = rows._2 - val expectedString = rows._3 - val errorPaths = rows._4 + val (modes, dataObjects, expectedString, errorPaths) = rows for (mode <- modes) { var index = 0 for (dataObject <- dataObjects) { From 17d522e0eda2e80a9e39256598fd38cb49f071d1 Mon Sep 17 00:00:00 2001 From: Zachary Drudi Date: Tue, 23 Nov 2021 11:56:29 -0500 Subject: [PATCH 07/12] update version number --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index 01eff169..067b3812 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "0.11.5" +version in ThisBuild := "0.11.6-alpha1" From f3419ef9dd39de1e0cfd68334cd145044b7fb445 Mon Sep 17 00:00:00 2001 From: Zachary Drudi Date: Wed, 24 Nov 2021 18:39:16 -0500 Subject: [PATCH 08/12] clean up unit test a little --- .../ValidateDataAgainstSchemaTest.scala | 363 ++++++------------ 1 file changed, 124 insertions(+), 239 deletions(-) diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala index 76879ece..003adbc0 100644 --- a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala @@ -1,22 +1,21 @@ package org.coursera.naptime.courier.validation -import org.scalatest.junit.AssertionsForJUnit -import com.linkedin.data.element.DataElement -import com.linkedin.data.schema.validation.ValidationResult -import com.linkedin.data.schema.DataSchema -import com.linkedin.data.DataMap -import com.linkedin.data.schema.RecordDataSchema -import com.linkedin.data.schema.SchemaParser -import com.linkedin.data.ByteString -import org.junit.Test -import com.linkedin.data.message.Message -import com.linkedin.data.Data import java.io.ByteArrayInputStream -import com.linkedin.data.codec.JacksonDataCodec +import com.linkedin.data.ByteString +import com.linkedin.data.Data import com.linkedin.data.DataComplex import com.linkedin.data.DataList +import com.linkedin.data.DataMap +import com.linkedin.data.codec.JacksonDataCodec +import com.linkedin.data.element.DataElement import com.linkedin.data.element.DataElementUtil +import com.linkedin.data.schema.DataSchema +import com.linkedin.data.schema.RecordDataSchema +import com.linkedin.data.schema.SchemaParser +import com.linkedin.data.schema.validation.ValidationResult +import org.junit.Test +import org.scalatest.junit.AssertionsForJUnit class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { import ValidateDataAgainstSchemaTest._ @@ -257,19 +256,7 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { val schemaText = """{ "type" : "record", "name" : "foo", "fields" : |[ { "name" : "bar", "type" : "long" }]}""".stripMargin - val inputs = List( - (new java.lang.Long(1), new java.lang.Long(1)), - (new java.lang.Long(-1), new java.lang.Long(-1)), - (new java.lang.Integer(1), new java.lang.Long(1)), - (new java.lang.Float(1), new java.lang.Long(1)), - (new java.lang.Double(1), new java.lang.Long(1))) - val badObjects = List( - new java.lang.Boolean(true), - new java.lang.String("abc"), - ByteString.copyAvroString("bytes", false), - new DataMap, - new DataList) - testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + testNormalCoercionValidation(schemaText, "bar", LONG_TEST_INPUTS, BAD_OBJECTS_FOR_NUMERIC) } @Test @@ -280,19 +267,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { val inputs = List( (new java.lang.String("1"), new java.lang.Long(1)), (new java.lang.String("-1"), new java.lang.Long(-1)), - (new java.lang.String("" + Long.MaxValue), new java.lang.Long(Long.MaxValue)), - (new java.lang.Long(1), new java.lang.Long(1)), - (new java.lang.Long(-1), new java.lang.Long(-1)), - (new java.lang.Integer(1), new java.lang.Long(1)), - (new java.lang.Float(1), new java.lang.Long(1)), - (new java.lang.Double(1), new java.lang.Long(1))) - val badObjects = List( - new java.lang.Boolean(true), - new java.lang.String("abc"), - ByteString.copyAvroString("bytes", false), - new DataMap, - new DataList) - testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + (new java.lang.String("" + Long.MaxValue), new java.lang.Long(Long.MaxValue))) ++ + LONG_TEST_INPUTS + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, BAD_OBJECTS_FOR_NUMERIC) } @Test @@ -300,19 +277,7 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { val schemaText = """{ "type" : "record", "name" : "foo", "fields" : |[ { "name" : "bar", "type" : "float" } ] }""".stripMargin - val inputs = List( - (new java.lang.Float(1), new java.lang.Float(1)), - (new java.lang.Float(-1), new java.lang.Float(-1)), - (new java.lang.Integer(1), new java.lang.Float(1)), - (new java.lang.Long(1), new java.lang.Float(1)), - (new java.lang.Double(1), new java.lang.Float(1))) - val badObjects = List( - new java.lang.Boolean(true), - new java.lang.String("abc"), - ByteString.copyAvroString("bytes", false), - new DataMap, - new DataList) - testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + testNormalCoercionValidation(schemaText, "bar", FLOAT_TEST_INPUTS, BAD_OBJECTS_FOR_NUMERIC) } @Test @@ -325,20 +290,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { (new java.lang.String("-1"), new java.lang.Float(-1)), (new java.lang.String("1.01"), new java.lang.Float(1.01)), (new java.lang.String("-1.01"), new java.lang.Float(-1.01)), - (new java.lang.String("" + Float.MaxValue), new java.lang.Float(Float.MaxValue)), - (new java.lang.Float(1), new java.lang.Float(1)), - (new java.lang.Float(1), new java.lang.Float(1)), - (new java.lang.Float(-1), new java.lang.Float(-1)), - (new java.lang.Integer(1), new java.lang.Float(1)), - (new java.lang.Long(1), new java.lang.Float(1)), - (new java.lang.Double(1), new java.lang.Float(1))) - val badObjects = List( - new java.lang.Boolean(true), - new java.lang.String("abc"), - ByteString.copyAvroString("bytes", false), - new DataMap, - new DataList) - testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + (new java.lang.String("" + Float.MaxValue), new java.lang.Float(Float.MaxValue))) ++ + FLOAT_TEST_INPUTS + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, BAD_OBJECTS_FOR_NUMERIC) } @Test @@ -346,19 +300,7 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { val schemaText = """{ "type" : "record", "name" : "foo", "fields" : |[ { "name" : "bar", "type" : "double" } ] }""".stripMargin - val inputs = List( - (new java.lang.Double(1), new java.lang.Double(1)), - (new java.lang.Double(-1), new java.lang.Double(-1)), - (new java.lang.Integer(1), new java.lang.Double(1)), - (new java.lang.Long(1), new java.lang.Double(1)), - (new java.lang.Float(1), new java.lang.Double(1))) - val badObjects = List( - new java.lang.Boolean(true), - new java.lang.String("abc"), - ByteString.copyAvroString("bytes", false), - new DataMap, - new DataList) - testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + testNormalCoercionValidation(schemaText, "bar", DOUBLE_TEST_INPUTS, BAD_OBJECTS_FOR_NUMERIC) } @Test @@ -371,19 +313,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { (new java.lang.String("-1"), new java.lang.Double(-1)), (new java.lang.String("1.01"), new java.lang.Double(1.01)), (new java.lang.String("-1.01"), new java.lang.Double(-1.01)), - (new java.lang.String("" + Double.MaxValue), new java.lang.Double(Double.MaxValue)), - (new java.lang.Double(1), new java.lang.Double(1)), - (new java.lang.Double(-1), new java.lang.Double(-1)), - (new java.lang.Integer(1), new java.lang.Double(1)), - (new java.lang.Long(1), new java.lang.Double(1)), - (new java.lang.Float(1), new java.lang.Double(1))) - val badObjects = List( - new java.lang.Boolean(true), - new java.lang.String("abc"), - ByteString.copyAvroString("bytes", false), - new DataMap, - new DataList) - testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + (new java.lang.String("" + Double.MaxValue), new java.lang.Double(Double.MaxValue))) ++ + DOUBLE_TEST_INPUTS + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, BAD_OBJECTS_FOR_NUMERIC) } @Test @@ -491,28 +423,8 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { val schemaText = """{ "type" : "record", "name" : "foo", "fields" : | [ { "name" : "bar", "type" : { "type" : "array", "items" : "int" } } ] }""".stripMargin - val inputs = List( - (new DataList, new DataList), - (new DataList(asList(1)), new DataList(asList(1))), - (new DataList(asList(2, 3)), new DataList(asList(2, 3))), - (new DataList(asList(1L)), new DataList(asList(1))), - (new DataList(asList(1.0f)), new DataList(asList(1))), - (new DataList(asList(1.0)), new DataList(asList(1)))) - val badObjects = List( - new java.lang.Boolean(true), - new java.lang.Integer(1), - new java.lang.Long(1), - new java.lang.Float(1), - new java.lang.Double(1), - new java.lang.String, - new DataMap, - new DataList(asList(TRUE)), - new DataList(asList(new String("1"))), - new DataList(asList(new DataMap)), - new DataList(asList(new DataList)), - new DataList(asList(TRUE, I1)), - new DataList(asList(new Integer(1), TRUE))) - testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + val badObjects = List(new DataList(asList(new String("1")))) ++ ARRAY_BAD_OBJECTS + testNormalCoercionValidation(schemaText, "bar", ARRAY_TEST_INPUTS, badObjects) } @Test @@ -522,27 +434,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { |[ { "name" : "bar", "type" : { "type" : "array", "items" : "int" } } ] }""".stripMargin val inputs = List( (new DataList(asList("1")), new DataList(asList(1))), - (new DataList(asList("1", "2", "3")), new DataList(asList(1, 2, 3))), - (new DataList, new DataList), - (new DataList(asList(1)), new DataList(asList(1))), - (new DataList(asList(2, 3)), new DataList(asList(2, 3))), - (new DataList(asList(1L)), new DataList(asList(1))), - (new DataList(asList(1.0f)), new DataList(asList(1))), - (new DataList(asList(1.0)), new DataList(asList(1)))) - val badObjects = List( - TRUE, - I1, - L1, - F1, - D1, - new java.lang.String, - new DataMap, - new DataList(asList(TRUE)), - new DataList(asList(new DataMap)), - new DataList(asList(new DataList)), - new DataList(asList(TRUE, I1)), - new DataList(asList(I1, TRUE))) - testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + (new DataList(asList("1", "2", "3")), new DataList(asList(1, 2, 3)))) ++ + ARRAY_TEST_INPUTS + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, ARRAY_BAD_OBJECTS) } @Test @@ -550,30 +444,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { val schemaText = """{ "type" : "record", "name" : "foo", "fields" : |[ { "name" : "bar", "type" : { "type" : "map", "values" : "int" } } ] }""".stripMargin - val inputs = List( - (new DataMap, new DataMap), - (new DataMap(asMap("key1" -> 1)), new DataMap(asMap("key1" -> 1))), - (new DataMap(asMap("key1" -> 1, "key2" -> 2)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), - (new DataMap(asMap("key1" -> 1L)), new DataMap(asMap("key1" -> 1))), - (new DataMap(asMap("key1" -> 1.0)), new DataMap(asMap("key1" -> 1))), - (new DataMap(asMap("key1" -> 1.0f)), new DataMap(asMap("key1" -> 1))), - (new DataMap(asMap("key1" -> 1, "key2" -> 2L)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), - ( - new DataMap(asMap("key1" -> 1L, "key2" -> 2.0)), - new DataMap(asMap("key1" -> 1, "key2" -> 2)))) - val badObjects = List( - TRUE, - I1, - L1, - F1, - D1, - new java.lang.String, - new DataList, - new DataMap(asMap("key1" -> TRUE)), - new DataMap(asMap("key1" -> new java.lang.String("1"))), - new DataMap(asMap("key1" -> new DataMap)), - new DataMap(asMap("key1" -> new DataList))) - testNormalCoercionValidation(schemaText, "bar", inputs, badObjects) + val badObjects = MAP_BAD_OBJECTS ++ + List(new DataMap(asMap("key1" -> new java.lang.String("1")))) + testNormalCoercionValidation(schemaText, "bar", MAP_TEST_INPUTS, badObjects) } @Test @@ -585,29 +458,9 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { (new DataMap(asMap("key1" -> "1")), new DataMap(asMap("key1" -> 1))), ( new DataMap(asMap("key1" -> "1", "key2" -> "2")), - new DataMap(asMap("key1" -> 1, "key2" -> 2))), - (new DataMap, new DataMap), - (new DataMap(asMap("key1" -> 1)), new DataMap(asMap("key1" -> 1))), - (new DataMap(asMap("key1" -> 1, "key2" -> 2)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), - (new DataMap(asMap("key1" -> 1L)), new DataMap(asMap("key1" -> 1))), - (new DataMap(asMap("key1" -> 1.0)), new DataMap(asMap("key1" -> 1))), - (new DataMap(asMap("key1" -> 1.0f)), new DataMap(asMap("key1" -> 1))), - (new DataMap(asMap("key1" -> 1, "key2" -> 2L)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), - ( - new DataMap(asMap("key1" -> 1L, "key2" -> 2.0)), - new DataMap(asMap("key1" -> 1, "key2" -> 2)))) - val badObjects = List( - TRUE, - I1, - L1, - F1, - D1, - new java.lang.String, - new DataList, - new DataMap(asMap("key1" -> TRUE)), - new DataMap(asMap("key1" -> new DataMap)), - new DataMap(asMap("key1" -> new DataList))) - testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, badObjects) + new DataMap(asMap("key1" -> 1, "key2" -> 2)))) ++ + MAP_TEST_INPUTS + testStringToPrimitiveCoercionValidation(schemaText, "bar", inputs, MAP_BAD_OBJECTS) } @Test @@ -676,8 +529,8 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { def testTyperefStringToPrimitiveCoercionValidation(): Unit = { val inputs = List( (new java.lang.String("1"), I1), - (I1, new Integer(1)), - (new Integer(-1), new Integer(-1)), + (I1, I1), + (IM1, IM1), (L1, I1), (F1, I1), (D1, I1)) @@ -705,11 +558,10 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { | { "name" : "optionalDouble", "type" : "double", "optional" : true }, | { "name" : "optionalWithDefaultString", "type" : "string", "optional" : true, | "default" : "orange" }] } } ] }""".stripMargin - val good = List( - List( + val input = List( ValidationOptions( requiredMode = RequiredMode.FIXUP_ABSENT_WITH_DEFAULT, - coercionMode = CoercionMode.NORMAL)) -> + coercionMode = CoercionMode.NORMAL) -> List( new DataMap(asMap("requiredInt" -> 12, "requiredString" -> "")), new DataMap(asMap("requiredInt" -> 34, "requiredString" -> "cow")), @@ -758,7 +610,7 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { "extra1" -> TRUE)))) // All bad examples used CoercionMode.OFF which is unimplemented. So they have been skipped. - testValidationWithDifferentValidationOptions(schemaText, "bar", good, List()) + testValidationWithDifferentValidationOptions(schemaText, "bar", input) } @Test @@ -1007,16 +859,90 @@ class ValidateDataAgainstSchemaTest extends AssertionsForJUnit { } object ValidateDataAgainstSchemaTest { - import collection.JavaConverters._ val FALSE = new java.lang.Boolean(false) val TRUE = new java.lang.Boolean(true) val I1 = new java.lang.Integer(1) + val IM1 = new java.lang.Integer(-1) val F1 = new java.lang.Float(1) val L1 = new java.lang.Long(1) val D1 = new java.lang.Double(1) + val LONG_TEST_INPUTS = List( + (new java.lang.Long(1), new java.lang.Long(1)), + (new java.lang.Long(-1), new java.lang.Long(-1)), + (new java.lang.Integer(1), new java.lang.Long(1)), + (new java.lang.Float(1), new java.lang.Long(1)), + (new java.lang.Double(1), new java.lang.Long(1))) + + val BAD_OBJECTS_FOR_NUMERIC = List( + new java.lang.Boolean(true), + new java.lang.String("abc"), + ByteString.copyAvroString("bytes", false), + new DataMap, + new DataList) + + val FLOAT_TEST_INPUTS = List( + (new java.lang.Float(1), new java.lang.Float(1)), + (new java.lang.Float(-1), new java.lang.Float(-1)), + (new java.lang.Integer(1), new java.lang.Float(1)), + (new java.lang.Long(1), new java.lang.Float(1)), + (new java.lang.Double(1), new java.lang.Float(1))) + + val DOUBLE_TEST_INPUTS = List( + (new java.lang.Double(1), new java.lang.Double(1)), + (new java.lang.Double(-1), new java.lang.Double(-1)), + (new java.lang.Integer(1), new java.lang.Double(1)), + (new java.lang.Long(1), new java.lang.Double(1)), + (new java.lang.Float(1), new java.lang.Double(1))) + + val MAP_TEST_INPUTS = List( + (new DataMap, new DataMap), + (new DataMap(asMap("key1" -> 1)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1, "key2" -> 2)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), + (new DataMap(asMap("key1" -> 1L)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1.0)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1.0f)), new DataMap(asMap("key1" -> 1))), + (new DataMap(asMap("key1" -> 1, "key2" -> 2L)), new DataMap(asMap("key1" -> 1, "key2" -> 2))), + ( + new DataMap(asMap("key1" -> 1L, "key2" -> 2.0)), + new DataMap(asMap("key1" -> 1, "key2" -> 2)))) + + val MAP_BAD_OBJECTS = List( + TRUE, + I1, + L1, + F1, + D1, + new java.lang.String, + new DataList, + new DataMap(asMap("key1" -> TRUE)), + new DataMap(asMap("key1" -> new DataMap)), + new DataMap(asMap("key1" -> new DataList))) + + val ARRAY_TEST_INPUTS = List( + (new DataList, new DataList), + (new DataList(asList(1)), new DataList(asList(1))), + (new DataList(asList(2, 3)), new DataList(asList(2, 3))), + (new DataList(asList(1L)), new DataList(asList(1))), + (new DataList(asList(1.0f)), new DataList(asList(1))), + (new DataList(asList(1.0)), new DataList(asList(1)))) + + val ARRAY_BAD_OBJECTS = List( + TRUE, + I1, + L1, + F1, + D1, + new java.lang.String, + new DataMap, + new DataList(asList(TRUE)), + new DataList(asList(new DataMap)), + new DataList(asList(new DataList)), + new DataList(asList(TRUE, I1)), + new DataList(asList(I1, TRUE))) + private val codec = new JacksonDataCodec() def inputStreamFromString(s: String): ByteArrayInputStream = { @@ -1084,66 +1010,25 @@ object ValidateDataAgainstSchemaTest { } def testValidationWithDifferentValidationOptions( - schemaText: String, - key: String, - goodInput: Seq[(Seq[ValidationOptions], Seq[AnyRef])], - badInput: Seq[(Seq[ValidationOptions], Seq[AnyRef], String, Seq[Seq[String]])]): Unit = { + schemaText: String, + key: String, + input: Seq[(ValidationOptions, Seq[AnyRef])]): Unit = { val schema = dataSchemaFromString(schemaText) assert(schema != null) val map = new DataMap - for (rows <- goodInput) { - val (modes, dataObjects) = rows - for (mode <- modes) { - for (dataObject <- dataObjects) { - map.put(key, dataObject) - val result = validate(map, schema, mode) - assert(result.isValid) - if (!result.hasFix) { - assert(map eq result.getFixed) - } - } - } - } - for (rows <- badInput) { - val (modes, dataObjects, expectedString, errorPaths) = rows - for (mode <- modes) { - var index = 0 - for (dataObject <- dataObjects) { - map.put(key, dataObject) - val result = validate(map, schema, mode) - assert(!result.isValid) + for (rows <- input) { + val (mode, dataObjects) = rows + for (dataObject <- dataObjects) { + map.put(key, dataObject) + val result = validate(map, schema, mode) + assert(result.isValid) + if (!result.hasFix) { assert(map eq result.getFixed) - checkMessages(result.getMessages.asScala, expectedString) - if (index < errorPaths.length) - checkMessagesErrorPath(result.getMessages.asScala, errorPaths(index)) - index += 1 } } } } - private def checkMessages(messages: Iterable[Message], expectedString: String): Unit = { - for (m <- messages) { - assert(m.getFormat.contains(expectedString)) - } - } - - private def checkMessagesErrorPath(messages: Iterable[Message], errorPaths: Seq[String]): Unit = { - for ((m, errorPath) <- messages.zip(errorPaths)) { - val path = pathAsString(m.getPath) - assert(path == errorPath) - } - } - - private def pathAsString(path: Array[AnyRef]): String = { - val sb = new StringBuilder - for (component <- path) { - sb.append(DataElement.SEPARATOR) - sb.append(component.toString) - } - sb.toString - } - private def testValidationWithNormalCoercionHelper( schema: DataSchema, key: String, From c10be3ff0caadd2ab4a4871df88461152f2346b1 Mon Sep 17 00:00:00 2001 From: Zachary Drudi Date: Fri, 26 Nov 2021 09:35:56 -0500 Subject: [PATCH 09/12] add attribution headers for copied source files. --- .../naptime/courier/validation/CoercionMode.scala | 14 ++++++++++++++ .../naptime/courier/validation/RequiredMode.scala | 14 ++++++++++++++ .../validation/ValidateDataAgainstSchema.scala | 14 ++++++++++++++ .../courier/validation/ValidationOptions.scala | 14 ++++++++++++++ .../validation/ValidateDataAgainstSchemaTest.scala | 14 ++++++++++++++ 5 files changed, 70 insertions(+) diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala index fb59816d..1fc4f617 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala @@ -1,3 +1,17 @@ +/* + Copyright (c) 2021 Coursera Inc. + + Copyright (c) 2012 LinkedIn Corp. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + + This file has been modified by Coursera Inc. to loosen + validation of enums. + */ + package org.coursera.naptime.courier.validation import com.linkedin.data.schema.validation.{CoercionMode => PegasusCoercionMode} diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala index 1136654c..7d04632f 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala @@ -1,3 +1,17 @@ +/* + Copyright (c) 2021 Coursera Inc. + + Copyright (c) 2012 LinkedIn Corp. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + + This file has been modified by Coursera Inc. to loosen + validation of enums. + */ + package org.coursera.naptime.courier.validation import com.linkedin.data.schema.validation.{RequiredMode => PegasusRequiredMode} diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala index 4bece15f..af87ebe4 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala @@ -1,3 +1,17 @@ +/* + Copyright (c) 2021 Coursera Inc. + + Copyright (c) 2012 LinkedIn Corp. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + + This file has been modified by Coursera Inc. to loosen + validation of enums. + */ + package org.coursera.naptime.courier.validation import java.math.BigDecimal diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala index 392e5290..43f1ec87 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala @@ -1,3 +1,17 @@ +/* + Copyright (c) 2021 Coursera Inc. + + Copyright (c) 2012 LinkedIn Corp. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + + This file has been modified by Coursera Inc. to loosen + validation of enums. + */ + package org.coursera.naptime.courier.validation import com.linkedin.data.it.Predicate diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala index 003adbc0..7085eb44 100644 --- a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala @@ -1,3 +1,17 @@ +/* + Copyright (c) 2021 Coursera Inc. + + Copyright (c) 2012 LinkedIn Corp. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + + This file has been modified by Coursera Inc. to loosen + validation of enums. + */ + package org.coursera.naptime.courier.validation import java.io.ByteArrayInputStream From f47d00ca5ee33580110740e3e4101074869171c2 Mon Sep 17 00:00:00 2001 From: Deepkanwal Plaha Date: Wed, 23 Mar 2022 19:57:30 -0400 Subject: [PATCH 10/12] update copyright notice --- .../courier/validation/CoercionMode.scala | 29 ++++++++++++++++--- .../courier/validation/RequiredMode.scala | 29 ++++++++++++++++--- .../ValidateDataAgainstSchema.scala | 29 ++++++++++++++++--- .../validation/ValidationOptions.scala | 29 ++++++++++++++++--- .../ValidateDataAgainstSchemaTest.scala | 29 ++++++++++++++++--- 5 files changed, 125 insertions(+), 20 deletions(-) diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala index 1fc4f617..33de5e2b 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/CoercionMode.scala @@ -1,16 +1,37 @@ /* - Copyright (c) 2021 Coursera Inc. + Copyright (c) 2021 Coursera, Inc. + This file has been modified by Coursera, Inc. to loosen + validation of enums. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* Copyright (c) 2012 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 - This file has been modified by Coursera Inc. to loosen - validation of enums. - */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ package org.coursera.naptime.courier.validation diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala index 7d04632f..af137328 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/RequiredMode.scala @@ -1,16 +1,37 @@ /* - Copyright (c) 2021 Coursera Inc. + Copyright (c) 2021 Coursera, Inc. + This file has been modified by Coursera, Inc. to loosen + validation of enums. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* Copyright (c) 2012 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 - This file has been modified by Coursera Inc. to loosen - validation of enums. - */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ package org.coursera.naptime.courier.validation diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala index af87ebe4..08088b66 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchema.scala @@ -1,16 +1,37 @@ /* - Copyright (c) 2021 Coursera Inc. + Copyright (c) 2021 Coursera, Inc. + This file has been modified by Coursera, Inc. to loosen + validation of enums. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* Copyright (c) 2012 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 - This file has been modified by Coursera Inc. to loosen - validation of enums. - */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ package org.coursera.naptime.courier.validation diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala index 43f1ec87..249a58d8 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/validation/ValidationOptions.scala @@ -1,16 +1,37 @@ /* - Copyright (c) 2021 Coursera Inc. + Copyright (c) 2021 Coursera, Inc. + This file has been modified by Coursera, Inc. to loosen + validation of enums. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* Copyright (c) 2012 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 - This file has been modified by Coursera Inc. to loosen - validation of enums. - */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ package org.coursera.naptime.courier.validation diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala index 7085eb44..cdb45654 100644 --- a/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/validation/ValidateDataAgainstSchemaTest.scala @@ -1,16 +1,37 @@ /* - Copyright (c) 2021 Coursera Inc. + Copyright (c) 2021 Coursera, Inc. + This file has been modified by Coursera, Inc. to loosen + validation of enums. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* Copyright (c) 2012 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 - This file has been modified by Coursera Inc. to loosen - validation of enums. - */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ package org.coursera.naptime.courier.validation From 2cca58b6d510d5deca9be408298e84b07f4afc7e Mon Sep 17 00:00:00 2001 From: Deepkanwal Plaha Date: Mon, 4 Apr 2022 16:05:47 -0400 Subject: [PATCH 11/12] relax enum validation for StringKey --- .../naptime/courier/CourierFormats.scala | 15 +++++++------- .../courier/TestRecordWithEnum.courier | 6 ++++++ .../naptime/courier/CourierFormatsTest.scala | 20 +++++++++++++++++++ version.sbt | 2 +- 4 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestRecordWithEnum.courier diff --git a/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierFormats.scala b/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierFormats.scala index 9467d1c5..f819561d 100644 --- a/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierFormats.scala +++ b/naptime-models/src/main/scala/org/coursera/naptime/courier/CourierFormats.scala @@ -39,10 +39,6 @@ import com.linkedin.data.schema.RecordDataSchema import com.linkedin.data.schema.StringDataSchema import com.linkedin.data.schema.TyperefDataSchema import com.linkedin.data.schema.UnionDataSchema -import com.linkedin.data.schema.validation.CoercionMode -import com.linkedin.data.schema.validation.RequiredMode -import com.linkedin.data.schema.validation.ValidateDataAgainstSchema -import com.linkedin.data.schema.validation.ValidationOptions import com.linkedin.data.template.DataTemplate import com.linkedin.data.template.DataTemplateUtil import com.linkedin.data.template.RecordTemplate @@ -56,6 +52,10 @@ import org.coursera.courier.templates.ScalaEnumTemplate import org.coursera.courier.templates.ScalaEnumTemplateSymbol import org.coursera.naptime.courier.CourierUtils._ import org.coursera.naptime.courier.Exceptions._ +import org.coursera.naptime.courier.validation.CoercionMode +import org.coursera.naptime.courier.validation.RequiredMode +import org.coursera.naptime.courier.validation.ValidationOptions +import org.coursera.naptime.courier.validation.ValidateDataAgainstSchema import play.api.libs.json.JsonValidationError import play.api.libs.json.Format import play.api.libs.json.IdxPathNode @@ -855,10 +855,11 @@ object CourierFormats extends StrictLogging { } } - private[this] val validationOptions = - new ValidationOptions(RequiredMode.FIXUP_ABSENT_WITH_DEFAULT, CoercionMode.STRING_TO_PRIMITIVE) + private[this] val validationOptions = ValidationOptions( + requiredMode = RequiredMode.FIXUP_ABSENT_WITH_DEFAULT, + coercionMode = CoercionMode.STRING_TO_PRIMITIVE) - private[this] def validateAndFixUp(data: Any, schema: DataSchema): Unit = { + private[this] def validateAndFixUp(data: AnyRef, schema: DataSchema): Unit = { val result = ValidateDataAgainstSchema.validate(data, schema, validationOptions) if (!result.isValid) { throw new DataValidationException(result) diff --git a/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestRecordWithEnum.courier b/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestRecordWithEnum.courier new file mode 100644 index 00000000..2e1ec70f --- /dev/null +++ b/naptime-models/src/test/pegasus/org/coursera/naptime/courier/TestRecordWithEnum.courier @@ -0,0 +1,6 @@ +namespace org.coursera.naptime.courier + +record TestRecordWithEnum { + enumField: TestEnum + stringField: string +} diff --git a/naptime-models/src/test/scala/org/coursera/naptime/courier/CourierFormatsTest.scala b/naptime-models/src/test/scala/org/coursera/naptime/courier/CourierFormatsTest.scala index b9479182..f17b2d52 100644 --- a/naptime-models/src/test/scala/org/coursera/naptime/courier/CourierFormatsTest.scala +++ b/naptime-models/src/test/scala/org/coursera/naptime/courier/CourierFormatsTest.scala @@ -409,6 +409,26 @@ class CourierFormatsTest extends AssertionsForJUnit { Json.parse(recordOfEnum).validate[TestWrappedEnum](reader)) } + @Test + def testStringKeyWithEnum(): Unit = { + implicit val converter = + CourierFormats.recordTemplateStringKeyFormat[TestRecordWithEnum] + + val validString = "FIRST~value" + val validStringKey = StringKey(validString) + val validRecord = validStringKey.asOpt[TestRecordWithEnum].get + assert(validRecord.data().size() === 2) + assert(validRecord.enumField == TestEnum.FIRST) + assert(validRecord.stringField == "value") + + val unknownEnumString = "THIRD~value" + val unknownEnumStringKey = StringKey(unknownEnumString) + val unknownEnumRecord = unknownEnumStringKey.asOpt[TestRecordWithEnum].get + assert(unknownEnumRecord.data().size() === 2) + assert(unknownEnumRecord.enumField == TestEnum.$UNKNOWN) + assert(unknownEnumRecord.stringField == "value") + } + @Test def deserializeUnknownNestedEnum(): Unit = { val reader = CourierFormats.recordTemplateFormats[TestWrappedEnum] diff --git a/version.sbt b/version.sbt index 067b3812..d65b03dd 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "0.11.6-alpha1" +version in ThisBuild := "0.11.6-alpha4" From 48a195182eaab54bd2ad880743f0adf31b42b5b2 Mon Sep 17 00:00:00 2001 From: Deepkanwal Plaha Date: Wed, 20 Apr 2022 19:12:36 -0400 Subject: [PATCH 12/12] bump version to 0.11.6 --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index d65b03dd..a2fbae3c 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "0.11.6-alpha4" +version in ThisBuild := "0.11.6"