From 89f9f5189be1c2757d998d148ab685832fedc954 Mon Sep 17 00:00:00 2001 From: Ian Duncan Date: Fri, 22 Jul 2022 10:10:48 -0500 Subject: [PATCH 1/3] First pass at implementing toJSON support --- src/Data/Avro/Deriving.hs | 49 +++- src/Data/Avro/Encoding/ToAvro.hs | 2 +- src/Data/Avro/JSON.hs | 369 +++++++++++++++++++++++++++++-- src/Data/Avro/Schema/Decimal.hs | 17 +- 4 files changed, 409 insertions(+), 28 deletions(-) diff --git a/src/Data/Avro/Deriving.hs b/src/Data/Avro/Deriving.hs index 8d1f705..0712875 100644 --- a/src/Data/Avro/Deriving.hs +++ b/src/Data/Avro/Deriving.hs @@ -45,6 +45,7 @@ import qualified Data.Aeson as J import Data.Avro hiding (decode, encode) import Data.Avro.Encoding.ToAvro (ToAvro (..)) import Data.Avro.Internal.EncodeRaw (putI) +import Data.Avro.JSON import Data.Avro.Schema.Schema as S import Data.ByteString (ByteString) import qualified Data.ByteString as B @@ -56,6 +57,7 @@ import qualified Data.List.NonEmpty as NE import Data.Map (Map) import Data.Maybe (fromMaybe) import Data.Semigroup ((<>)) +import Data.String (IsString (..)) import qualified Data.Text as Text import Data.Time (Day, DiffTime, LocalTime, UTCTime) import Data.UUID (UUID) @@ -366,16 +368,27 @@ newNames base n = sequence [newName (base ++ show i) | i <- [1..n]] ------------------------- ToAvro ------------------------------------------------ genToAvro :: DeriveOptions -> Schema -> Q [Dec] -genToAvro opts s@(S.Enum n _ _ _) = - encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) +genToAvro opts s@(S.Enum n _ _ _) = do + baseInstance <- encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) + jsonInstance <- encodeAvroJsonInstance (mkSchemaValueName (namespaceBehavior opts) n) + pure (baseInstance ++ jsonInstance) where encodeAvroInstance sname = [d| instance ToAvro $(conT $ mkDataTypeName (namespaceBehavior opts) n) where toAvro = $([| \_ x -> putI (fromEnum x) |]) |] + encodeAvroJsonInstance sname = + [d| instance ToAvroJSON $(conT $ mkDataTypeName (namespaceBehavior opts) n) where + toAvroJSON (S.Enum _ _ _ vs) x = case vs V.!? fromEnum x of + Nothing -> error "toAvroJson: unable to find equivalent enum value in new schema" + Just _ -> toJSON originalName + toAvroJSON _ _ = error "ToAvroJson for enum only works for Enum schema values" + |] -genToAvro opts s@(S.Record n _ _ fs) = - encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) +genToAvro opts s@(S.Record n _ _ fs) = do + baseInstance <- encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) + jsonInstance <- encodeAvroJsonInstance (mkSchemaValueName (namespaceBehavior opts) n) + pure (baseInstance ++ jsonInstance) where encodeAvroInstance sname = [d| instance ToAvro $(conT $ mkDataTypeName (namespaceBehavior opts) n) where @@ -390,9 +403,26 @@ genToAvro opts s@(S.Record n _ _ fs) = in listE $ build <$> zip fs names ) |] + encodeAvroJsonInstance sname = + [d| instance ToAvroJSON $(conT $ mkDataTypeName (namespaceBehavior opts) n) where + toAvroJSON = $(encodeAvroJsonFieldsExp sname) + |] + encodeAvroJsonFieldsExp sname = do + names <- newNames "p_" (length fs) + wn <- varP <$> newName "_" + let con = conP (mkDataTypeName (namespaceBehavior opts) n) (varP <$> names) + lamE [wn, con] + [| J.object + $( let build (fld, n) = [| fromString $(stringE $ Text.unpack $ fldName fld) J..= toAvroJSON (fldType fld) $(varE n) |] + in listE $ build <$> zip fs names + ) + |] + -genToAvro opts s@(S.Fixed n _ _ _) = - encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) +genToAvro opts s@(S.Fixed n _ _ _) = do + baseInstance <- encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) + jsonInstance <- encodeAvroJsonInstance (mkSchemaValueName (namespaceBehavior opts) n) + pure (baseInstance ++ jsonInstance) where encodeAvroInstance sname = [d| instance ToAvro $(conT $ mkDataTypeName (namespaceBehavior opts) n) where @@ -401,6 +431,13 @@ genToAvro opts s@(S.Fixed n _ _ _) = wc <- newName "_" lamE [varP wc, conP (mkDataTypeName (namespaceBehavior opts) n) [varP x]] [| toAvro $(varE sname) $(varE x) |]) |] + encodeAvroJsonInstance sname = + [d| instance ToAvroJSON $(conT $ mkDataTypeName (namespaceBehavior opts) n) where + toAvroJSON = $(do + x <- newName "x" + wc <- newName "_" + lamE [varP wc, conP (mkDataTypeName (namespaceBehavior opts) n) [varP x]] [| toAvroJSON $(varE sname) $(varE x) |]) + |] genToAvro _ _ = pure [] schemaDef :: Name -> Schema -> Q [Dec] diff --git a/src/Data/Avro/Encoding/ToAvro.hs b/src/Data/Avro/Encoding/ToAvro.hs index acc301b..2c15a57 100644 --- a/src/Data/Avro/Encoding/ToAvro.hs +++ b/src/Data/Avro/Encoding/ToAvro.hs @@ -234,7 +234,7 @@ instance ToAvro a => ToAvro (Map.Map Text a) where toAvro (S.Map s) hm = if Map.null hm then long0 else putI (F.length hm) <> foldMap putKV (Map.toList hm) <> long0 where putKV (k,v) = toAvro S.String' k <> toAvro s v - toAvro s _ = error ("Unable to encode HashMap as: " <> show s) + toAvro s _ = error ("Unable to encode Map as: " <> show s) instance ToAvro a => ToAvro (HashMap Text a) where toAvro (S.Map s) hm = diff --git a/src/Data/Avro/JSON.hs b/src/Data/Avro/JSON.hs index dfa1b1c..419fc4a 100644 --- a/src/Data/Avro/JSON.hs +++ b/src/Data/Avro/JSON.hs @@ -1,5 +1,7 @@ +{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} -- | Avro supports a JSON representation of Avro objects alongside the -- Avro binary format. An Avro schema can be used to generate and -- validate JSON representations of Avro objects. @@ -59,22 +61,38 @@ -- @ module Data.Avro.JSON where +import Control.Monad.Identity (Identity (..)) import qualified Data.Aeson as Aeson import qualified Data.Aeson.Key as K import qualified Data.Aeson.KeyMap as KM -import Data.ByteString.Lazy (ByteString) +import qualified Data.Array as Ar +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as BS import qualified Data.Foldable as Foldable import Data.HashMap.Strict ((!)) import qualified Data.HashMap.Strict as HashMap import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.List.NonEmpty as NE +import qualified Data.Map as Map +import Data.Maybe (fromJust) import Data.Tagged import qualified Data.Text as Text +import qualified Data.Text.Encoding as Text +import qualified Data.Text.Lazy as LazyText +import qualified Data.Time as Time import qualified Data.Avro.HasAvroSchema as Schema -import Data.Avro.Schema.Schema (DefaultValue (..), Result (..), Schema, parseAvroJSON) -import qualified Data.Avro.Schema.Schema as Schema -import qualified Data.Vector as V +import Data.Avro.EitherN +import Data.Avro.Internal.Time +import Data.Avro.Schema.Decimal as D +import Data.Avro.Schema.Schema (DefaultValue (..), Result (..), Schema, parseAvroJSON) +import qualified Data.Avro.Schema.Schema as Schema +import qualified Data.Vector as V +import qualified Data.Vector.Unboxed as U +import Data.Int +import Data.Word +import qualified Data.UUID as UUID +import GHC.TypeLits decodeAvroJSON :: Schema -> Aeson.Value -> Result DefaultValue decodeAvroJSON schema json = @@ -119,15 +137,333 @@ decodeAvroJSON schema json = isBuiltIn name = name `elem` [ "null", "boolean", "int", "long", "float" , "double", "bytes", "string", "array", "map" ] --- -- | Convert a 'Aeson.Value' into a type that has an Avro schema. The --- -- schema is used to validate the JSON and will return an 'Error' if --- -- the JSON object is not encoded correctly or does not match the schema. --- fromJSON :: forall a. (FromAvro a) => Aeson.Value -> Result a --- fromJSON json = do --- value <- decodeAvroJSON schema json --- fromAvro value --- where --- schema = untag (Schema.schema :: Tagged a Schema) +class ToAvroJSON a where + -- | Convert an object with an Avro schema to JSON using that schema. + -- + -- We always need the schema to /encode/ to JSON because representing + -- unions requires using the names of named types. + toAvroJSON :: Schema -> a -> Aeson.Value + toAvroEncoding :: Schema -> a -> Aeson.Encoding + toAvroEncoding s = Aeson.toEncoding . toAvroJSON s + +instance ToAvroJSON Int where + -- It might seem surprising that we use fromIntegral here, but it's + -- used to ensure that overflow is handled correctly. + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Int64 (fromIntegral i) + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Int32 (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Int as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Int32 where + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Int64 (fromIntegral i) + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Int32 i + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Int32 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Int64 where + toAvroJSON (Schema.Long _) i = Aeson.toJSON i + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Int64 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Word8 where + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Word8 i + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Word64 (fromIntegral i) + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Word8 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Word16 where + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Word16 i + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Word64 (fromIntegral i) + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Word16 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Word32 where + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Word32 i + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Word64 (fromIntegral i) + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Word32 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Word64 where + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Word64 i + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Word64 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Double where + toAvroJSON Schema.Double i = Aeson.toJSON @Double i + toAvroJSON s _ = error ("Unable to encode Double as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Float where + toAvroJSON Schema.Float i = Aeson.toJSON @Float i + toAvroJSON Schema.Double i = Aeson.toJSON @Double $ realToFrac i + toAvroJSON s _ = error ("Unable to encode Float as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON () where + toAvroJSON Schema.Null () = Aeson.Null + toAvroJSON s () = error ("Unable to encode () as: " <> show s) + {-# INLINE toAvroJSON #-} + + +instance ToAvroJSON Bool where + toAvroJSON Schema.Boolean v = Aeson.toJSON v + toAvroJSON s _ = error ("Unable to encode Bool as: " <> show s) + {-# INLINE toAvroJSON #-} + toAvroEncoding Schema.Boolean v = Aeson.toEncoding v + toAvroEncoding s _ = error ("Unable to encode Bool as: " <> show s) + {-# INLINE toAvroEncoding #-} + +instance ToAvroJSON Text.Text where + toAvroJSON (Schema.Bytes _) v = Aeson.toJSON $ Schema.serializeBytes $ Text.encodeUtf8 v + toAvroJSON (Schema.String _) v = Aeson.toJSON v + toAvroJSON s _ = error ("Unable to encode Text as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON LazyText.Text where + toAvroJSON (Schema.Bytes _) v = Aeson.toJSON $ Schema.serializeBytes $ Text.encodeUtf8 $ LazyText.toStrict v + toAvroJSON (Schema.String _) v = Aeson.toJSON v + toAvroJSON s _ = error ("Unable to encode Text as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON BS.ByteString where + toAvroJSON s bs = case s of + (Schema.Bytes _) -> Aeson.toJSON $ Schema.serializeBytes bs + (Schema.String _) -> Aeson.toJSON $ Text.decodeUtf8 bs + Schema.Fixed _ _ l _ | l == BS.length bs -> Aeson.toJSON $ Schema.serializeBytes bs + Schema.Fixed _ _ l _ -> error ("Unable to encode ByteString as Fixed(" <> show l <> ") because its length is " <> show (BS.length bs)) + _ -> error ("Unable to encode ByteString as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON BL.ByteString where + toAvroJSON s bs = toAvroJSON s (BL.toStrict bs) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Time.UTCTime where + toAvroJSON s@(Schema.Long (Just Schema.TimestampMicros)) = toAvroJSON @Int64 s . fromIntegral . utcTimeToMicros + toAvroJSON s@(Schema.Long (Just Schema.TimestampMillis)) = toAvroJSON @Int64 s . fromIntegral . utcTimeToMillis + toAvroJSON s = error ("Unable to encode UTCTime as " <> show s) + +instance ToAvroJSON Time.LocalTime where + toAvroJSON s@(Schema.Long (Just Schema.LocalTimestampMicros)) = + toAvroJSON @Int64 s . fromIntegral . localTimeToMicros + toAvroJSON s@(Schema.Long (Just Schema.LocalTimestampMillis)) = + toAvroJSON @Int64 s . fromIntegral . localTimeToMillis + toAvroJSON s = + error ("Unable to encode LocalTime as " <> show s) + +instance ToAvroJSON Time.DiffTime where + toAvroJSON s@(Schema.Long (Just Schema.TimeMicros)) = toAvroJSON @Int64 s . fromIntegral . diffTimeToMicros + toAvroJSON s@(Schema.Long (Just Schema.TimestampMicros)) = toAvroJSON @Int64 s . fromIntegral . diffTimeToMicros + toAvroJSON s@(Schema.Long (Just Schema.TimestampMillis)) = toAvroJSON @Int64 s . fromIntegral . diffTimeToMillis + toAvroJSON s@(Schema.Int (Just Schema.TimeMillis)) = toAvroJSON @Int32 s . fromIntegral . diffTimeToMillis + toAvroJSON s = error ("Unble to encode DiffTime as " <> show s) + +instance ToAvroJSON Time.Day where + toAvroJSON s = toAvroJSON @Int32 s . fromIntegral . daysSinceEpoch + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON UUID.UUID where + toAvroJSON s = toAvroJSON s . UUID.toText + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON a => ToAvroJSON [a] where + toAvroJSON (Schema.Array s) as = Aeson.toJSON $ fmap (toAvroJSON s) as + toAvroJSON s _ = error ("Unable to encode Haskell list as: " <> show s) + +instance (ToAvroJSON a) => ToAvroJSON (Identity a) where + toAvroJSON (Schema.Union opts) e@(Identity a) = + if V.length opts == 1 + then toAvroUnionJSON (V.unsafeIndex opts 0) a + else error ("Unable to encode Identity as a single-value union: " <> show opts) + toAvroJSON s _ = error ("Unable to encode Identity value as " <> show s) + +instance ToAvroJSON a => ToAvroJSON (Maybe a) where + toAvroJSON (Schema.Union opts) v = + case Foldable.toList opts of + [Schema.Null, s] -> maybe (toAvroJSON Schema.Null ()) (toAvroJSON s) v + [s, Schema.Null] -> maybe (toAvroJSON Schema.Null ()) (toAvroJSON s) v + wrongOpts -> error ("Unable to encode Maybe as " <> show wrongOpts) + toAvroJSON s _ = error ("Unable to encode Maybe as " <> show s) + +instance (U.Unbox a, ToAvroJSON a) => ToAvroJSON (U.Vector a) where + toAvroJSON (Schema.Array s) as = Aeson.toJSON @(V.Vector Aeson.Value) $ fmap (toAvroJSON s) $ U.convert as + toAvroJSON s _ = error ("Unable to encode Vector list as: " <> show s) + +instance (ToAvroJSON a) => ToAvroJSON (V.Vector a) where + toAvroJSON (Schema.Array s) as = Aeson.toJSON $ fmap (toAvroJSON s) as + toAvroJSON s _ = error ("Unable to encode Vector list as: " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b) => ToAvroJSON (Either a b) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 2 + then case v of + Left a -> toAvroUnionJSON (V.unsafeIndex opts 0) a + Right b -> toAvroUnionJSON (V.unsafeIndex opts 1) b + else error ("Unable to encode Either as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either as " <> show s) + +instance ToAvroJSON a => ToAvroJSON (Map.Map Text.Text a) where + toAvroJSON (Schema.Map s) m = Aeson.toJSON $ fmap (toAvroJSON s) m + toAvroJSON s _ = error ("Unable to encode Map as: " <> show s) + +instance ToAvroJSON a => ToAvroJSON (HashMap.HashMap Text.Text a) where + toAvroJSON (Schema.Map s) hm = Aeson.toJSON $ fmap (toAvroJSON s) hm + toAvroJSON s _ = error ("Unable to encode HashMap as: " <> show s) + +instance (Ar.Ix i, ToAvroJSON a) => ToAvroJSON (Ar.Array i a) where + toAvroJSON (Schema.Array s) a = Aeson.toJSON $ fmap (toAvroJSON s) $ Foldable.toList a + toAvroJSON s _ = error ("Unable to encode indexed Array list as: " <> show s) + +-- TODO, I don't know if this is the right way to do this. +instance (KnownNat p, KnownNat s) => ToAvroJSON (Decimal p s) where + toAvroJSON s = toAvroJSON @Int64 s . fromIntegral . fromJust . D.underlyingValue + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c) => ToAvroJSON (Either3 a b c) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 3 + then case v of + E3_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E3_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E3_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + else error ("Unable to encode Either3 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either3 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d) => ToAvroJSON (Either4 a b c d) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 4 + then case v of + E4_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E4_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E4_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E4_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + else error ("Unable to encode Either4 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either4 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e) => ToAvroJSON (Either5 a b c d e) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 5 + then case v of + E5_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E5_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E5_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E5_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E5_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + else error ("Unable to encode Either5 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either5 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f) => ToAvroJSON (Either6 a b c d e f) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 6 + then case v of + E6_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E6_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E6_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E6_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E6_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E6_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + else error ("Unable to encode Either6 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either6 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f, ToAvroJSON g) => ToAvroJSON (Either7 a b c d e f g) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 7 + then case v of + E7_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E7_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E7_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E7_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E7_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E7_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + E7_7 x -> toAvroUnionJSON (V.unsafeIndex opts 6) x + else error ("Unable to encode Either7 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either7 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f, ToAvroJSON g, ToAvroJSON h) => ToAvroJSON (Either8 a b c d e f g h) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 8 + then case v of + E8_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E8_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E8_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E8_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E8_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E8_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + E8_7 x -> toAvroUnionJSON (V.unsafeIndex opts 6) x + E8_8 x -> toAvroUnionJSON (V.unsafeIndex opts 7) x + else error ("Unable to encode Either8 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either8 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f, ToAvroJSON g, ToAvroJSON h, ToAvroJSON i) => ToAvroJSON (Either9 a b c d e f g h i) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 9 + then case v of + E9_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E9_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E9_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E9_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E9_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E9_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + E9_7 x -> toAvroUnionJSON (V.unsafeIndex opts 6) x + E9_8 x -> toAvroUnionJSON (V.unsafeIndex opts 7) x + E9_9 x -> toAvroUnionJSON (V.unsafeIndex opts 8) x + else error ("Unable to encode Either9 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either9 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f, ToAvroJSON g, ToAvroJSON h, ToAvroJSON i, ToAvroJSON j) => ToAvroJSON (Either10 a b c d e f g h i j) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 9 + then case v of + E10_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E10_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E10_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E10_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E10_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E10_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + E10_7 x -> toAvroUnionJSON (V.unsafeIndex opts 6) x + E10_8 x -> toAvroUnionJSON (V.unsafeIndex opts 7) x + E10_9 x -> toAvroUnionJSON (V.unsafeIndex opts 8) x + E10_10 x -> toAvroUnionJSON (V.unsafeIndex opts 9) x + else error ("Unable to encode Either10 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either10 as " <> show s) + +toAvroUnionJSON :: ToAvroJSON a => Schema -> a -> Aeson.Value +toAvroUnionJSON s x = case s of + Schema.Null -> toAvroJSON s x + -- TODO, not sure if this is supposed to use logical types, need to + -- compare with other language implementations + _ -> Aeson.toJSON $ HashMap.singleton (Schema.typeName s) (toAvroJSON s x) + +toJSON :: forall a. (Schema.HasAvroSchema a, ToAvroJSON a) => a -> Aeson.Value +toJSON = toAvroJSON schema + where + schema = untag (Schema.schema :: Tagged a Schema) + +toEncoding :: forall a. (Schema.HasAvroSchema a, ToAvroJSON a) => a -> Aeson.Encoding +toEncoding = toAvroEncoding schema + where + schema = untag (Schema.schema :: Tagged a Schema) + +class FromAvroJSON a where + -- | Convert a 'Aeson.Value' into a type that has an Avro schema. The + -- schema is used to validate the JSON and will return an 'Error' if + -- the JSON object is not encoded correctly or does not match the schema. + fromAvroJSON :: Schema -> Aeson.Value -> Result a + +fromJSON :: forall a. (Schema.HasAvroSchema a, FromAvroJSON a) => Aeson.Value -> Result a +fromJSON = fromAvroJSON schema + where + schema = untag (Schema.schema :: Tagged a Schema) -- -- | Parse a 'ByteString' as JSON and convert it to a type with an -- -- Avro schema. Will return 'Error' if the input is not valid JSON or @@ -136,10 +472,3 @@ decodeAvroJSON schema json = -- parseJSON input = case Aeson.eitherDecode input of -- Left msg -> Error msg -- Right value -> fromJSON value - --- -- | Convert an object with an Avro schema to JSON using that schema. --- -- --- -- We always need the schema to /encode/ to JSON because representing --- -- unions requires using the names of named types. --- toJSON :: forall a. (ToAvro a) => a -> Aeson.Value --- toJSON = Aeson.toJSON . toAvro diff --git a/src/Data/Avro/Schema/Decimal.hs b/src/Data/Avro/Schema/Decimal.hs index f826f3d..3331b24 100644 --- a/src/Data/Avro/Schema/Decimal.hs +++ b/src/Data/Avro/Schema/Decimal.hs @@ -6,10 +6,11 @@ module Data.Avro.Schema.Decimal where import qualified Data.BigDecimal as D +import Data.Scientific as S import Data.Proxy import GHC.TypeLits -newtype Decimal (p :: Nat) (s :: Nat) +newtype Decimal (precision :: Nat) (scale :: Nat) = Decimal { unDecimal :: D.BigDecimal } deriving (Eq, Ord, Show, Read, Num, Fractional, Real) @@ -31,3 +32,17 @@ underlyingValue (Decimal d) in if D.precision new > pp then Nothing else Just $ fromInteger $ D.getValue new + +clamp :: forall p s. (KnownNat p, KnownNat s) => Decimal p s -> Decimal p s +clamp (Decimal d) + = let ss = natVal (Proxy :: Proxy s) + pp = natVal (Proxy :: Proxy p) + new = if ss > D.getScale d + then D.BigDecimal (D.getValue d * 10 ^ (ss - D.getScale d)) ss + else D.roundBD d (D.halfUp ss) + in Decimal new + +toScientific :: (KnownNat p, KnownNat s) => Decimal p s -> Scientific +toScientific d + = let (Decimal d') = clamp d + in S.scientific (D.getValue d') (negate $ fromIntegral $ D.getScale d') From 1d07b6841571d56e7fcc05f1cf5e3bf9e0f29132 Mon Sep 17 00:00:00 2001 From: Ian Duncan Date: Sat, 23 Jul 2022 09:35:06 -0500 Subject: [PATCH 2/3] Clean up a few spots where there were incomplete pattern matches --- src/Data/Avro/EitherN.hs | 8 ++++++++ src/Data/Avro/Schema/Schema.hs | 5 ++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Data/Avro/EitherN.hs b/src/Data/Avro/EitherN.hs index d7f2a21..17e8953 100644 --- a/src/Data/Avro/EitherN.hs +++ b/src/Data/Avro/EitherN.hs @@ -443,6 +443,7 @@ instance (FromAvro a, FromAvro b, FromAvro c) => FromAvro (Either3 a b c) where fromAvro (AV.Union _ 1 b) = E3_2 <$> fromAvro b fromAvro (AV.Union _ 2 c) = E3_3 <$> fromAvro c fromAvro (AV.Union _ n _) = Left ("Unable to decode Either3 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either3 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d) => FromAvro (Either4 a b c d) where fromAvro (AV.Union _ 0 a) = E4_1 <$> fromAvro a @@ -450,6 +451,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d) => FromAvro (Either4 a fromAvro (AV.Union _ 2 c) = E4_3 <$> fromAvro c fromAvro (AV.Union _ 3 d) = E4_4 <$> fromAvro d fromAvro (AV.Union _ n _) = Left ("Unable to decode Either4 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either4 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e) => FromAvro (Either5 a b c d e) where fromAvro (AV.Union _ 0 a) = E5_1 <$> fromAvro a @@ -458,6 +460,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e) => FromAvr fromAvro (AV.Union _ 3 d) = E5_4 <$> fromAvro d fromAvro (AV.Union _ 4 e) = E5_5 <$> fromAvro e fromAvro (AV.Union _ n _) = Left ("Unable to decode Either5 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either5 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f) => FromAvro (Either6 a b c d e f) where fromAvro (AV.Union _ 0 a) = E6_1 <$> fromAvro a @@ -467,6 +470,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 4 e) = E6_5 <$> fromAvro e fromAvro (AV.Union _ 5 f) = E6_6 <$> fromAvro f fromAvro (AV.Union _ n _) = Left ("Unable to decode Either6 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either6 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f, FromAvro g) => FromAvro (Either7 a b c d e f g) where fromAvro (AV.Union _ 0 a) = E7_1 <$> fromAvro a @@ -477,6 +481,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 5 f) = E7_6 <$> fromAvro f fromAvro (AV.Union _ 6 g) = E7_7 <$> fromAvro g fromAvro (AV.Union _ n _) = Left ("Unable to decode Either7 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either7 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f, FromAvro g, FromAvro h) => FromAvro (Either8 a b c d e f g h) where fromAvro (AV.Union _ 0 a) = E8_1 <$> fromAvro a @@ -488,6 +493,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 6 g) = E8_7 <$> fromAvro g fromAvro (AV.Union _ 7 h) = E8_8 <$> fromAvro h fromAvro (AV.Union _ n _) = Left ("Unable to decode Either8 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either8 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f, FromAvro g, FromAvro h, FromAvro i) => FromAvro (Either9 a b c d e f g h i) where fromAvro (AV.Union _ 0 a) = E9_1 <$> fromAvro a @@ -500,6 +506,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 7 h) = E9_8 <$> fromAvro h fromAvro (AV.Union _ 8 i) = E9_9 <$> fromAvro i fromAvro (AV.Union _ n _) = Left ("Unable to decode Either9 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either9 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f, FromAvro g, FromAvro h, FromAvro i, FromAvro j) => FromAvro (Either10 a b c d e f g h i j) where fromAvro (AV.Union _ 0 a) = E10_1 <$> fromAvro a @@ -513,6 +520,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 8 i) = E10_9 <$> fromAvro i fromAvro (AV.Union _ 9 j) = E10_10 <$> fromAvro j fromAvro (AV.Union _ n _) = Left ("Unable to decode Either10 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either10 from a non-union" putIndexedValue :: ToAvro a => Int -> V.Vector Schema -> a -> Builder putIndexedValue i opts x = putI i <> toAvro (V.unsafeIndex opts i) x diff --git a/src/Data/Avro/Schema/Schema.hs b/src/Data/Avro/Schema/Schema.hs index c6af7c4..de7236f 100644 --- a/src/Data/Avro/Schema/Schema.hs +++ b/src/Data/Avro/Schema/Schema.hs @@ -1055,11 +1055,10 @@ overlay input supplement = overlayType input overlayType m@Map{..} = m { values = overlayType values } overlayType r@Record{..} = r { fields = map overlayField fields } overlayType u@Union{..} = Union (fmap overlayType options) - overlayType nt@(NamedType _) = rebind nt + overlayType (NamedType tn) = HashMap.lookupDefault (NamedType tn) tn bindings overlayType other = other - rebind (NamedType tn) = HashMap.lookupDefault (NamedType tn) tn bindings - bindings = extractBindings supplement + bindings = extractBindings supplement -- | Extract the named inner type definition as its own schema. subdefinition :: Schema -> Text -> Maybe Schema From 7e5dc9012803c338c42a8a35f3511b632dddb577 Mon Sep 17 00:00:00 2001 From: Ian Duncan Date: Sun, 24 Jul 2022 11:25:49 -0500 Subject: [PATCH 3/3] fix handling of ignored fields in deconflicted schemas --- README.md | 2 +- src/Data/Avro/Encoding/FromAvro.hs | 1 + src/Data/Avro/Encoding/ToAvro.hs | 85 ++++++++++------ src/Data/Avro/Internal/Time.hs | 2 - src/Data/Avro/JSON.hs | 151 ++++++++++++++++++++++++++--- src/Data/Avro/Schema/Deconflict.hs | 7 +- src/Data/Avro/Schema/ReadSchema.hs | 77 ++++++++++++++- src/Data/Avro/Schema/Schema.hs | 1 + test/Avro/JSONSpec.hs | 102 +++++++++++++++++++ 9 files changed, 373 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 4e19ab2..af4fd6c 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ When working with `FromAvro` directly it is important to understand the differen `Schema` (as in the example above) is just a regular data schema for an Avro type. -`ReadSchema` is a similar type, but it is capable of captuting and resolving differences between "_writer_ schema" and "_reader_ schema". See [Specification](https://avro.apache.org/docs/current/spec.html#Schema+Resolution) to learn more about schema resolution and de-conflicting. +`ReadSchema` is a similar type, but it is capable of capturing and resolving differences between "_writer_ schema" and "_reader_ schema". See [Specification](https://avro.apache.org/docs/current/spec.html#Schema+Resolution) to learn more about schema resolution and de-conflicting. `FromAvro` class requires `ReaderSchema` because with Avro it is possible to read data with a different schema compared to the schema that was used for writing this data. diff --git a/src/Data/Avro/Encoding/FromAvro.hs b/src/Data/Avro/Encoding/FromAvro.hs index 47ee80f..123bdf4 100644 --- a/src/Data/Avro/Encoding/FromAvro.hs +++ b/src/Data/Avro/Encoding/FromAvro.hs @@ -8,6 +8,7 @@ module Data.Avro.Encoding.FromAvro -- ** For internal use , Value(..) , getValue +, convertValue ) where diff --git a/src/Data/Avro/Encoding/ToAvro.hs b/src/Data/Avro/Encoding/ToAvro.hs index 2c15a57..a4177e0 100644 --- a/src/Data/Avro/Encoding/ToAvro.hs +++ b/src/Data/Avro/Encoding/ToAvro.hs @@ -10,6 +10,7 @@ module Data.Avro.Encoding.ToAvro where +import Control.Exception (Exception(..), throw) import Control.Monad.Identity (Identity (..)) import qualified Data.Array as Ar import Data.Avro.Internal.EncodeRaw @@ -36,11 +37,13 @@ import qualified Data.Text.Foreign as T #endif import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL +import Data.Typeable (Typeable(..), TypeRep, typeOf) import qualified Data.Time as Time import qualified Data.UUID as UUID import qualified Data.Vector as V import qualified Data.Vector.Unboxed as U import Data.Word +import GHC.Stack import GHC.TypeLits {- HLINT ignore "Use section" -} @@ -65,6 +68,26 @@ record (S.Record _ _ _ fs) vs = mapField :: HashMap Text Encoder -> S.Field -> Builder mapField env fld = maybe (failField fld) (flip runEncoder (S.fldType fld)) (HashMap.lookup (S.fldName fld) env) +record s _ = error $ "Expected record, got: " <> show s + +data AvroEncodingFailure = AvroEncodingFailure + { failedSchema :: Schema + , failedType :: Maybe TypeRep + , renderedValue :: Maybe String + , failure :: Maybe String + } + deriving (Show, Typeable) + +failEncoding :: (HasCallStack, Typeable a, Show a) => Schema -> a -> b +failEncoding s x = throw $ AvroEncodingFailure s (Just $ typeOf x) (Just $ show x) Nothing + +failEncodingWithMessage :: (HasCallStack, Typeable a, Show a) => Schema -> a -> String -> b +failEncodingWithMessage s x msg = throw $ AvroEncodingFailure s (Just $ typeOf x) (Just $ show x) $ Just msg + +failEncodingWithoutTypeInfo :: (HasCallStack) => Schema -> String -> b +failEncodingWithoutTypeInfo s msg = throw $ AvroEncodingFailure s Nothing Nothing $ Just msg + +instance Exception AvroEncodingFailure -- | Describes how to encode Haskell data types into Avro bytes class ToAvro a where @@ -73,7 +96,7 @@ class ToAvro a where instance ToAvro Int where toAvro (S.Long _) i = encodeRaw @Int64 (fromIntegral i) toAvro (S.Int _) i = encodeRaw @Int32 (fromIntegral i) - toAvro s _ = error ("Unable to encode Int as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Int32 where @@ -81,14 +104,14 @@ instance ToAvro Int32 where toAvro (S.Int _) i = encodeRaw @Int32 i toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Int32 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Int64 where toAvro (S.Long _) i = encodeRaw @Int64 i toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Int64 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Word8 where @@ -96,7 +119,7 @@ instance ToAvro Word8 where toAvro (S.Long _) i = encodeRaw @Word64 (fromIntegral i) toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Word8 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Word16 where @@ -104,7 +127,7 @@ instance ToAvro Word16 where toAvro (S.Long _) i = encodeRaw @Word64 (fromIntegral i) toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Word16 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Word32 where @@ -112,33 +135,33 @@ instance ToAvro Word32 where toAvro (S.Long _) i = encodeRaw @Word64 (fromIntegral i) toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Word32 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Word64 where toAvro (S.Long _) i = encodeRaw @Word64 i toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) - toAvro s _ = error ("Unable to encode Word64 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Double where toAvro S.Double i = word64LE (IEEE.doubleToWord i) - toAvro s _ = error ("Unable to encode Double as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Float where toAvro S.Float i = word32LE (IEEE.floatToWord i) toAvro S.Double i = word64LE (IEEE.doubleToWord $ realToFrac i) - toAvro s _ = error ("Unable to encode Float as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro () where toAvro S.Null () = mempty - toAvro s () = error ("Unable to encode () as: " <> show s) + toAvro s () = failEncoding s () instance ToAvro Bool where toAvro S.Boolean v = word8 $ fromIntegral (fromEnum v) - toAvro s _ = error ("Unable to encode Bool as: " <> show s) + toAvro s v = failEncoding s v {-# INLINE toAvro #-} instance (KnownNat p, KnownNat s) => ToAvro (D.Decimal p s) where @@ -157,28 +180,28 @@ instance ToAvro Time.DiffTime where toAvro s@(S.Long (Just S.TimestampMicros)) = toAvro @Int64 s . fromIntegral . diffTimeToMicros toAvro s@(S.Long (Just S.TimestampMillis)) = toAvro @Int64 s . fromIntegral . diffTimeToMillis toAvro s@(S.Int (Just S.TimeMillis)) = toAvro @Int32 s . fromIntegral . diffTimeToMillis - toAvro s = error ("Unble to encode DiffTime as " <> show s) + toAvro s = failEncoding s instance ToAvro Time.UTCTime where toAvro s@(S.Long (Just S.TimestampMicros)) = toAvro @Int64 s . fromIntegral . utcTimeToMicros toAvro s@(S.Long (Just S.TimestampMillis)) = toAvro @Int64 s . fromIntegral . utcTimeToMillis - toAvro s = error ("Unable to encode UTCTime as " <> show s) + toAvro s = failEncoding s instance ToAvro Time.LocalTime where toAvro s@(S.Long (Just S.LocalTimestampMicros)) = toAvro @Int64 s . fromIntegral . localTimeToMicros toAvro s@(S.Long (Just S.LocalTimestampMillis)) = toAvro @Int64 s . fromIntegral . localTimeToMillis - toAvro s = - error ("Unable to encode LocalTime as " <> show s) + toAvro s = failEncoding s instance ToAvro B.ByteString where toAvro s bs = case s of (S.Bytes _) -> encodeRaw (B.length bs) <> byteString bs (S.String _) -> encodeRaw (B.length bs) <> byteString bs S.Fixed _ _ l _ | l == B.length bs -> byteString bs - S.Fixed _ _ l _ -> error ("Unable to encode ByteString as Fixed(" <> show l <> ") because its length is " <> show (B.length bs)) - _ -> error ("Unable to encode ByteString as: " <> show s) + S.Fixed _ _ l _ -> failEncodingWithMessage s bs + ("Unable to encode ByteString as Fixed(" <> show l <> ") because its length is " <> show (B.length bs)) + _ -> failEncoding s bs {-# INLINE toAvro #-} instance ToAvro BL.ByteString where @@ -194,7 +217,7 @@ instance ToAvro Text where in case s of (S.Bytes _) -> res (S.String _) -> res - _ -> error ("Unable to encode Text as: " <> show s) + _ -> failEncoding s v #else let bs = T.encodeUtf8 v @@ -202,7 +225,7 @@ instance ToAvro Text where in case s of (S.Bytes _) -> res (S.String _) -> res - _ -> error ("Unable to encode Text as: " <> show s) + _ -> failEncoding s v #endif {-# INLINE toAvro #-} @@ -213,49 +236,49 @@ instance ToAvro TL.Text where instance ToAvro a => ToAvro [a] where toAvro (S.Array s) as = if DL.null as then long0 else encodeRaw (F.length as) <> foldMap (toAvro s) as <> long0 - toAvro s _ = error ("Unable to encode Haskell list as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Haskell list" instance ToAvro a => ToAvro (V.Vector a) where toAvro (S.Array s) as = if V.null as then long0 else encodeRaw (V.length as) <> foldMap (toAvro s) as <> long0 - toAvro s _ = error ("Unable to encode Vector list as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Vector" instance (Ix i, ToAvro a) => ToAvro (Ar.Array i a) where toAvro (S.Array s) as = if F.length as == 0 then long0 else encodeRaw (F.length as) <> foldMap (toAvro s) as <> long0 - toAvro s _ = error ("Unable to encode indexed Array list as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Array" instance (U.Unbox a, ToAvro a) => ToAvro (U.Vector a) where toAvro (S.Array s) as = if U.null as then long0 else encodeRaw (U.length as) <> foldMap (toAvro s) (U.toList as) <> long0 - toAvro s _ = error ("Unable to encode Vector list as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Unboxed Vector" instance ToAvro a => ToAvro (Map.Map Text a) where toAvro (S.Map s) hm = if Map.null hm then long0 else putI (F.length hm) <> foldMap putKV (Map.toList hm) <> long0 where putKV (k,v) = toAvro S.String' k <> toAvro s v - toAvro s _ = error ("Unable to encode Map as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Map" instance ToAvro a => ToAvro (HashMap Text a) where toAvro (S.Map s) hm = if HashMap.null hm then long0 else putI (F.length hm) <> foldMap putKV (HashMap.toList hm) <> long0 where putKV (k,v) = toAvro S.String' k <> toAvro s v - toAvro s _ = error ("Unable to encode HashMap as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode HashMap" instance ToAvro a => ToAvro (Maybe a) where toAvro (S.Union opts) v = case F.toList opts of [S.Null, s] -> maybe (putI 0) (\a -> putI 1 <> toAvro s a) v [s, S.Null] -> maybe (putI 1) (\a -> putI 0 <> toAvro s a) v - wrongOpts -> error ("Unable to encode Maybe as " <> show wrongOpts) - toAvro s _ = error ("Unable to encode Maybe as " <> show s) + wrongOpts -> failEncodingWithoutTypeInfo (S.Union opts) "Unable to encode Maybe" + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Maybe" instance (ToAvro a) => ToAvro (Identity a) where toAvro (S.Union opts) e@(Identity a) = if V.length opts == 1 then putI 0 <> toAvro (V.unsafeIndex opts 0) a - else error ("Unable to encode Identity as a single-value union: " <> show opts) - toAvro s _ = error ("Unable to encode Identity value as " <> show s) + else failEncodingWithoutTypeInfo (S.Union opts) ("Unable to encode Identity as a single-value union: " <> show opts) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Identity" instance (ToAvro a, ToAvro b) => ToAvro (Either a b) where toAvro (S.Union opts) v = @@ -263,5 +286,5 @@ instance (ToAvro a, ToAvro b) => ToAvro (Either a b) where then case v of Left a -> putI 0 <> toAvro (V.unsafeIndex opts 0) a Right b -> putI 1 <> toAvro (V.unsafeIndex opts 1) b - else error ("Unable to encode Either as " <> show opts) - toAvro s _ = error ("Unable to encode Either as " <> show s) + else failEncodingWithoutTypeInfo (S.Union opts) "Unable to encode Either as" + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Either" diff --git a/src/Data/Avro/Internal/Time.hs b/src/Data/Avro/Internal/Time.hs index 185fe1f..ba2d5fd 100644 --- a/src/Data/Avro/Internal/Time.hs +++ b/src/Data/Avro/Internal/Time.hs @@ -3,10 +3,8 @@ module Data.Avro.Internal.Time where -- Utility functions to work with times -import Data.Fixed (Fixed (..)) import Data.Maybe (fromJust) import Data.Time -import Data.Time.Clock import Data.Time.Clock.POSIX (posixSecondsToUTCTime) #if MIN_VERSION_time(1,9,0) import Data.Time.Format.Internal diff --git a/src/Data/Avro/JSON.hs b/src/Data/Avro/JSON.hs index 419fc4a..e177b5d 100644 --- a/src/Data/Avro/JSON.hs +++ b/src/Data/Avro/JSON.hs @@ -1,7 +1,9 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TupleSections #-} -- | Avro supports a JSON representation of Avro objects alongside the -- Avro binary format. An Avro schema can be used to generate and -- validate JSON representations of Avro objects. @@ -61,6 +63,7 @@ -- @ module Data.Avro.JSON where +import Control.Monad (forM, when) import Control.Monad.Identity (Identity (..)) import qualified Data.Aeson as Aeson import qualified Data.Aeson.Key as K @@ -83,10 +86,13 @@ import qualified Data.Time as Time import qualified Data.Avro.HasAvroSchema as Schema import Data.Avro.EitherN +import Data.Avro.Encoding.FromAvro as FromAvro import Data.Avro.Internal.Time import Data.Avro.Schema.Decimal as D -import Data.Avro.Schema.Schema (DefaultValue (..), Result (..), Schema, parseAvroJSON) +import Data.Avro.Schema.Schema (DefaultValue (..), Schema) import qualified Data.Avro.Schema.Schema as Schema +import Data.Avro.Schema.ReadSchema (ReadSchema) +import qualified Data.Avro.Schema.ReadSchema as ReadSchema import qualified Data.Vector as V import qualified Data.Vector.Unboxed as U import Data.Int @@ -94,9 +100,10 @@ import Data.Word import qualified Data.UUID as UUID import GHC.TypeLits -decodeAvroJSON :: Schema -> Aeson.Value -> Result DefaultValue +-- deprecate +decodeAvroJSON :: Schema -> Aeson.Value -> Schema.Result DefaultValue decodeAvroJSON schema json = - parseAvroJSON union env schema json + Schema.parseAvroJSON union env schema json where env = Schema.buildTypeEnvironment missing schema @@ -124,7 +131,7 @@ decodeAvroJSON schema json = HashMap.fromList [(Schema.typeName t, t) | t <- Foldable.toList schemas] in case HashMap.lookup (canonicalize branch) names of Just t -> do - nested <- parseAvroJSON union env t $ case KM.lookup (K.fromText branch) obj of + nested <- Schema.parseAvroJSON union env t $ case KM.lookup (K.fromText branch) obj of Just val -> val Nothing -> error "impossible" return (Schema.DUnion schemas t nested) @@ -291,8 +298,8 @@ instance (ToAvroJSON a) => ToAvroJSON (Identity a) where instance ToAvroJSON a => ToAvroJSON (Maybe a) where toAvroJSON (Schema.Union opts) v = case Foldable.toList opts of - [Schema.Null, s] -> maybe (toAvroJSON Schema.Null ()) (toAvroJSON s) v - [s, Schema.Null] -> maybe (toAvroJSON Schema.Null ()) (toAvroJSON s) v + [Schema.Null, s] -> maybe (toAvroJSON Schema.Null ()) (toAvroUnionJSON s) v + [s, Schema.Null] -> maybe (toAvroJSON Schema.Null ()) (toAvroUnionJSON s) v wrongOpts -> error ("Unable to encode Maybe as " <> show wrongOpts) toAvroJSON s _ = error ("Unable to encode Maybe as " <> show s) @@ -422,7 +429,7 @@ instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f, ToAvroJSON g, ToAvroJSON h, ToAvroJSON i, ToAvroJSON j) => ToAvroJSON (Either10 a b c d e f g h i j) where toAvroJSON (Schema.Union opts) v = - if V.length opts == 9 + if V.length opts == 10 then case v of E10_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x E10_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x @@ -437,6 +444,9 @@ instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, else error ("Unable to encode Either10 as " <> show opts) toAvroJSON s _ = error ("Unable to encode Either10 as " <> show s) +-- | Unions in Avro JSON serialization are encoded as a map with a single key +-- that is the name of the type. This function can be used to serialize union +-- values properly when writing instances of 'ToAvroJSON' for union fields. toAvroUnionJSON :: ToAvroJSON a => Schema -> a -> Aeson.Value toAvroUnionJSON s x = case s of Schema.Null -> toAvroJSON s x @@ -454,17 +464,54 @@ toEncoding = toAvroEncoding schema where schema = untag (Schema.schema :: Tagged a Schema) -class FromAvroJSON a where - -- | Convert a 'Aeson.Value' into a type that has an Avro schema. The - -- schema is used to validate the JSON and will return an 'Error' if - -- the JSON object is not encoded correctly or does not match the schema. - fromAvroJSON :: Schema -> Aeson.Value -> Result a - -fromJSON :: forall a. (Schema.HasAvroSchema a, FromAvroJSON a) => Aeson.Value -> Result a -fromJSON = fromAvroJSON schema +fromJSON :: (FromAvro a) => ReadSchema -> Aeson.Value -> Either String a +fromJSON schema json = do + intermediateValue <- parseAvroJSON union env schema json + FromAvro.fromAvro intermediateValue where - schema = untag (Schema.schema :: Tagged a Schema) + env = + ReadSchema.buildTypeEnvironment missing schema + missing name = + fail ("Type " <> show name <> " not in schema") + + union :: ReadSchema -> Aeson.Value -> Either String FromAvro.Value + union (ReadSchema.Union schemas) Aeson.Null = + case V.find ((== ReadSchema.Null) . snd) schemas of + Nothing -> Left "Null not in union." + Just (ix, _) -> pure $ FromAvro.Union (ReadSchema.Union schemas) ix FromAvro.Null + + union (ReadSchema.Union schemas) (Aeson.Object obj) + | null obj = + Left "Invalid encoding of union: empty object ({})." + | length obj > 1 = + Left "Invalid encoding of union: object with too many fields." + | otherwise = + let + canonicalize name + | isBuiltIn name = name + | otherwise = Schema.renderFullname $ Schema.parseFullname name + branch = + K.toText $ head (KM.keys obj) + names = + HashMap.fromList [(ReadSchema.typeName t, ixed) | ixed@(_, t) <- Foldable.toList schemas] + in case HashMap.lookup (canonicalize branch) names of + Just (ix, t) -> do + nested <- parseAvroJSON union env t $ case KM.lookup (K.fromText branch) obj of + Just val -> val + Nothing -> error "impossible" + return (FromAvro.Union (ReadSchema.Union schemas) ix nested) + Nothing -> Left ("Type '" <> Text.unpack branch <> "' not in union: " <> show schemas) + union ReadSchema.Union{} _ = + Left "Invalid JSON representation for union: has to be a JSON object with exactly one field." + -- TODO, not sure what to do with val here + union (ReadSchema.FreeUnion ix ty) val = do + nested <- parseAvroJSON union env ty val + pure $ FromAvro.Union (ReadSchema.FreeUnion ix ty) ix nested + union _ _ = + error "Impossible: function given non-union schema." + isBuiltIn name = name `elem` [ "null", "boolean", "int", "long", "float" + , "double", "bytes", "string", "array", "map" ] -- -- | Parse a 'ByteString' as JSON and convert it to a type with an -- -- Avro schema. Will return 'Error' if the input is not valid JSON or -- -- the JSON does not convert with the specified schema. @@ -472,3 +519,75 @@ fromJSON = fromAvroJSON schema -- parseJSON input = case Aeson.eitherDecode input of -- Left msg -> Error msg -- Right value -> fromJSON value +-- | Parse JSON-encoded avro data. +parseAvroJSON :: (ReadSchema -> Aeson.Value -> Either String FromAvro.Value) + -- ^ How to handle unions. The way unions are + -- formatted in JSON depends on whether we're parsing + -- a normal Avro object or we're parsing a default + -- declaration in a schema. + -- + -- This function will only ever be passed 'Union' + -- schemas. It /should/ error out if this is not the + -- case—it represents a bug in this code. + -> (Schema.TypeName -> Maybe ReadSchema) + -> ReadSchema + -> Aeson.Value + -> Either String FromAvro.Value +parseAvroJSON union env (ReadSchema.NamedType name) av = + case env name of + Nothing -> Left $ "Could not resolve type name for " <> Text.unpack (Schema.renderFullname name) + Just t -> parseAvroJSON union env t av +parseAvroJSON union _ u@ReadSchema.Union{} av = u `union` av +parseAvroJSON union env ty av = + case av of + Aeson.String s -> + case ty of + ReadSchema.String _ -> return $ FromAvro.String ty s + ReadSchema.Enum {..} -> + case s `V.elemIndex` symbols of + Just i -> pure $ FromAvro.Enum ty i s + Nothing -> Left $ "JSON string is not one of the expected symbols for enum '" <> show name <> "': " <> Text.unpack s + ReadSchema.Bytes _ -> FromAvro.Bytes ty <$> (Schema.resultToEither $ Schema.parseBytes s) + ReadSchema.Fixed {..} -> do + bytes <- Schema.resultToEither $ Schema.parseBytes s + let len = BS.length bytes + when (len /= size) $ + Left $ "Fixed string wrong size. Expected " <> show size <> " but got " <> show len + return $ FromAvro.Fixed ty bytes + _ -> Left $ "Expected type String, Enum, Bytes, or Fixed, but found (Type,Value)=" + <> show (ty, av) + Aeson.Bool b -> + case ty of + ReadSchema.Boolean -> return $ FromAvro.Boolean b + _ -> avroTypeMismatch ty "boolean" + Aeson.Number i -> + case ty of + ReadSchema.Int _ -> return $ FromAvro.Int ty (floor i) + ReadSchema.Long _ _ -> return $ FromAvro.Long ty (floor i) + ReadSchema.Float _ -> return $ FromAvro.Float ty (realToFrac i) + ReadSchema.Double _ -> return $ FromAvro.Double ty (realToFrac i) + _ -> avroTypeMismatch ty "number" + Aeson.Array vec -> + case ty of + ReadSchema.Array t -> FromAvro.Array <$> V.mapM (parseAvroJSON union env t) vec + _ -> avroTypeMismatch ty "array" + Aeson.Object obj -> + case ty of + ReadSchema.Map mTy -> FromAvro.Map <$> mapM (parseAvroJSON union env mTy) (KM.toHashMapText obj) + ReadSchema.Record {..} -> do + values <- forM (filter ((/= ReadSchema.Ignored) . ReadSchema.fldStatus) fields) $ \ReadSchema.ReadField{..} -> do + case KM.lookup (K.fromText fldName) obj of + Nothing -> + case fldDefault of + Just v -> pure $ FromAvro.convertValue v + Nothing -> Left $ "Decode failure: No record field '" <> Text.unpack fldName <> "' and no default in schema." + Just v -> parseAvroJSON union env fldType v + pure $ FromAvro.Record ty $ V.fromList values + _ -> avroTypeMismatch ty "object" + Aeson.Null -> case ty of + ReadSchema.Null -> return FromAvro.Null + _ -> avroTypeMismatch ty "null" + +avroTypeMismatch :: ReadSchema -> Text.Text -> Either String a +avroTypeMismatch expected actual = + Left $ "Could not resolve type '" <> Text.unpack actual <> "' with expected type: " <> show expected diff --git a/src/Data/Avro/Schema/Deconflict.hs b/src/Data/Avro/Schema/Deconflict.hs index 016f083..69e7927 100644 --- a/src/Data/Avro/Schema/Deconflict.hs +++ b/src/Data/Avro/Schema/Deconflict.hs @@ -30,7 +30,12 @@ import Debug.Trace -- with the writer's schema into the form specified by the reader's schema. -- -- Schema resolution rules are described by the specification: -deconflict :: Schema -> Schema -> Either String ReadSchema +deconflict + :: Schema + -- ^ The writer's schema. + -> Schema + -- ^ The reader's schema. + -> Either String ReadSchema deconflict writerSchema readerSchema | writerSchema == readerSchema = pure (Read.fromSchema readerSchema) deconflict S.Null S.Null = pure Read.Null deconflict S.Boolean S.Boolean = pure Read.Boolean diff --git a/src/Data/Avro/Schema/ReadSchema.hs b/src/Data/Avro/Schema/ReadSchema.hs index bdcbd69..f4e31c8 100644 --- a/src/Data/Avro/Schema/ReadSchema.hs +++ b/src/Data/Avro/Schema/ReadSchema.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE DeriveAnyClass #-} -{-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} module Data.Avro.Schema.ReadSchema ( ReadSchema(..), ReadField(..) @@ -9,6 +10,8 @@ module Data.Avro.Schema.ReadSchema , fromSchema, fromField , extractBindings +, buildTypeEnvironment +, typeName , S.Decimal(..) , S.LogicalTypeBytes(..), S.LogicalTypeFixed(..) @@ -201,3 +204,69 @@ extractBindings = \case Map{..} -> extractBindings values FreeUnion {..} -> extractBindings ty _ -> HashMap.empty + +-- | @buildTypeEnvironment schema@ builds a function mapping type names to +-- the types declared in the traversed schema. +-- +-- This mapping includes both the base type names and any aliases they +-- have. Aliases and normal names are not differentiated in any way. +buildTypeEnvironment :: Applicative m + => (TypeName -> m ReadSchema) + -- ^ Callback to handle type names not in the + -- schema. + -> ReadSchema + -- ^ The schema that we're generating a lookup + -- function for. + -> (TypeName -> m ReadSchema) +buildTypeEnvironment failure from forTy = + case HashMap.lookup forTy env of + Nothing -> failure forTy + Just res -> pure res + where + env = extractBindings from + +-- |Get the name of the type. In the case of unions, get the name of the +-- first value in the union schema. +typeName :: ReadSchema -> Text +typeName bt = + case bt of + Null -> "null" + Boolean -> "boolean" + Int Nothing -> "int" + Int (Just (S.DecimalI d)) + -> decimalName d + Int (Just S.Date) -> "date" + Int (Just S.TimeMillis) + -> "time-millis" + Long _ Nothing -> "long" + Long _ (Just (S.DecimalL d)) + -> decimalName d + Long _ (Just S.TimeMicros) + -> "time-micros" + Long _ (Just S.TimestampMillis) + -> "timestamp-millis" + Long _ (Just S.TimestampMicros) + -> "timestamp-micros" + Long _ (Just S.LocalTimestampMillis) + -> "local-timestamp-millis" + Long _ (Just S.LocalTimestampMicros) + -> "local-timestamp-micros" + Float _ -> "float" + Double _ -> "double" + Bytes Nothing -> "bytes" + Bytes (Just (S.DecimalB d)) + -> decimalName d + String Nothing -> "string" + String (Just S.UUID) + -> "uuid" + Array _ -> "array" + Map _ -> "map" + NamedType name -> S.renderFullname name + Union ts -> typeName $ snd $ V.head ts + Fixed _ _ _ (Just (S.DecimalF d)) + -> decimalName d + Fixed _ _ _ (Just S.Duration) + -> "duration" + _ -> S.renderFullname $ name bt + where + decimalName (S.Decimal prec sc) = "decimal(" <> T.pack (show prec) <> "," <> T.pack (show sc) <> ")" diff --git a/src/Data/Avro/Schema/Schema.hs b/src/Data/Avro/Schema/Schema.hs index de7236f..19437a6 100644 --- a/src/Data/Avro/Schema/Schema.hs +++ b/src/Data/Avro/Schema/Schema.hs @@ -849,6 +849,7 @@ parseFieldDefault env schema value = parseAvroJSON defaultUnion env schema value where defaultUnion (Union ts) val = DUnion ts (V.head ts) <$> parseFieldDefault env (V.head ts) val defaultUnion _ _ = error "Impossible: not Union." +-- TODO deprecate this function in favor of JSON module -- | Parse JSON-encoded avro data. parseAvroJSON :: (Schema -> A.Value -> Result DefaultValue) -- ^ How to handle unions. The way unions are diff --git a/test/Avro/JSONSpec.hs b/test/Avro/JSONSpec.hs index 8915d25..fa30608 100644 --- a/test/Avro/JSONSpec.hs +++ b/test/Avro/JSONSpec.hs @@ -1,7 +1,9 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} module Avro.JSONSpec where import Control.Monad (forM_) @@ -10,14 +12,31 @@ import Control.Monad.Identity (Identity (..)) import qualified Data.Aeson as Aeson import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS +import Data.Either (isRight) +import Data.Int import qualified Data.Map as Map +import qualified Data.HashMap.Strict as HM +import Data.Tagged +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TL import Data.Avro.Deriving import Data.Avro.EitherN +import Data.Avro.Encoding.FromAvro import Data.Avro.JSON +import Data.Avro.HasAvroSchema +import Data.Avro.Schema.Deconflict (deconflict) +import qualified Data.Avro.Schema.Schema as S +import Data.Avro.Schema.ReadSchema +import qualified Avro.Data.Deconflict.Read as Read +import qualified Avro.Data.Deconflict.Write as Write import Avro.Data.Endpoint import Avro.Data.Enums +import Avro.Data.FixedTypes +import Avro.Data.Karma import Avro.Data.Reused import Avro.Data.Unions @@ -34,11 +53,94 @@ import System.Environment (setEnv) {- HLINT ignore "Redundant do" -} deriveAvro "test/data/unions-no-namespace.avsc" +deriveAvro "test/data/maybe.avsc" + +roundtripGen :: (Monad m, Show a, Eq a, ToAvroJSON a, FromAvro a) => S.Schema -> Gen a -> PropertyT m () +roundtripGen sch gen = do + value <- forAll gen + tripping value (toAvroJSON sch) (fromJSON (fromSchema sch)) + +roundtripGenKnownSchema :: (Monad m, Show a, Eq a, ToAvroJSON a, FromAvro a, HasAvroSchema a) => Gen a -> PropertyT m () +roundtripGenKnownSchema gen = do + value <- forAll gen + let sch = schemaOf value + tripping value (toAvroJSON sch) (fromJSON (fromSchema sch)) + + spec :: Spec spec = describe "Avro.JSONSpec: JSON serialization/parsing" $ do it "should pass" $ require $ withTests 1 $ property success + describe "ToAvroJSON instances" $ do + specify "Int" $ do + let int = 1 :: Int + toAvroJSON (S.Long Nothing) int `shouldBe` Aeson.Number 1 + toAvroJSON (S.Int Nothing) int `shouldBe` Aeson.Number 1 + specify "Int32" $ do + let int = 1 :: Int32 + toAvroJSON (S.Long Nothing) int `shouldBe` Aeson.Number 1 + toAvroJSON (S.Int Nothing) int `shouldBe` Aeson.Number 1 + + describe "Schema evolution" $ do + it "can read values in older formats from new schemas" $ do + let writeSchema = unTagged $ schema @Write.Foo + readSchema = unTagged $ schema @Read.Foo + (Right deconflicted) = deconflict writeSchema readSchema + require $ property $ do + foo <- forAll Write.genFoo + let jsonAnnotation = TL.unpack $ TL.decodeUtf8 $ Aeson.encode $ toJSON foo + annotate jsonAnnotation + annotate $ show deconflicted + case fromJSON @Read.Foo deconflicted (toJSON foo) of + Left err -> annotate err >> failure + Right _ -> success + describe "Roundtrip" $ do + it "Null roundtrip" $ require $ withTests 1 $ property $ do + roundtripGen S.Null (Gen.constant ()) + it "Bool roundtrip" $ require $ withTests 2 $ property $ do + roundtripGen S.Boolean Gen.bool_ + it "Int roundtrip" $ require $ property $ do + roundtripGen (S.Int Nothing) (Gen.int32 Range.linearBounded) + it "Long roundtrip" $ require $ property $ do + roundtripGen (S.Long Nothing) (Gen.int Range.linearBounded) + roundtripGen (S.Long Nothing) (Gen.int64 Range.linearBounded) + it "Float roundtrip" $ require $ property $ do + roundtripGen S.Float (Gen.float (Range.exponentialFloat (-500) 500)) + it "Double roundtrip" $ require $ property $ do + roundtripGen S.Double (Gen.double (Range.exponentialFloat (-500) 500)) + it "Bytes roundtrip" $ require $ property $ do + roundtripGen (S.Bytes Nothing) (Gen.bytes (Range.linear 0 100)) + it "String roundtrip" $ require $ property $ do + roundtripGen (S.String Nothing) (Gen.text (Range.linear 0 100) Gen.unicodeAll) + it "Array roundtrip" $ require $ property $ do + roundtripGen (S.Array $ S.Long Nothing) (Gen.list (Range.linear 0 100) $ Gen.int Range.linearBounded) + it "Map roundtrip" $ require $ property $ do + let gen = do + items <- Gen.list + (Range.linear 0 100) + (do + k <- Gen.text (Range.linear 1 100) Gen.alphaNum + v <- Gen.int Range.linearBounded + pure (k, v) + ) + pure $ HM.fromList items + roundtripGen (S.Map $ S.Long Nothing) gen + -- TODO named types + -- TODO record types + describe "Record roundtrip" $ do + specify "Endpoint" $ require $ property $ roundtripGenKnownSchema endpointGen + specify "Blessing" $ require $ property $ roundtripGenKnownSchema blessingGen + specify "Curse" $ require $ property $ roundtripGenKnownSchema curseGen + specify "ReuseFixed" $ require $ property $ roundtripGenKnownSchema reuseFixedGen + specify "Unions" $ require $ property $ roundtripGenKnownSchema unionsGen + + it "Enum roundtrip" $ require $ property $ do + roundtripGen (schemaOf EnumReasonBecause) (Gen.enumBounded :: Gen EnumReason) + -- TODO it "Union roundtrip" + it "Fixed roundtrip" $ require $ property $ do + roundtripGen (schemaOf $ FixedTag "wow") (fmap FixedTag $ Gen.bytes $ Range.singleton 3) + -- it "should do roundtrip (enums)" $ require $ property $ do -- msg <- forAll enumWrapperGen -- tripping msg (Aeson.encode . toJSON) parseJSON