diff --git a/README.md b/README.md index 4e19ab2..af4fd6c 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ When working with `FromAvro` directly it is important to understand the differen `Schema` (as in the example above) is just a regular data schema for an Avro type. -`ReadSchema` is a similar type, but it is capable of captuting and resolving differences between "_writer_ schema" and "_reader_ schema". See [Specification](https://avro.apache.org/docs/current/spec.html#Schema+Resolution) to learn more about schema resolution and de-conflicting. +`ReadSchema` is a similar type, but it is capable of capturing and resolving differences between "_writer_ schema" and "_reader_ schema". See [Specification](https://avro.apache.org/docs/current/spec.html#Schema+Resolution) to learn more about schema resolution and de-conflicting. `FromAvro` class requires `ReaderSchema` because with Avro it is possible to read data with a different schema compared to the schema that was used for writing this data. diff --git a/src/Data/Avro/Deriving.hs b/src/Data/Avro/Deriving.hs index 8d1f705..0712875 100644 --- a/src/Data/Avro/Deriving.hs +++ b/src/Data/Avro/Deriving.hs @@ -45,6 +45,7 @@ import qualified Data.Aeson as J import Data.Avro hiding (decode, encode) import Data.Avro.Encoding.ToAvro (ToAvro (..)) import Data.Avro.Internal.EncodeRaw (putI) +import Data.Avro.JSON import Data.Avro.Schema.Schema as S import Data.ByteString (ByteString) import qualified Data.ByteString as B @@ -56,6 +57,7 @@ import qualified Data.List.NonEmpty as NE import Data.Map (Map) import Data.Maybe (fromMaybe) import Data.Semigroup ((<>)) +import Data.String (IsString (..)) import qualified Data.Text as Text import Data.Time (Day, DiffTime, LocalTime, UTCTime) import Data.UUID (UUID) @@ -366,16 +368,27 @@ newNames base n = sequence [newName (base ++ show i) | i <- [1..n]] ------------------------- ToAvro ------------------------------------------------ genToAvro :: DeriveOptions -> Schema -> Q [Dec] -genToAvro opts s@(S.Enum n _ _ _) = - encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) +genToAvro opts s@(S.Enum n _ _ _) = do + baseInstance <- encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) + jsonInstance <- encodeAvroJsonInstance (mkSchemaValueName (namespaceBehavior opts) n) + pure (baseInstance ++ jsonInstance) where encodeAvroInstance sname = [d| instance ToAvro $(conT $ mkDataTypeName (namespaceBehavior opts) n) where toAvro = $([| \_ x -> putI (fromEnum x) |]) |] + encodeAvroJsonInstance sname = + [d| instance ToAvroJSON $(conT $ mkDataTypeName (namespaceBehavior opts) n) where + toAvroJSON (S.Enum _ _ _ vs) x = case vs V.!? fromEnum x of + Nothing -> error "toAvroJson: unable to find equivalent enum value in new schema" + Just _ -> toJSON originalName + toAvroJSON _ _ = error "ToAvroJson for enum only works for Enum schema values" + |] -genToAvro opts s@(S.Record n _ _ fs) = - encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) +genToAvro opts s@(S.Record n _ _ fs) = do + baseInstance <- encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) + jsonInstance <- encodeAvroJsonInstance (mkSchemaValueName (namespaceBehavior opts) n) + pure (baseInstance ++ jsonInstance) where encodeAvroInstance sname = [d| instance ToAvro $(conT $ mkDataTypeName (namespaceBehavior opts) n) where @@ -390,9 +403,26 @@ genToAvro opts s@(S.Record n _ _ fs) = in listE $ build <$> zip fs names ) |] + encodeAvroJsonInstance sname = + [d| instance ToAvroJSON $(conT $ mkDataTypeName (namespaceBehavior opts) n) where + toAvroJSON = $(encodeAvroJsonFieldsExp sname) + |] + encodeAvroJsonFieldsExp sname = do + names <- newNames "p_" (length fs) + wn <- varP <$> newName "_" + let con = conP (mkDataTypeName (namespaceBehavior opts) n) (varP <$> names) + lamE [wn, con] + [| J.object + $( let build (fld, n) = [| fromString $(stringE $ Text.unpack $ fldName fld) J..= toAvroJSON (fldType fld) $(varE n) |] + in listE $ build <$> zip fs names + ) + |] + -genToAvro opts s@(S.Fixed n _ _ _) = - encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) +genToAvro opts s@(S.Fixed n _ _ _) = do + baseInstance <- encodeAvroInstance (mkSchemaValueName (namespaceBehavior opts) n) + jsonInstance <- encodeAvroJsonInstance (mkSchemaValueName (namespaceBehavior opts) n) + pure (baseInstance ++ jsonInstance) where encodeAvroInstance sname = [d| instance ToAvro $(conT $ mkDataTypeName (namespaceBehavior opts) n) where @@ -401,6 +431,13 @@ genToAvro opts s@(S.Fixed n _ _ _) = wc <- newName "_" lamE [varP wc, conP (mkDataTypeName (namespaceBehavior opts) n) [varP x]] [| toAvro $(varE sname) $(varE x) |]) |] + encodeAvroJsonInstance sname = + [d| instance ToAvroJSON $(conT $ mkDataTypeName (namespaceBehavior opts) n) where + toAvroJSON = $(do + x <- newName "x" + wc <- newName "_" + lamE [varP wc, conP (mkDataTypeName (namespaceBehavior opts) n) [varP x]] [| toAvroJSON $(varE sname) $(varE x) |]) + |] genToAvro _ _ = pure [] schemaDef :: Name -> Schema -> Q [Dec] diff --git a/src/Data/Avro/EitherN.hs b/src/Data/Avro/EitherN.hs index d7f2a21..17e8953 100644 --- a/src/Data/Avro/EitherN.hs +++ b/src/Data/Avro/EitherN.hs @@ -443,6 +443,7 @@ instance (FromAvro a, FromAvro b, FromAvro c) => FromAvro (Either3 a b c) where fromAvro (AV.Union _ 1 b) = E3_2 <$> fromAvro b fromAvro (AV.Union _ 2 c) = E3_3 <$> fromAvro c fromAvro (AV.Union _ n _) = Left ("Unable to decode Either3 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either3 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d) => FromAvro (Either4 a b c d) where fromAvro (AV.Union _ 0 a) = E4_1 <$> fromAvro a @@ -450,6 +451,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d) => FromAvro (Either4 a fromAvro (AV.Union _ 2 c) = E4_3 <$> fromAvro c fromAvro (AV.Union _ 3 d) = E4_4 <$> fromAvro d fromAvro (AV.Union _ n _) = Left ("Unable to decode Either4 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either4 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e) => FromAvro (Either5 a b c d e) where fromAvro (AV.Union _ 0 a) = E5_1 <$> fromAvro a @@ -458,6 +460,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e) => FromAvr fromAvro (AV.Union _ 3 d) = E5_4 <$> fromAvro d fromAvro (AV.Union _ 4 e) = E5_5 <$> fromAvro e fromAvro (AV.Union _ n _) = Left ("Unable to decode Either5 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either5 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f) => FromAvro (Either6 a b c d e f) where fromAvro (AV.Union _ 0 a) = E6_1 <$> fromAvro a @@ -467,6 +470,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 4 e) = E6_5 <$> fromAvro e fromAvro (AV.Union _ 5 f) = E6_6 <$> fromAvro f fromAvro (AV.Union _ n _) = Left ("Unable to decode Either6 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either6 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f, FromAvro g) => FromAvro (Either7 a b c d e f g) where fromAvro (AV.Union _ 0 a) = E7_1 <$> fromAvro a @@ -477,6 +481,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 5 f) = E7_6 <$> fromAvro f fromAvro (AV.Union _ 6 g) = E7_7 <$> fromAvro g fromAvro (AV.Union _ n _) = Left ("Unable to decode Either7 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either7 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f, FromAvro g, FromAvro h) => FromAvro (Either8 a b c d e f g h) where fromAvro (AV.Union _ 0 a) = E8_1 <$> fromAvro a @@ -488,6 +493,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 6 g) = E8_7 <$> fromAvro g fromAvro (AV.Union _ 7 h) = E8_8 <$> fromAvro h fromAvro (AV.Union _ n _) = Left ("Unable to decode Either8 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either8 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f, FromAvro g, FromAvro h, FromAvro i) => FromAvro (Either9 a b c d e f g h i) where fromAvro (AV.Union _ 0 a) = E9_1 <$> fromAvro a @@ -500,6 +506,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 7 h) = E9_8 <$> fromAvro h fromAvro (AV.Union _ 8 i) = E9_9 <$> fromAvro i fromAvro (AV.Union _ n _) = Left ("Unable to decode Either9 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either9 from a non-union" instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f, FromAvro g, FromAvro h, FromAvro i, FromAvro j) => FromAvro (Either10 a b c d e f g h i j) where fromAvro (AV.Union _ 0 a) = E10_1 <$> fromAvro a @@ -513,6 +520,7 @@ instance (FromAvro a, FromAvro b, FromAvro c, FromAvro d, FromAvro e, FromAvro f fromAvro (AV.Union _ 8 i) = E10_9 <$> fromAvro i fromAvro (AV.Union _ 9 j) = E10_10 <$> fromAvro j fromAvro (AV.Union _ n _) = Left ("Unable to decode Either10 from a position #" <> show n) + fromAvro _ = Left "Unable to decode Either10 from a non-union" putIndexedValue :: ToAvro a => Int -> V.Vector Schema -> a -> Builder putIndexedValue i opts x = putI i <> toAvro (V.unsafeIndex opts i) x diff --git a/src/Data/Avro/Encoding/FromAvro.hs b/src/Data/Avro/Encoding/FromAvro.hs index 47ee80f..123bdf4 100644 --- a/src/Data/Avro/Encoding/FromAvro.hs +++ b/src/Data/Avro/Encoding/FromAvro.hs @@ -8,6 +8,7 @@ module Data.Avro.Encoding.FromAvro -- ** For internal use , Value(..) , getValue +, convertValue ) where diff --git a/src/Data/Avro/Encoding/ToAvro.hs b/src/Data/Avro/Encoding/ToAvro.hs index acc301b..a4177e0 100644 --- a/src/Data/Avro/Encoding/ToAvro.hs +++ b/src/Data/Avro/Encoding/ToAvro.hs @@ -10,6 +10,7 @@ module Data.Avro.Encoding.ToAvro where +import Control.Exception (Exception(..), throw) import Control.Monad.Identity (Identity (..)) import qualified Data.Array as Ar import Data.Avro.Internal.EncodeRaw @@ -36,11 +37,13 @@ import qualified Data.Text.Foreign as T #endif import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL +import Data.Typeable (Typeable(..), TypeRep, typeOf) import qualified Data.Time as Time import qualified Data.UUID as UUID import qualified Data.Vector as V import qualified Data.Vector.Unboxed as U import Data.Word +import GHC.Stack import GHC.TypeLits {- HLINT ignore "Use section" -} @@ -65,6 +68,26 @@ record (S.Record _ _ _ fs) vs = mapField :: HashMap Text Encoder -> S.Field -> Builder mapField env fld = maybe (failField fld) (flip runEncoder (S.fldType fld)) (HashMap.lookup (S.fldName fld) env) +record s _ = error $ "Expected record, got: " <> show s + +data AvroEncodingFailure = AvroEncodingFailure + { failedSchema :: Schema + , failedType :: Maybe TypeRep + , renderedValue :: Maybe String + , failure :: Maybe String + } + deriving (Show, Typeable) + +failEncoding :: (HasCallStack, Typeable a, Show a) => Schema -> a -> b +failEncoding s x = throw $ AvroEncodingFailure s (Just $ typeOf x) (Just $ show x) Nothing + +failEncodingWithMessage :: (HasCallStack, Typeable a, Show a) => Schema -> a -> String -> b +failEncodingWithMessage s x msg = throw $ AvroEncodingFailure s (Just $ typeOf x) (Just $ show x) $ Just msg + +failEncodingWithoutTypeInfo :: (HasCallStack) => Schema -> String -> b +failEncodingWithoutTypeInfo s msg = throw $ AvroEncodingFailure s Nothing Nothing $ Just msg + +instance Exception AvroEncodingFailure -- | Describes how to encode Haskell data types into Avro bytes class ToAvro a where @@ -73,7 +96,7 @@ class ToAvro a where instance ToAvro Int where toAvro (S.Long _) i = encodeRaw @Int64 (fromIntegral i) toAvro (S.Int _) i = encodeRaw @Int32 (fromIntegral i) - toAvro s _ = error ("Unable to encode Int as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Int32 where @@ -81,14 +104,14 @@ instance ToAvro Int32 where toAvro (S.Int _) i = encodeRaw @Int32 i toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Int32 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Int64 where toAvro (S.Long _) i = encodeRaw @Int64 i toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Int64 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Word8 where @@ -96,7 +119,7 @@ instance ToAvro Word8 where toAvro (S.Long _) i = encodeRaw @Word64 (fromIntegral i) toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Word8 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Word16 where @@ -104,7 +127,7 @@ instance ToAvro Word16 where toAvro (S.Long _) i = encodeRaw @Word64 (fromIntegral i) toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Word16 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Word32 where @@ -112,33 +135,33 @@ instance ToAvro Word32 where toAvro (S.Long _) i = encodeRaw @Word64 (fromIntegral i) toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) toAvro S.Float i = toAvro @Float S.Float (fromIntegral i) - toAvro s _ = error ("Unable to encode Word32 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Word64 where toAvro (S.Long _) i = encodeRaw @Word64 i toAvro S.Double i = toAvro @Double S.Double (fromIntegral i) - toAvro s _ = error ("Unable to encode Word64 as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Double where toAvro S.Double i = word64LE (IEEE.doubleToWord i) - toAvro s _ = error ("Unable to encode Double as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro Float where toAvro S.Float i = word32LE (IEEE.floatToWord i) toAvro S.Double i = word64LE (IEEE.doubleToWord $ realToFrac i) - toAvro s _ = error ("Unable to encode Float as: " <> show s) + toAvro s i = failEncoding s i {-# INLINE toAvro #-} instance ToAvro () where toAvro S.Null () = mempty - toAvro s () = error ("Unable to encode () as: " <> show s) + toAvro s () = failEncoding s () instance ToAvro Bool where toAvro S.Boolean v = word8 $ fromIntegral (fromEnum v) - toAvro s _ = error ("Unable to encode Bool as: " <> show s) + toAvro s v = failEncoding s v {-# INLINE toAvro #-} instance (KnownNat p, KnownNat s) => ToAvro (D.Decimal p s) where @@ -157,28 +180,28 @@ instance ToAvro Time.DiffTime where toAvro s@(S.Long (Just S.TimestampMicros)) = toAvro @Int64 s . fromIntegral . diffTimeToMicros toAvro s@(S.Long (Just S.TimestampMillis)) = toAvro @Int64 s . fromIntegral . diffTimeToMillis toAvro s@(S.Int (Just S.TimeMillis)) = toAvro @Int32 s . fromIntegral . diffTimeToMillis - toAvro s = error ("Unble to encode DiffTime as " <> show s) + toAvro s = failEncoding s instance ToAvro Time.UTCTime where toAvro s@(S.Long (Just S.TimestampMicros)) = toAvro @Int64 s . fromIntegral . utcTimeToMicros toAvro s@(S.Long (Just S.TimestampMillis)) = toAvro @Int64 s . fromIntegral . utcTimeToMillis - toAvro s = error ("Unable to encode UTCTime as " <> show s) + toAvro s = failEncoding s instance ToAvro Time.LocalTime where toAvro s@(S.Long (Just S.LocalTimestampMicros)) = toAvro @Int64 s . fromIntegral . localTimeToMicros toAvro s@(S.Long (Just S.LocalTimestampMillis)) = toAvro @Int64 s . fromIntegral . localTimeToMillis - toAvro s = - error ("Unable to encode LocalTime as " <> show s) + toAvro s = failEncoding s instance ToAvro B.ByteString where toAvro s bs = case s of (S.Bytes _) -> encodeRaw (B.length bs) <> byteString bs (S.String _) -> encodeRaw (B.length bs) <> byteString bs S.Fixed _ _ l _ | l == B.length bs -> byteString bs - S.Fixed _ _ l _ -> error ("Unable to encode ByteString as Fixed(" <> show l <> ") because its length is " <> show (B.length bs)) - _ -> error ("Unable to encode ByteString as: " <> show s) + S.Fixed _ _ l _ -> failEncodingWithMessage s bs + ("Unable to encode ByteString as Fixed(" <> show l <> ") because its length is " <> show (B.length bs)) + _ -> failEncoding s bs {-# INLINE toAvro #-} instance ToAvro BL.ByteString where @@ -194,7 +217,7 @@ instance ToAvro Text where in case s of (S.Bytes _) -> res (S.String _) -> res - _ -> error ("Unable to encode Text as: " <> show s) + _ -> failEncoding s v #else let bs = T.encodeUtf8 v @@ -202,7 +225,7 @@ instance ToAvro Text where in case s of (S.Bytes _) -> res (S.String _) -> res - _ -> error ("Unable to encode Text as: " <> show s) + _ -> failEncoding s v #endif {-# INLINE toAvro #-} @@ -213,49 +236,49 @@ instance ToAvro TL.Text where instance ToAvro a => ToAvro [a] where toAvro (S.Array s) as = if DL.null as then long0 else encodeRaw (F.length as) <> foldMap (toAvro s) as <> long0 - toAvro s _ = error ("Unable to encode Haskell list as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Haskell list" instance ToAvro a => ToAvro (V.Vector a) where toAvro (S.Array s) as = if V.null as then long0 else encodeRaw (V.length as) <> foldMap (toAvro s) as <> long0 - toAvro s _ = error ("Unable to encode Vector list as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Vector" instance (Ix i, ToAvro a) => ToAvro (Ar.Array i a) where toAvro (S.Array s) as = if F.length as == 0 then long0 else encodeRaw (F.length as) <> foldMap (toAvro s) as <> long0 - toAvro s _ = error ("Unable to encode indexed Array list as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Array" instance (U.Unbox a, ToAvro a) => ToAvro (U.Vector a) where toAvro (S.Array s) as = if U.null as then long0 else encodeRaw (U.length as) <> foldMap (toAvro s) (U.toList as) <> long0 - toAvro s _ = error ("Unable to encode Vector list as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Unboxed Vector" instance ToAvro a => ToAvro (Map.Map Text a) where toAvro (S.Map s) hm = if Map.null hm then long0 else putI (F.length hm) <> foldMap putKV (Map.toList hm) <> long0 where putKV (k,v) = toAvro S.String' k <> toAvro s v - toAvro s _ = error ("Unable to encode HashMap as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Map" instance ToAvro a => ToAvro (HashMap Text a) where toAvro (S.Map s) hm = if HashMap.null hm then long0 else putI (F.length hm) <> foldMap putKV (HashMap.toList hm) <> long0 where putKV (k,v) = toAvro S.String' k <> toAvro s v - toAvro s _ = error ("Unable to encode HashMap as: " <> show s) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode HashMap" instance ToAvro a => ToAvro (Maybe a) where toAvro (S.Union opts) v = case F.toList opts of [S.Null, s] -> maybe (putI 0) (\a -> putI 1 <> toAvro s a) v [s, S.Null] -> maybe (putI 1) (\a -> putI 0 <> toAvro s a) v - wrongOpts -> error ("Unable to encode Maybe as " <> show wrongOpts) - toAvro s _ = error ("Unable to encode Maybe as " <> show s) + wrongOpts -> failEncodingWithoutTypeInfo (S.Union opts) "Unable to encode Maybe" + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Maybe" instance (ToAvro a) => ToAvro (Identity a) where toAvro (S.Union opts) e@(Identity a) = if V.length opts == 1 then putI 0 <> toAvro (V.unsafeIndex opts 0) a - else error ("Unable to encode Identity as a single-value union: " <> show opts) - toAvro s _ = error ("Unable to encode Identity value as " <> show s) + else failEncodingWithoutTypeInfo (S.Union opts) ("Unable to encode Identity as a single-value union: " <> show opts) + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Identity" instance (ToAvro a, ToAvro b) => ToAvro (Either a b) where toAvro (S.Union opts) v = @@ -263,5 +286,5 @@ instance (ToAvro a, ToAvro b) => ToAvro (Either a b) where then case v of Left a -> putI 0 <> toAvro (V.unsafeIndex opts 0) a Right b -> putI 1 <> toAvro (V.unsafeIndex opts 1) b - else error ("Unable to encode Either as " <> show opts) - toAvro s _ = error ("Unable to encode Either as " <> show s) + else failEncodingWithoutTypeInfo (S.Union opts) "Unable to encode Either as" + toAvro s _ = failEncodingWithoutTypeInfo s "Unable to encode Either" diff --git a/src/Data/Avro/Internal/Time.hs b/src/Data/Avro/Internal/Time.hs index 185fe1f..ba2d5fd 100644 --- a/src/Data/Avro/Internal/Time.hs +++ b/src/Data/Avro/Internal/Time.hs @@ -3,10 +3,8 @@ module Data.Avro.Internal.Time where -- Utility functions to work with times -import Data.Fixed (Fixed (..)) import Data.Maybe (fromJust) import Data.Time -import Data.Time.Clock import Data.Time.Clock.POSIX (posixSecondsToUTCTime) #if MIN_VERSION_time(1,9,0) import Data.Time.Format.Internal diff --git a/src/Data/Avro/JSON.hs b/src/Data/Avro/JSON.hs index dfa1b1c..e177b5d 100644 --- a/src/Data/Avro/JSON.hs +++ b/src/Data/Avro/JSON.hs @@ -1,5 +1,9 @@ +{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TupleSections #-} -- | Avro supports a JSON representation of Avro objects alongside the -- Avro binary format. An Avro schema can be used to generate and -- validate JSON representations of Avro objects. @@ -59,26 +63,47 @@ -- @ module Data.Avro.JSON where +import Control.Monad (forM, when) +import Control.Monad.Identity (Identity (..)) import qualified Data.Aeson as Aeson import qualified Data.Aeson.Key as K import qualified Data.Aeson.KeyMap as KM -import Data.ByteString.Lazy (ByteString) +import qualified Data.Array as Ar +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as BS import qualified Data.Foldable as Foldable import Data.HashMap.Strict ((!)) import qualified Data.HashMap.Strict as HashMap import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.List.NonEmpty as NE +import qualified Data.Map as Map +import Data.Maybe (fromJust) import Data.Tagged import qualified Data.Text as Text +import qualified Data.Text.Encoding as Text +import qualified Data.Text.Lazy as LazyText +import qualified Data.Time as Time import qualified Data.Avro.HasAvroSchema as Schema -import Data.Avro.Schema.Schema (DefaultValue (..), Result (..), Schema, parseAvroJSON) -import qualified Data.Avro.Schema.Schema as Schema -import qualified Data.Vector as V +import Data.Avro.EitherN +import Data.Avro.Encoding.FromAvro as FromAvro +import Data.Avro.Internal.Time +import Data.Avro.Schema.Decimal as D +import Data.Avro.Schema.Schema (DefaultValue (..), Schema) +import qualified Data.Avro.Schema.Schema as Schema +import Data.Avro.Schema.ReadSchema (ReadSchema) +import qualified Data.Avro.Schema.ReadSchema as ReadSchema +import qualified Data.Vector as V +import qualified Data.Vector.Unboxed as U +import Data.Int +import Data.Word +import qualified Data.UUID as UUID +import GHC.TypeLits -decodeAvroJSON :: Schema -> Aeson.Value -> Result DefaultValue +-- deprecate +decodeAvroJSON :: Schema -> Aeson.Value -> Schema.Result DefaultValue decodeAvroJSON schema json = - parseAvroJSON union env schema json + Schema.parseAvroJSON union env schema json where env = Schema.buildTypeEnvironment missing schema @@ -106,7 +131,7 @@ decodeAvroJSON schema json = HashMap.fromList [(Schema.typeName t, t) | t <- Foldable.toList schemas] in case HashMap.lookup (canonicalize branch) names of Just t -> do - nested <- parseAvroJSON union env t $ case KM.lookup (K.fromText branch) obj of + nested <- Schema.parseAvroJSON union env t $ case KM.lookup (K.fromText branch) obj of Just val -> val Nothing -> error "impossible" return (Schema.DUnion schemas t nested) @@ -119,16 +144,374 @@ decodeAvroJSON schema json = isBuiltIn name = name `elem` [ "null", "boolean", "int", "long", "float" , "double", "bytes", "string", "array", "map" ] --- -- | Convert a 'Aeson.Value' into a type that has an Avro schema. The --- -- schema is used to validate the JSON and will return an 'Error' if --- -- the JSON object is not encoded correctly or does not match the schema. --- fromJSON :: forall a. (FromAvro a) => Aeson.Value -> Result a --- fromJSON json = do --- value <- decodeAvroJSON schema json --- fromAvro value --- where --- schema = untag (Schema.schema :: Tagged a Schema) +class ToAvroJSON a where + -- | Convert an object with an Avro schema to JSON using that schema. + -- + -- We always need the schema to /encode/ to JSON because representing + -- unions requires using the names of named types. + toAvroJSON :: Schema -> a -> Aeson.Value + toAvroEncoding :: Schema -> a -> Aeson.Encoding + toAvroEncoding s = Aeson.toEncoding . toAvroJSON s + +instance ToAvroJSON Int where + -- It might seem surprising that we use fromIntegral here, but it's + -- used to ensure that overflow is handled correctly. + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Int64 (fromIntegral i) + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Int32 (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Int as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Int32 where + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Int64 (fromIntegral i) + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Int32 i + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Int32 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Int64 where + toAvroJSON (Schema.Long _) i = Aeson.toJSON i + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Int64 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Word8 where + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Word8 i + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Word64 (fromIntegral i) + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Word8 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Word16 where + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Word16 i + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Word64 (fromIntegral i) + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Word16 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Word32 where + toAvroJSON (Schema.Int _) i = Aeson.toJSON @Word32 i + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Word64 (fromIntegral i) + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON Schema.Float i = toAvroJSON @Float Schema.Float (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Word32 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Word64 where + toAvroJSON (Schema.Long _) i = Aeson.toJSON @Word64 i + toAvroJSON Schema.Double i = toAvroJSON @Double Schema.Double (fromIntegral i) + toAvroJSON s _ = error ("Unable to encode Word64 as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Double where + toAvroJSON Schema.Double i = Aeson.toJSON @Double i + toAvroJSON s _ = error ("Unable to encode Double as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Float where + toAvroJSON Schema.Float i = Aeson.toJSON @Float i + toAvroJSON Schema.Double i = Aeson.toJSON @Double $ realToFrac i + toAvroJSON s _ = error ("Unable to encode Float as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON () where + toAvroJSON Schema.Null () = Aeson.Null + toAvroJSON s () = error ("Unable to encode () as: " <> show s) + {-# INLINE toAvroJSON #-} + + +instance ToAvroJSON Bool where + toAvroJSON Schema.Boolean v = Aeson.toJSON v + toAvroJSON s _ = error ("Unable to encode Bool as: " <> show s) + {-# INLINE toAvroJSON #-} + toAvroEncoding Schema.Boolean v = Aeson.toEncoding v + toAvroEncoding s _ = error ("Unable to encode Bool as: " <> show s) + {-# INLINE toAvroEncoding #-} + +instance ToAvroJSON Text.Text where + toAvroJSON (Schema.Bytes _) v = Aeson.toJSON $ Schema.serializeBytes $ Text.encodeUtf8 v + toAvroJSON (Schema.String _) v = Aeson.toJSON v + toAvroJSON s _ = error ("Unable to encode Text as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON LazyText.Text where + toAvroJSON (Schema.Bytes _) v = Aeson.toJSON $ Schema.serializeBytes $ Text.encodeUtf8 $ LazyText.toStrict v + toAvroJSON (Schema.String _) v = Aeson.toJSON v + toAvroJSON s _ = error ("Unable to encode Text as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON BS.ByteString where + toAvroJSON s bs = case s of + (Schema.Bytes _) -> Aeson.toJSON $ Schema.serializeBytes bs + (Schema.String _) -> Aeson.toJSON $ Text.decodeUtf8 bs + Schema.Fixed _ _ l _ | l == BS.length bs -> Aeson.toJSON $ Schema.serializeBytes bs + Schema.Fixed _ _ l _ -> error ("Unable to encode ByteString as Fixed(" <> show l <> ") because its length is " <> show (BS.length bs)) + _ -> error ("Unable to encode ByteString as: " <> show s) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON BL.ByteString where + toAvroJSON s bs = toAvroJSON s (BL.toStrict bs) + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON Time.UTCTime where + toAvroJSON s@(Schema.Long (Just Schema.TimestampMicros)) = toAvroJSON @Int64 s . fromIntegral . utcTimeToMicros + toAvroJSON s@(Schema.Long (Just Schema.TimestampMillis)) = toAvroJSON @Int64 s . fromIntegral . utcTimeToMillis + toAvroJSON s = error ("Unable to encode UTCTime as " <> show s) + +instance ToAvroJSON Time.LocalTime where + toAvroJSON s@(Schema.Long (Just Schema.LocalTimestampMicros)) = + toAvroJSON @Int64 s . fromIntegral . localTimeToMicros + toAvroJSON s@(Schema.Long (Just Schema.LocalTimestampMillis)) = + toAvroJSON @Int64 s . fromIntegral . localTimeToMillis + toAvroJSON s = + error ("Unable to encode LocalTime as " <> show s) + +instance ToAvroJSON Time.DiffTime where + toAvroJSON s@(Schema.Long (Just Schema.TimeMicros)) = toAvroJSON @Int64 s . fromIntegral . diffTimeToMicros + toAvroJSON s@(Schema.Long (Just Schema.TimestampMicros)) = toAvroJSON @Int64 s . fromIntegral . diffTimeToMicros + toAvroJSON s@(Schema.Long (Just Schema.TimestampMillis)) = toAvroJSON @Int64 s . fromIntegral . diffTimeToMillis + toAvroJSON s@(Schema.Int (Just Schema.TimeMillis)) = toAvroJSON @Int32 s . fromIntegral . diffTimeToMillis + toAvroJSON s = error ("Unble to encode DiffTime as " <> show s) + +instance ToAvroJSON Time.Day where + toAvroJSON s = toAvroJSON @Int32 s . fromIntegral . daysSinceEpoch + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON UUID.UUID where + toAvroJSON s = toAvroJSON s . UUID.toText + {-# INLINE toAvroJSON #-} + +instance ToAvroJSON a => ToAvroJSON [a] where + toAvroJSON (Schema.Array s) as = Aeson.toJSON $ fmap (toAvroJSON s) as + toAvroJSON s _ = error ("Unable to encode Haskell list as: " <> show s) + +instance (ToAvroJSON a) => ToAvroJSON (Identity a) where + toAvroJSON (Schema.Union opts) e@(Identity a) = + if V.length opts == 1 + then toAvroUnionJSON (V.unsafeIndex opts 0) a + else error ("Unable to encode Identity as a single-value union: " <> show opts) + toAvroJSON s _ = error ("Unable to encode Identity value as " <> show s) + +instance ToAvroJSON a => ToAvroJSON (Maybe a) where + toAvroJSON (Schema.Union opts) v = + case Foldable.toList opts of + [Schema.Null, s] -> maybe (toAvroJSON Schema.Null ()) (toAvroUnionJSON s) v + [s, Schema.Null] -> maybe (toAvroJSON Schema.Null ()) (toAvroUnionJSON s) v + wrongOpts -> error ("Unable to encode Maybe as " <> show wrongOpts) + toAvroJSON s _ = error ("Unable to encode Maybe as " <> show s) + +instance (U.Unbox a, ToAvroJSON a) => ToAvroJSON (U.Vector a) where + toAvroJSON (Schema.Array s) as = Aeson.toJSON @(V.Vector Aeson.Value) $ fmap (toAvroJSON s) $ U.convert as + toAvroJSON s _ = error ("Unable to encode Vector list as: " <> show s) + +instance (ToAvroJSON a) => ToAvroJSON (V.Vector a) where + toAvroJSON (Schema.Array s) as = Aeson.toJSON $ fmap (toAvroJSON s) as + toAvroJSON s _ = error ("Unable to encode Vector list as: " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b) => ToAvroJSON (Either a b) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 2 + then case v of + Left a -> toAvroUnionJSON (V.unsafeIndex opts 0) a + Right b -> toAvroUnionJSON (V.unsafeIndex opts 1) b + else error ("Unable to encode Either as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either as " <> show s) + +instance ToAvroJSON a => ToAvroJSON (Map.Map Text.Text a) where + toAvroJSON (Schema.Map s) m = Aeson.toJSON $ fmap (toAvroJSON s) m + toAvroJSON s _ = error ("Unable to encode Map as: " <> show s) + +instance ToAvroJSON a => ToAvroJSON (HashMap.HashMap Text.Text a) where + toAvroJSON (Schema.Map s) hm = Aeson.toJSON $ fmap (toAvroJSON s) hm + toAvroJSON s _ = error ("Unable to encode HashMap as: " <> show s) + +instance (Ar.Ix i, ToAvroJSON a) => ToAvroJSON (Ar.Array i a) where + toAvroJSON (Schema.Array s) a = Aeson.toJSON $ fmap (toAvroJSON s) $ Foldable.toList a + toAvroJSON s _ = error ("Unable to encode indexed Array list as: " <> show s) + +-- TODO, I don't know if this is the right way to do this. +instance (KnownNat p, KnownNat s) => ToAvroJSON (Decimal p s) where + toAvroJSON s = toAvroJSON @Int64 s . fromIntegral . fromJust . D.underlyingValue + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c) => ToAvroJSON (Either3 a b c) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 3 + then case v of + E3_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E3_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E3_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + else error ("Unable to encode Either3 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either3 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d) => ToAvroJSON (Either4 a b c d) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 4 + then case v of + E4_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E4_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E4_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E4_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + else error ("Unable to encode Either4 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either4 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e) => ToAvroJSON (Either5 a b c d e) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 5 + then case v of + E5_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E5_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E5_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E5_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E5_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + else error ("Unable to encode Either5 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either5 as " <> show s) +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f) => ToAvroJSON (Either6 a b c d e f) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 6 + then case v of + E6_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E6_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E6_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E6_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E6_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E6_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + else error ("Unable to encode Either6 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either6 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f, ToAvroJSON g) => ToAvroJSON (Either7 a b c d e f g) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 7 + then case v of + E7_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E7_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E7_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E7_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E7_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E7_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + E7_7 x -> toAvroUnionJSON (V.unsafeIndex opts 6) x + else error ("Unable to encode Either7 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either7 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f, ToAvroJSON g, ToAvroJSON h) => ToAvroJSON (Either8 a b c d e f g h) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 8 + then case v of + E8_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E8_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E8_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E8_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E8_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E8_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + E8_7 x -> toAvroUnionJSON (V.unsafeIndex opts 6) x + E8_8 x -> toAvroUnionJSON (V.unsafeIndex opts 7) x + else error ("Unable to encode Either8 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either8 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f, ToAvroJSON g, ToAvroJSON h, ToAvroJSON i) => ToAvroJSON (Either9 a b c d e f g h i) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 9 + then case v of + E9_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E9_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E9_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E9_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E9_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E9_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + E9_7 x -> toAvroUnionJSON (V.unsafeIndex opts 6) x + E9_8 x -> toAvroUnionJSON (V.unsafeIndex opts 7) x + E9_9 x -> toAvroUnionJSON (V.unsafeIndex opts 8) x + else error ("Unable to encode Either9 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either9 as " <> show s) + +instance (ToAvroJSON a, ToAvroJSON b, ToAvroJSON c, ToAvroJSON d, ToAvroJSON e, ToAvroJSON f, ToAvroJSON g, ToAvroJSON h, ToAvroJSON i, ToAvroJSON j) => ToAvroJSON (Either10 a b c d e f g h i j) where + toAvroJSON (Schema.Union opts) v = + if V.length opts == 10 + then case v of + E10_1 x -> toAvroUnionJSON (V.unsafeIndex opts 0) x + E10_2 x -> toAvroUnionJSON (V.unsafeIndex opts 1) x + E10_3 x -> toAvroUnionJSON (V.unsafeIndex opts 2) x + E10_4 x -> toAvroUnionJSON (V.unsafeIndex opts 3) x + E10_5 x -> toAvroUnionJSON (V.unsafeIndex opts 4) x + E10_6 x -> toAvroUnionJSON (V.unsafeIndex opts 5) x + E10_7 x -> toAvroUnionJSON (V.unsafeIndex opts 6) x + E10_8 x -> toAvroUnionJSON (V.unsafeIndex opts 7) x + E10_9 x -> toAvroUnionJSON (V.unsafeIndex opts 8) x + E10_10 x -> toAvroUnionJSON (V.unsafeIndex opts 9) x + else error ("Unable to encode Either10 as " <> show opts) + toAvroJSON s _ = error ("Unable to encode Either10 as " <> show s) + +-- | Unions in Avro JSON serialization are encoded as a map with a single key +-- that is the name of the type. This function can be used to serialize union +-- values properly when writing instances of 'ToAvroJSON' for union fields. +toAvroUnionJSON :: ToAvroJSON a => Schema -> a -> Aeson.Value +toAvroUnionJSON s x = case s of + Schema.Null -> toAvroJSON s x + -- TODO, not sure if this is supposed to use logical types, need to + -- compare with other language implementations + _ -> Aeson.toJSON $ HashMap.singleton (Schema.typeName s) (toAvroJSON s x) + +toJSON :: forall a. (Schema.HasAvroSchema a, ToAvroJSON a) => a -> Aeson.Value +toJSON = toAvroJSON schema + where + schema = untag (Schema.schema :: Tagged a Schema) + +toEncoding :: forall a. (Schema.HasAvroSchema a, ToAvroJSON a) => a -> Aeson.Encoding +toEncoding = toAvroEncoding schema + where + schema = untag (Schema.schema :: Tagged a Schema) + +fromJSON :: (FromAvro a) => ReadSchema -> Aeson.Value -> Either String a +fromJSON schema json = do + intermediateValue <- parseAvroJSON union env schema json + FromAvro.fromAvro intermediateValue + where + env = + ReadSchema.buildTypeEnvironment missing schema + missing name = + fail ("Type " <> show name <> " not in schema") + + union :: ReadSchema -> Aeson.Value -> Either String FromAvro.Value + union (ReadSchema.Union schemas) Aeson.Null = + case V.find ((== ReadSchema.Null) . snd) schemas of + Nothing -> Left "Null not in union." + Just (ix, _) -> pure $ FromAvro.Union (ReadSchema.Union schemas) ix FromAvro.Null + + union (ReadSchema.Union schemas) (Aeson.Object obj) + | null obj = + Left "Invalid encoding of union: empty object ({})." + | length obj > 1 = + Left "Invalid encoding of union: object with too many fields." + | otherwise = + let + canonicalize name + | isBuiltIn name = name + | otherwise = Schema.renderFullname $ Schema.parseFullname name + branch = + K.toText $ head (KM.keys obj) + names = + HashMap.fromList [(ReadSchema.typeName t, ixed) | ixed@(_, t) <- Foldable.toList schemas] + in case HashMap.lookup (canonicalize branch) names of + Just (ix, t) -> do + nested <- parseAvroJSON union env t $ case KM.lookup (K.fromText branch) obj of + Just val -> val + Nothing -> error "impossible" + return (FromAvro.Union (ReadSchema.Union schemas) ix nested) + Nothing -> Left ("Type '" <> Text.unpack branch <> "' not in union: " <> show schemas) + union ReadSchema.Union{} _ = + Left "Invalid JSON representation for union: has to be a JSON object with exactly one field." + -- TODO, not sure what to do with val here + union (ReadSchema.FreeUnion ix ty) val = do + nested <- parseAvroJSON union env ty val + pure $ FromAvro.Union (ReadSchema.FreeUnion ix ty) ix nested + union _ _ = + error "Impossible: function given non-union schema." + + isBuiltIn name = name `elem` [ "null", "boolean", "int", "long", "float" + , "double", "bytes", "string", "array", "map" ] -- -- | Parse a 'ByteString' as JSON and convert it to a type with an -- -- Avro schema. Will return 'Error' if the input is not valid JSON or -- -- the JSON does not convert with the specified schema. @@ -136,10 +519,75 @@ decodeAvroJSON schema json = -- parseJSON input = case Aeson.eitherDecode input of -- Left msg -> Error msg -- Right value -> fromJSON value +-- | Parse JSON-encoded avro data. +parseAvroJSON :: (ReadSchema -> Aeson.Value -> Either String FromAvro.Value) + -- ^ How to handle unions. The way unions are + -- formatted in JSON depends on whether we're parsing + -- a normal Avro object or we're parsing a default + -- declaration in a schema. + -- + -- This function will only ever be passed 'Union' + -- schemas. It /should/ error out if this is not the + -- case—it represents a bug in this code. + -> (Schema.TypeName -> Maybe ReadSchema) + -> ReadSchema + -> Aeson.Value + -> Either String FromAvro.Value +parseAvroJSON union env (ReadSchema.NamedType name) av = + case env name of + Nothing -> Left $ "Could not resolve type name for " <> Text.unpack (Schema.renderFullname name) + Just t -> parseAvroJSON union env t av +parseAvroJSON union _ u@ReadSchema.Union{} av = u `union` av +parseAvroJSON union env ty av = + case av of + Aeson.String s -> + case ty of + ReadSchema.String _ -> return $ FromAvro.String ty s + ReadSchema.Enum {..} -> + case s `V.elemIndex` symbols of + Just i -> pure $ FromAvro.Enum ty i s + Nothing -> Left $ "JSON string is not one of the expected symbols for enum '" <> show name <> "': " <> Text.unpack s + ReadSchema.Bytes _ -> FromAvro.Bytes ty <$> (Schema.resultToEither $ Schema.parseBytes s) + ReadSchema.Fixed {..} -> do + bytes <- Schema.resultToEither $ Schema.parseBytes s + let len = BS.length bytes + when (len /= size) $ + Left $ "Fixed string wrong size. Expected " <> show size <> " but got " <> show len + return $ FromAvro.Fixed ty bytes + _ -> Left $ "Expected type String, Enum, Bytes, or Fixed, but found (Type,Value)=" + <> show (ty, av) + Aeson.Bool b -> + case ty of + ReadSchema.Boolean -> return $ FromAvro.Boolean b + _ -> avroTypeMismatch ty "boolean" + Aeson.Number i -> + case ty of + ReadSchema.Int _ -> return $ FromAvro.Int ty (floor i) + ReadSchema.Long _ _ -> return $ FromAvro.Long ty (floor i) + ReadSchema.Float _ -> return $ FromAvro.Float ty (realToFrac i) + ReadSchema.Double _ -> return $ FromAvro.Double ty (realToFrac i) + _ -> avroTypeMismatch ty "number" + Aeson.Array vec -> + case ty of + ReadSchema.Array t -> FromAvro.Array <$> V.mapM (parseAvroJSON union env t) vec + _ -> avroTypeMismatch ty "array" + Aeson.Object obj -> + case ty of + ReadSchema.Map mTy -> FromAvro.Map <$> mapM (parseAvroJSON union env mTy) (KM.toHashMapText obj) + ReadSchema.Record {..} -> do + values <- forM (filter ((/= ReadSchema.Ignored) . ReadSchema.fldStatus) fields) $ \ReadSchema.ReadField{..} -> do + case KM.lookup (K.fromText fldName) obj of + Nothing -> + case fldDefault of + Just v -> pure $ FromAvro.convertValue v + Nothing -> Left $ "Decode failure: No record field '" <> Text.unpack fldName <> "' and no default in schema." + Just v -> parseAvroJSON union env fldType v + pure $ FromAvro.Record ty $ V.fromList values + _ -> avroTypeMismatch ty "object" + Aeson.Null -> case ty of + ReadSchema.Null -> return FromAvro.Null + _ -> avroTypeMismatch ty "null" --- -- | Convert an object with an Avro schema to JSON using that schema. --- -- --- -- We always need the schema to /encode/ to JSON because representing --- -- unions requires using the names of named types. --- toJSON :: forall a. (ToAvro a) => a -> Aeson.Value --- toJSON = Aeson.toJSON . toAvro +avroTypeMismatch :: ReadSchema -> Text.Text -> Either String a +avroTypeMismatch expected actual = + Left $ "Could not resolve type '" <> Text.unpack actual <> "' with expected type: " <> show expected diff --git a/src/Data/Avro/Schema/Decimal.hs b/src/Data/Avro/Schema/Decimal.hs index f826f3d..3331b24 100644 --- a/src/Data/Avro/Schema/Decimal.hs +++ b/src/Data/Avro/Schema/Decimal.hs @@ -6,10 +6,11 @@ module Data.Avro.Schema.Decimal where import qualified Data.BigDecimal as D +import Data.Scientific as S import Data.Proxy import GHC.TypeLits -newtype Decimal (p :: Nat) (s :: Nat) +newtype Decimal (precision :: Nat) (scale :: Nat) = Decimal { unDecimal :: D.BigDecimal } deriving (Eq, Ord, Show, Read, Num, Fractional, Real) @@ -31,3 +32,17 @@ underlyingValue (Decimal d) in if D.precision new > pp then Nothing else Just $ fromInteger $ D.getValue new + +clamp :: forall p s. (KnownNat p, KnownNat s) => Decimal p s -> Decimal p s +clamp (Decimal d) + = let ss = natVal (Proxy :: Proxy s) + pp = natVal (Proxy :: Proxy p) + new = if ss > D.getScale d + then D.BigDecimal (D.getValue d * 10 ^ (ss - D.getScale d)) ss + else D.roundBD d (D.halfUp ss) + in Decimal new + +toScientific :: (KnownNat p, KnownNat s) => Decimal p s -> Scientific +toScientific d + = let (Decimal d') = clamp d + in S.scientific (D.getValue d') (negate $ fromIntegral $ D.getScale d') diff --git a/src/Data/Avro/Schema/Deconflict.hs b/src/Data/Avro/Schema/Deconflict.hs index 016f083..69e7927 100644 --- a/src/Data/Avro/Schema/Deconflict.hs +++ b/src/Data/Avro/Schema/Deconflict.hs @@ -30,7 +30,12 @@ import Debug.Trace -- with the writer's schema into the form specified by the reader's schema. -- -- Schema resolution rules are described by the specification: -deconflict :: Schema -> Schema -> Either String ReadSchema +deconflict + :: Schema + -- ^ The writer's schema. + -> Schema + -- ^ The reader's schema. + -> Either String ReadSchema deconflict writerSchema readerSchema | writerSchema == readerSchema = pure (Read.fromSchema readerSchema) deconflict S.Null S.Null = pure Read.Null deconflict S.Boolean S.Boolean = pure Read.Boolean diff --git a/src/Data/Avro/Schema/ReadSchema.hs b/src/Data/Avro/Schema/ReadSchema.hs index bdcbd69..f4e31c8 100644 --- a/src/Data/Avro/Schema/ReadSchema.hs +++ b/src/Data/Avro/Schema/ReadSchema.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE DeriveAnyClass #-} -{-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} module Data.Avro.Schema.ReadSchema ( ReadSchema(..), ReadField(..) @@ -9,6 +10,8 @@ module Data.Avro.Schema.ReadSchema , fromSchema, fromField , extractBindings +, buildTypeEnvironment +, typeName , S.Decimal(..) , S.LogicalTypeBytes(..), S.LogicalTypeFixed(..) @@ -201,3 +204,69 @@ extractBindings = \case Map{..} -> extractBindings values FreeUnion {..} -> extractBindings ty _ -> HashMap.empty + +-- | @buildTypeEnvironment schema@ builds a function mapping type names to +-- the types declared in the traversed schema. +-- +-- This mapping includes both the base type names and any aliases they +-- have. Aliases and normal names are not differentiated in any way. +buildTypeEnvironment :: Applicative m + => (TypeName -> m ReadSchema) + -- ^ Callback to handle type names not in the + -- schema. + -> ReadSchema + -- ^ The schema that we're generating a lookup + -- function for. + -> (TypeName -> m ReadSchema) +buildTypeEnvironment failure from forTy = + case HashMap.lookup forTy env of + Nothing -> failure forTy + Just res -> pure res + where + env = extractBindings from + +-- |Get the name of the type. In the case of unions, get the name of the +-- first value in the union schema. +typeName :: ReadSchema -> Text +typeName bt = + case bt of + Null -> "null" + Boolean -> "boolean" + Int Nothing -> "int" + Int (Just (S.DecimalI d)) + -> decimalName d + Int (Just S.Date) -> "date" + Int (Just S.TimeMillis) + -> "time-millis" + Long _ Nothing -> "long" + Long _ (Just (S.DecimalL d)) + -> decimalName d + Long _ (Just S.TimeMicros) + -> "time-micros" + Long _ (Just S.TimestampMillis) + -> "timestamp-millis" + Long _ (Just S.TimestampMicros) + -> "timestamp-micros" + Long _ (Just S.LocalTimestampMillis) + -> "local-timestamp-millis" + Long _ (Just S.LocalTimestampMicros) + -> "local-timestamp-micros" + Float _ -> "float" + Double _ -> "double" + Bytes Nothing -> "bytes" + Bytes (Just (S.DecimalB d)) + -> decimalName d + String Nothing -> "string" + String (Just S.UUID) + -> "uuid" + Array _ -> "array" + Map _ -> "map" + NamedType name -> S.renderFullname name + Union ts -> typeName $ snd $ V.head ts + Fixed _ _ _ (Just (S.DecimalF d)) + -> decimalName d + Fixed _ _ _ (Just S.Duration) + -> "duration" + _ -> S.renderFullname $ name bt + where + decimalName (S.Decimal prec sc) = "decimal(" <> T.pack (show prec) <> "," <> T.pack (show sc) <> ")" diff --git a/src/Data/Avro/Schema/Schema.hs b/src/Data/Avro/Schema/Schema.hs index c6af7c4..19437a6 100644 --- a/src/Data/Avro/Schema/Schema.hs +++ b/src/Data/Avro/Schema/Schema.hs @@ -849,6 +849,7 @@ parseFieldDefault env schema value = parseAvroJSON defaultUnion env schema value where defaultUnion (Union ts) val = DUnion ts (V.head ts) <$> parseFieldDefault env (V.head ts) val defaultUnion _ _ = error "Impossible: not Union." +-- TODO deprecate this function in favor of JSON module -- | Parse JSON-encoded avro data. parseAvroJSON :: (Schema -> A.Value -> Result DefaultValue) -- ^ How to handle unions. The way unions are @@ -1055,11 +1056,10 @@ overlay input supplement = overlayType input overlayType m@Map{..} = m { values = overlayType values } overlayType r@Record{..} = r { fields = map overlayField fields } overlayType u@Union{..} = Union (fmap overlayType options) - overlayType nt@(NamedType _) = rebind nt + overlayType (NamedType tn) = HashMap.lookupDefault (NamedType tn) tn bindings overlayType other = other - rebind (NamedType tn) = HashMap.lookupDefault (NamedType tn) tn bindings - bindings = extractBindings supplement + bindings = extractBindings supplement -- | Extract the named inner type definition as its own schema. subdefinition :: Schema -> Text -> Maybe Schema diff --git a/test/Avro/JSONSpec.hs b/test/Avro/JSONSpec.hs index 8915d25..fa30608 100644 --- a/test/Avro/JSONSpec.hs +++ b/test/Avro/JSONSpec.hs @@ -1,7 +1,9 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} module Avro.JSONSpec where import Control.Monad (forM_) @@ -10,14 +12,31 @@ import Control.Monad.Identity (Identity (..)) import qualified Data.Aeson as Aeson import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS +import Data.Either (isRight) +import Data.Int import qualified Data.Map as Map +import qualified Data.HashMap.Strict as HM +import Data.Tagged +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TL import Data.Avro.Deriving import Data.Avro.EitherN +import Data.Avro.Encoding.FromAvro import Data.Avro.JSON +import Data.Avro.HasAvroSchema +import Data.Avro.Schema.Deconflict (deconflict) +import qualified Data.Avro.Schema.Schema as S +import Data.Avro.Schema.ReadSchema +import qualified Avro.Data.Deconflict.Read as Read +import qualified Avro.Data.Deconflict.Write as Write import Avro.Data.Endpoint import Avro.Data.Enums +import Avro.Data.FixedTypes +import Avro.Data.Karma import Avro.Data.Reused import Avro.Data.Unions @@ -34,11 +53,94 @@ import System.Environment (setEnv) {- HLINT ignore "Redundant do" -} deriveAvro "test/data/unions-no-namespace.avsc" +deriveAvro "test/data/maybe.avsc" + +roundtripGen :: (Monad m, Show a, Eq a, ToAvroJSON a, FromAvro a) => S.Schema -> Gen a -> PropertyT m () +roundtripGen sch gen = do + value <- forAll gen + tripping value (toAvroJSON sch) (fromJSON (fromSchema sch)) + +roundtripGenKnownSchema :: (Monad m, Show a, Eq a, ToAvroJSON a, FromAvro a, HasAvroSchema a) => Gen a -> PropertyT m () +roundtripGenKnownSchema gen = do + value <- forAll gen + let sch = schemaOf value + tripping value (toAvroJSON sch) (fromJSON (fromSchema sch)) + + spec :: Spec spec = describe "Avro.JSONSpec: JSON serialization/parsing" $ do it "should pass" $ require $ withTests 1 $ property success + describe "ToAvroJSON instances" $ do + specify "Int" $ do + let int = 1 :: Int + toAvroJSON (S.Long Nothing) int `shouldBe` Aeson.Number 1 + toAvroJSON (S.Int Nothing) int `shouldBe` Aeson.Number 1 + specify "Int32" $ do + let int = 1 :: Int32 + toAvroJSON (S.Long Nothing) int `shouldBe` Aeson.Number 1 + toAvroJSON (S.Int Nothing) int `shouldBe` Aeson.Number 1 + + describe "Schema evolution" $ do + it "can read values in older formats from new schemas" $ do + let writeSchema = unTagged $ schema @Write.Foo + readSchema = unTagged $ schema @Read.Foo + (Right deconflicted) = deconflict writeSchema readSchema + require $ property $ do + foo <- forAll Write.genFoo + let jsonAnnotation = TL.unpack $ TL.decodeUtf8 $ Aeson.encode $ toJSON foo + annotate jsonAnnotation + annotate $ show deconflicted + case fromJSON @Read.Foo deconflicted (toJSON foo) of + Left err -> annotate err >> failure + Right _ -> success + describe "Roundtrip" $ do + it "Null roundtrip" $ require $ withTests 1 $ property $ do + roundtripGen S.Null (Gen.constant ()) + it "Bool roundtrip" $ require $ withTests 2 $ property $ do + roundtripGen S.Boolean Gen.bool_ + it "Int roundtrip" $ require $ property $ do + roundtripGen (S.Int Nothing) (Gen.int32 Range.linearBounded) + it "Long roundtrip" $ require $ property $ do + roundtripGen (S.Long Nothing) (Gen.int Range.linearBounded) + roundtripGen (S.Long Nothing) (Gen.int64 Range.linearBounded) + it "Float roundtrip" $ require $ property $ do + roundtripGen S.Float (Gen.float (Range.exponentialFloat (-500) 500)) + it "Double roundtrip" $ require $ property $ do + roundtripGen S.Double (Gen.double (Range.exponentialFloat (-500) 500)) + it "Bytes roundtrip" $ require $ property $ do + roundtripGen (S.Bytes Nothing) (Gen.bytes (Range.linear 0 100)) + it "String roundtrip" $ require $ property $ do + roundtripGen (S.String Nothing) (Gen.text (Range.linear 0 100) Gen.unicodeAll) + it "Array roundtrip" $ require $ property $ do + roundtripGen (S.Array $ S.Long Nothing) (Gen.list (Range.linear 0 100) $ Gen.int Range.linearBounded) + it "Map roundtrip" $ require $ property $ do + let gen = do + items <- Gen.list + (Range.linear 0 100) + (do + k <- Gen.text (Range.linear 1 100) Gen.alphaNum + v <- Gen.int Range.linearBounded + pure (k, v) + ) + pure $ HM.fromList items + roundtripGen (S.Map $ S.Long Nothing) gen + -- TODO named types + -- TODO record types + describe "Record roundtrip" $ do + specify "Endpoint" $ require $ property $ roundtripGenKnownSchema endpointGen + specify "Blessing" $ require $ property $ roundtripGenKnownSchema blessingGen + specify "Curse" $ require $ property $ roundtripGenKnownSchema curseGen + specify "ReuseFixed" $ require $ property $ roundtripGenKnownSchema reuseFixedGen + specify "Unions" $ require $ property $ roundtripGenKnownSchema unionsGen + + it "Enum roundtrip" $ require $ property $ do + roundtripGen (schemaOf EnumReasonBecause) (Gen.enumBounded :: Gen EnumReason) + -- TODO it "Union roundtrip" + it "Fixed roundtrip" $ require $ property $ do + roundtripGen (schemaOf $ FixedTag "wow") (fmap FixedTag $ Gen.bytes $ Range.singleton 3) + -- it "should do roundtrip (enums)" $ require $ property $ do -- msg <- forAll enumWrapperGen -- tripping msg (Aeson.encode . toJSON) parseJSON