Skip to content
Merged
178 changes: 154 additions & 24 deletions edison-core/src/Data/Edison/Assoc/TernaryTrie.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
-- Stability : stable
-- Portability : GHC, Hugs (MPTC and FD)
--
-- Finite maps implemented as ternary search tries
-- Finite maps indexed by lists or strings @[k]@, implemented as ternary
-- search tries

module Data.Edison.Assoc.TernaryTrie (
-- * Type of ternary search tries
Expand Down Expand Up @@ -60,12 +61,13 @@ import qualified Data.Edison.Seq as S
import qualified Data.List as L
import qualified Control.Monad.Fail as Fail
import Control.Monad
import Data.Coerce (coerce)
import Data.Monoid
import Data.Semigroup as SG
import Data.Maybe (isNothing)
import Data.Maybe (isJust, isNothing)

import Data.Edison.Assoc.Defaults
import Test.QuickCheck (Arbitrary(..), CoArbitrary(..), Gen(), variant)
import Test.QuickCheck (Arbitrary(..), CoArbitrary(..), Gen(), NonNegative(..), variant, sized, resize, choose, oneof)


-- signatures for exported functions
Expand Down Expand Up @@ -162,15 +164,43 @@ moduleName = "Data.Edison.Assoc.TernaryTrie"
data FM k a
= FM !(Maybe a) !(FMB k a)

-- | This is isomorphic to an iteration of binary trees with keys @k@.
--
-- @
-- data BT k v = E | I k v (BT k v) (BT k v)
-- data Layer k v x = Layer (Maybe v) (BT k x)
--
-- FMB k v = Fix (Layer k v)
-- @
--
-- The trees are weight-balanced trees, ensuring that the sizes of the
-- two subtrees of any node are bounded by each other up to a constant factor.
--
-- @
-- size l + size r <= 1
--
-- -- or --
--
-- size l <= 6 * size r
-- size r <= 6 * size l
-- @
--
-- Source: <https://yoichihirai.com/bst.pdf Balancing Weight-Balanced Trees>
-- by Hirai and Yamamoto, 2011 (Section 4)
data FMB k v
= E
| I !Int !k !(Maybe v) !(FMB k v) !(FMB' k v) !(FMB k v)
deriving Show

newtype FMB' k v
= FMB' (FMB k v)
deriving Show

-- | The balance factor must be either 3 or 4.
-- With other factors, the invariant gets broken by delete, minViewWithKey and maxViewWithKey.
-- (cf. Section 4 of the paper linked above)
balance :: Int
balance = 6
balance = 4

sizeFMB :: FMB k v -> Int
sizeFMB E = 0
Expand All @@ -191,21 +221,27 @@ lookupFMB nk@(x:xs) (I _ k v l (FMB' fmbm) r)
GT -> lookupFMB nk r
EQ -> if L.null xs then v else lookupFMB xs fmbm

listToFMB :: [k] -> (Maybe v -> Maybe v) -> FMB k v
listToFMB [x] fv = mkFMB x (fv Nothing) E (FMB' E) E
listToFMB (x:xs) fv = mkFMB x Nothing E (FMB' $ listToFMB xs fv) E
listToFMB :: [k] -> v -> FMB k v
listToFMB [x] v = mkFMB x (Just v) E (FMB' E) E
listToFMB (x:xs) v = mkFMB x Nothing E (FMB' $ listToFMB xs v) E
listToFMB _ _ = error "TernaryTrie.listToFMB: bug!"

addToFMB :: (Ord k) => [k] -> (Maybe v -> Maybe v) -> FMB k v -> FMB k v
addToFMB xs combiner E
= listToFMB xs combiner
= case combiner Nothing of
Just v -> listToFMB xs v
Nothing -> E
addToFMB nk@(x:xs) combiner (I size k v l m@(FMB' fmbm) r)
= case compare x k of
LT -> mkBalancedFMB k v (addToFMB nk combiner l) m r
GT -> mkBalancedFMB k v l m (addToFMB nk combiner r)
EQ -> case xs of
[] -> I size k (combiner v) l m r
_ -> I size k v l (FMB' $ addToFMB xs combiner fmbm) r
[] -> case combiner v of
Nothing | FMB' E <- m -> appendFMB l r
v' -> I size k v' l m r
_ -> case addToFMB xs combiner fmbm of
E | Nothing <- v -> appendFMB l r
m' -> I size k v l (FMB' m') r
addToFMB _ _ _ = error "TernaryTrie.addToFMB: bug!"

addToFM :: (Ord k) => [k] -> (Maybe v -> Maybe v) -> FM k v -> FM k v
Expand All @@ -226,7 +262,9 @@ lookupAndDelFromFMB onFail cont nk@(x:xs) (I size k v l m@(FMB' fmbm) r)
Just w -> case fmbm of
E -> cont w (appendFMB l r)
_ -> cont w (I size k Nothing l m r)
_ -> lookupAndDelFromFMB onFail (\w m' -> cont w (I size k v l (FMB' m') r)) xs fmbm
_ -> lookupAndDelFromFMB onFail (\w m' -> case m' of
E | Nothing <- v -> cont w (appendFMB l r)
_ -> cont w (I size k v l (FMB' m') r)) xs fmbm
lookupAndDelFromFMB _ _ _ _ = error "TernaryTrie.lookupAndDelFromFMB: bug!"

lookupAndDelFromFM :: (Ord k) => z -> (v -> FM k v -> z) -> [k] -> FM k v -> z
Expand All @@ -247,7 +285,9 @@ delFromFMB nk@(x:xs) (I size k v l m@(FMB' fmbm) r)
[] -> case fmbm of
E -> appendFMB l r
_ -> I size k Nothing l m r
_ -> I size k v l (FMB' $ delFromFMB xs fmbm) r
_ -> case delFromFMB xs fmbm of
E | Nothing <- v -> appendFMB l r
m' -> I size k v l (FMB' m') r
delFromFMB _ _ = error "TernaryTrie.delFromFMB: bug!"


Expand Down Expand Up @@ -304,6 +344,8 @@ mkBalancedFMB k v l m r


mkVBalancedFMB :: k -> Maybe v -> FMB k v -> FMB' k v -> FMB k v -> FMB k v
mkVBalancedFMB k Nothing l (FMB' E) r
= appendFMB l r
mkVBalancedFMB k v E m E
= mkFMB k v E m E
mkVBalancedFMB k v l@E m (I _ kr vr rl rm rr)
Expand Down Expand Up @@ -497,7 +539,7 @@ mergeKVFM f (FM vx fmbx) (FM vy fmby)
empty = FM Nothing E

singleton [] v = FM (Just v) E
singleton xs v = FM Nothing (listToFMB xs (\_ -> Just v))
singleton xs v = FM Nothing (listToFMB xs v)

fromSeq = fromSeqUsingInsertSeq

Expand Down Expand Up @@ -858,7 +900,11 @@ intersectionWithKey f

minViewFMB :: Fail.MonadFail m => FMB k a -> (FMB k a -> FM k a) -> m (a, FM k a)
minViewFMB E _ = fail $ moduleName++".minView: empty map"
minViewFMB (I i k (Just v) E m r) f = return (v, f (I i k Nothing E m r))
minViewFMB (I i k (Just v) E m r) f = return (v, f t)
where
t = case m of
FMB' E -> r
_ -> I i k Nothing E m r
minViewFMB (I _ _ Nothing E (FMB' E) _) _ = error $ moduleName++".minView: bug!"
minViewFMB (I _ k Nothing E (FMB' m) r) f = minViewFMB m (\m' -> f (mkVBalancedFMB k Nothing E (FMB' m') r))
minViewFMB (I _ k mv l m r) f = minViewFMB l (\l' -> f (mkVBalancedFMB k mv l' m r))
Expand All @@ -869,7 +915,11 @@ minView (FM Nothing fmb) = minViewFMB fmb (FM Nothing)

minViewWithKeyFMB :: Fail.MonadFail m => FMB k a -> ([k] -> [k]) -> (FMB k a -> FM k a) -> m (([k],a),FM k a)
minViewWithKeyFMB E _ _ = fail $ moduleName++".minView: empty map"
minViewWithKeyFMB (I i k (Just v) E m r) kf f = return ((kf [k],v),f (I i k Nothing E m r))
minViewWithKeyFMB (I i k (Just v) E m r) kf f = return ((kf [k],v),f t)
where
t = case m of
FMB' E -> r
_ -> I i k Nothing E m r
minViewWithKeyFMB (I _ _ Nothing E (FMB' E) _) _ _ = error $ moduleName++".minViewWithKey: bug!"
minViewWithKeyFMB (I _ k Nothing E (FMB' m) r) kf f = minViewWithKeyFMB m (kf . (k:))
(\m' -> f (mkVBalancedFMB k Nothing E (FMB' m') r))
Expand Down Expand Up @@ -912,7 +962,11 @@ maxViewFMB :: Fail.MonadFail m => FMB k a -> (FMB k a -> FM k a) -> m (a, FM k a
maxViewFMB (I _ _ (Just v) l (FMB' E) E) f = return (v, f l)
--maxViewFMB (I i k (Just v) l (FMB' E) E) f = return (v, f (I i k Nothing l (FMB' E) E))
maxViewFMB (I _ _ Nothing _ (FMB' E) E) _ = error $ moduleName++".maxView: bug!"
maxViewFMB (I i k mv l (FMB' m) E) f = maxViewFMB m (\m' -> f (I i k mv l (FMB' m') E))
maxViewFMB (I i k mv l (FMB' m) E) f = maxViewFMB m (\m' -> f (t m'))
where
t m' = case m' of
E | Nothing <- mv -> l
_ -> I i k mv l (FMB' m') E
maxViewFMB (I _ k mv l m r) f = maxViewFMB r (\r' -> f (mkVBalancedFMB k mv l m r'))
maxViewFMB E _ = error $ moduleName++".maxView: bug!"

Expand All @@ -926,7 +980,11 @@ maxViewWithKeyFMB :: Monad m => FMB k a -> ([k] -> [k]) -> (FMB k a -> FM k a) -
maxViewWithKeyFMB (I _ k (Just v) l (FMB' E) E) kf f = return ((kf [k],v),f l)
maxViewWithKeyFMB (I _ _ Nothing _ (FMB' E) E) _ _ = error $ moduleName++".maxViewWithKey: bug!"
maxViewWithKeyFMB (I i k mv l (FMB' m) E) kf f = maxViewWithKeyFMB m (kf . (k:))
(\m' -> f (I i k mv l (FMB' m') E))
(\m' -> f (t m'))
where
t m' = case m' of
E | Nothing <- mv -> l
_ -> I i k mv l (FMB' m') E
maxViewWithKeyFMB (I _ k mv l m r) kf f = maxViewWithKeyFMB r kf
(\r' -> f (mkVBalancedFMB k mv l m r'))
maxViewWithKeyFMB E _ _ = error $ moduleName++".maxViewWithKey: bug!"
Expand Down Expand Up @@ -1125,26 +1183,38 @@ actualSizeFMB (I _ _ _ l _ r) = 1 + actualSizeFMB l + actualSizeFMB r

structuralInvariantFMB :: Ord k => FMB k a -> Bool
structuralInvariantFMB E = True
structuralInvariantFMB fmb@(I size k _ l (FMB' m) r)
structuralInvariantFMB fmb@(I size k v l (FMB' m) r)
= structuralInvariantFMB l
&& structuralInvariantFMB m
&& structuralInvariantFMB r
&& keyInvariantFMB (<k) l
&& keyInvariantFMB (>k) r
&& actualSizeFMB fmb == size
&& (sizel + sizer < 2
|| (sizel <= balance * sizer && sizer <= balance * sizel))
&& isBalanced l r
&& relevantRoot fmb

isBalanced :: FMB k a -> FMB k a -> Bool
isBalanced l r = sizel + sizer <= 1
|| (sizel <= balance * sizer && sizer <= balance * sizel)
where
sizel = sizeFMB l
sizer = sizeFMB r

-- | This invariant is used by minView
relevantRoot :: FMB k a -> Bool
relevantRoot (I _ _ Nothing _ (FMB' E) _) = False
relevantRoot _ = True

structuralInvariant :: Ord k => FM k a -> Bool
structuralInvariant (FM _ fmb) = structuralInvariantFMB fmb


instance (Ord k,Arbitrary k,Arbitrary a) => Arbitrary (FM k a) where
arbitrary = do (xs::[([k],a)]) <- arbitrary
return (Prelude.foldr (uncurry insert) empty xs)
-- | Generate weight-balanced trees either by direct recursion or via
-- 'fromSeq'. The former is much more likely to hit counterexamples to wrong
-- @balance@ coefficients. We keep the latter generator around just in case,
-- because it generates a more realistic distribution.
instance (Integral k, Arbitrary k, Arbitrary a) => Arbitrary (FM k a) where
arbitrary = oneof [genFM, fromSeq <$> (arbitrary :: Gen [([k], a)])]
shrink (FM v m) = [FM v m | (v, FMB' m) <- shrinkTuple shrink shrinkFMB' (v, FMB' m)]

instance (Ord k,CoArbitrary k,CoArbitrary a) => CoArbitrary (FM k a) where
coarbitrary (FM x fmb) = coarbitrary_maybe x . coarbitrary_fmb fmb
Expand All @@ -1168,3 +1238,63 @@ instance Ord k => Monoid (FM k a) where
mappend = (SG.<>)
mconcat = unionSeq

-- Testing

genFM :: (Integral k, Arbitrary a) => Gen (FM k a)
genFM = do
FM <$> arbitrary <*> genFMB_

-- Choose the number of elements in the top layer upfront,
-- and distribute it while recursing down.
genFMB_ :: (Integral k, Arbitrary a) => Gen (FMB k a)
genFMB_ = sized $ \sz -> do
n <- choose (0, sz)
resize (sz - n) (genFMB 0 n)

-- Distribute the size @sz@ to generate the middle children of the nodes in the
-- top layer.
genFMB :: (Integral k, Arbitrary a) => Int -> Int -> Gen (FMB k a)
genFMB i 0 = pure E
genFMB i n = sized $ \sz -> do
let b = if n <= 2 then 0 else (n-1+balance) `div` (balance+1)
l <- choose (b, n-1-b)
z <- choose (0, sz)
m <- resize (min z (sz-z)) genFMB_
v <- case m of E -> Just <$> arbitrary ; _ -> arbitrary
let k = fromIntegral (i+l)
I n k v
<$> resize z (genFMB i l)
<*> pure (FMB' m)
<*> resize (sz - z) (genFMB (i+l+1) (n-l-1))

-- Be careful to preserve balance during shrinking.
shrinkFMB :: Arbitrary a => FMB k a -> [FMB k a]
shrinkFMB E = []
shrinkFMB (I s k v l m r) = E : l : r : do
let (*-) = shrinkTuple ; infixr 3 *-
(v, (l, (m@(FMB' m'), r))) <- (shrinkJust *- shrinkFMB *- shrinkFMB' *- shrinkFMB) (v, (l, (m, r)))
let s = sizeFMB l + sizeFMB r + 1
t = I s k v l m r
guard (isBalanced l r && (isJust v || not (nullFMB' m)))
pure t

nullFMB' :: FMB' k v -> Bool
nullFMB' (FMB' E) = True
nullFMB' _ = False

shrinkFMB' :: Arbitrary a => FMB' k a -> [FMB' k a]
shrinkFMB' (FMB' m) = coerce $
tailsFMB m ++ shrinkFMB m

-- List the middle children of the top layer.
tailsFMB :: FMB k a -> [FMB k a]
tailsFMB E = []
tailsFMB (I _ _ _ l (FMB' m) r) = m : tailsFMB l ++ tailsFMB r

-- Don't remove elements
shrinkJust :: Arbitrary a => Maybe a -> [Maybe a]
shrinkJust Nothing = []
shrinkJust (Just x) = Just <$> shrink x

shrinkTuple :: (a -> [a]) -> (b -> [b]) -> (a, b) -> [(a, b)]
shrinkTuple sa sb (a, b) = [(a', b) | a' <- sa a] ++ [(a, b') | b' <- sb b]
3 changes: 2 additions & 1 deletion test/Edison-test.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ Source-Repository head
Location: https://github.com/robdockins/edison/
Subdir: test

Executable testSuite
test-suite testSuite
type: exitcode-stdio-1.0
Main-Is: Main.hs
Other-modules:
Data.Edison.Test.Bag
Expand Down
Loading