diff --git a/Data/Time/Zones.hs b/Data/Time/Zones.hs index cd2aaed..e4278cc 100644 --- a/Data/Time/Zones.hs +++ b/Data/Time/Zones.hs @@ -31,6 +31,7 @@ module Data.Time.Zones ( loadLocalTZ, -- * Utilities diffForAbbr, + renderPosixTz, ) where import Control.DeepSeq @@ -39,30 +40,72 @@ import Data.Data import Data.Int (Int64) import Data.Time import Data.Time.Zones.Internal +import Data.Time.Zones.Internal.PosixTz (renderPosixTz) import Data.Time.Zones.Read import Data.Time.Zones.Types import qualified Data.Vector as VB import qualified Data.Vector.Unboxed as VU +import qualified Data.ByteString.Char8 as B8 -- | Returns the time difference (in seconds) for TZ at the given -- POSIX time. diffForPOSIX :: TZ -> Int64 -> Int {-# INLINE diffForPOSIX #-} -diffForPOSIX (TZ trans diffs _) t = VU.unsafeIndex diffs $ binarySearch trans t +diffForPOSIX (TZ trans diffs _ mptz) t = + if t < VU.last trans + then useExplicit + else maybe useExplicit (`diffForPOSIXFromRule` t) mptz + where + useExplicit = VU.unsafeIndex diffs $ binarySearch trans t + +-- | Returns a time difference (in seconds) for `PosixTz` at given +-- POSIX time. +diffForPOSIXFromRule :: PosixTz -> Int64 -> Int +{-# INLINE diffForPOSIXFromRule #-} +diffForPOSIXFromRule ptz t = + diffMins * 60 + where + TimeZone diffMins _ _ = timeZoneFromRule ptz t +-- | Returns the `TimeZone` for given index of `TZ` data. +-- +-- /Note/: This ignores POSIX-TZ rules. timeZoneForIx :: TZ -> Int -> TimeZone {-# INLINE timeZoneForIx #-} -timeZoneForIx (TZ _ diffs infos) i = TimeZone diffMins isDst name +timeZoneForIx (TZ _ diffs infos _) i = TimeZone diffMins isDst name where diffMins = VU.unsafeIndex diffs i `div` 60 (isDst, name) = VB.unsafeIndex infos i +-- | Returns the `TimeZone` for the `PosixTz` at given POSIX time. +timeZoneFromRule :: PosixTz -> Int64 -> TimeZone +{-# INLINE timeZoneFromRule #-} +timeZoneFromRule (PosixTz (PosixZone std stdoff) mdst) t = maybe stdtz f mdst + where + toDiffMins x = fromIntegral (-x) `div` 60 + stdtz = TimeZone (toDiffMins stdoff) False (mkname std) + -- 'TimeZone' does not use the angle bracket notation + mkname = B8.unpack . B8.dropWhile (== '<') . B8.dropWhileEnd (== '>') + + f (PosixZone dst dstoff, rbeg, rend) = + let (y, _, _) = toGregorian . localDay $ int64PairToLocalTime t 0 + beg = ruleToSecs rbeg (fromIntegral y) + fromIntegral stdoff + end = ruleToSecs rend (fromIntegral y) + fromIntegral dstoff + isdst = if beg > end + then t < end || t >= beg + else t >= beg && t < end + dsttz = TimeZone (toDiffMins dstoff) True (mkname dst) + in if isdst then dsttz else stdtz + -- | Returns the `TimeZone` for the `TZ` at the given POSIX time. timeZoneForPOSIX :: TZ -> Int64 -> TimeZone {-# INLINABLE timeZoneForPOSIX #-} -timeZoneForPOSIX tz@(TZ trans _ _) t = timeZoneForIx tz i +timeZoneForPOSIX tz@(TZ trans _ _ mptz) t = + if t < VU.last trans + then useExplicit + else maybe useExplicit (`timeZoneFromRule` t) mptz where - i = binarySearch trans t + useExplicit = timeZoneForIx tz (binarySearch trans t) -- | Returns the `TimeZone` for the `TZ` at the given `UTCTime`. timeZoneForUTCTime :: TZ -> UTCTime -> TimeZone @@ -151,7 +194,7 @@ instance NFData FromLocal where -- TODO(klao): check that these assuptions hold. localToPOSIX :: TZ -> Int64 -> FromLocal {-# INLINABLE localToPOSIX #-} -localToPOSIX (TZ trans diffs _) !lTime = res +localToPOSIX (TZ trans diffs _ _) !lTime = res where lBound = lTime - 86400 ix = binarySearch trans lBound @@ -202,7 +245,7 @@ instance NFData LocalToUTCResult where -- TODO(klao): better name localTimeToUTCFull :: TZ -> LocalTime -> LocalToUTCResult -localTimeToUTCFull tz@(TZ _ diffs _) localT = res +localTimeToUTCFull tz@(TZ _ diffs _ _) localT = res where (t,ps) = localTimeToInt64Pair localT addDiff i = int64PairToUTCTime t' ps @@ -239,7 +282,7 @@ localTimeToUTCTZ tz lt = -- on the abbreviation.) diffForAbbr :: TZ -> String -> Maybe Int {-# INLINABLE diffForAbbr #-} -diffForAbbr (TZ _ diffs infos) s = +diffForAbbr (TZ _ diffs infos _) s = case VB.findIndex ((==) s . snd) $ VB.reverse infos of Nothing -> Nothing Just i -> Just $ VU.unsafeIndex diffs (VU.length diffs - 1 - i) diff --git a/Data/Time/Zones/Internal.hs b/Data/Time/Zones/Internal.hs index 05a28e0..a9f6f0c 100644 --- a/Data/Time/Zones/Internal.hs +++ b/Data/Time/Zones/Internal.hs @@ -18,6 +18,11 @@ module Data.Time.Zones.Internal ( localTimeToInt64Pair, int64PairToUTCTime, int64PairToLocalTime, + -- * POSIX-TZ helper functions + ruleToSecs, + yearToSecs, + daysInMonth, + monthToSecs, -- * Low-level \"coercions\" picoToInteger, integerToPico, @@ -25,17 +30,25 @@ module Data.Time.Zones.Internal ( picoToDiffTime, diffTimeToInteger, integerToDiffTime, + -- * Backwards combatibility + getEnvMaybe, ) where +import Data.Bits ( Bits((.&.), shiftR) ) import Data.Fixed import Data.Int import Data.Time +import qualified Data.Vector.Unboxed as VU +import System.Environment ( getEnv ) +import System.IO.Error ( catchIOError, isDoesNotExistError ) #ifdef TZ_TH import Data.Time.Zones.Internal.CoerceTH #else import Unsafe.Coerce #endif +import Data.Time.Zones.Types + utcTimeToInt64Pair :: UTCTime -> (Int64, Int64) utcTimeToInt64Pair (UTCTime (ModifiedJulianDay d) t) = (86400 * (fromIntegral d - unixEpochDay) + s, ps) @@ -79,6 +92,60 @@ utcTimeToInt64 (UTCTime (ModifiedJulianDay d) t) unixEpochDay = 40587 {-# INLINE utcTimeToInt64 #-} +-------------------------------------------------------------------------------- +-- POSIX-TZ helper functions + +-- | Convert 'TzRule' plus year to number of seconds since epoch +-- +-- See musl rule_to_secs() +ruleToSecs :: TzRule -> Int64 -> Int64 +ruleToSecs (TzRule ty m n d t) y = + ys + ms + fromIntegral t + where + secsperday = 86400 + isleap = isLeapYear (fromIntegral y) + ys = yearToSecs y + ms = case ty of + TzRuleJ -> fromIntegral (if not isleap || d < 60 then d - 1 else d) * secsperday + TzRuleN -> fromIntegral d * secsperday + TzRuleM -> + let + -- s1 = seconds until start of the month + s1 = fromIntegral $ monthToSecs isleap (m - 1) + t0 = ys + s1 + wday = ((t0 + 4*secsperday) `mod` (7*secsperday)) `div` secsperday + d1 = fromIntegral d - wday + d2 = if d1 < 0 then d1 + 7 else d1 + n1 = fromIntegral $ if n == 5 && d2+28 >= fromIntegral (daysInMonth isleap m) + then 4 + else n + s2 = secsperday * (d2 + 7*(n1-1)) + in s1 + s2 +{-# INLINE ruleToSecs #-} + +-- | Number of seconds since epoch for year +yearToSecs :: Int64 -> Int64 +yearToSecs y64 = + utcTimeToInt64 $ UTCTime (fromGregorian (fromIntegral y64) 1 1) 0 +{-# INLINE yearToSecs #-} + +-- | Number of days in month +daysInMonth :: Bool -> Int -> Int +daysInMonth isleap 2 = 28 + if isleap then 1 else 0 +daysInMonth _ m = 30 + ((0xad5 `shiftR` (m - 1)) .&. 1) +{-# INLINE daysInMonth #-} + +-- | Number of seconds between start of year and end of month (1-12) +monthToSecs :: Bool -> Int -> Int +monthToSecs isleap m = + d * 86400 + where + d = VU.unsafeIndex sumdays m + if isleap && m >= 2 then 1 else 0 + + sumdays :: VU.Vector Int + sumdays = VU.fromList [ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ] +{-# INLINE monthToSecs #-} + -------------------------------------------------------------------------------- -- Low-level zero-overhead conversions. -- Basically we could have used 'coerce' if the constructors were exported. @@ -138,3 +205,13 @@ integerToDiffTime = unsafeCoerce {-# INLINE integerToDiffTime #-} #endif + +-------------------------------------------------------------------------------- +-- Backwards compatibility + +-- | This is equivalent to 'lookupEnv', defined for compatibility with +-- base < 4.6.0.0 +getEnvMaybe :: String -> IO (Maybe String) +getEnvMaybe var = + fmap Just (getEnv var) `catchIOError` + (\e -> if isDoesNotExistError e then return Nothing else ioError e) diff --git a/Data/Time/Zones/Internal/PosixTz.hs b/Data/Time/Zones/Internal/PosixTz.hs new file mode 100644 index 0000000..c5b64b3 --- /dev/null +++ b/Data/Time/Zones/Internal/PosixTz.hs @@ -0,0 +1,338 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE Rank2Types #-} + +-- | Parsing and rendering of POSIX-TZ style rules +module Data.Time.Zones.Internal.PosixTz where + +import Prelude hiding (succ) +import Control.Applicative ( Alternative(..) ) +import Control.Monad (unless) +import Data.ByteString (ByteString) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as B8 + +#if !(MIN_VERSION_base(4,13,0)) +import qualified Control.Monad.Fail as Fail +#endif + +import Data.Time.Zones.Types + +-- | Default time for DST rules +-- +-- "The time fields specify when, in the local time currently in effect, the change to the other time occurs. +-- If omitted, the default is 02:00:00." +defaultDstRuleTime :: Int +defaultDstRuleTime = 7200 + + +-- | Parse POSIX-TZ +-- +-- >>> parsePosixTz "CET-1CEST,M3.5.0,M10.5.0" +-- Right (PosixTz {_posixTzStd = PosixZone {_pzName = "CET", _pzOffset = -3600}, _posixTzDst = Just (PosixZone {_pzName = "CEST", _pzOffset = -7200},TzRule {_tzrType = TzRuleM, _tzrMon = 3, _tzrNum = 5, _tzrDay = 0, _tzrTime = 7200},TzRule {_tzrType = TzRuleM, _tzrMon = 10, _tzrNum = 5, _tzrDay = 0, _tzrTime = 7200})}) +-- >>> parsePosixTz "CET-1CEST,25/26:00:00,J75" +parsePosixTz :: ByteString -> Either String PosixTz +parsePosixTz = parse (posixTz <* endOfInput) +{-# INLINE parsePosixTz #-} + +posixTz :: Parser PosixTz +posixTz = do + std <- zone + stdoff <- offset + mdst <- (Nothing <$ endOfInput) <|> (Just <$> dstspec stdoff) + pure $ PosixTz (PosixZone std stdoff) mdst + +zone :: Parser ByteString +zone = do + c <- lookupChar + if c == '<' + then anglezone + else simplezone + where + anglezone = do + _ <- char '<' + s <- takeWhile1n 10 (\c -> c /= '>' && c /= '\NUL') + _ <- char '>' + pure $ B8.cons '<' $ B8.snoc s '>' + simplezone = takeWhile1n 6 isSimpleChar + isSimpleChar c = + c /= ':' && c /= ',' && c /= '-' && c /= '+' -- is not explicitly banned + && (c < '0' || c > '9') -- is not a digit + && c /= '\NUL' + +-- | Zone offset +-- zone offset hour part must be in range [-24,24] +-- note docs say [0, 24] but they don't treat the sign as part of the number +offset :: Parser Int +offset = hms (\h -> h >= -24 && h <= 24) + +-- | TZ rule time +-- as an extension to POSIX time hour part can be in range [-167, 167] +time :: Parser Int +time = hms (\h -> h >= -167 && h <= 167) + +hms :: (Int -> Bool) -> Parser Int +hms hpred = do + h <- signed 3 + unless (hpred h) $ fail "hours out of range" + m <- option 0 (char ':' *> decimal 2) + unless (m >= 0 && m <= 59) $ fail "minutes out of range [0,59]" + s <- option 0 (char ':' *> decimal 2) + unless (s >= 0 && s <= 59) $ fail "seconds out of range [0,59]" + pure $ 3600*h + (if h > 0 then 1 else (-1)) * (60*m + s) + +-- | DST spec +dstspec + :: Int -- std offset, needed to calculate default dst offset, if not explicitly given + -> Parser (PosixZone, TzRule, TzRule) +dstspec stdoff = do + dst <- zone + -- If no offset follows dst, summer time is assumed to be one hour ahead of standard time. + dstoff <- option (stdoff - 3600) offset + _ <- char ',' + start <- tzrule + _ <- char ',' + end <- tzrule + pure (PosixZone dst dstoff, start, end) + +-- | TZ rule +tzrule :: Parser TzRule +tzrule = do + r <- jnrule <|> nrule <|> mrule + t <- option defaultDstRuleTime $ char '/' *> time + pure $ r { _tzrTime = t } + +-- | Jn +jnrule :: Parser TzRule +jnrule = do + _ <- char 'J' + n <- decimal 3 + unless (n >= 1 && n <= 365) $ fail "date Jn: Julian day out of range [1,365]" + pure $ TzRule TzRuleJ 0 0 n 0 + +-- | n +nrule :: Parser TzRule +nrule = do + n <- decimal 3 + unless (n >= 0 && n <= 365) $ fail "date n: Julian day out of range [0,365]" + pure $ TzRule TzRuleN 0 0 n 0 + +-- | M.m.n.d +mrule :: Parser TzRule +mrule = do + _ <- char 'M' + m <- decimal 2 + unless (m >= 1 && m <= 12) $ fail "month number out of range [1,12]" + _ <- char '.' + n <- decimal 1 + unless (n >= 1 && n <= 5) $ fail "week number out of range [1,5]" + _ <- char '.' + d <- decimal 1 + unless (d >= 0 && d <= 6) $ fail "day number out of range [0,6]" + pure $ TzRule TzRuleM m n d 0 + +----------------------------------------------------------------- +-- Parser + +-- | Parser +-- +-- This is a distilled version of attoparsec's Parser as we are not allowed to +-- depend on attoparsec itself. +newtype Parser a = Parser { runParser :: forall r. ByteString + -> Int + -> Failure r + -> Success a r + -> Result r + } + +type Result r = Either String r +type Failure a = ByteString -> Int -> String -> Result a +type Success a r = ByteString -> Int -> a -> Result r + +instance Functor Parser where + fmap f (Parser p) = Parser $ \s pos lose succ -> + let succ' s' pos' a = succ s' pos' (f a) + in p s pos lose succ' + {-# INLINE fmap #-} + +instance Applicative Parser where + pure x = Parser $ \s pos _lose succ -> succ s pos x + {-# INLINE pure #-} + + ff <*> fa = ff >>= (<$> fa) + {-# INLINE (<*>) #-} + +instance Monad Parser where +#if !(MIN_VERSION_base(4,13,0)) + fail = Fail.fail + {-# INLINE fail #-} +#endif + m >>= k = Parser $ \s pos lose succ -> + let succ' s' pos' a = runParser (k a) s' pos' lose succ + in runParser m s pos lose succ' + {-# INLINE (>>=) #-} + +instance MonadFail Parser where + fail err = Parser $ \s pos lose _succ -> lose s pos err + {-# INLINE fail #-} + +instance Alternative Parser where + empty = fail "empty" + {-# INLINE empty #-} + + a <|> b = Parser $ \s pos lose succ -> + let lose' s' pos' _msg = runParser b s' pos' lose succ + in runParser a s pos lose' succ + {-# INLINE (<|>) #-} + +successK :: Success a a +successK _s _pos = Right +{-# INLINE successK #-} + +failureK :: Failure a +failureK _s _pos = Left +{-# INLINE failureK #-} + +parse :: Parser a -> ByteString -> Either String a +parse p s = runParser p s 0 failureK successK +{-# INLINE parse #-} + + +--------------------------------------------------------------- +-- Utilities + +option :: a -> Parser a -> Parser a +option a p = p <|> pure a +{-# INLINE option #-} + +decimal :: Int -> Parser Int +decimal n = B.foldl' step 0 `fmap` takeWhile1n n isDigit + where + step a w = a * 10 + fromIntegral (w - 48) + isDigit c = c >= '0' && c <= '9' +{-# INLINE decimal #-} + +signed :: Int -> Parser Int +signed n = (negate <$> (char '-' *> decimal n)) + <|> (char '+' *> decimal n) + <|> decimal n +{-# INLINE signed #-} + +endOfInput :: Parser () +endOfInput = Parser $ \s pos lose succ -> + if pos >= B.length s + then succ s pos () + else lose s pos "endOfInput: expected end of input" +{-# INLINE endOfInput #-} + +lookupChar :: Parser Char +lookupChar = do + ensurebytes 1 + Parser $ \s pos _lose succ -> + succ s pos $ B8.index s pos +{-# INLINE lookupChar #-} + +ensurebytes :: Int -> Parser () +ensurebytes n = Parser $ \s pos lose succ -> + if pos + n <= B.length s + then succ s pos () + else lose s pos "unexpected end of input" +{-# INLINE ensurebytes #-} + +char :: Char -> Parser Char +char w = do + ensurebytes 1 + Parser $ \s pos lose succ -> + let x = B8.index s pos + in if x == w + then succ s (pos + 1) x + else lose s pos "char: no match" +{-# INLINE char #-} + +takeWhile1n :: Int -> (Char -> Bool) -> Parser ByteString +takeWhile1n n0 predicate = Parser $ \s pos lose succ -> go n0 [] s pos lose succ + where + go n rs s pos lose succ = + if n > 0 && pos < B.length s + then + let c = B8.index s pos + in if predicate c + then go (n - 1) (c:rs) s (pos + 1) lose succ + else finish + else finish + where + finish = case rs of + [] -> lose s pos "takeWhile1n: predicate not matched" + _ -> succ s pos $ B8.pack $ reverse rs +{-# INLINE takeWhile1n #-} + +-- | Render 'PosixTz' +-- +-- >>> renderPosixTz utcPosixTz +-- "UTC0" +-- +-- Europe/Berlin CET-1CEST,M3.5.0,M10.5.0/3 +-- >>> renderPosixTz (PosixTz (PosixZone "CET" (-3600)) (Just (PosixZone "CEST" (-2*3600), TzRule TzRuleM 3 0 5 7200, TzRule TzRuleM 10 0 5 (3*3600)))) +-- "CET-1CEST,M3.0.5,M10.0.5/3" +-- +-- >>> renderPosixTz (PosixTz (PosixZone "EST" (5*3600)) (Just (PosixZone "EDT" (4*3600), TzRule TzRuleM 3 2 0 (2*3600), TzRule TzRuleM 11 1 0 (2*3600)))) +-- "EST5EDT,M3.2.0,M11.1.0" +-- +-- Australia/Lord_Howe <+1030>-10:30<+11>-11,M10.1.0,M4.1.0 +-- >>> renderPosixTz (PosixTz (PosixZone "<+1030>" (-10*3600 - 1800)) (Just (PosixZone "<+11>" (-11*3600), TzRule TzRuleM 10 1 0 (2*3600), TzRule TzRuleM 4 1 0 (2*3600)))) +-- "<+1030>-10:30<+11>-11,M10.1.0,M4.1.0" +renderPosixTz :: PosixTz -> ByteString +renderPosixTz (PosixTz (PosixZone std stdoff) mdst) = + mconcat + [ std + , renderOffset stdoff + , maybe "" renderDst mdst + ] + where + renderOffset x | x >= 0 = renderHms x + renderOffset x = "-" <> renderHms (-x) + + renderTime x | x >= 0 = renderHms x + renderTime x = "-" <> renderHms (-x) + + renderHms x = + let (h, m, s) = tohms x + in mconcat + [ bshow h + , if m /= 0 || s /= 0 + then ":" <> bshow m + else "" + , if s /= 0 + then ":" <> bshow s + else "" + ] + + tohms x = + let (h, r) = x `divMod` 3600 + (m, s) = r `divMod` 60 + in (h, m, s) + + renderDst (PosixZone dst dstoff, start, end) = + mconcat + [ dst + , if dstoff /= (stdoff - 3600) + then renderOffset dstoff + else "" + , "," + , renderTzRule start + , "," + , renderTzRule end + ] + + renderTzRule (TzRule ty m n d t) = + mconcat + [ case ty of + TzRuleJ -> "J" <> bshow d + TzRuleN -> bshow d + TzRuleM -> "M" <> bshow m <> "." <> bshow n <> "." <> bshow d + , if t /= defaultDstRuleTime then "/" <> renderTime t else "" + ] + + bshow :: (Show a) => a -> ByteString + bshow = B8.pack . show +{-# INLINE renderPosixTz #-} \ No newline at end of file diff --git a/Data/Time/Zones/Read.hs b/Data/Time/Zones/Read.hs index 0294426..95e0a6a 100644 --- a/Data/Time/Zones/Read.hs +++ b/Data/Time/Zones/Read.hs @@ -1,3 +1,6 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} + {- | Module : Data.Time.Zones.Read Copyright : (C) 2014 Mihaly Barasz @@ -5,9 +8,6 @@ License : Apache-2.0, see LICENSE Maintainer : Mihaly Barasz Stability : experimental -} - -{-# LANGUAGE OverloadedStrings #-} - module Data.Time.Zones.Read ( -- * Various ways of loading `TZ` loadTZFromFile, @@ -24,17 +24,18 @@ import Control.Applicative import Control.Exception (assert) import Control.Monad import Data.Binary -import Data.Binary.Get (getByteString, getWord32be, getWord64be, runGet, skip) +import Data.Binary.Get (getByteString, getWord32be, getWord64be, runGet, skip, lookAhead) import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as BL +import qualified Data.ByteString.Builder as BB import Data.Maybe import qualified Data.Vector.Unboxed as VU import qualified Data.Vector as VB import Data.Int import Data.Time.Zones.Files +import Data.Time.Zones.Internal.PosixTz ( parsePosixTz ) import Data.Time.Zones.Types -import System.Environment -import System.IO.Error +import Data.Time.Zones.Internal (getEnvMaybe) -- Suppress 'redundant imports' warning import Prelude @@ -85,11 +86,6 @@ loadLocalTZ = do Just "" -> loadSystemTZ "UTC" Just z -> loadSystemTZ z -getEnvMaybe :: String -> IO (Maybe String) -getEnvMaybe var = - fmap Just (getEnv var) `catchIOError` - (\e -> if isDoesNotExistError e then return Nothing else ioError e) - -- | Reads the corresponding file from the time zone database shipped -- with this package. loadTZFromDB :: String -> IO TZ @@ -106,8 +102,9 @@ olsonGet = do () | version == '\0' -> olsonGetWith 4 getTime32 () | version `elem` ['2', '3'] -> do skipOlson0 >> void olsonHeader - olsonGetWith 8 getTime64 - -- TODO(klao): read the rule string + o <- olsonGetWith 8 getTime64 + ptz <- getPosixTz + pure $ o { _tzPosixTz = Just ptz } _ -> fail $ "olsonGet: invalid version character: " ++ show version parseOlson :: BL.ByteString -> TZ @@ -166,7 +163,41 @@ olsonGetWith szTime getTime = do first = head $ filter (not . isDst) lInfos ++ lInfos diffs = VU.map gmtOff eInfos tzInfos = VB.fromListN (VU.length eInfos) $ map isDstName $ VU.toList eInfos - return $ TZ eTransitions diffs tzInfos + return $ TZ eTransitions diffs tzInfos Nothing + +getPosixTz :: Get PosixTz +getPosixTz = do + nlOrFail + bs <- getWhile maxTzBytes (/= 10) + nlOrFail + case parsePosixTz bs of + Right x -> pure x + Left err -> fail err + where + -- Arbitrary limit, we do not expect TZ strings to be larger than this, + -- if an unusually large string is encountered we want to fail early instead + -- of trying to parse it. + maxTzBytes = 512 + +nlOrFail :: Get () +nlOrFail = + getWord8 >>= \case + 10 -> pure () + _ -> fail "expected a newline" + +-- TODO: find a better way? +getWhile :: Int -> (Word8 -> Bool) -> Get BS.ByteString +getWhile maxlen f = + BL.toStrict . BB.toLazyByteString <$> go maxlen + where + go 0 = fail "input too large" + go n = do + w <- lookAhead getWord8 + if f w + then do + x <- BB.word8 <$> getWord8 + (x <>) <$> go (n - 1) + else pure mempty abbrForInd :: Int -> BS.ByteString -> String abbrForInd i = BS.unpack . BS.takeWhile (/= '\0') . BS.drop i diff --git a/Data/Time/Zones/Types.hs b/Data/Time/Zones/Types.hs index 8624e5a..aac0941 100644 --- a/Data/Time/Zones/Types.hs +++ b/Data/Time/Zones/Types.hs @@ -7,18 +7,27 @@ Stability : experimental -} {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE MagicHash #-} module Data.Time.Zones.Types ( TZ(..), utcTZ, + PosixTz(..), + utcPosixTz, + PosixZone(..), + TzRule(..), + TzRuleTy(..), ) where import Control.DeepSeq import Data.Data import Data.Default -import Data.Int +import Data.Int ( Int64 ) import qualified Data.Vector as VB import qualified Data.Vector.Unboxed as VU +import qualified Data.ByteString as B + data TZ = TZ { _tzTrans :: !(VU.Vector Int64), @@ -26,7 +35,8 @@ data TZ = TZ { -- TODO(klao): maybe we should store it as a vector of indices and a -- (short) vector of expanded 'TimeZone's, similarly to how it's -- stored? - _tzInfos :: !(VB.Vector (Bool, String)) -- (summer, name) + _tzInfos :: !(VB.Vector (Bool, String)), -- (summer, name) + _tzPosixTz :: !(Maybe PosixTz) -- v1 tzfiles do not contain POSIX-TZ rules } deriving (Eq, Show, Typeable, Data, Read) instance NFData TZ where @@ -34,7 +44,54 @@ instance NFData TZ where -- | The `TZ` definition for UTC. utcTZ :: TZ -utcTZ = TZ (VU.singleton minBound) (VU.singleton 0) (VB.singleton (False, "UTC")) +utcTZ = TZ (VU.singleton minBound) (VU.singleton 0) (VB.singleton (False, "UTC")) (Just utcPosixTz) instance Default TZ where def = utcTZ + +-- | POSIX-TZ variable-type-string data +-- +-- POSIX-TZ-environment-variable-style string is used for handling instants after the last +-- transition time stored in the file or for all instants if the file has no transitions +-- +-- See: +-- http://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html +-- +-- Manual pages: tzfile(5) newtzset(3) +data PosixTz = PosixTz + { _posixTzStd :: !PosixZone + -- ^ std name and offset + , _posixTzDst :: !(Maybe (PosixZone, TzRule, TzRule)) + -- ^ dst name and offset, dst start rule, dst end rule + } deriving (Eq, Show, Data, Read) + +-- | Designation for the standard (std) or summer (dst) time zone together with offset from UTC +data PosixZone = PosixZone + { _pzName :: !B.ByteString + , _pzOffset :: !Int + -- ^ offset in seconds + -- + -- The offset specifies the time value you must add to the local time to get + -- a Coordinated Universal Time value. + } deriving (Eq, Show, Data, Read) + +-- | Indicates when to change to or back from summer time +-- +-- Field interpretation depends on type. +data TzRule = TzRule + { _tzrType :: !TzRuleTy + , _tzrMon :: {-# UNPACK #-} !Int + , _tzrNum :: {-# UNPACK #-} !Int + , _tzrDay :: {-# UNPACK #-} !Int + , _tzrTime :: {-# UNPACK #-} !Int + } deriving (Eq, Show, Data, Read) + +data TzRuleTy + = TzRuleJ -- Jn + | TzRuleN -- n + | TzRuleM -- M.m.w.d + deriving (Eq, Show, Data, Read) + +-- | POSIX-TZ value for UTC (zero offset, no DST) +utcPosixTz :: PosixTz +utcPosixTz = PosixTz (PosixZone "UTC" 0) Nothing diff --git a/tests/testTZ.hs b/tests/testTZ.hs index f28636d..7d93694 100644 --- a/tests/testTZ.hs +++ b/tests/testTZ.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-missing-signatures #-} @@ -5,6 +6,7 @@ module Main (main) where import Data.Time import Data.Time.Zones +import Data.Time.Zones.Types import Test.Framework.Providers.HUnit import Test.Framework.TH import Test.HUnit hiding (Test, assert) @@ -70,5 +72,53 @@ case_Paris_diffForAbbr = do diffForAbbr tz "LMT" @?= Just 561 diffForAbbr tz "XYZ" @?= Nothing +case_Berlin_hasTzRule = do + tz <- loadTZFromDB "Europe/Berlin" + _tzPosixTz tz @?= Just ptz + where + h = 3600 + ptz = PosixTz + { _posixTzStd = PosixZone "CET" (-h) + , _posixTzDst = Just ( PosixZone "CEST" (-2*h) + , TzRule TzRuleM 3 5 0 (2*h) + , TzRule TzRuleM 10 5 0 (3*h) + ) + } + +case_LosAngeles_tzrule = do + tz <- loadTZFromDB "America/Los_Angeles" + + -- pst -> pdt + pst @?= timeZoneForUTCTime tz (mkUTC 2077 3 14 9 59 59) + pdt @?= timeZoneForUTCTime tz (mkUTC 2077 3 14 10 0 0) + + -- pdt -> pst + pdt @?= timeZoneForUTCTime tz (mkUTC 2077 11 7 8 59 59) + pst @?= timeZoneForUTCTime tz (mkUTC 2077 11 7 9 0 0) + where + pst = TimeZone (-8*60) False "PST" + pdt = TimeZone (-7*60) True "PDT" + +case_Sydney_tzrule = do + tz <- loadTZFromDB "Australia/Sydney" + + -- aedt -> aest + aedt @?= timeZoneForUTCTime tz (mkUTC 2077 4 3 15 59 59) + aest @?= timeZoneForUTCTime tz (mkUTC 2077 4 3 16 0 0) + + -- aest -> aedt + aest @?= timeZoneForUTCTime tz (mkUTC 2077 10 2 15 59 59) + aedt @?= timeZoneForUTCTime tz (mkUTC 2077 10 2 16 0 0) + where + aest = TimeZone (10*60) False "AEST" + aedt = TimeZone (11*60) True "AEDT" + +-- produced IDT due to bug in monthToSecs +-- rule: IST-2IDT,M3.4.4/26,M10.5.0 +case_ptz_regression_1 = do + tz <- loadSystemTZ "Asia/Jerusalem" + timeZoneForPOSIX tz 252399456000 @?= TimeZone 120 False "IST" -- 9968-03-22 00:00:00 UTC + + main :: IO () main = $defaultMainGenerator diff --git a/tests/testTZSys.hs b/tests/testTZSys.hs index 5bda7a0..36ee280 100644 --- a/tests/testTZSys.hs +++ b/tests/testTZSys.hs @@ -3,14 +3,19 @@ module Main (main) where -import Data.Bits import Data.IORef import Data.Int +import Data.Foldable (for_) +import Data.Maybe (isJust) import Data.Time import Data.Time.Clock.POSIX import Data.Time.Zones +import Data.Time.Zones.Read +import Data.Time.Zones.Types import System.Environment +import System.FilePath.Find import System.IO.Unsafe +import Test.Framework.Providers.HUnit import Test.Framework.Providers.QuickCheck2 import Test.Framework.TH import Test.HUnit hiding (Test, assert) @@ -43,21 +48,32 @@ onceIO op = opWrap -- On the Int32 range of POSIX times we should replicate the behavior -- perfectly. -- --- * After year 2038 we normally run into a range where the --- envvar-like "rule" part of the TZif should be interpreted, which we --- don't do yet. +-- * After year 2038 we run into a range where the envvar-like "rule" +-- part of the TZif is interpreted. However, above around 2^47 +-- glibc starts producing wrong results (tested on a system with +-- version 2.32): +-- +-- % env TZ=America/Los_Angeles date -d '5881580-7-1 00:00 UTC' +-- Mon Jun 30 05:00:00 PM PDT 5881580 +-- % env TZ=America/Los_Angeles date -d '5881581-7-1 00:00 UTC' +-- Tue Jun 30 04:00:00 PM PST 5881581 +-- +-- > logBase 2 $ fromIntegral $ utcTimeToInt64 $ UTCTime (fromGregorian 5881581 7 1) 0 +-- 47.398743929757934 -- -- * And below around -2^55 the localtime_r C function starts failing -- with "value too large". checkTimeZone64 :: String -> Property -checkTimeZone64 zoneName = prop +checkTimeZone64 zoneName = withMaxSuccess 1000 prop where setup = onceIO $ setupTZ zoneName - two31 = 2147483647 + two31 = 2^(31 :: Int) - 1 + two47 = 2^(47 :: Int) + two55 = 2^(55 :: Int) prop = monadicIO $ do - tz <- run $ setup + tz <- run setup ut <- pick $ oneof [arbitrary, choose (-two31, two31)] - pre $ ut < two31 && ut > -(1 `shiftL` 55) + pre $ ut < two47 && ut > -two55 -- This is important. On 32 bit machines we want to limit -- testing to the Int range. pre $ ut > fromIntegral (minBound :: Int) @@ -67,9 +83,9 @@ checkTimeZone64 zoneName = prop -- On the Int32 range of POSIX times we should mostly replicate the -- behavior. -- --- * After year 2038 we normally run into a range where the --- envvar-like "rule" part of the TZif should be interpreted, which we --- don't do yet. +-- * After year 2038 we run into a range where the envvar-like "rule" +-- part of the TZif is interpreted but above around 2^47 glibc starts +-- returning wrong results. -- -- * And the very first time diff in most of the TZif files is usually -- the "Local Mean Time", which is generally a fractional number of @@ -78,19 +94,47 @@ checkTimeZone64 zoneName = prop -- around 1900, which happens to be less than -2^31 POSIX time. But -- in some locations this transition falls within the Int32 range -- (eg. China), so we can supply another lower bound. -checkLocalTime :: String -> Maybe Int32 -> Int32 -> Property -checkLocalTime zoneName mLower = prop +-- +-- Warning: seems to leak memory, be careful running for more than +-- 100k successes. +checkLocalTime :: String -> Maybe Int32 -> Int64 -> Property +checkLocalTime zoneName mLower = withMaxSuccess 1000 . prop where + two31 = 2^(31 :: Int) + two47 = 2^(47 :: Int) setup = onceIO $ setupTZ zoneName prop ut = monadicIO $ do + pre $ ut < two47 case mLower of - Nothing -> return () - Just lower -> pre $ ut > lower - tz <- run $ setup + Nothing -> pre $ ut > -two31 + Just lower -> pre $ ut > fromIntegral lower + tz <- run setup let utcTime = posixSecondsToUTCTime $ fromIntegral ut timeZone <- run $ getTimeZone utcTime run $ utcToLocalTimeTZ tz utcTime @?= utcToLocalTime timeZone utcTime +-- | Check if all zone files in system directory parse +-- +-- This is useful to verify parsing but overkill to run on every +-- test or install so first parameter says if we want to run or not. +-- +-- This can fail if any of the files in listed dirs is in v1 format +-- which does not have POSIX-TZ strings. +checkAllParse :: IO () +checkAllParse = do + tzdir <- pathForSystemTZ "" + for_ dirs $ \dir -> do + let curdir = tzdir <> "/" <> dir + xs <- find always (fileType ==? RegularFile) curdir + for_ xs $ \p -> do + tz <- loadTZFromFile p + assertBool ("missing POSIX-TZ (v1 file) or parsing failure: " <> p) (isJust $ _tzPosixTz tz) + where + dirs = [ "Africa", "America", "Antarctica", "Arctic" + , "Asia", "Atlantic", "Australia", "Brazil" + , "Canada", "Chile", "Etc", "Europe", "Indian" + , "Mexico", "Pacific", "US" + ] prop_Budapest_correct_TimeZone = checkTimeZone64 "Europe/Budapest" prop_New_York_correct_TimeZone = checkTimeZone64 "America/New_York" @@ -108,5 +152,8 @@ prop_Jerusalem_correct_LocalTime = checkLocalTime "Asia/Jerusalem" $ Just (-1641 prop_Antarctica_Palmer_correct_LocalTime = checkLocalTime "Antarctica/Palmer" Nothing prop_Melbourne_correct_LocalTime = checkLocalTime "Australia/Melbourne" Nothing +-- disabled by default +-- case_All_parse = checkAllParse True + main :: IO () main = $defaultMainGenerator diff --git a/tz.cabal b/tz.cabal index ca30f29..0780e7b 100644 --- a/tz.cabal +++ b/tz.cabal @@ -46,6 +46,7 @@ Library Data.Time.Zones.Read, Data.Time.Zones.All, Data.Time.Zones.Internal + Data.Time.Zones.Internal.PosixTz Default-Language: Haskell2010 GHC-Options: -Wall Build-Depends: @@ -93,9 +94,11 @@ Test-Suite testsSys Build-Depends: tz, base >= 4 && < 5, + filemanip, HUnit >= 1.2 && < 1.7, QuickCheck >= 2.4 && < 3, test-framework >= 0.4 && < 1, + test-framework-hunit >= 0.2 && < 0.4, test-framework-quickcheck2 >= 0.2 && < 0.4, test-framework-th >= 0.2 && < 0.4, time >= 1.2 && < 1.10