Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ jobs:
run: echo CABAL_CONFIG_FLAGS="$CABAL_CONFIG_FLAGS --index-state=${{ matrix.index-state }}" >>"$GITHUB_ENV"

- name: Install hsthrift and Glean dependencies
run: apt-get install -y pkg-config rsync libgmock-dev libpcre3-dev libtinfo-dev libxxhash-dev
run: apt-get install -y pkg-config rsync libgmock-dev libpcre3-dev libtinfo-dev libxxhash-dev squashfs-tools

- name: Build hsthrift and Glean
run: make
Expand Down
4 changes: 4 additions & 0 deletions cabal.project
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ packages:
glean.cabal
glean/lang/clang/glean-clang.cabal
glean/lsp
glean/lmdb-clib

optional-packages:
hsthrift/folly-clib/folly-clib.cabal
Expand Down Expand Up @@ -52,6 +53,9 @@ package thrift-http
package glean
flags: -opt

package lmdb-clib
flags: -opt

package fb-util
flags: +folly

Expand Down
19 changes: 19 additions & 0 deletions glean.cabal.in
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,23 @@ library rocksdb
glean:storage,
glean:rts,

library lmdb
import: fb-haskell, fb-cpp, deps, folly
visibility: private
CXX_LIB_glean_cpp_lmdb
include-dirs: .
install-includes:
glean/lmdb/container-impl.h
glean/lmdb/database-impl.h
glean/lmdb/ffi.h
glean/lmdb/glean_lmdb.h
glean/lmdb/util.h
pkgconfig-depends: fmt
build-depends:
glean:storage,
glean:rts,
lmdb-clib,

library util
import: fb-haskell, fb-cpp, deps, thrift-client
visibility: public
Expand Down Expand Up @@ -606,6 +623,7 @@ library db
Glean.Database.Storage.Memory
Glean.Database.Storage.DB
Glean.Database.Storage.RocksDB
Glean.Database.Storage.LMDB
Glean.Database.Trace
Glean.Database.Types
Glean.Database.Validate
Expand Down Expand Up @@ -660,6 +678,7 @@ library db
glean:rts,
glean:stubs,
glean:rocksdb,
glean:lmdb,

-- Backend API, and a few things built on top
library backend-api
Expand Down
24 changes: 14 additions & 10 deletions glean/db/Glean/Database/Backup.hs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ import Glean.Database.Open
import Glean.Database.Trace
import Glean.Database.Types
import Glean.Database.Schema
import Glean.Database.Storage
import Glean.Logger
import Glean.RTS.Foreign.Ownership (getOwnershipStats, showOwnershipStats)
import Glean.ServerConfig.Types (DatabaseBackupPolicy(..))
Expand Down Expand Up @@ -241,16 +242,17 @@ doBackup env@Env{..} repo prefix site =
hasExcludeProperty repo (metaProperties meta) config_retention
atomically $ notify envListener $ BackupStarted repo
say logInfo "starting"
withOpenDatabaseStorage env repo $ \_storage OpenDB{..} -> do
withOpenDatabase env repo $ \OpenDB{..} -> do
say logInfo "packing"
stats <- mapMaybe
(\(pid,stats) -> (,stats) . predicateRef <$> lookupPid pid odbSchema)
<$> Storage.predicateStats odbHandle
ownershipStats <- do
maybeOwnership <- readTVarIO odbOwnership
mapM getOwnershipStats maybeOwnership
withStorageFor env repo meta $ \storage -> do

Backend.Data{..} <- withScratchDirectory envStorage repo $ \scratch ->
Backend.Data{..} <- withScratchDirectory storage repo $ \scratch ->
Storage.backup odbHandle scratch $ \path Data{dataSize} -> do
say logInfo "uploading"
let policy = ServerConfig.databaseBackupPolicy_repos config_backup
Expand Down Expand Up @@ -313,7 +315,8 @@ doRestore env@Env{..} repo meta
case config_restore_timeout of
Just seconds -> void . timeout (fromIntegral seconds * 1000000)
Nothing -> id
maybeTimeout $ restore site size `catch` handler
withStorageFor env repo meta $ \storage ->
maybeTimeout $ restore site storage size `catch` handler storage

-- NOTE: No point in adding the repo to the sinbin if there was
-- an exception, the handler removed it from the list of known DBs
Expand All @@ -330,10 +333,10 @@ doRestore env@Env{..} repo meta
where
say log s = log $ inRepo repo $ "restore: " ++ s

restore :: Site s => s -> Maybe Int64 -> IO ()
restore site bytes = traceMsg envTracer (GleanTraceDownload repo) $ do
restore :: (Storage st, Site s) => s -> st -> Maybe Int64 -> IO ()
restore site storage bytes = traceMsg envTracer (GleanTraceDownload repo) $ do
atomically $ notify envListener $ RestoreStarted repo
mbFreeBytes <- (Just <$> Storage.getFreeCapacity envStorage)
mbFreeBytes <- (Just <$> Storage.getFreeCapacity storage)
`catch` \(_ :: IOException) -> return Nothing
case (mbFreeBytes, bytes) of
(Just freeBytes, Just size) -> do
Expand All @@ -345,7 +348,7 @@ doRestore env@Env{..} repo meta
neededBytes freeBytes
_ -> return ()

withScratchDirectory envStorage repo $ \scratch -> do
withScratchDirectory storage repo $ \scratch -> do
say logInfo "starting"
say logInfo "downloading"
let scratch_restore = scratch </> "restore"
Expand All @@ -356,14 +359,15 @@ doRestore env@Env{..} repo meta
say logInfo "restoring"
createDirectoryIfMissing True scratch_restore
traceMsg envTracer GleanTraceStorageRestore $
Storage.restore envStorage repo scratch_restore scratch_file
Storage.restore storage repo scratch_restore scratch_file
say logInfo "adding"
traceMsg envTracer GleanTraceFinishRestore $
Catalog.finishRestoring envCatalog repo
atomically $ notify envListener $ RestoreFinished repo
say logInfo "finished"

handler exc = do
handler :: Storage s => s -> SomeException -> IO ()
handler storage exc = do
failed <- atomically $ do
failed <- Catalog.exists envCatalog [Restoring] repo
when failed $ do
Expand All @@ -372,7 +376,7 @@ doRestore env@Env{..} repo meta
return failed
when failed $ do
say logError $ "failed: " ++ show exc
swallow $ Storage.safeRemoveForcibly envStorage repo
swallow $ Storage.safeRemoveForcibly storage repo
rethrowAsync exc


Expand Down
6 changes: 3 additions & 3 deletions glean/db/Glean/Database/Close.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ closeDatabases env@Env{..} = do
dbs <- readTVarIO envActive
forM_ (HashMap.keys dbs) $ closeDatabase env

isIdle :: (TimePoint -> Bool) -> DB s -> OpenDB s -> STM Bool
isIdle :: (TimePoint -> Bool) -> DB -> OpenDB -> STM Bool
isIdle long_enough db odb = and <$> sequence
[ (== 1) <$> readTVar (dbUsers db) -- we are the only user
, long_enough <$> readTVar (odbIdleSince odb)
Expand All @@ -52,7 +52,7 @@ isIdle long_enough db odb = and <$> sequence
]

closeIf
:: (forall s . DB s -> DBState s -> STM (Maybe (OpenDB s)))
:: (DB -> DBState -> STM (Maybe OpenDB))
-> Env
-> Repo
-> IO ()
Expand Down Expand Up @@ -132,7 +132,7 @@ exportOpenDBStats Env{..} = do
forM_ (HashMap.toList repoOpenCounts) $ \(repoNm,count) -> do
setCounter ("glean.db." <> Text.encodeUtf8 repoNm <> ".open") count

closeOpenDB :: Storage.Storage s => Env -> OpenDB s -> IO ()
closeOpenDB :: Env -> OpenDB -> IO ()
closeOpenDB env OpenDB{..} = do
case odbWriting of
Just Writing{..} -> do
Expand Down
62 changes: 45 additions & 17 deletions glean/db/Glean/Database/Config.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@
module Glean.Database.Config (
-- * DataStore
DataStore(..),
StorageName,
fileDataStore,
tmpDataStore,
memoryDataStore,
rocksdbName,
lmdbName,

-- * Config, and options parser
options,
Expand Down Expand Up @@ -82,8 +85,10 @@ import Glean.Database.Schema.ComputeIds
import Glean.Database.Storage
import qualified Glean.Database.Storage.Memory as Memory
import qualified Glean.Database.Storage.RocksDB as RocksDB
import qualified Glean.Database.Storage.LMDB as LMDB
import Glean.Database.Trace
import qualified Glean.Internal.Types as Internal
import Glean.Internal.Types (StorageName(..))
import Glean.DefaultConfigs
import Glean.Logger.Database
import Glean.Logger.Server
Expand All @@ -104,36 +109,54 @@ import Paths_glean
#endif

data DataStore = DataStore
{ withDataStore
:: forall a. ServerConfig.Config
-> (forall c s. (Catalog.Store c, Storage s) => c -> s -> IO a)
-> IO a
{ withStorage ::
-- setup the storage backend. Must be scoped, because it might
-- involve creating temporary resources.
forall a.
ServerConfig.Config ->
((HashMap StorageName (Some Storage), Some Catalog.Store) -> IO a) ->
IO a
, defaultStorage :: StorageName
, dataStoreTag :: String
}

rocksdbName, lmdbName, memoryName :: StorageName
rocksdbName = StorageName "rocksdb"
lmdbName = StorageName "lmdb"
memoryName = StorageName "memory"

fileDataStore :: FilePath -> DataStore
fileDataStore path = DataStore
{ withDataStore = \scfg f -> do
{ withStorage = \scfg f -> do
rocksdb <- RocksDB.newStorage path scfg
f (Catalog.fileCatalog path) rocksdb
, dataStoreTag = "rocksdb:" <> path
lmdb <- LMDB.newStorage path scfg
f (
HashMap.fromList
[ (rocksdbName, Some rocksdb),
(lmdbName, Some lmdb)
],
Some (Catalog.fileCatalog path)
)
, defaultStorage = rocksdbName
, dataStoreTag = "db:" <> path
}

tmpDataStore :: DataStore
tmpDataStore = DataStore
{ withDataStore = \scfg f -> withSystemTempDirectory "glean" $ \tmp -> do
{ withStorage = \scfg f -> withSystemTempDirectory "glean" $ \tmp -> do
logInfo $ "Storing temporary DBs in " <> tmp
rocksdb <- RocksDB.newStorage tmp scfg
f (Catalog.fileCatalog tmp) rocksdb
, dataStoreTag = "rocksdb:{TMP}"
withStorage (fileDataStore tmp) scfg f
, defaultStorage = rocksdbName
, dataStoreTag = dataStoreTag (fileDataStore "<tmp>")
}

memoryDataStore :: DataStore
memoryDataStore = DataStore
{ withDataStore = \_ f -> do
{ withStorage = \_ f -> do
cat <- Catalog.memoryCatalog
mem <- Memory.newStorage
f cat mem
f (HashMap.fromList [(memoryName, Some mem)], Some cat)
, defaultStorage = memoryName
, dataStoreTag = "memory"
}

Expand Down Expand Up @@ -488,10 +511,15 @@ schemaLocationOption = option (eitherReader schemaLocationParser)
options :: Parser Config
options = do
let
dbRoot = fileDataStore <$> strOption (
long "db-root" <>
metavar "DIR" <>
help "Directory containing databases")
dbRoot = do
path <- strOption (
long "db-root" <>
metavar "DIR" <>
help "Directory containing databases")
lmdb <- switch (long "lmdb")
pure $
(if lmdb then \s -> s { defaultStorage = lmdbName } else id) $
fileDataStore path
dbTmp = tmpDataStore <$ flag' () (
long "db-tmp" <>
help "Store databases in a temporary directory (default)")
Expand Down
21 changes: 10 additions & 11 deletions glean/db/Glean/Database/Create.hs
Original file line number Diff line number Diff line change
Expand Up @@ -115,29 +115,28 @@ kickOffDatabase env@Env{..} kickOff@Thrift.KickOff{..}
handle
(\Catalog.EntryAlreadyExists{} ->
return $ Thrift.KickOffResponse True) $
mask $ \unmask ->
mask $ \unmask -> do
-- FIXME: There is a tiny race here where we might fail in a weird way
-- if kick off a DB that is being deleted after it got removed from
-- the Catalog but before it got removed from the storage. The entire
-- concept of deleting DBs will change with the new metadata handling so
-- it's not worth fixing at this point, especially since we aren't
-- supposed to be kicking off DBs we've previously deleted.
let meta = newMeta envDefaultStorage version time
(Incomplete def) allProps
(lightDeps kickOff_dependencies')
bracket_
(Catalog.create
envCatalog
kickOff_repo
(newMeta version time (Incomplete def) allProps
(lightDeps kickOff_dependencies')) $ do
modifyTVar' envActive $ HashMap.insert kickOff_repo db
writeTVar (dbState db) Opening
acquireDB db)
(Catalog.create envCatalog kickOff_repo meta $ do
modifyTVar' envActive $ HashMap.insert kickOff_repo db
writeTVar (dbState db) Opening
acquireDB db)
(atomically $ releaseDB envCatalog envActive db) $
do
withStorageFor env kickOff_repo meta $ \storage -> do
-- Open the new db in Create mode which will create the
-- physical storage. This might fail - in that case, we
-- mark the db as failed. NB. pass the full dependencies
-- here, not lightDeps.
opener <- asyncOpenDB env envStorage db version mode
opener <- asyncOpenDB env storage db version mode
kickOff_dependencies'
(do
logInfo $ inRepo kickOff_repo "created")
Expand Down
14 changes: 7 additions & 7 deletions glean/db/Glean/Database/Data.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import Data.ByteString.Lazy (toStrict, fromStrict)
import Thrift.Protocol.Compact

import Glean.Database.Exception
import Glean.Database.Storage (Storage, Database)
import Glean.Database.Storage (DatabaseOps)
import qualified Glean.Database.Storage as Storage
import Glean.RTS.Foreign.Ownership
import Glean.Types (Repo)
Expand All @@ -39,21 +39,21 @@ uNITS_KEY = "units"
sLICES_KEY :: ByteString
sLICES_KEY = "slices"

storeSchema :: Storage s => Database s -> StoredSchema -> IO ()
storeSchema :: DatabaseOps db => db -> StoredSchema -> IO ()
storeSchema db = Storage.store db sCHEMA_KEY . serializeCompact

retrieveSchema :: Storage s => Repo -> Database s -> IO (Maybe StoredSchema)
retrieveSchema :: DatabaseOps db => Repo -> db -> IO (Maybe StoredSchema)
retrieveSchema repo db = do
value <- Storage.retrieve db sCHEMA_KEY
case deserializeCompact <$> value of
Just (Right info) -> return $ Just info
Just (Left msg) -> dbError repo $ "invalid schema: " ++ msg
Nothing -> return Nothing

storeUnits :: Storage s => Database s -> [ByteString] -> IO ()
storeUnits :: DatabaseOps db => db -> [ByteString] -> IO ()
storeUnits db = Storage.store db uNITS_KEY . toStrict . encode

retrieveUnits :: Storage s => Repo -> Database s -> IO (Maybe [ByteString])
retrieveUnits :: DatabaseOps db => Repo -> db -> IO (Maybe [ByteString])
retrieveUnits repo db = do
value <- Storage.retrieve db uNITS_KEY
case decodeOrFail . fromStrict <$> value of
Expand All @@ -64,12 +64,12 @@ retrieveUnits repo db = do
-- Slices are each serialized using the RTS Slice::serialize(), and then
-- the list of serialized slices :: [ByteString] is serialized with
-- the Haskell Binary encoder.
storeSlices :: Storage s => Database s -> [Slice] -> IO ()
storeSlices :: DatabaseOps db => db -> [Slice] -> IO ()
storeSlices db slices = do
bytestrings <- mapM serializeSlice slices
Storage.store db sLICES_KEY $ toStrict $ encode bytestrings

retrieveSlices :: Storage s => Repo -> Database s -> IO (Maybe [Slice])
retrieveSlices :: DatabaseOps db => Repo -> db -> IO (Maybe [Slice])
retrieveSlices repo db = do
value <- Storage.retrieve db sLICES_KEY
case decodeOrFail . fromStrict <$> value of
Expand Down
Loading