Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ jobs:
run: echo CABAL_CONFIG_FLAGS="$CABAL_CONFIG_FLAGS --index-state=${{ matrix.index-state }}" >>"$GITHUB_ENV"

- name: Install hsthrift and Glean dependencies
run: apt-get install -y pkg-config rsync libgmock-dev libpcre3-dev libtinfo-dev libxxhash-dev
run: apt-get install -y pkg-config rsync libgmock-dev libpcre3-dev libtinfo-dev libxxhash-dev squashfs-tools

- name: Build hsthrift and Glean
run: make
Expand Down
4 changes: 4 additions & 0 deletions cabal.project
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ packages:
glean.cabal
glean/lang/clang/glean-clang.cabal
glean/lsp
glean/lmdb-clib

optional-packages:
hsthrift/folly-clib/folly-clib.cabal
Expand Down Expand Up @@ -52,6 +53,9 @@ package thrift-http
package glean
flags: -opt

package lmdb-clib
flags: -opt

package fb-util
flags: +folly

Expand Down
20 changes: 20 additions & 0 deletions glean.cabal.in
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,23 @@ library rocksdb
glean:storage,
glean:rts,

library lmdb
import: fb-haskell, fb-cpp, deps, folly
visibility: private
CXX_LIB_glean_cpp_lmdb
include-dirs: .
install-includes:
glean/lmdb/container-impl.h
glean/lmdb/database-impl.h
glean/lmdb/ffi.h
glean/lmdb/glean_lmdb.h
glean/lmdb/util.h
pkgconfig-depends: fmt
build-depends:
glean:storage,
glean:rts,
lmdb-clib,

library util
import: fb-haskell, fb-cpp, deps, thrift-client
visibility: public
Expand Down Expand Up @@ -611,6 +628,7 @@ library db
Glean.Database.Storage.Memory
Glean.Database.Storage.DB
Glean.Database.Storage.RocksDB
Glean.Database.Storage.LMDB
Glean.Database.Trace
Glean.Database.Types
Glean.Database.Validate
Expand Down Expand Up @@ -665,6 +683,7 @@ library db
glean:rts,
glean:stubs,
glean:rocksdb,
glean:lmdb,

-- Backend API, and a few things built on top
library backend-api
Expand Down Expand Up @@ -1878,6 +1897,7 @@ library bench-lib
BenchDB
build-depends:
glean:backend-api,
glean:bench-util,
glean:client-hs,
glean:core,
glean:db,
Expand Down
17 changes: 14 additions & 3 deletions glean/bench/BenchDB.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import qualified Data.Text as Text

import Glean
import Glean.Backend.Types (loadPredicates)
import Glean.Database.Test (withEmptyTestDB, completeTestDB)
import Glean.Database.Test
import Glean.Database.Open
import Glean.Database.Write.Batch
import qualified Glean.Schema.Sys.Types as Sys
Expand All @@ -26,9 +26,20 @@ import qualified Glean.Schema.Sys as Sys
import qualified Glean.Schema.Cxx1 as Cxx
import qualified Glean.Schema.GleanTest as Glean.Test
import Glean.Typed
import Glean.Util.Benchmark

withBenchDB :: Int -> (forall b . Backend b => b -> Repo -> IO a) -> IO a
withBenchDB num act = withEmptyTestDB [] $ \env repo -> do
withBenchDB
:: GleanBenchConfig
-> Int
-> (forall b . Backend b => b -> Repo -> IO a)
-> IO a
withBenchDB conf num act = do
let
settings
| useLMDB conf = [setLMDBStorage]
| otherwise = []

withEmptyTestDB (enableRocksDBCache : settings) $ \env repo -> do
withOpenDatabase env repo $ \odb ->
void $ return odb

Expand Down
2 changes: 1 addition & 1 deletion glean/bench/MakeFactBench.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import Glean.Util.Benchmark
import Glean.Typed

main :: IO ()
main = benchmarkMain $ \run -> do
main = benchmarkMain $ \_conf run -> do
withEmptyTestDB [] $ \env repo -> do
predicates <- Backend.loadPredicates env repo
[ Cxx.allPredicates
Expand Down
3 changes: 2 additions & 1 deletion glean/bench/QueryBench.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ import Glean.Util.Benchmark
import BenchDB

main :: IO ()
main = benchmarkMain $ \run -> withBenchDB 10000 $ \env repo -> do
main = benchmarkMain $ \conf run ->
withBenchDB conf 10000 $ \env repo -> do
let
nestedAngle :: Query Cxx.FunctionName
nestedAngle = angle "cxx1.FunctionName { name = \"x1\" }"
Expand Down
2 changes: 1 addition & 1 deletion glean/bench/RenameBench.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import Glean.Util.Benchmark
import TestBatch

main :: IO ()
main = benchmarkMain $ \run ->
main = benchmarkMain $ \_conf run ->
withEmptyTestDB [] $ \env repo -> do
schema <- loadDbSchema env repo
batch <- testBatch 500000 env repo
Expand Down
22 changes: 19 additions & 3 deletions glean/bench/lib/Glean/Util/Benchmark.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,31 @@
module Glean.Util.Benchmark
( BenchmarkRunner
, benchmarkMain
, GleanBenchConfig(..)
) where

import Criterion.Main
import Criterion.Main.Options
import Data.Default
import Options.Applicative

import Glean.Init

type BenchmarkRunner = [Benchmark] -> IO ()

benchmarkMain :: (BenchmarkRunner -> IO ()) -> IO ()
benchmarkMain exec = withOptions (describe defaultConfig) $
exec . runMode
benchmarkMain :: (GleanBenchConfig -> BenchmarkRunner -> IO ()) -> IO ()
benchmarkMain exec =
withOptions
(describeWith ((,) <$> opts <*> parseWith defaultConfig)) $ \(conf, crit) ->
exec conf (runMode crit)

data GleanBenchConfig = GleanBenchConfig {
useLMDB :: Bool
}

instance Default GleanBenchConfig where
def = GleanBenchConfig { useLMDB = False }

opts :: Parser GleanBenchConfig
opts = GleanBenchConfig
<$> switch (long "lmdb" <> help "use LMDB (otherwise use RocksDB)")
10 changes: 10 additions & 0 deletions glean/config/server/server_config.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,16 @@ struct Config {
// How much of total memory capacity to use for rocksdb cache.
// Overrides db_rocksdb_cache_mb if set.
38: optional float db_rocksdb_cache_to_mem_ratio;

// Args to pass to mksquashfs when creating a backup of an LMDB
39: list<string> db_lmdb_mksquashfs_args =
[ "-comp", "zstd", "-Xcompression-level", "8" ];

// LMDB: unpack a restored DB. If false, the DB will be mounted using
// squashfs instead, which is more space-efficient but incurs a small
// runtime cost to decompress the DB on demand. Mounting requires
// squashfs-tools and squashfuse to be installed on Linux.
40: bool db_lmdb_restore_unpack = true;
}

// The following were automatically generated and may benefit from renaming.
Expand Down
21 changes: 14 additions & 7 deletions glean/db/Glean/Database/Backup.hs
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ doBackup env@Env{..} repo prefix site =
say log s = log $ inRepo repo $ "backup: " ++ s

backup = loggingAction (runLogRepo "backup" env repo) (const mempty) $ do
ServerConfig.Config{..} <- Observed.get envServerConfig
cfg@ServerConfig.Config{..} <- Observed.get envServerConfig
meta <- atomically $ Catalog.readMeta envCatalog repo
let excluded =
hasExcludeProperty repo (metaProperties meta) config_retention
Expand All @@ -253,7 +253,7 @@ doBackup env@Env{..} repo prefix site =
withStorageFor env repo meta $ \storage -> do

Backend.Data{..} <- withScratchDirectory storage repo $ \scratch ->
Storage.backup odbHandle scratch $ \path Data{dataSize} -> do
Storage.backup odbHandle cfg scratch $ \path Data{dataSize} -> do
say logInfo "uploading"
let policy = ServerConfig.databaseBackupPolicy_repos config_backup
ttl = case Map.lookup (repo_name repo) policy of
Expand Down Expand Up @@ -310,13 +310,13 @@ doRestore env@Env{..} repo meta
, Just (_, Some site, r_repo) <- fromRepoLocator envBackupBackends loc
, r_repo == repo =
loggingAction (runLogRepo "restore" env repo) (const mempty) $ do
ServerConfig.Config{..} <- Observed.get envServerConfig
cfg@ServerConfig.Config{..} <- Observed.get envServerConfig
let maybeTimeout =
case config_restore_timeout of
Just seconds -> void . timeout (fromIntegral seconds * 1000000)
Nothing -> id
withStorageFor env repo meta $ \storage ->
maybeTimeout $ restore site storage size `catch` handler storage
maybeTimeout $ restore site storage cfg size `catch` handler storage

-- NOTE: No point in adding the repo to the sinbin if there was
-- an exception, the handler removed it from the list of known DBs
Expand All @@ -333,8 +333,15 @@ doRestore env@Env{..} repo meta
where
say log s = log $ inRepo repo $ "restore: " ++ s

restore :: (Storage st, Site s) => s -> st -> Maybe Int64 -> IO ()
restore site storage bytes = traceMsg envTracer (GleanTraceDownload repo) $ do
restore
:: (Storage st, Site s)
=> s
-> st
-> ServerConfig.Config
-> Maybe Int64
-> IO ()
restore site storage cfg bytes =
traceMsg envTracer (GleanTraceDownload repo) $ do
atomically $ notify envListener $ RestoreStarted repo
mbFreeBytes <- (Just <$> Storage.getFreeCapacity storage)
`catch` \(_ :: IOException) -> return Nothing
Expand All @@ -359,7 +366,7 @@ doRestore env@Env{..} repo meta
say logInfo "restoring"
createDirectoryIfMissing True scratch_restore
traceMsg envTracer GleanTraceStorageRestore $
Storage.restore storage repo scratch_restore scratch_file
Storage.restore storage cfg repo scratch_restore scratch_file
say logInfo "adding"
traceMsg envTracer GleanTraceFinishRestore $
Catalog.finishRestoring envCatalog repo
Expand Down
14 changes: 11 additions & 3 deletions glean/db/Glean/Database/Config.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ module Glean.Database.Config (
tmpDataStore,
memoryDataStore,
rocksdbName,
lmdbName,

-- * Config, and options parser
options,
Expand Down Expand Up @@ -84,6 +85,7 @@ import Glean.Database.Schema.ComputeIds
import Glean.Database.Storage
import qualified Glean.Database.Storage.Memory as Memory
import qualified Glean.Database.Storage.RocksDB as RocksDB
import qualified Glean.Database.Storage.LMDB as LMDB
import Glean.Database.Trace
import qualified Glean.Internal.Types as Internal
import Glean.Internal.Types (StorageName(..))
Expand Down Expand Up @@ -118,17 +120,20 @@ data DataStore = DataStore
, dataStoreTag :: String
}

rocksdbName, memoryName :: StorageName
rocksdbName, lmdbName, memoryName :: StorageName
rocksdbName = StorageName "rocksdb"
lmdbName = StorageName "lmdb"
memoryName = StorageName "memory"

fileDataStore :: FilePath -> DataStore
fileDataStore path = DataStore
{ withStorage = \scfg f -> do
rocksdb <- RocksDB.newStorage path scfg
lmdb <- LMDB.newStorage path scfg
f (
HashMap.fromList
[ (rocksdbName, Some rocksdb)
[ (rocksdbName, Some rocksdb),
(lmdbName, Some lmdb)
],
Some (Catalog.fileCatalog path)
)
Expand Down Expand Up @@ -511,7 +516,10 @@ options = do
long "db-root" <>
metavar "DIR" <>
help "Directory containing databases")
pure $ fileDataStore path
lmdb <- switch (long "lmdb")
pure $
(if lmdb then \s -> s { defaultStorage = lmdbName } else id) $
fileDataStore path
dbTmp = tmpDataStore <$ flag' () (
long "db-tmp" <>
help "Store databases in a temporary directory (default)")
Expand Down
3 changes: 3 additions & 0 deletions glean/db/Glean/Database/Storage.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import Glean.RTS.Foreign.Lookup (CanLookup(..), Lookup)
import Glean.RTS.Foreign.Ownership hiding (computeDerivedOwnership)
import Glean.RTS.Types (Fid, Pid)
import Glean.ServerConfig.Types (DBVersion(..))
import qualified Glean.ServerConfig.Types as ServerConfig
import Glean.Types (PredicateStats, Repo, SchemaId)
import Glean.Util.Some

Expand Down Expand Up @@ -120,6 +121,7 @@ class DatabaseOps (Database s) => Storage s where
-- to reduce the number of copies of the DB on disk during a restore.
restore
:: s -- ^ storage
-> ServerConfig.Config -- ^ server config
-> Repo -- ^ repo
-> FilePath -- ^ scratch directory
-> FilePath -- ^ file containing the serialiased database (produced by 'backup')
Expand Down Expand Up @@ -202,6 +204,7 @@ class CanLookup db => DatabaseOps db where
-- the operation completes.
backup
:: db -- ^ database
-> ServerConfig.Config -- ^ server config
-> FilePath -- ^ scratch directory
-> (FilePath -> Data -> IO a)
-- ^ function which expects the serialised database
Expand Down
7 changes: 4 additions & 3 deletions glean/db/Glean/Database/Storage/DB.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import Foreign.ForeignPtr
import Foreign.Marshal.Array
import Foreign.Ptr
import Foreign.Storable
import System.Directory
import System.FilePath

import Util.FFI
Expand Down Expand Up @@ -156,12 +157,12 @@ instance DatabaseOps DB where
unsafeWithForeignPtr (dbPtr db) $ \db_ptr ->
invoke $ glean_rocksdb_prepare_fact_owner_cache db_ptr

backup db scratch process = do
backup db _ scratch process = do
let path = scratch </> "backup"
createDirectoryIfMissing True path
withContainer db $ \s_ptr ->
withCString path $ invoke . glean_rocksdb_container_backup s_ptr
process path (Data 0)
where
path = scratch </> "backup"

newtype Container = Container (Ptr Container)
deriving(Storable)
Expand Down
Loading
Loading