-
Notifications
You must be signed in to change notification settings - Fork 11
feat: new WAL file for meta #334
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
834e2ed
14d3d01
8302b7a
6706435
792a7fa
9779ff6
65ef2b9
4375be9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ package frac | |
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "io" | ||
| "math" | ||
| "os" | ||
|
|
@@ -54,7 +55,8 @@ type Active struct { | |
| sortCache *cache.Cache[[]byte] | ||
|
|
||
| metaFile *os.File | ||
| metaReader storage.DocBlocksReader | ||
| metaReader *storage.DocBlocksReader | ||
| walReader *storage.WalReader | ||
|
|
||
| writer *ActiveWriter | ||
| indexer *ActiveIndexer | ||
|
|
@@ -79,7 +81,8 @@ func NewActive( | |
| cfg *Config, | ||
| ) *Active { | ||
| docsFile, docsStats := mustOpenFile(baseFileName+consts.DocsFileSuffix, config.SkipFsync) | ||
| metaFile, metaStats := mustOpenFile(baseFileName+consts.MetaFileSuffix, config.SkipFsync) | ||
|
|
||
| metaFile, writer, metaReader, walReader, metaSize := mustOpenMetaWriter(baseFileName, readLimiter, docsFile, docsStats) | ||
|
|
||
| f := &Active{ | ||
| TokenList: NewActiveTokenList(config.IndexWorkers), | ||
|
|
@@ -95,13 +98,14 @@ func NewActive( | |
| sortReader: storage.NewDocsReader(readLimiter, docsFile, sortCache), | ||
|
|
||
| metaFile: metaFile, | ||
| metaReader: storage.NewDocBlocksReader(readLimiter, metaFile), | ||
| metaReader: metaReader, | ||
| walReader: walReader, | ||
|
|
||
| indexer: activeIndexer, | ||
| writer: NewActiveWriter(docsFile, metaFile, docsStats.Size(), metaStats.Size(), config.SkipFsync), | ||
| writer: writer, | ||
|
|
||
| BaseFileName: baseFileName, | ||
| info: common.NewInfo(baseFileName, uint64(docsStats.Size()), uint64(metaStats.Size())), | ||
| info: common.NewInfo(baseFileName, uint64(docsStats.Size()), metaSize), | ||
| Config: cfg, | ||
| } | ||
|
|
||
|
|
@@ -114,6 +118,35 @@ func NewActive( | |
| return f | ||
| } | ||
|
|
||
| func mustOpenMetaWriter( | ||
| baseFileName string, | ||
| readLimiter *storage.ReadLimiter, | ||
| docsFile *os.File, | ||
| docsStats os.FileInfo) (*os.File, *ActiveWriter, *storage.DocBlocksReader, *storage.WalReader, uint64) { | ||
| legacyMetaFileName := baseFileName + consts.MetaFileSuffix | ||
|
|
||
| if _, err := os.Stat(legacyMetaFileName); err == nil { | ||
| // .meta file exists | ||
| metaFile, metaStats := mustOpenFile(legacyMetaFileName, config.SkipFsync) | ||
| metaSize := uint64(metaStats.Size()) | ||
| metaReader := storage.NewDocBlocksReader(readLimiter, metaFile) | ||
| writer := NewActiveWriterLegacy(docsFile, metaFile, docsStats.Size(), metaStats.Size(), config.SkipFsync) | ||
| logger.Info("using legacy meta file format", zap.String("fraction", baseFileName)) | ||
| return metaFile, writer, &metaReader, nil, metaSize | ||
| } else { | ||
| logger.Info("using new WAL format", zap.String("fraction", baseFileName)) | ||
| walFileName := baseFileName + consts.WalFileSuffix | ||
| metaFile, metaStats := mustOpenFile(walFileName, config.SkipFsync) | ||
| metaSize := uint64(metaStats.Size()) | ||
| writer := NewActiveWriter(docsFile, metaFile, docsStats.Size(), metaStats.Size(), config.SkipFsync) | ||
| walReader, err := storage.NewWalReader(readLimiter, metaFile, baseFileName) | ||
| if err != nil { | ||
| logger.Fatal("failed to initialize WAL reader", zap.String("fraction", baseFileName), zap.Error(err)) | ||
| } | ||
| return metaFile, writer, nil, walReader, metaSize | ||
| } | ||
| } | ||
|
|
||
| func mustOpenFile(name string, skipFsync bool) (*os.File, os.FileInfo) { | ||
| file, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0o776) | ||
| if err != nil { | ||
|
|
@@ -133,6 +166,81 @@ func mustOpenFile(name string, skipFsync bool) (*os.File, os.FileInfo) { | |
| } | ||
|
|
||
| func (f *Active) Replay(ctx context.Context) error { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: we already check file existence during initialization in mustOpenMetaWriter, so we can simplify the check in Replay func (f *Active) Replay(ctx context.Context) error {
if f.walReader != nil {
return f.replayWalFile(ctx)
}
if f.metaReader != nil {
return f.replayMetaFileLegacy(ctx)
}
logger.Info("neither wal nor legacy meta file was found, skipping replay", zap.String("fraction", f.BaseFileName))
return nil
}Since we always open either a meta file or a wal file during initialization, Replay could be even simpler: func (f *Active) Replay(ctx context.Context) error {
if f.metaReader != nil {
return f.replayMetaFileLegacy(ctx)
}
return f.replayWalFile(ctx)
} |
||
| walFileName := f.BaseFileName + consts.WalFileSuffix | ||
| if _, err := os.Stat(walFileName); err == nil { | ||
| return f.replayWalFile(ctx) | ||
| } | ||
|
|
||
| metaFileName := f.BaseFileName + consts.MetaFileSuffix | ||
| if _, err := os.Stat(metaFileName); err == nil { | ||
| return f.replayMetaFileLegacy(ctx) | ||
| } | ||
|
Comment on lines
+169
to
+177
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: If crash happens and both If we change order, first read |
||
|
|
||
| logger.Info("neither wal nor legacy meta file was found, skipping replay", zap.String("fraction", f.BaseFileName)) | ||
| return nil | ||
| } | ||
|
|
||
| func (f *Active) replayWalFile(ctx context.Context) error { | ||
| if f.walReader == nil { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: this is a private method and its calls are under our control, so we don't need this check - we expect walReader to be initialized) |
||
| return fmt.Errorf("WAL reader not initialized") | ||
| } | ||
|
|
||
| logger.Info("start replaying WAL file...", zap.String("name", f.info.Name())) | ||
|
|
||
| t := time.Now() | ||
|
|
||
| step := f.info.MetaOnDisk / 10 | ||
| next := step | ||
|
|
||
| sw := stopwatch.New() | ||
| wg := sync.WaitGroup{} | ||
|
|
||
| for entry := range f.walReader.Iter() { | ||
| // Check for context cancellation | ||
| select { | ||
| case <-ctx.Done(): | ||
| return ctx.Err() | ||
| default: | ||
| } | ||
|
|
||
| if entry.Err != nil { | ||
| return entry.Err | ||
| } | ||
|
|
||
| if uint64(entry.Offset) > next { | ||
| next += step | ||
| progress := float64(uint64(entry.Offset)) / float64(f.info.MetaOnDisk) * 100 | ||
| logger.Info("replaying batch, meta", | ||
| zap.String("name", f.info.Name()), | ||
| zap.Int64("from", entry.Offset), | ||
| zap.Int64("to", entry.Offset+entry.Size), | ||
| zap.Uint64("target", f.info.MetaOnDisk), | ||
| util.ZapFloat64WithPrec("progress_percentage", progress, 2), | ||
| ) | ||
| } | ||
|
|
||
| wg.Add(1) | ||
| f.indexer.Index(f, entry.Data, &wg, sw) | ||
| } | ||
|
|
||
| wg.Wait() | ||
|
|
||
| tookSeconds := util.DurationToUnit(time.Since(t), "s") | ||
| throughputRaw := util.SizeToUnit(f.info.DocsRaw, "mb") / tookSeconds | ||
| throughputMeta := util.SizeToUnit(f.info.MetaOnDisk, "mb") / tookSeconds | ||
| logger.Info("active fraction replayed", | ||
| zap.String("name", f.info.Name()), | ||
| zap.Uint32("docs_total", f.info.DocsTotal), | ||
| util.ZapUint64AsSizeStr("docs_size", f.info.DocsOnDisk), | ||
| util.ZapFloat64WithPrec("took_s", tookSeconds, 1), | ||
| util.ZapFloat64WithPrec("throughput_raw_mb_sec", throughputRaw, 1), | ||
| util.ZapFloat64WithPrec("throughput_meta_mb_sec", throughputMeta, 1), | ||
| ) | ||
| return nil | ||
| } | ||
|
|
||
| // replayMetaFileLegacy replays legacy *.meta files. Only basic corruption detection support is implemented | ||
| func (f *Active) replayMetaFileLegacy(ctx context.Context) error { | ||
| logger.Info("start replaying...", zap.String("name", f.info.Name())) | ||
|
|
||
| t := time.Now() | ||
|
|
@@ -175,7 +283,9 @@ out: | |
| offset += metaSize | ||
|
|
||
| wg.Add(1) | ||
| f.indexer.Index(f, meta, &wg, sw) | ||
|
|
||
| walBlock := storage.PackDocBlockToWalBlock(meta) | ||
| f.indexer.Index(f, walBlock, &wg, sw) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -204,7 +314,7 @@ var bulkStagesSeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{ | |
| }, []string{"stage"}) | ||
|
|
||
| // Append causes data to be written on disk and sends metas to index workers | ||
| func (f *Active) Append(docs, metas []byte, wg *sync.WaitGroup) (err error) { | ||
| func (f *Active) Append(docs storage.DocBlock, metas storage.WalBlock, wg *sync.WaitGroup) (err error) { | ||
| sw := stopwatch.New() | ||
| m := sw.Start("append") | ||
| if err = f.writer.Write(docs, metas, sw); err != nil { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,8 +13,10 @@ import ( | |
| "time" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
|
|
||
| "github.com/ozontech/seq-db/metric/stopwatch" | ||
| "github.com/ozontech/seq-db/storage" | ||
| ) | ||
|
|
||
| type testWriterSyncer struct { | ||
|
|
@@ -62,7 +64,7 @@ func (ws *testWriterSyncer) Check(val []byte) bool { | |
|
|
||
| func TestFileWriter(t *testing.T) { | ||
| ws := &testWriterSyncer{out: map[string]struct{}{}, pause: time.Millisecond} | ||
| fw := NewFileWriter(ws, 0, false) | ||
| fw := storage.NewFileWriter(ws, 0, false) | ||
|
|
||
| wg := sync.WaitGroup{} | ||
| for range 100 { | ||
|
|
@@ -85,7 +87,7 @@ func TestFileWriter(t *testing.T) { | |
|
|
||
| func TestFileWriterNoSync(t *testing.T) { | ||
| ws := &testWriterSyncer{out: map[string]struct{}{}, pause: time.Millisecond} | ||
| fw := NewFileWriter(ws, 0, true) | ||
| fw := storage.NewFileWriter(ws, 0, true) | ||
|
|
||
| wg := sync.WaitGroup{} | ||
| for range 100 { | ||
|
|
@@ -108,7 +110,7 @@ func TestFileWriterNoSync(t *testing.T) { | |
|
|
||
| func TestFileWriterError(t *testing.T) { | ||
| ws := &testWriterSyncer{out: map[string]struct{}{}, pause: time.Millisecond, err: true} | ||
| fw := NewFileWriter(ws, 0, false) | ||
| fw := storage.NewFileWriter(ws, 0, false) | ||
|
|
||
| wg := sync.WaitGroup{} | ||
| for range 100 { | ||
|
|
@@ -149,7 +151,7 @@ func TestConcurrentFileWriting(t *testing.T) { | |
|
|
||
| defer f.Close() | ||
|
|
||
| fw := NewFileWriter(&testRandPauseWriterAt{f: f}, 0, true) | ||
| fw := storage.NewFileWriter(&testRandPauseWriterAt{f: f}, 0, true) | ||
|
|
||
| const ( | ||
| writersCount = 100 | ||
|
|
@@ -262,3 +264,40 @@ func TestSparseWrite(t *testing.T) { | |
| e = os.Remove(rf.Name()) | ||
| assert.NoError(t, e) | ||
| } | ||
|
|
||
| func TestLegacyMetaWriterConvertsWalBlockToDocBlock(t *testing.T) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. all remaining file except this function duplicates storage/file_writer_test.go |
||
| f, err := os.Create(t.TempDir() + "/test_wal_block.txt") | ||
| require.NoError(t, err) | ||
| defer f.Close() | ||
|
|
||
| meta := NewLegacyMetaWriter(storage.NewFileWriter(f, 0, false)) | ||
|
|
||
| originalPayload := []byte("test payload for WalBlock to DocBlock conversion") | ||
| walBlock := storage.CompressWalBlock(originalPayload, nil, 3) | ||
| walBlock.SetDocsOffset(12345) | ||
| walBlock.SetVersion(1) | ||
|
|
||
| sw := stopwatch.New() | ||
| offset, err := meta.Write(walBlock, sw) | ||
| require.NoError(t, err) | ||
|
|
||
| meta.Stop() | ||
|
|
||
| docBlockSize := storage.DocBlockHeaderLen + walBlock.Len() | ||
|
|
||
| readBuf := make([]byte, docBlockSize) | ||
| bytesRead, err := f.ReadAt(readBuf, offset) | ||
| require.NoError(t, err) | ||
| require.Equal(t, int(docBlockSize), bytesRead) | ||
| readBuf = readBuf[:bytesRead] | ||
|
|
||
| docBlock := storage.DocBlock(readBuf) | ||
|
|
||
| assert.Equal(t, storage.CodecZSTD, docBlock.Codec()) | ||
| assert.Equal(t, uint64(len(originalPayload)), docBlock.RawLen()) | ||
| assert.Equal(t, uint64(12345), docBlock.GetExt2()) | ||
|
|
||
| decompressed, err := docBlock.DecompressTo(nil) | ||
| require.NoError(t, err) | ||
| assert.Equal(t, originalPayload, decompressed) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: else after return is not necessary