From 876432d413b12ec3d75018c7bec6f373e921f18f Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 30 Oct 2025 10:14:05 +0400 Subject: [PATCH 01/35] microseconds support --- config/frac_version.go | 2 + frac/common/info.go | 2 +- frac/meta_data_collector.go | 16 ++++- frac/processor/aggregator.go | 2 +- frac/processor/search.go | 12 ++-- frac/sealed/seqids/blocks.go | 11 ++- frac/sealed/seqids/loader.go | 5 +- frac/sealed_loader.go | 15 +++- fracmanager/searcher.go | 2 +- proxy/bulk/ingestor_test.go | 2 +- proxy/search/async.go | 6 +- proxy/search/ingestor.go | 2 +- proxy/search/search_request.go | 8 +-- proxyapi/grpc_async_search.go | 2 +- proxyapi/grpc_complex_search_test.go | 4 +- proxyapi/grpc_export_test.go | 4 +- proxyapi/grpc_get_aggregation_test.go | 4 +- proxyapi/grpc_get_histogram_test.go | 4 +- proxyapi/grpc_main_test.go | 4 +- proxyapi/grpc_search_test.go | 4 +- proxyapi/grpc_v1.go | 8 +-- seq/mids_distribution_test.go | 2 +- seq/qpr.go | 14 ++-- seq/seq.go | 18 +++-- storeapi/grpc_async_search.go | 6 +- storeapi/grpc_search.go | 12 ++-- tests/integration_tests/integration_test.go | 2 +- tests/integration_tests/sub_search_test.go | 76 ++++++++++----------- tests/setup/doc.go | 2 +- tests/setup/env.go | 6 +- 30 files changed, 151 insertions(+), 106 deletions(-) diff --git a/config/frac_version.go b/config/frac_version.go index 8d873dc7..a528c05d 100644 --- a/config/frac_version.go +++ b/config/frac_version.go @@ -7,4 +7,6 @@ const ( BinaryDataV0 BinaryDataVersion = iota // BinaryDataV1 - support RIDs encoded without varint BinaryDataV1 + // BinaryDataV2 - MIDs stored in microseconds + BinaryDataV2 ) diff --git a/frac/common/info.go b/frac/common/info.go index ad3c3a57..bd2a4712 100644 --- a/frac/common/info.go +++ b/frac/common/info.go @@ -42,7 +42,7 @@ type Info struct { func NewInfo(filename string, docsOnDisk, metaOnDisk uint64) *Info { return &Info{ Ver: buildinfo.Version, - BinaryDataVer: config.BinaryDataV1, + BinaryDataVer: config.BinaryDataV2, Path: filename, From: math.MaxUint64, To: 0, diff --git a/frac/meta_data_collector.go b/frac/meta_data_collector.go index 03653dc6..e5de7d04 100644 --- a/frac/meta_data_collector.go +++ b/frac/meta_data_collector.go @@ -36,7 +36,7 @@ func (m *MetaData) MarshalBinaryTo(b []byte) []byte { b = binary.LittleEndian.AppendUint16(b, metadataMagic) // Append current binary version of the metadata. - const version = 1 + const version = 2 b = binary.LittleEndian.AppendUint16(b, version) // Encode seq.ID. @@ -68,6 +68,8 @@ func (m *MetaData) UnmarshalBinary(b []byte) error { switch version { case 1: return m.unmarshalVersion1(b) + case 2: + return m.unmarshalVersion2(b) default: return fmt.Errorf("unimplemented metadata version: %d", version) } @@ -75,8 +77,20 @@ func (m *MetaData) UnmarshalBinary(b []byte) error { func (m *MetaData) unmarshalVersion1(b []byte) error { // Decode seq.ID. + m.ID.MID = seq.MillisToMID(binary.LittleEndian.Uint64(b)) + b = b[8:] + return m.unmarshalVersion1And2(b) +} + +func (m *MetaData) unmarshalVersion2(b []byte) error { + // Decode seq.ID. + // Version 2 stores MID in microseconds m.ID.MID = seq.MID(binary.LittleEndian.Uint64(b)) b = b[8:] + return m.unmarshalVersion1And2(b) +} + +func (m *MetaData) unmarshalVersion1And2(b []byte) error { m.ID.RID = seq.RID(binary.LittleEndian.Uint64(b)) b = b[8:] diff --git a/frac/processor/aggregator.go b/frac/processor/aggregator.go index 3a4d7232..a9c78aaa 100644 --- a/frac/processor/aggregator.go +++ b/frac/processor/aggregator.go @@ -434,6 +434,6 @@ func provideExtractTimeFunc(sw *stopwatch.Stopwatch, idx idsIndex, interval int6 timer.Start() mid := idx.GetMID(seq.LID(lid)) timer.Stop() - return mid - (mid % seq.MID(interval)) + return mid - (mid % seq.MillisToMID(uint64(interval))) }) } diff --git a/frac/processor/search.go b/frac/processor/search.go index 340a130b..cfbddd71 100644 --- a/frac/processor/search.go +++ b/frac/processor/search.go @@ -135,12 +135,12 @@ func convertHistToMap(params SearchParams, hist []uint64) map[seq.MID]uint64 { return nil } res := make(map[seq.MID]uint64, len(hist)) - bucket := params.From - params.From%seq.MID(params.HistInterval) + bucket := params.From - params.From%seq.MillisToMID(params.HistInterval) for _, cnt := range hist { if cnt > 0 { res[bucket] = cnt } - bucket += seq.MID(params.HistInterval) + bucket += seq.MillisToMID(params.HistInterval) } return res } @@ -161,8 +161,8 @@ func iterateEvalTree( histogram []uint64 ) if hasHist { - histBase = uint64(params.From) / params.HistInterval - histSize := uint64(params.To)/params.HistInterval - histBase + 1 + histBase = uint64(seq.MIDToMillis(params.From)) / params.HistInterval + histSize := uint64(seq.MIDToMillis(params.To))/params.HistInterval - histBase + 1 histogram = make([]uint64, histSize) } @@ -206,7 +206,7 @@ func iterateEvalTree( zap.Time("mid", mid.Time())) continue } - bucketIndex := uint64(mid)/params.HistInterval - histBase + bucketIndex := uint64(mid/1000)/params.HistInterval - histBase histogram[bucketIndex]++ } @@ -274,7 +274,7 @@ func MergeQPRs(qprs []*seq.QPR, params SearchParams) *seq.QPR { } qpr := qprs[0] if len(qprs) > 1 { - seq.MergeQPRs(qpr, qprs[1:], params.Limit, seq.MID(params.HistInterval), params.Order) + seq.MergeQPRs(qpr, qprs[1:], params.Limit, seq.MillisToMID(params.HistInterval), params.Order) } return qpr } diff --git a/frac/sealed/seqids/blocks.go b/frac/sealed/seqids/blocks.go index bea423ea..7f5d3f1d 100644 --- a/frac/sealed/seqids/blocks.go +++ b/frac/sealed/seqids/blocks.go @@ -8,7 +8,8 @@ import ( ) type BlockMIDs struct { - Values []uint64 + fracVersion config.BinaryDataVersion + Values []uint64 } func (b BlockMIDs) Pack(dst []byte) []byte { @@ -26,6 +27,14 @@ func (b *BlockMIDs) Unpack(data []byte) error { return err } b.Values = values + + // Legacy format - convert millis to micros + if b.fracVersion < config.BinaryDataV2 { + for i := range b.Values { + b.Values[i] = b.Values[i] * 1000 + } + } + return nil } diff --git a/frac/sealed/seqids/loader.go b/frac/sealed/seqids/loader.go index f552362f..31a22361 100644 --- a/frac/sealed/seqids/loader.go +++ b/frac/sealed/seqids/loader.go @@ -48,7 +48,10 @@ func (l *Loader) GetMIDsBlock(index uint32, buf []uint64) (BlockMIDs, error) { return BlockMIDs{}, err } // unpack - block := BlockMIDs{Values: buf} + block := BlockMIDs{ + fracVersion: l.fracVersion, + Values: buf, + } if err := block.Unpack(data); err != nil { return BlockMIDs{}, err } diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index 83a7f060..0585ef49 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -5,6 +5,7 @@ import ( "go.uber.org/zap" + "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" "github.com/ozontech/seq-db/frac/sealed/lids" @@ -31,7 +32,7 @@ func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, indexRea var err error - if blocksData.IDsTable, blocksData.BlocksOffsets, err = l.loadIDs(); err != nil { + if blocksData.IDsTable, blocksData.BlocksOffsets, err = l.loadIDs(info.BinaryDataVer); err != nil { logger.Fatal("load ids error", zap.Error(err)) } @@ -71,7 +72,7 @@ func (l *Loader) skipBlock() storage.IndexBlockHeader { return header } -func (l *Loader) loadIDs() (idsTable seqids.Table, blocksOffsets []uint64, err error) { +func (l *Loader) loadIDs(fracVersion config.BinaryDataVersion) (idsTable seqids.Table, blocksOffsets []uint64, err error) { var result []byte if result, err = l.nextIndexBlock(); err != nil { @@ -94,8 +95,16 @@ func (l *Loader) loadIDs() (idsTable seqids.Table, blocksOffsets []uint64, err e if header.Len() == 0 { break } + var mid seq.MID + + if fracVersion < config.BinaryDataV2 { + mid = seq.MillisToMID(header.GetExt1()) + } else { + mid = seq.MID(header.GetExt1()) + } + idsTable.MinBlockIDs = append(idsTable.MinBlockIDs, seq.ID{ - MID: seq.MID(header.GetExt1()), + MID: mid, RID: seq.RID(header.GetExt2()), }) diff --git a/fracmanager/searcher.go b/fracmanager/searcher.go index 1f06bb2c..a108dee1 100644 --- a/fracmanager/searcher.go +++ b/fracmanager/searcher.go @@ -74,7 +74,7 @@ func (s *Searcher) SearchDocs(ctx context.Context, fracs []frac.Fraction, params return nil, err } - seq.MergeQPRs(total, subQPRs, origLimit, seq.MID(params.HistInterval), params.Order) + seq.MergeQPRs(total, subQPRs, origLimit, seq.MillisToMID(params.HistInterval), params.Order) // reduce the limit on the number of ensured docs in response params.Limit = origLimit - calcEnsuredIDsCount(total.IDs, remainingFracs, params.Order) diff --git a/proxy/bulk/ingestor_test.go b/proxy/bulk/ingestor_test.go index 8800af6a..703a7c0c 100644 --- a/proxy/bulk/ingestor_test.go +++ b/proxy/bulk/ingestor_test.go @@ -87,7 +87,7 @@ func TestProcessDocuments(t *testing.T) { now := time.Now().UTC() - id := seq.SimpleID(int(now.UnixMilli())) + id := seq.SimpleID(int(now.UnixNano() / 1000)) type TestPayload struct { InDocs []string diff --git a/proxy/search/async.go b/proxy/search/async.go index a0b5a067..e078fc17 100644 --- a/proxy/search/async.go +++ b/proxy/search/async.go @@ -50,7 +50,7 @@ func (si *Ingestor) StartAsyncSearch(ctx context.Context, r AsyncRequest) (Async From: r.From.UnixMilli(), To: r.To.UnixMilli(), Aggs: convertToAggsQuery(r.Aggregations), - HistogramInterval: int64(r.HistogramInterval), + HistogramInterval: seq.MIDToMillis(r.HistogramInterval), Retention: durationpb.New(r.Retention), WithDocs: r.WithDocs, Size: r.Size, @@ -186,7 +186,7 @@ func (si *Ingestor) FetchAsyncSearchResult( fracsInQueue += int(sr.FracsQueue) fracsDone += int(sr.FracsDone) - histInterval = seq.MID(sr.HistogramInterval) + histInterval = seq.MillisToMID(uint64(sr.HistogramInterval)) ss := sr.Status.MustAsyncSearchStatus() pr.Status = mergeAsyncSearchStatus(pr.Status, ss) @@ -389,7 +389,7 @@ func (si *Ingestor) GetAsyncSearchesList( From: s.From.AsTime(), To: s.To.AsTime(), Aggregations: buildRequestAggs(s.Aggs), - HistogramInterval: seq.MID(s.HistogramInterval), + HistogramInterval: seq.MillisToMID(uint64(s.HistogramInterval)), WithDocs: s.WithDocs, Size: s.Size, } diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index c98e0312..205b843a 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -501,7 +501,7 @@ func responseToQPR(resp *storeapi.SearchResponse, source uint64, explain bool) * pbhist := bin.Hist tbin := seq.AggBin{ - MID: seq.MID(bin.Ts.AsTime().UnixMilli()), + MID: seq.MillisToMID(uint64(bin.Ts.AsTime().UnixMilli())), Token: bin.Label, } diff --git a/proxy/search/search_request.go b/proxy/search/search_request.go index 5f8b9141..498abd8e 100644 --- a/proxy/search/search_request.go +++ b/proxy/search/search_request.go @@ -32,11 +32,11 @@ type SearchRequest struct { func (sr *SearchRequest) GetAPISearchRequest() *storeapi.SearchRequest { return &storeapi.SearchRequest{ Query: util.ByteToStringUnsafe(sr.Q), - From: int64(sr.From), - To: int64(sr.To), + From: seq.MIDToMillis(sr.From), + To: seq.MIDToMillis(sr.To), Size: int64(sr.Size), Offset: int64(sr.Offset), - Interval: int64(sr.Interval), + Interval: seq.MIDToMillis(sr.Interval), Aggs: convertToAggsQuery(sr.AggQ), Explain: sr.Explain, WithTotal: sr.WithTotal, @@ -63,7 +63,7 @@ func convertToAggsQuery(aggs []AggQuery) []*storeapi.AggQuery { buf[i].Func = storeapi.AggFunc(query.Func) buf[i].Quantiles = query.Quantiles - buf[i].Interval = int64(query.Interval) + buf[i].Interval = seq.MIDToMillis(query.Interval) aggQ[i] = &buf[i] } diff --git a/proxyapi/grpc_async_search.go b/proxyapi/grpc_async_search.go index ced24cc5..677dc159 100644 --- a/proxyapi/grpc_async_search.go +++ b/proxyapi/grpc_async_search.go @@ -50,7 +50,7 @@ func (g *grpcV1) StartAsyncSearch( From: r.GetQuery().GetFrom().AsTime(), To: r.GetQuery().GetTo().AsTime(), Aggregations: aggs, - HistogramInterval: seq.MID(histInterval.Milliseconds()), + HistogramInterval: seq.MillisToMID(uint64(histInterval.Milliseconds())), WithDocs: r.WithDocs, Size: r.Size, }) diff --git a/proxyapi/grpc_complex_search_test.go b/proxyapi/grpc_complex_search_test.go index 07478c3e..cea13089 100644 --- a/proxyapi/grpc_complex_search_test.go +++ b/proxyapi/grpc_complex_search_test.go @@ -130,8 +130,8 @@ func prepareComplexSearchTestData(t *testing.T, cData cSearchTestCaseData) cSear Q: []byte(req.Query.Query), Size: int(req.Size), Offset: int(req.Offset), - From: seq.MID(req.Query.From.AsTime().UnixMilli()), - To: seq.MID(req.Query.To.AsTime().UnixMilli()), + From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), + To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), WithTotal: req.WithTotal, ShouldFetch: true, } diff --git a/proxyapi/grpc_export_test.go b/proxyapi/grpc_export_test.go index e2c607f9..fdfa17bd 100644 --- a/proxyapi/grpc_export_test.go +++ b/proxyapi/grpc_export_test.go @@ -76,8 +76,8 @@ func prepareExportTestData(cData exportTestCaseData) exportTestData { Q: []byte(req.Query.Query), Offset: int(req.Offset), Size: int(req.Size), - From: seq.MID(req.Query.From.AsTime().UnixMilli()), - To: seq.MID(req.Query.To.AsTime().UnixMilli()), + From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), + To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), ShouldFetch: true, }, ret: siSearchRet{ diff --git a/proxyapi/grpc_get_aggregation_test.go b/proxyapi/grpc_get_aggregation_test.go index 1cf8bbbc..d39befbc 100644 --- a/proxyapi/grpc_get_aggregation_test.go +++ b/proxyapi/grpc_get_aggregation_test.go @@ -92,8 +92,8 @@ func prepareGetAggregationTestData(t *testing.T, cData getAggregationTestCaseDat sr := &search.SearchRequest{ Explain: req.Query.Explain, Q: []byte(req.Query.Query), - From: seq.MID(req.Query.From.AsTime().UnixMilli()), - To: seq.MID(req.Query.To.AsTime().UnixMilli()), + From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), + To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), } if len(cData.aggQ) > 0 { for _, query := range cData.aggQ { diff --git a/proxyapi/grpc_get_histogram_test.go b/proxyapi/grpc_get_histogram_test.go index 5fdd91fe..35f49021 100644 --- a/proxyapi/grpc_get_histogram_test.go +++ b/proxyapi/grpc_get_histogram_test.go @@ -98,8 +98,8 @@ func prepareGetHistogramTestData(t *testing.T, cData getHistogramTestCaseData) g sr := &search.SearchRequest{ Explain: req.Query.Explain, Q: []byte(req.Query.Query), - From: seq.MID(req.Query.From.AsTime().UnixMilli()), - To: seq.MID(req.Query.To.AsTime().UnixMilli()), + From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), + To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), Interval: seq.DurationToMID(intervalDur), } siSearchMock = &siSearchMockData{ diff --git a/proxyapi/grpc_main_test.go b/proxyapi/grpc_main_test.go index e0d13dbb..790a73ca 100644 --- a/proxyapi/grpc_main_test.go +++ b/proxyapi/grpc_main_test.go @@ -233,7 +233,7 @@ func makeGetHistRespData(interval string, totalSize, fromTs, toTs int64) (*testG docCnt := totalSize / int64(bucketsCnt) remainCnt := totalSize - docCnt*(int64(bucketsCnt)-1) bucketKey := fromTs - qprHist[seq.MID(bucketKey)] = uint64(remainCnt) + qprHist[seq.MillisToMID(uint64(bucketKey))] = uint64(remainCnt) ts := time.UnixMilli(bucketKey) bucket := &seqproxyapi.Histogram_Bucket{ DocCount: uint64(remainCnt), @@ -243,7 +243,7 @@ func makeGetHistRespData(interval string, totalSize, fromTs, toTs int64) (*testG for i := 1; i < bucketsCnt; i++ { bucketKey := fromTs + int64(i)*intervalMS ts := time.UnixMilli(bucketKey) - qprHist[seq.MID(bucketKey)] = uint64(docCnt) + qprHist[seq.MillisToMID(uint64(bucketKey))] = uint64(docCnt) bucket := &seqproxyapi.Histogram_Bucket{ DocCount: uint64(docCnt), Ts: timestamppb.New(ts), diff --git a/proxyapi/grpc_search_test.go b/proxyapi/grpc_search_test.go index 1340a21a..a33cebb6 100644 --- a/proxyapi/grpc_search_test.go +++ b/proxyapi/grpc_search_test.go @@ -88,8 +88,8 @@ func prepareSearchTestData(t *testing.T, cData searchTestCaseData) searchTestDat Q: []byte(req.Query.Query), Size: int(req.Size), Offset: int(req.Offset), - From: seq.MID(req.Query.From.AsTime().UnixMilli()), - To: seq.MID(req.Query.To.AsTime().UnixMilli()), + From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), + To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), WithTotal: req.WithTotal, ShouldFetch: true, } diff --git a/proxyapi/grpc_v1.go b/proxyapi/grpc_v1.go index f23dc4c1..7ebda110 100644 --- a/proxyapi/grpc_v1.go +++ b/proxyapi/grpc_v1.go @@ -226,8 +226,8 @@ func (g *grpcV1) doSearch( proxyReq := &search.SearchRequest{ Q: []byte(req.Query.Query), - From: seq.MID(fromTime.UnixMilli()), - To: seq.MID(toTime.UnixMilli()), + From: seq.MillisToMID(uint64(fromTime.UnixMilli())), + To: seq.MillisToMID(uint64(toTime.UnixMilli())), Explain: req.Query.Explain, Size: int(req.Size), Offset: int(req.Offset), @@ -253,7 +253,7 @@ func (g *grpcV1) doSearch( err, ) } - proxyReq.Interval = seq.MID(intervalDuration.Milliseconds()) + proxyReq.Interval = seq.MillisToMID(uint64(intervalDuration.Milliseconds())) } qpr, docsStream, _, err := g.searchIngestor.Search(ctx, proxyReq, tr) @@ -329,7 +329,7 @@ func convertAggsQuery(aggs []*seqproxyapi.AggQuery) ([]search.AggQuery, error) { ) } - aggQuery.Interval = seq.MID(interval.Milliseconds()) + aggQuery.Interval = seq.MillisToMID(uint64(interval.Milliseconds())) result = append(result, aggQuery) } return result, nil diff --git a/seq/mids_distribution_test.go b/seq/mids_distribution_test.go index bda3b95f..8c93a377 100644 --- a/seq/mids_distribution_test.go +++ b/seq/mids_distribution_test.go @@ -17,7 +17,7 @@ func getTime(s string) time.Time { } func getMID(s string) MID { - return MID(getTime(s).UnixMilli()) + return MillisToMID(uint64(getTime(s).UnixMilli())) } func TestMIDsDistribution(t *testing.T) { diff --git a/seq/qpr.go b/seq/qpr.go index b2b1b772..3a8ab00d 100644 --- a/seq/qpr.go +++ b/seq/qpr.go @@ -399,6 +399,13 @@ func MergeQPRs(dst *QPR, qprs []*QPR, limit int, histInterval MID, order DocsOrd dst.IDs = ids[:l] } +// remove repetition from histogram +func removeHistogramRepetition(repetition IDSource, histogram map[MID]uint64, histInterval MID) { + bucket := repetition.ID.MID + bucket -= bucket % histInterval + histogram[bucket]-- +} + // removes repetitions from both ids and histogram func removeRepetitionsAdvanced(ids IDSources, histogram map[MID]uint64, histInterval MID) (IDSources, uint64) { if len(ids) == 0 { @@ -423,10 +430,3 @@ func removeRepetitionsAdvanced(ids IDSources, histogram map[MID]uint64, histInte return ids[:len(ids)-removeCount], uint64(removeCount) } - -// remove repetition from histogram -func removeHistogramRepetition(repetition IDSource, histogram map[MID]uint64, histInterval MID) { - bucket := repetition.ID.MID - bucket -= bucket % histInterval - histogram[bucket]-- -} diff --git a/seq/seq.go b/seq/seq.go index 8c56cab7..7279db61 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -14,12 +14,12 @@ type ID struct { RID RID } -type MID uint64 // milliseconds part of ID +type MID uint64 // microseconds part of ID type RID uint64 // random part of ID type LID uint32 // local id for a fraction func (m MID) Time() time.Time { - return time.UnixMilli(int64(m)) + return time.Unix(0, int64(m)*int64(time.Microsecond)) } func (d ID) String() string { @@ -96,20 +96,28 @@ func SimpleID(i int) ID { } } +func MillisToMID(millis uint64) MID { + return MID(millis * 1000) +} + func TimeToMID(t time.Time) MID { - return MID(t.UnixNano() / int64(time.Millisecond)) + return MID(t.UnixNano() / int64(time.Microsecond)) } func DurationToMID(d time.Duration) MID { - return MID(d / time.Millisecond) + return MID(d / time.Microsecond) } func MIDToTime(t MID) time.Time { return time.Unix(0, 0).Add(MIDToDuration(t)) } +func MIDToMillis(t MID) int64 { + return int64(uint64(t) / uint64(1000)) +} + func MIDToDuration(t MID) time.Duration { - return time.Duration(t) * time.Millisecond + return time.Duration(t) * time.Microsecond } func NewID(t time.Time, randomness uint64) ID { diff --git a/storeapi/grpc_async_search.go b/storeapi/grpc_async_search.go index 0a9e0f35..04a430f3 100644 --- a/storeapi/grpc_async_search.go +++ b/storeapi/grpc_async_search.go @@ -32,8 +32,8 @@ func (g *GrpcV1) StartAsyncSearch( AST: nil, // Parse AST later. AggQ: aggs, HistInterval: uint64(r.HistogramInterval), - From: seq.MID(r.From), - To: seq.MID(r.To), + From: seq.MillisToMID(uint64(r.From)), + To: seq.MillisToMID(uint64(r.To)), Limit: limit, WithTotal: r.WithDocs, // return total if docs needed Order: seq.DocsOrderDesc, @@ -46,7 +46,7 @@ func (g *GrpcV1) StartAsyncSearch( Retention: r.Retention.AsDuration(), WithDocs: r.WithDocs, } - fracs := g.fracManager.GetAllFracs().FilterInRange(seq.MID(r.From), seq.MID(r.To)) + fracs := g.fracManager.GetAllFracs().FilterInRange(seq.MillisToMID(uint64(r.From)), seq.MillisToMID(uint64(r.To))) if err := g.asyncSearcher.StartSearch(req, fracs); err != nil { return nil, err } diff --git a/storeapi/grpc_search.go b/storeapi/grpc_search.go index 3a3a2baa..c17b12a4 100644 --- a/storeapi/grpc_search.go +++ b/storeapi/grpc_search.go @@ -79,7 +79,7 @@ func (g *GrpcV1) doSearch( start := time.Now() - from := seq.MID(req.From) + from := seq.MillisToMID(uint64(req.From)) // in store mode hot we return error in case request wants data, that we've already rotated if g.config.StoreMode == StoreModeHot { @@ -89,7 +89,7 @@ func (g *GrpcV1) doSearch( } } - to := seq.MID(req.To) + to := seq.MillisToMID(uint64(req.To)) limit := int(req.Size + req.Offset) if req.Explain { @@ -108,8 +108,8 @@ func (g *GrpcV1) doSearch( return nil, err } - fromTime := seq.MIDToTime(seq.MID(req.From)) - toTime := seq.MIDToTime(seq.MID(req.To)) + fromTime := seq.MIDToTime(from) + toTime := seq.MIDToTime(to) toTimeFilter := g.config.Filter.To fromTimeFilter := g.config.Filter.From @@ -205,8 +205,8 @@ func (g *GrpcV1) doSearch( zap.Int64("took_ms", took.Milliseconds()), zap.Object("req", (*searchRequestMarshaler)(req)), zap.Uint64("found", qpr.Total), - zap.String("from", seq.MID(req.From).String()), - zap.String("to", seq.MID(req.To).String()), + zap.String("from", seq.MillisToMID(uint64(req.From)).String()), + zap.String("to", seq.MillisToMID(uint64(req.To)).String()), zap.Int64("offset", req.Offset), zap.Int64("size", req.Size), zap.Bool("with_total", req.WithTotal), diff --git a/tests/integration_tests/integration_test.go b/tests/integration_tests/integration_test.go index 131f5e2d..1eb945cf 100644 --- a/tests/integration_tests/integration_test.go +++ b/tests/integration_tests/integration_test.go @@ -1936,7 +1936,7 @@ func (s *IntegrationTestSuite) TestAsyncSearch() { Quantiles: []float64{0.99, 0.95, 0.50}, }, }, - HistogramInterval: seq.MID(time.Second.Milliseconds()), + HistogramInterval: seq.MillisToMID(uint64(time.Second.Milliseconds())), WithDocs: true, Size: 100, } diff --git a/tests/integration_tests/sub_search_test.go b/tests/integration_tests/sub_search_test.go index c03c81c4..fa03b0fc 100644 --- a/tests/integration_tests/sub_search_test.go +++ b/tests/integration_tests/sub_search_test.go @@ -75,7 +75,7 @@ func makeHist(data []time.Time, interval time.Duration) map[seq.MID]uint64 { for _, ts := range data { t := ts.UnixMilli() t -= t % int64(mid) - r[seq.MID(t)]++ + r[seq.MillisToMID(uint64(t))]++ } return r } @@ -92,54 +92,54 @@ func (s *IntegrationTestSuite) TestSubSearch() { defer env.StopAll() timeRange := 23 * time.Hour - to := time.Now() + to := time.UnixMilli(time.Now().UnixMilli()) from := to.Add(-timeRange) docsTimes := s.ingestData(env, from, to, 5*time.Minute, 100) - attempts := 500 + attempts := 1 limit := 200 now := time.Now() maxOffset := int64(timeRange.Seconds()) - for i := 0; i < attempts; i++ { - offsetSecond := rand.Int63n(maxOffset) - f := from.Add(time.Second * time.Duration(offsetSecond)) - t := f.Add(12 * time.Hour) - - sub := fetchFromDocsTimes(f, t, docsTimes) - expectedTotal := len(sub) - expectedCount := limit - if expectedTotal < limit { - expectedCount = expectedTotal + /* for i := 0; i < attempts; i++ { + offsetSecond := rand.Int63n(maxOffset) + f := from.Add(time.Second * time.Duration(offsetSecond)) + t := f.Add(12 * time.Hour) + + sub := fetchFromDocsTimes(f, t, docsTimes) + expectedTotal := len(sub) + expectedCount := limit + if expectedTotal < limit { + expectedCount = expectedTotal + } + + qpr, _, _, err := env.Search("service:*", limit, setup.NoFetch(), setup.WithTotal(false), setup.WithTimeRange(f, t)) + assert.NoError(s.T(), err, "should be no errors") + assert.Equal(s.T(), expectedCount, len(qpr.IDs), "wrong doc count in range [%s, %s]", f, t) } - - qpr, _, _, err := env.Search("service:*", limit, setup.NoFetch(), setup.WithTotal(false), setup.WithTimeRange(f, t)) - assert.NoError(s.T(), err, "should be no errors") - assert.Equal(s.T(), expectedCount, len(qpr.IDs), "wrong doc count in range [%s, %s]", f, t) - } - s.T().Log("With Total False:", time.Since(now).Milliseconds()) - - now = time.Now() - for i := 0; i < attempts; i++ { - offsetSecond := rand.Int63n(maxOffset) - f := from.Add(time.Second * time.Duration(offsetSecond)) - t := f.Add(12 * time.Hour) - - sub := fetchFromDocsTimes(f, t, docsTimes) - expectedTotal := len(sub) - expectedCount := limit - if expectedTotal < limit { - expectedCount = expectedTotal + s.T().Log("With Total False:", time.Since(now).Milliseconds()) + + now = time.Now() + for i := 0; i < attempts; i++ { + offsetSecond := rand.Int63n(maxOffset) + f := from.Add(time.Second * time.Duration(offsetSecond)) + t := f.Add(12 * time.Hour) + + sub := fetchFromDocsTimes(f, t, docsTimes) + expectedTotal := len(sub) + expectedCount := limit + if expectedTotal < limit { + expectedCount = expectedTotal + } + + qpr, _, _, err := env.Search("service:*", limit, setup.NoFetch(), setup.WithTotal(true), setup.WithTimeRange(f, t)) + assert.NoError(s.T(), err, "should be no errors") + assert.Equal(s.T(), expectedCount, len(qpr.IDs), "wrong doc count in range [%s, %s]", f, t) + assert.Equal(s.T(), expectedTotal, int(qpr.Total), "wrong doc count in range [%s, %s]", f, t) } - - qpr, _, _, err := env.Search("service:*", limit, setup.NoFetch(), setup.WithTotal(true), setup.WithTimeRange(f, t)) - assert.NoError(s.T(), err, "should be no errors") - assert.Equal(s.T(), expectedCount, len(qpr.IDs), "wrong doc count in range [%s, %s]", f, t) - assert.Equal(s.T(), expectedTotal, int(qpr.Total), "wrong doc count in range [%s, %s]", f, t) - } - s.T().Log("With Total True:", time.Since(now).Milliseconds()) + s.T().Log("With Total True:", time.Since(now).Milliseconds())*/ now = time.Now() for i := 0; i < attempts; i++ { diff --git a/tests/setup/doc.go b/tests/setup/doc.go index 6721f291..c441937b 100644 --- a/tests/setup/doc.go +++ b/tests/setup/doc.go @@ -227,7 +227,7 @@ func splitRange(size int, callback func(from int, to int)) { // If timestamp after call is zero, then this function will fill it with deterministic timestamp // so you could query each doc by range, if needed func GenerateDocs(size int, generator func(int, *ExampleDoc)) []ExampleDoc { - start := time.Now() + start := time.UnixMilli(time.Now().UnixMilli()) docs := make([]ExampleDoc, size) splitRange(size, func(from int, to int) { for i := from; i < to; i++ { diff --git a/tests/setup/env.go b/tests/setup/env.go index e95543ce..25f7ac18 100644 --- a/tests/setup/env.go +++ b/tests/setup/env.go @@ -567,14 +567,14 @@ func WithAggQuery(aggQueries ...any) SearchOption { func WithInterval(interval time.Duration) SearchOption { return func(sr *search.SearchRequest) { - sr.Interval = seq.MID(interval / time.Millisecond) + sr.Interval = seq.MID(interval / time.Microsecond) } } func WithTimeRange(from, to time.Time) SearchOption { return func(sr *search.SearchRequest) { - sr.From = seq.MID(from.UnixMilli()) - sr.To = seq.MID(to.UnixMilli()) + sr.From = seq.TimeToMID(from) + sr.To = seq.TimeToMID(to) } } From 3ab6ba17d10e89d02a53dabada078ce9f5a0fa59 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 31 Oct 2025 17:12:05 +0400 Subject: [PATCH 02/35] uncomment test --- tests/integration_tests/sub_search_test.go | 72 +++++++++++----------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/tests/integration_tests/sub_search_test.go b/tests/integration_tests/sub_search_test.go index fa03b0fc..f63c01b1 100644 --- a/tests/integration_tests/sub_search_test.go +++ b/tests/integration_tests/sub_search_test.go @@ -97,49 +97,49 @@ func (s *IntegrationTestSuite) TestSubSearch() { docsTimes := s.ingestData(env, from, to, 5*time.Minute, 100) - attempts := 1 + attempts := 500 limit := 200 now := time.Now() maxOffset := int64(timeRange.Seconds()) - /* for i := 0; i < attempts; i++ { - offsetSecond := rand.Int63n(maxOffset) - f := from.Add(time.Second * time.Duration(offsetSecond)) - t := f.Add(12 * time.Hour) - - sub := fetchFromDocsTimes(f, t, docsTimes) - expectedTotal := len(sub) - expectedCount := limit - if expectedTotal < limit { - expectedCount = expectedTotal - } - - qpr, _, _, err := env.Search("service:*", limit, setup.NoFetch(), setup.WithTotal(false), setup.WithTimeRange(f, t)) - assert.NoError(s.T(), err, "should be no errors") - assert.Equal(s.T(), expectedCount, len(qpr.IDs), "wrong doc count in range [%s, %s]", f, t) + for i := 0; i < attempts; i++ { + offsetSecond := rand.Int63n(maxOffset) + f := from.Add(time.Second * time.Duration(offsetSecond)) + t := f.Add(12 * time.Hour) + + sub := fetchFromDocsTimes(f, t, docsTimes) + expectedTotal := len(sub) + expectedCount := limit + if expectedTotal < limit { + expectedCount = expectedTotal } - s.T().Log("With Total False:", time.Since(now).Milliseconds()) - - now = time.Now() - for i := 0; i < attempts; i++ { - offsetSecond := rand.Int63n(maxOffset) - f := from.Add(time.Second * time.Duration(offsetSecond)) - t := f.Add(12 * time.Hour) - - sub := fetchFromDocsTimes(f, t, docsTimes) - expectedTotal := len(sub) - expectedCount := limit - if expectedTotal < limit { - expectedCount = expectedTotal - } - - qpr, _, _, err := env.Search("service:*", limit, setup.NoFetch(), setup.WithTotal(true), setup.WithTimeRange(f, t)) - assert.NoError(s.T(), err, "should be no errors") - assert.Equal(s.T(), expectedCount, len(qpr.IDs), "wrong doc count in range [%s, %s]", f, t) - assert.Equal(s.T(), expectedTotal, int(qpr.Total), "wrong doc count in range [%s, %s]", f, t) + + qpr, _, _, err := env.Search("service:*", limit, setup.NoFetch(), setup.WithTotal(false), setup.WithTimeRange(f, t)) + assert.NoError(s.T(), err, "should be no errors") + assert.Equal(s.T(), expectedCount, len(qpr.IDs), "wrong doc count in range [%s, %s]", f, t) + } + s.T().Log("With Total False:", time.Since(now).Milliseconds()) + + now = time.Now() + for i := 0; i < attempts; i++ { + offsetSecond := rand.Int63n(maxOffset) + f := from.Add(time.Second * time.Duration(offsetSecond)) + t := f.Add(12 * time.Hour) + + sub := fetchFromDocsTimes(f, t, docsTimes) + expectedTotal := len(sub) + expectedCount := limit + if expectedTotal < limit { + expectedCount = expectedTotal } - s.T().Log("With Total True:", time.Since(now).Milliseconds())*/ + + qpr, _, _, err := env.Search("service:*", limit, setup.NoFetch(), setup.WithTotal(true), setup.WithTimeRange(f, t)) + assert.NoError(s.T(), err, "should be no errors") + assert.Equal(s.T(), expectedCount, len(qpr.IDs), "wrong doc count in range [%s, %s]", f, t) + assert.Equal(s.T(), expectedTotal, int(qpr.Total), "wrong doc count in range [%s, %s]", f, t) + } + s.T().Log("With Total True:", time.Since(now).Milliseconds()) now = time.Now() for i := 0; i < attempts; i++ { From 17d181d918d40256279203eddb381707ced7a4da Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 31 Oct 2025 21:40:39 +0400 Subject: [PATCH 03/35] seq_id string in micros, but support legacy millis --- seq/seq.go | 9 +++++++-- seq/seq_test.go | 13 ++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/seq/seq.go b/seq/seq.go index 7279db61..7dc31636 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -42,7 +42,7 @@ func (d ID) Bytes() []byte { n := hex.Encode(hexBuf, numBuf) final := append(make([]byte, 0), hexBuf[:n]...) - final = append(final, '-') + final = append(final, '_') binary.LittleEndian.PutUint64(numBuf, uint64(d.RID)) n = hex.Encode(hexBuf, numBuf) @@ -83,7 +83,12 @@ func FromString(x string) (ID, error) { return id, err } - id.MID = MID(binary.LittleEndian.Uint64(mid)) + // legacy format, MID in millis + if x[16] == '-' { + id.MID = MillisToMID(binary.LittleEndian.Uint64(mid)) + } else { + id.MID = MID(binary.LittleEndian.Uint64(mid)) + } id.RID = RID(binary.LittleEndian.Uint64(rid)) return id, nil diff --git a/seq/seq_test.go b/seq/seq_test.go index 1513bd47..da9396dd 100644 --- a/seq/seq_test.go +++ b/seq/seq_test.go @@ -6,7 +6,18 @@ import ( "github.com/stretchr/testify/assert" ) +func TestLegacyMIDFromString(t *testing.T) { + id, err := FromString("abaf05877b010000-2402dc02d60615cc") + + assert.NoError(t, err) + // converted legacy (millis) to micros + assert.Equal(t, MID(1630057901995000), id.MID) +} + func TestFromString(t *testing.T) { - _, err := FromString("abaf05877b010000-2402dc02d60615cc") + id, err := FromString("abaf05877b010000_2402dc02d60615cc") + assert.NoError(t, err) + // no convertion, used as micros + assert.Equal(t, MID(1630057901995), id.MID) } From eece147b019f956982efed5716284b0d5cd82f7b Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 31 Oct 2025 21:41:23 +0400 Subject: [PATCH 04/35] dirty if-convert MID in proxy search --- proxy/search/ingestor.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index 205b843a..9e74821a 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -611,6 +611,13 @@ func (si *Ingestor) searchHost(ctx context.Context, req *storeapi.SearchRequest, return nil, 0, err } + for _, id := range data.IdSources { + mid := id.Id.Mid + if mid < 2000000000000 { + id.Id.Mid = id.Id.Mid * 1000 + } + } + return data, si.sourceByClient[host], nil } From 50b3777c91866f364814d0a94af9dbd26b91f68d Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 31 Oct 2025 22:34:15 +0400 Subject: [PATCH 05/35] dirty if in streaming_doc.go --- proxy/search/streaming_doc.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/proxy/search/streaming_doc.go b/proxy/search/streaming_doc.go index eb92a8bd..6a262968 100644 --- a/proxy/search/streaming_doc.go +++ b/proxy/search/streaming_doc.go @@ -29,9 +29,14 @@ func NewStreamingDoc(idSource seq.IDSource, data []byte) StreamingDoc { func unpackDoc(data []byte, source uint64) StreamingDoc { block := storage.DocBlock(data) + mid := block.GetExt1() + // milli to micro + if mid < 1000000000000000 { + mid = mid * 1000 + } doc := StreamingDoc{ ID: seq.ID{ - MID: seq.MID(block.GetExt1()), + MID: seq.MID(mid), RID: seq.RID(block.GetExt2()), }, Source: source, From 95eea7e709382d049b5776e20f140dcb7c8a4699 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Sat, 1 Nov 2025 19:08:25 +0400 Subject: [PATCH 06/35] dbg qpr out --- storeapi/grpc_search.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/storeapi/grpc_search.go b/storeapi/grpc_search.go index c17b12a4..450df900 100644 --- a/storeapi/grpc_search.go +++ b/storeapi/grpc_search.go @@ -213,7 +213,13 @@ func (g *GrpcV1) doSearch( ) } - return buildSearchResponse(qpr), nil + // TODO remove dbg logg + resp := buildSearchResponse(qpr) + if len(qpr.IDs) > 0 { + logger.Info(fmt.Sprintf("responding with IDS: %d", qpr.IDs[0].ID.MID)) + } + + return resp, nil } func (g *GrpcV1) parseQuery(query string) (*parser.ASTNode, error) { From f70c8fe8a0455b9ea3d62ba131c6cdacceddff49 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Sat, 1 Nov 2025 19:11:10 +0400 Subject: [PATCH 07/35] dbg print --- storeapi/grpc_search.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/storeapi/grpc_search.go b/storeapi/grpc_search.go index 450df900..56fa08ef 100644 --- a/storeapi/grpc_search.go +++ b/storeapi/grpc_search.go @@ -216,7 +216,13 @@ func (g *GrpcV1) doSearch( // TODO remove dbg logg resp := buildSearchResponse(qpr) if len(qpr.IDs) > 0 { - logger.Info(fmt.Sprintf("responding with IDS: %d", qpr.IDs[0].ID.MID)) + logger.Info(fmt.Sprintf("responding with IDS: %d %d", qpr.IDs[0].ID.MID, qpr.IDs[0].Source)) + } + if len(qpr.Histogram) > 0 { + for k, v := range qpr.Histogram { + logger.Info(fmt.Sprintf("responding with hist: %d -> %d", k, v)) + break + } } return resp, nil From 52a3cb9e16e0e1d170e8a0069a5dbbae7a116b5d Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Sat, 1 Nov 2025 21:54:09 +0400 Subject: [PATCH 08/35] remove dbg print --- storeapi/grpc_search.go | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/storeapi/grpc_search.go b/storeapi/grpc_search.go index 56fa08ef..c17b12a4 100644 --- a/storeapi/grpc_search.go +++ b/storeapi/grpc_search.go @@ -213,19 +213,7 @@ func (g *GrpcV1) doSearch( ) } - // TODO remove dbg logg - resp := buildSearchResponse(qpr) - if len(qpr.IDs) > 0 { - logger.Info(fmt.Sprintf("responding with IDS: %d %d", qpr.IDs[0].ID.MID, qpr.IDs[0].Source)) - } - if len(qpr.Histogram) > 0 { - for k, v := range qpr.Histogram { - logger.Info(fmt.Sprintf("responding with hist: %d -> %d", k, v)) - break - } - } - - return resp, nil + return buildSearchResponse(qpr), nil } func (g *GrpcV1) parseQuery(query string) (*parser.ASTNode, error) { From 3060b12495226bb874cabaf86acfd403372ddd65 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 09:43:39 +0400 Subject: [PATCH 09/35] dirty if for MID conversion for hist --- proxy/search/ingestor.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index 9e74821a..96f8c91d 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -618,6 +618,24 @@ func (si *Ingestor) searchHost(ctx context.Context, req *storeapi.SearchRequest, } } + // Convert histogram MIDs from milliseconds to microseconds if needed + if len(data.Histogram) > 0 { + needsConversion := false + for k := range data.Histogram { + if k < 2000000000000 { + needsConversion = true + break + } + } + if needsConversion { + newHist := make(map[uint64]uint64, len(data.Histogram)) + for k, v := range data.Histogram { + newHist[k*1000] = v + } + data.Histogram = newHist + } + } + return data, si.sourceByClient[host], nil } From 0ba6c6f2449bbea5bcfa47d045b9192810dc4c53 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 09:44:25 +0400 Subject: [PATCH 10/35] do not use cached info from frac cache if binary version is different --- config/frac_version.go | 2 ++ frac/common/info.go | 2 +- frac/sealed/block_info.go | 7 +++++++ fracmanager/frac_info_cache.go | 11 +++++++++++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/config/frac_version.go b/config/frac_version.go index a528c05d..f672bdb2 100644 --- a/config/frac_version.go +++ b/config/frac_version.go @@ -10,3 +10,5 @@ const ( // BinaryDataV2 - MIDs stored in microseconds BinaryDataV2 ) + +const CurrentFracVersion = BinaryDataV2 diff --git a/frac/common/info.go b/frac/common/info.go index bd2a4712..81bd34eb 100644 --- a/frac/common/info.go +++ b/frac/common/info.go @@ -42,7 +42,7 @@ type Info struct { func NewInfo(filename string, docsOnDisk, metaOnDisk uint64) *Info { return &Info{ Ver: buildinfo.Version, - BinaryDataVer: config.BinaryDataV2, + BinaryDataVer: config.CurrentFracVersion, Path: filename, From: math.MaxUint64, To: 0, diff --git a/frac/sealed/block_info.go b/frac/sealed/block_info.go index 8436f91e..246f7e57 100644 --- a/frac/sealed/block_info.go +++ b/frac/sealed/block_info.go @@ -6,6 +6,7 @@ import ( "go.uber.org/zap" + "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/logger" ) @@ -39,5 +40,11 @@ func (b *BlockInfo) Unpack(data []byte) error { } b.Info.MetaOnDisk = 0 // todo: make this correction on sealing and remove this next time + // legacy format - MID in milliseconds + if b.Info.BinaryDataVer < config.BinaryDataV2 { + b.Info.From = b.Info.From * 1000 + b.Info.To = b.Info.To * 1000 + } + return nil } diff --git a/fracmanager/frac_info_cache.go b/fracmanager/frac_info_cache.go index 621037c0..ed9c6c3b 100644 --- a/fracmanager/frac_info_cache.go +++ b/fracmanager/frac_info_cache.go @@ -10,6 +10,7 @@ import ( "go.uber.org/zap" + "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/logger" ) @@ -67,6 +68,16 @@ func (fc *fracInfoCache) LoadFromDisk(fileName string) { return } + versionMismatchFracs := make([]string, 0) + for frac, info := range fc.cache { + if info.BinaryDataVer != config.CurrentFracVersion { + versionMismatchFracs = append(versionMismatchFracs, frac) + } + } + for _, key := range versionMismatchFracs { + delete(fc.cache, key) + } + logger.Info("frac-cache loaded from disk", zap.String("filename", fileName), zap.Int("cache_entries", len(fc.cache)), From 5ec9ec14fb3987c114b269c31e0b569202e695af Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 12:25:47 +0400 Subject: [PATCH 11/35] better parsing of seq.ID --- seq/seq.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/seq/seq.go b/seq/seq.go index 7dc31636..4e3df83e 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -83,11 +83,14 @@ func FromString(x string) (ID, error) { return id, err } - // legacy format, MID in millis - if x[16] == '-' { + delimiter := x[16] + if delimiter == '-' { + // legacy format, MID in millis id.MID = MillisToMID(binary.LittleEndian.Uint64(mid)) - } else { + } else if delimiter == '_' { id.MID = MID(binary.LittleEndian.Uint64(mid)) + } else { + return id, fmt.Errorf("unknown delimiter %c", delimiter) } id.RID = RID(binary.LittleEndian.Uint64(rid)) From d579050330448449d5f83391902fdb019c4773fc Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 12:40:55 +0400 Subject: [PATCH 12/35] fix millis to micros conversion overflow --- seq/seq.go | 8 +++++++- seq/seq_test.go | 9 +++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/seq/seq.go b/seq/seq.go index 4e3df83e..cb3ca8da 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -4,6 +4,7 @@ import ( "encoding/binary" "encoding/hex" "fmt" + "math" "time" "github.com/ozontech/seq-db/util" @@ -105,7 +106,12 @@ func SimpleID(i int) ID { } func MillisToMID(millis uint64) MID { - return MID(millis * 1000) + + if millis < math.MaxUint64/1000 { + return MID(millis * 1000) + } else { + return MID(millis) + } } func TimeToMID(t time.Time) MID { diff --git a/seq/seq_test.go b/seq/seq_test.go index da9396dd..7d75090a 100644 --- a/seq/seq_test.go +++ b/seq/seq_test.go @@ -1,6 +1,7 @@ package seq import ( + "math" "testing" "github.com/stretchr/testify/assert" @@ -21,3 +22,11 @@ func TestFromString(t *testing.T) { // no convertion, used as micros assert.Equal(t, MID(1630057901995), id.MID) } + +func TestMillisToMID(t *testing.T) { + assert.Equal(t, MID(1761812502000000), MillisToMID(1761812502000)) + + // we can't convert millis this high to nanos (overflow), so we expect that user just want "infinite future" + assert.Equal(t, MID(math.MaxUint64), MillisToMID(math.MaxUint64)) + assert.Equal(t, MID(math.MaxUint64/1000), MillisToMID(math.MaxUint64/1000)) +} From 82d67650c07507f4ff88db8ba6119a967b991399 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 15:13:23 +0400 Subject: [PATCH 13/35] grpc header for reporting MID precision --- consts/consts.go | 3 +++ network/grpcutil/interceptors.go | 17 +++++++++++++++++ proxy/search/ingestor.go | 29 +++++++++++++++-------------- storeapi/grpc_server.go | 1 + 4 files changed, 36 insertions(+), 14 deletions(-) diff --git a/consts/consts.go b/consts/consts.go index 07ee83ca..280db993 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -69,6 +69,9 @@ const ( // tracing JaegerDebugKey = "jaeger-debug-id" DebugHeader = "x-o3-sample-trace" + + // MIDPrecisionHeader reports store MID precision + MIDPrecisionHeader = "x-seq-time-precision" ) var ( diff --git a/network/grpcutil/interceptors.go b/network/grpcutil/interceptors.go index 2b9a2998..9133a144 100644 --- a/network/grpcutil/interceptors.go +++ b/network/grpcutil/interceptors.go @@ -13,6 +13,7 @@ import ( "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/proto" + "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/metric" "github.com/ozontech/seq-db/tracing" @@ -123,3 +124,19 @@ func PassMetadataUnaryClientInterceptor() grpc.UnaryClientInterceptor { return invoker(ctx, method, req, reply, cc, opts...) } } + +// MIDPrecisionHeaderUnaryServerInterceptor sets the MID precision header for all unary responses. +func MIDPrecisionHeaderUnaryServerInterceptor(precision string) grpc.UnaryServerInterceptor { + return func( + ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, + h grpc.UnaryHandler, + ) (interface{}, error) { + md := metadata.New(map[string]string{ + consts.MIDPrecisionHeader: precision, + }) + if err := grpc.SetHeader(ctx, md); err != nil { + logger.Error("failed to set MID precision header", zap.Error(err)) + } + return h(ctx, req) + } +} diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index 96f8c91d..2bc599eb 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -11,6 +11,7 @@ import ( "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/encoding/gzip" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" "github.com/ozontech/seq-db/consts" @@ -602,32 +603,32 @@ func (si *Ingestor) searchHost(ctx context.Context, req *storeapi.SearchRequest, ) } + var md metadata.MD data, err := client.Search(ctx, req, grpc.MaxCallRecvMsgSize(256*int(units.MiB)), grpc.MaxCallSendMsgSize(256*int(units.MiB)), grpc.UseCompressor(gzip.Name), + grpc.Header(&md), ) if err != nil { return nil, 0, err } - for _, id := range data.IdSources { - mid := id.Id.Mid - if mid < 2000000000000 { - id.Id.Mid = id.Id.Mid * 1000 - } + // Check the store's MID precision from response header + // If header indicates milliseconds, convert to microseconds + storePrecision := "ms" + if precisionValues := md.Get(consts.MIDPrecisionHeader); len(precisionValues) > 0 { + storePrecision = precisionValues[0] } - // Convert histogram MIDs from milliseconds to microseconds if needed - if len(data.Histogram) > 0 { - needsConversion := false - for k := range data.Histogram { - if k < 2000000000000 { - needsConversion = true - break - } + if storePrecision == "ms" { + // Store operates in milliseconds, convert to microseconds + for _, id := range data.IdSources { + id.Id.Mid = id.Id.Mid * 1000 } - if needsConversion { + + // Convert histogram MIDs from milliseconds to microseconds + if len(data.Histogram) > 0 { newHist := make(map[uint64]uint64, len(data.Histogram)) for k, v := range data.Histogram { newHist[k*1000] = v diff --git a/storeapi/grpc_server.go b/storeapi/grpc_server.go index 4b1b6062..2a477e71 100644 --- a/storeapi/grpc_server.go +++ b/storeapi/grpc_server.go @@ -39,6 +39,7 @@ func newGRPCServer(cfg APIConfig, fracManager *fracmanager.FracManager, mappingP func initServer() *grpc.Server { interceptors := []grpc.UnaryServerInterceptor{ + grpcutil.MIDPrecisionHeaderUnaryServerInterceptor("us"), grpcutil.ReturnToVTPoolUnaryServerInterceptor(), } opts := []grpc.ServerOption{ From 38b5b162fc928b45b970c8946e761e338887d575 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 15:16:36 +0400 Subject: [PATCH 14/35] rename header --- consts/consts.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consts/consts.go b/consts/consts.go index 280db993..01673dcb 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -71,7 +71,7 @@ const ( DebugHeader = "x-o3-sample-trace" // MIDPrecisionHeader reports store MID precision - MIDPrecisionHeader = "x-seq-time-precision" + MIDPrecisionHeader = "x-seq-mid-precision" ) var ( From ef91c7d6bb049b2afcf226dd07670b8a79508cc8 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 16:36:22 +0400 Subject: [PATCH 15/35] report MID precision on streaming RPC call --- network/grpcutil/interceptors.go | 16 ++++++++++++++++ proxy/search/docs_iterator.go | 22 ++++++++++++---------- proxy/search/ingestor.go | 13 ++++++++++++- proxy/search/streaming_doc.go | 8 +++++--- storeapi/grpc_server.go | 4 ++++ 5 files changed, 49 insertions(+), 14 deletions(-) diff --git a/network/grpcutil/interceptors.go b/network/grpcutil/interceptors.go index 9133a144..0ea7f40e 100644 --- a/network/grpcutil/interceptors.go +++ b/network/grpcutil/interceptors.go @@ -140,3 +140,19 @@ func MIDPrecisionHeaderUnaryServerInterceptor(precision string) grpc.UnaryServer return h(ctx, req) } } + +// MIDPrecisionHeaderStreamServerInterceptor sets the MID precision header for all streaming responses. +func MIDPrecisionHeaderStreamServerInterceptor(precision string) grpc.StreamServerInterceptor { + return func( + srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, + h grpc.StreamHandler, + ) error { + md := metadata.New(map[string]string{ + consts.MIDPrecisionHeader: precision, + }) + if err := ss.SendHeader(md); err != nil { + logger.Error("failed to set MID precision header in stream", zap.Error(err)) + } + return h(srv, ss) + } +} diff --git a/proxy/search/docs_iterator.go b/proxy/search/docs_iterator.go index 3ce30a54..dbd35a8d 100644 --- a/proxy/search/docs_iterator.go +++ b/proxy/search/docs_iterator.go @@ -76,20 +76,22 @@ func (u *uniqueIDIterator) Next() (StreamingDoc, error) { } type grpcStreamIterator struct { - source uint64 - host string - stream storeapi.StoreApi_FetchClient - totalIDs int + source uint64 + host string + stream storeapi.StoreApi_FetchClient + totalIDs int + midPrecision string fetched int } -func newGrpcStreamIterator(stream storeapi.StoreApi_FetchClient, host string, source uint64, totalIDs int) *grpcStreamIterator { +func newGrpcStreamIterator(stream storeapi.StoreApi_FetchClient, host string, source uint64, totalIDs int, midPrecision string) *grpcStreamIterator { return &grpcStreamIterator{ - stream: stream, - source: source, - host: host, - totalIDs: totalIDs, + stream: stream, + source: source, + host: host, + totalIDs: totalIDs, + midPrecision: midPrecision, } } @@ -108,7 +110,7 @@ func (s *grpcStreamIterator) Next() (StreamingDoc, error) { return StreamingDoc{Source: s.source}, err } - doc := unpackDoc(data.Data, s.source) + doc := unpackDoc(data.Data, s.source, s.midPrecision) if !doc.Empty() { s.fetched++ } else { diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index 2bc599eb..7e821a90 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -229,7 +229,18 @@ func (si *Ingestor) singleDocsStream(ctx context.Context, explain bool, source u return nil, fmt.Errorf("can't fetch docs: %s", err.Error()) } - var it DocsIterator = newGrpcStreamIterator(stream, host, source, len(ids)) + md, err := stream.Header() + midPrecision := "ms" + if md != nil && err == nil { + if precisionValues := md.Get(consts.MIDPrecisionHeader); len(precisionValues) > 0 { + midPrecision = precisionValues[0] + } + } + if err != nil { + return nil, fmt.Errorf("can't fetch metadata: %s", err.Error()) + } + + var it DocsIterator = newGrpcStreamIterator(stream, host, source, len(ids), midPrecision) if explain { it = newExplainWrapperIterator(it, ids, host, startTime) } diff --git a/proxy/search/streaming_doc.go b/proxy/search/streaming_doc.go index 6a262968..7726bd2b 100644 --- a/proxy/search/streaming_doc.go +++ b/proxy/search/streaming_doc.go @@ -27,13 +27,15 @@ func NewStreamingDoc(idSource seq.IDSource, data []byte) StreamingDoc { } } -func unpackDoc(data []byte, source uint64) StreamingDoc { +func unpackDoc(data []byte, source uint64, midPrecision string) StreamingDoc { block := storage.DocBlock(data) mid := block.GetExt1() - // milli to micro - if mid < 1000000000000000 { + + // Convert from milliseconds to microseconds if store operates in milliseconds + if midPrecision == "ms" { mid = mid * 1000 } + doc := StreamingDoc{ ID: seq.ID{ MID: seq.MID(mid), diff --git a/storeapi/grpc_server.go b/storeapi/grpc_server.go index 2a477e71..308f61e2 100644 --- a/storeapi/grpc_server.go +++ b/storeapi/grpc_server.go @@ -42,8 +42,12 @@ func initServer() *grpc.Server { grpcutil.MIDPrecisionHeaderUnaryServerInterceptor("us"), grpcutil.ReturnToVTPoolUnaryServerInterceptor(), } + streamInterceptors := []grpc.StreamServerInterceptor{ + grpcutil.MIDPrecisionHeaderStreamServerInterceptor("us"), + } opts := []grpc.ServerOption{ grpc.ChainUnaryInterceptor(interceptors...), + grpc.ChainStreamInterceptor(streamInterceptors...), grpc.MaxRecvMsgSize(int(units.MiB) * 256), grpc.MaxSendMsgSize(int(units.MiB) * 256), grpc.StatsHandler(&tracing.ServerHandler{}), From cbfcfa8ac383dd4533099fd55678e363d652accc Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 5 Nov 2025 18:16:41 +0400 Subject: [PATCH 16/35] MID now is in nanoseconds --- config/frac_version.go | 2 +- frac/meta_data_collector.go | 2 +- frac/processor/search.go | 3 ++- frac/sealed/block_info.go | 5 +++-- frac/sealed/seqids/blocks.go | 3 ++- proxy/bulk/ingestor_test.go | 2 +- proxy/search/ingestor.go | 17 ++++++++--------- proxy/search/streaming_doc.go | 4 ++-- seq/seq.go | 18 +++++++++--------- seq/seq_test.go | 6 +++--- storeapi/grpc_server.go | 4 ++-- 11 files changed, 34 insertions(+), 32 deletions(-) diff --git a/config/frac_version.go b/config/frac_version.go index f672bdb2..d3ff1b14 100644 --- a/config/frac_version.go +++ b/config/frac_version.go @@ -7,7 +7,7 @@ const ( BinaryDataV0 BinaryDataVersion = iota // BinaryDataV1 - support RIDs encoded without varint BinaryDataV1 - // BinaryDataV2 - MIDs stored in microseconds + // BinaryDataV2 - MIDs stored in nanoseconds BinaryDataV2 ) diff --git a/frac/meta_data_collector.go b/frac/meta_data_collector.go index e5de7d04..9ce618ee 100644 --- a/frac/meta_data_collector.go +++ b/frac/meta_data_collector.go @@ -84,7 +84,7 @@ func (m *MetaData) unmarshalVersion1(b []byte) error { func (m *MetaData) unmarshalVersion2(b []byte) error { // Decode seq.ID. - // Version 2 stores MID in microseconds + // Version 2 stores MID in nanoseconds m.ID.MID = seq.MID(binary.LittleEndian.Uint64(b)) b = b[8:] return m.unmarshalVersion1And2(b) diff --git a/frac/processor/search.go b/frac/processor/search.go index cfbddd71..a346e89a 100644 --- a/frac/processor/search.go +++ b/frac/processor/search.go @@ -206,7 +206,8 @@ func iterateEvalTree( zap.Time("mid", mid.Time())) continue } - bucketIndex := uint64(mid/1000)/params.HistInterval - histBase + // TODO /1000000 replace + bucketIndex := uint64(mid/1000000)/params.HistInterval - histBase histogram[bucketIndex]++ } diff --git a/frac/sealed/block_info.go b/frac/sealed/block_info.go index 246f7e57..21bcd182 100644 --- a/frac/sealed/block_info.go +++ b/frac/sealed/block_info.go @@ -9,6 +9,7 @@ import ( "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/logger" + "github.com/ozontech/seq-db/seq" ) const seqDBMagic = "SEQM" @@ -42,8 +43,8 @@ func (b *BlockInfo) Unpack(data []byte) error { // legacy format - MID in milliseconds if b.Info.BinaryDataVer < config.BinaryDataV2 { - b.Info.From = b.Info.From * 1000 - b.Info.To = b.Info.To * 1000 + b.Info.From = seq.MillisToMID(uint64(b.Info.From)) + b.Info.To = seq.MillisToMID(uint64(b.Info.To)) } return nil diff --git a/frac/sealed/seqids/blocks.go b/frac/sealed/seqids/blocks.go index 7f5d3f1d..2c4555ef 100644 --- a/frac/sealed/seqids/blocks.go +++ b/frac/sealed/seqids/blocks.go @@ -31,7 +31,8 @@ func (b *BlockMIDs) Unpack(data []byte) error { // Legacy format - convert millis to micros if b.fracVersion < config.BinaryDataV2 { for i := range b.Values { - b.Values[i] = b.Values[i] * 1000 + // TODO replace this by calling function + b.Values[i] = b.Values[i] * 1000000 } } diff --git a/proxy/bulk/ingestor_test.go b/proxy/bulk/ingestor_test.go index 703a7c0c..ff4dd4e8 100644 --- a/proxy/bulk/ingestor_test.go +++ b/proxy/bulk/ingestor_test.go @@ -87,7 +87,7 @@ func TestProcessDocuments(t *testing.T) { now := time.Now().UTC() - id := seq.SimpleID(int(now.UnixNano() / 1000)) + id := seq.SimpleID(int(now.UnixNano() / 1000000)) type TestPayload struct { InDocs []string diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index 7e821a90..c73dbe2e 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -626,23 +626,22 @@ func (si *Ingestor) searchHost(ctx context.Context, req *storeapi.SearchRequest, } // Check the store's MID precision from response header - // If header indicates milliseconds, convert to microseconds - storePrecision := "ms" + // If header indicates milliseconds, convert to nanoseconds + midPrecision := "ms" if precisionValues := md.Get(consts.MIDPrecisionHeader); len(precisionValues) > 0 { - storePrecision = precisionValues[0] + midPrecision = precisionValues[0] } - if storePrecision == "ms" { - // Store operates in milliseconds, convert to microseconds + // Convert legacy store response to nanoseconds MID + if midPrecision == "ms" { for _, id := range data.IdSources { - id.Id.Mid = id.Id.Mid * 1000 + id.Id.Mid = uint64(seq.MillisToMID(id.Id.Mid)) } - // Convert histogram MIDs from milliseconds to microseconds if len(data.Histogram) > 0 { newHist := make(map[uint64]uint64, len(data.Histogram)) - for k, v := range data.Histogram { - newHist[k*1000] = v + for mid, v := range data.Histogram { + newHist[uint64(seq.MillisToMID(mid))] = v } data.Histogram = newHist } diff --git a/proxy/search/streaming_doc.go b/proxy/search/streaming_doc.go index 7726bd2b..8b925dfb 100644 --- a/proxy/search/streaming_doc.go +++ b/proxy/search/streaming_doc.go @@ -31,9 +31,9 @@ func unpackDoc(data []byte, source uint64, midPrecision string) StreamingDoc { block := storage.DocBlock(data) mid := block.GetExt1() - // Convert from milliseconds to microseconds if store operates in milliseconds + // Convert from milliseconds to nanoseconds if store (legacy) operates in milliseconds if midPrecision == "ms" { - mid = mid * 1000 + mid = mid * 1000000 } doc := StreamingDoc{ diff --git a/seq/seq.go b/seq/seq.go index cb3ca8da..871f5739 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -15,12 +15,13 @@ type ID struct { RID RID } -type MID uint64 // microseconds part of ID +type MID uint64 // nanoseconds part of ID type RID uint64 // random part of ID type LID uint32 // local id for a fraction func (m MID) Time() time.Time { - return time.Unix(0, int64(m)*int64(time.Microsecond)) + // TODO check for large nanos and avoid cast + return time.Unix(0, int64(m)) } func (d ID) String() string { @@ -106,20 +107,19 @@ func SimpleID(i int) ID { } func MillisToMID(millis uint64) MID { - - if millis < math.MaxUint64/1000 { - return MID(millis * 1000) + if millis < math.MaxUint64/1000000 { + return MID(millis * 1000000) } else { return MID(millis) } } func TimeToMID(t time.Time) MID { - return MID(t.UnixNano() / int64(time.Microsecond)) + return MID(t.UnixNano()) } func DurationToMID(d time.Duration) MID { - return MID(d / time.Microsecond) + return MID(d) } func MIDToTime(t MID) time.Time { @@ -127,11 +127,11 @@ func MIDToTime(t MID) time.Time { } func MIDToMillis(t MID) int64 { - return int64(uint64(t) / uint64(1000)) + return int64(uint64(t) / uint64(1000000)) } func MIDToDuration(t MID) time.Duration { - return time.Duration(t) * time.Microsecond + return time.Duration(t) } func NewID(t time.Time, randomness uint64) ID { diff --git a/seq/seq_test.go b/seq/seq_test.go index 7d75090a..3dfe59de 100644 --- a/seq/seq_test.go +++ b/seq/seq_test.go @@ -11,8 +11,8 @@ func TestLegacyMIDFromString(t *testing.T) { id, err := FromString("abaf05877b010000-2402dc02d60615cc") assert.NoError(t, err) - // converted legacy (millis) to micros - assert.Equal(t, MID(1630057901995000), id.MID) + // converted legacy (milliseconds MID) to nanoseconds + assert.Equal(t, MID(1630057901995000000), id.MID) } func TestFromString(t *testing.T) { @@ -24,7 +24,7 @@ func TestFromString(t *testing.T) { } func TestMillisToMID(t *testing.T) { - assert.Equal(t, MID(1761812502000000), MillisToMID(1761812502000)) + assert.Equal(t, MID(1761812502000000000), MillisToMID(1761812502000)) // we can't convert millis this high to nanos (overflow), so we expect that user just want "infinite future" assert.Equal(t, MID(math.MaxUint64), MillisToMID(math.MaxUint64)) diff --git a/storeapi/grpc_server.go b/storeapi/grpc_server.go index 308f61e2..0c9a7a7d 100644 --- a/storeapi/grpc_server.go +++ b/storeapi/grpc_server.go @@ -39,11 +39,11 @@ func newGRPCServer(cfg APIConfig, fracManager *fracmanager.FracManager, mappingP func initServer() *grpc.Server { interceptors := []grpc.UnaryServerInterceptor{ - grpcutil.MIDPrecisionHeaderUnaryServerInterceptor("us"), + grpcutil.MIDPrecisionHeaderUnaryServerInterceptor("ns"), grpcutil.ReturnToVTPoolUnaryServerInterceptor(), } streamInterceptors := []grpc.StreamServerInterceptor{ - grpcutil.MIDPrecisionHeaderStreamServerInterceptor("us"), + grpcutil.MIDPrecisionHeaderStreamServerInterceptor("ns"), } opts := []grpc.ServerOption{ grpc.ChainUnaryInterceptor(interceptors...), From 7b17f7b7b0b89254dde11d2d2d2a6b2b7e79de22 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 6 Nov 2025 16:29:54 +0400 Subject: [PATCH 17/35] v1 QPR compatibility --- asyncsearcher/encoding.go | 60 ++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/asyncsearcher/encoding.go b/asyncsearcher/encoding.go index 79354eca..a1f4d6fb 100644 --- a/asyncsearcher/encoding.go +++ b/asyncsearcher/encoding.go @@ -14,7 +14,12 @@ import ( var be = binary.BigEndian -const qprBinVersion = uint8(1) +const ( + qprBinVersion1 = uint8(1) // MIDs stored in milliseconds + qprBinVersion2 = uint8(2) // MIDs stored in nanoseconds +) + +const qprBinVersion = qprBinVersion2 // Phase 2: write version 2 (nanoseconds) func marshalQPR(q *seq.QPR, dst []byte) []byte { dst = append(dst, qprBinVersion) @@ -40,8 +45,8 @@ func unmarshalQPR(dst *seq.QPR, src []byte, idsLimit int) (_ []byte, err error) version := src[0] src = src[1:] - if version != qprBinVersion { - return nil, fmt.Errorf("invalid QPR version %d; want %d", version, qprBinVersion) + if version != qprBinVersion1 && version != qprBinVersion2 { + return nil, fmt.Errorf("invalid QPR version %d; want %d or %d", version, qprBinVersion1, qprBinVersion2) } idsBlocksLen := int(be.Uint64(src)) @@ -54,7 +59,7 @@ func unmarshalQPR(dst *seq.QPR, src []byte, idsLimit int) (_ []byte, err error) if len(dst.IDs) >= idsLimit { break } - idsBlocks, err = unmarshalIDsBlock(dst, idsBlocks) + idsBlocks, err = unmarshalIDsBlock(dst, idsBlocks, version) if err != nil { return nil, fmt.Errorf("can't unmarshal ids block at pos %d: %s", i, err) } @@ -64,12 +69,12 @@ func unmarshalQPR(dst *seq.QPR, src []byte, idsLimit int) (_ []byte, err error) } src = src[idsBlocksLen:] - dst.Histogram, src, err = unmarshalHistogram(src) + dst.Histogram, src, err = unmarshalHistogram(src, version) if err != nil { return nil, fmt.Errorf("can't unmarshal histogram: %s", err) } - dst.Aggs, src, err = unmarshalAggs(dst.Aggs, src) + dst.Aggs, src, err = unmarshalAggs(dst.Aggs, src, version) if err != nil { return nil, fmt.Errorf("can't unmarshal aggs: %s", err) } @@ -169,7 +174,7 @@ func marshalIDsBlock(dst []byte, ids []seq.IDSource) ([]byte, idsCodec) { return dst, idsCodecDeltaZstd } -func unmarshalIDsBlock(dst *seq.QPR, src []byte) (_ []byte, err error) { +func unmarshalIDsBlock(dst *seq.QPR, src []byte, version uint8) (_ []byte, err error) { if len(src) == 0 { return src, fmt.Errorf("empty IDs block") } @@ -192,13 +197,13 @@ func unmarshalIDsBlock(dst *seq.QPR, src []byte) (_ []byte, err error) { if err != nil { return src, fmt.Errorf("can't decompress ids block: %s", err) } - dst.IDs, err = unmarshalIDsDelta(dst.IDs, b.B) + dst.IDs, err = unmarshalIDsDelta(dst.IDs, b.B, version) if err != nil { return src, err } return src, nil case idsCodecDelta: - dst.IDs, err = unmarshalIDsDelta(dst.IDs, block) + dst.IDs, err = unmarshalIDsDelta(dst.IDs, block, version) if err != nil { return src, err } @@ -208,7 +213,7 @@ func unmarshalIDsBlock(dst *seq.QPR, src []byte) (_ []byte, err error) { } } -func unmarshalIDsDelta(dst seq.IDSources, block []byte) (seq.IDSources, error) { +func unmarshalIDsDelta(dst seq.IDSources, block []byte, version uint8) (seq.IDSources, error) { prevMID := int64(0) for len(block) > 0 { v, n := binary.Varint(block) @@ -227,9 +232,16 @@ func unmarshalIDsDelta(dst seq.IDSources, block []byte) (seq.IDSources, error) { hint := string(block[:hintSize]) block = block[hintSize:] + var midValue seq.MID + if version == qprBinVersion1 { + midValue = seq.MillisToMID(uint64(mid)) + } else { + midValue = seq.MID(mid) + } + dst = append(dst, seq.IDSource{ ID: seq.ID{ - MID: seq.MID(mid), + MID: midValue, RID: rid, }, Source: source, @@ -254,7 +266,7 @@ func marshalHistogram(dst []byte, histogram map[seq.MID]uint64) []byte { return dst } -func unmarshalHistogram(src []byte) (map[seq.MID]uint64, []byte, error) { +func unmarshalHistogram(src []byte, version uint8) (map[seq.MID]uint64, []byte, error) { length, n := binary.Uvarint(src) src = src[n:] if n <= 0 { @@ -278,7 +290,14 @@ func unmarshalHistogram(src []byte) (map[seq.MID]uint64, []byte, error) { return dst, src, fmt.Errorf("malformed histogram MID: %d", n) } - dst[seq.MID(mid)] = cnt + var midValue seq.MID + if version == qprBinVersion1 { + midValue = seq.MillisToMID(uint64(mid)) + } else { + midValue = seq.MID(mid) + } + + dst[midValue] = cnt } return dst, src, nil } @@ -349,7 +368,7 @@ func marshalAggs(dst []byte, aggs []seq.AggregatableSamples) []byte { return dst } -func unmarshalAggs(dst []seq.AggregatableSamples, src []byte) (_ []seq.AggregatableSamples, _ []byte, err error) { +func unmarshalAggs(dst []seq.AggregatableSamples, src []byte, version uint8) (_ []seq.AggregatableSamples, _ []byte, err error) { var header aggsBlockHeader src, err = header.Unmarshal(src) if err != nil { @@ -376,7 +395,7 @@ func unmarshalAggs(dst []seq.AggregatableSamples, src []byte) (_ []seq.Aggregata for i := 0; len(block) > 0; i++ { agg := seq.AggregatableSamples{} - block, err = unmarshalAggregatableSamples(&agg, block) + block, err = unmarshalAggregatableSamples(&agg, block, version) if err != nil { return nil, nil, fmt.Errorf("invalid QPRHistogram at pos %d: %v", i, err) } @@ -400,7 +419,7 @@ func marshalAggregatableSamples(s seq.AggregatableSamples, dst []byte) []byte { return dst } -func unmarshalAggregatableSamples(q *seq.AggregatableSamples, src []byte) ([]byte, error) { +func unmarshalAggregatableSamples(q *seq.AggregatableSamples, src []byte, version uint8) ([]byte, error) { if len(src) < 16 { return nil, fmt.Errorf("src too short to unmarshal QPRHistogram, want at least 16 bytes, got %d", len(src)) } @@ -435,8 +454,15 @@ func unmarshalAggregatableSamples(q *seq.AggregatableSamples, src []byte) ([]byt } src = tail + var midValue seq.MID + if version == qprBinVersion1 { + midValue = seq.MillisToMID(uint64(mid)) + } else { + midValue = seq.MID(mid) + } + ab := seq.AggBin{ - MID: seq.MID(mid), + MID: midValue, Token: token, } q.SamplesByBin[ab] = sample From 8e82b050558a720397918ef7d723ad2e1b796bd2 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 6 Nov 2025 16:30:16 +0400 Subject: [PATCH 18/35] support mid precision "ms" --- proxy/search/async.go | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/proxy/search/async.go b/proxy/search/async.go index e078fc17..5501d9e5 100644 --- a/proxy/search/async.go +++ b/proxy/search/async.go @@ -9,11 +9,14 @@ import ( "github.com/google/uuid" "go.uber.org/zap" + "google.golang.org/grpc" "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" "google.golang.org/protobuf/types/known/durationpb" "github.com/ozontech/seq-db/asyncsearcher" + "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/pkg/storeapi" "github.com/ozontech/seq-db/proxy/stores" @@ -154,13 +157,34 @@ func (si *Ingestor) FetchAsyncSearchResult( defer wg.Done() for _, replica := range shard { - storeResp, err := si.clients[replica].FetchAsyncSearchResult(storesCtx, &req) + var md metadata.MD + storeResp, err := si.clients[replica].FetchAsyncSearchResult(storesCtx, &req, grpc.Header(&md)) if err != nil { if status.Code(err) == codes.NotFound { continue } } + midPrecision := "ms" + if precisionValues := md.Get(consts.MIDPrecisionHeader); len(precisionValues) > 0 { + midPrecision = precisionValues[0] + } + + if midPrecision == "ms" { + response := storeResp.Response + for _, id := range response.IdSources { + id.Id.Mid = uint64(seq.MillisToMID(id.Id.Mid)) + } + + if len(response.Histogram) > 0 { + newHist := make(map[uint64]uint64, len(response.Histogram)) + for mid, v := range response.Histogram { + newHist[uint64(seq.MillisToMID(mid))] = v + } + response.Histogram = newHist + } + } + respChan <- shardResponse{ replica: replica, data: storeResp, From 56babec41385ed9c4b1a29de91d39e0d1f2f3b85 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 6 Nov 2025 18:16:15 +0400 Subject: [PATCH 19/35] handle compatibility with milliseconds --- asyncsearcher/async_searcher.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/asyncsearcher/async_searcher.go b/asyncsearcher/async_searcher.go index c283608c..9ba6908c 100644 --- a/asyncsearcher/async_searcher.go +++ b/asyncsearcher/async_searcher.go @@ -40,8 +40,13 @@ const ( minRetention = 5 * time.Minute maxRetention = 30 * 24 * time.Hour // 30 days + + infoVersion1 = uint8(1) // MIDs stored in milliseconds + infoVersion2 = uint8(2) // MIDs stored in nanoseconds ) +const infoVersion = infoVersion2 // Phase 2: write version 2 (nanoseconds) + var ( asyncSearchActiveSearches = promauto.NewGauge(prometheus.GaugeOpts{ Namespace: "seq_db_store", @@ -142,6 +147,8 @@ type fracSearchState struct { } type asyncSearchInfo struct { + Version uint8 `json:"version"` + // Finished is true if there are no fracs waiting to be processed. // // An async search request is considered complete only when all fracs are processed, @@ -174,6 +181,7 @@ func newAsyncSearchInfo(r AsyncSearchRequest, list fracmanager.List) asyncSearch } ctx, cancel := context.WithCancel(context.Background()) return asyncSearchInfo{ + Version: infoVersion, // Set version based on current phase Finished: false, Error: "", CanceledAt: time.Time{}, @@ -578,6 +586,15 @@ func loadAsyncRequests(dataDir string) (map[string]asyncSearchInfo, error) { return fmt.Errorf("malformed async search info %q: %s", name, err) } + if info.Version == 0 { + info.Version = infoVersion1 + } + if info.Version == infoVersion1 { + info.Request.Params.From = seq.MillisToMID(uint64(info.Request.Params.From)) + info.Request.Params.To = seq.MillisToMID(uint64(info.Request.Params.To)) + info.Version = infoVersion + } + info.merged.Store(areQPRsMerged[requestID]) info.qprsSize.Store(int64(qprsDuByID[requestID])) info.infoSize.Store(int64(infoDuByID[requestID])) From 30a2157333f403715748da19125b6de0367941f3 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Thu, 6 Nov 2025 18:52:01 +0400 Subject: [PATCH 20/35] omit json tag --- asyncsearcher/async_searcher.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/asyncsearcher/async_searcher.go b/asyncsearcher/async_searcher.go index 9ba6908c..13690ffb 100644 --- a/asyncsearcher/async_searcher.go +++ b/asyncsearcher/async_searcher.go @@ -45,7 +45,7 @@ const ( infoVersion2 = uint8(2) // MIDs stored in nanoseconds ) -const infoVersion = infoVersion2 // Phase 2: write version 2 (nanoseconds) +const infoVersion = infoVersion2 var ( asyncSearchActiveSearches = promauto.NewGauge(prometheus.GaugeOpts{ @@ -147,7 +147,7 @@ type fracSearchState struct { } type asyncSearchInfo struct { - Version uint8 `json:"version"` + Version uint8 // Finished is true if there are no fracs waiting to be processed. // @@ -181,7 +181,7 @@ func newAsyncSearchInfo(r AsyncSearchRequest, list fracmanager.List) asyncSearch } ctx, cancel := context.WithCancel(context.Background()) return asyncSearchInfo{ - Version: infoVersion, // Set version based on current phase + Version: infoVersion, Finished: false, Error: "", CanceledAt: time.Time{}, From 7492d0f97118f38b14d49effbd673660492a8a3b Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 7 Nov 2025 13:24:04 +0400 Subject: [PATCH 21/35] merge main --- indexer/meta_data.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/indexer/meta_data.go b/indexer/meta_data.go index 241f219f..8c139235 100644 --- a/indexer/meta_data.go +++ b/indexer/meta_data.go @@ -35,7 +35,7 @@ func (m *MetaData) MarshalBinaryTo(b []byte) []byte { b = binary.LittleEndian.AppendUint16(b, metadataMagic) // Append current binary version of the metadata. - const version = 1 + const version = 2 b = binary.LittleEndian.AppendUint16(b, version) // Encode seq.ID. @@ -67,15 +67,28 @@ func (m *MetaData) UnmarshalBinary(b []byte) error { switch version { case 1: return m.unmarshalVersion1(b) + case 2: + return m.unmarshalVersion2(b) default: return fmt.Errorf("unimplemented metadata version: %d", version) } } func (m *MetaData) unmarshalVersion1(b []byte) error { + // Decode seq.ID. + m.ID.MID = seq.MillisToMID(binary.LittleEndian.Uint64(b)) + b = b[8:] + return m.unmarshalVersion1And2(b) +} + +func (m *MetaData) unmarshalVersion2(b []byte) error { // Decode seq.ID. m.ID.MID = seq.MID(binary.LittleEndian.Uint64(b)) b = b[8:] + return m.unmarshalVersion1And2(b) +} + +func (m *MetaData) unmarshalVersion1And2(b []byte) error { m.ID.RID = seq.RID(binary.LittleEndian.Uint64(b)) b = b[8:] From 28634dfaeb2ac766dfd76ccace0d084127698f92 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 7 Nov 2025 17:15:22 +0400 Subject: [PATCH 22/35] frac cache keeps from and to in milliseconds in JSON file --- fracmanager/frac_info_cache.go | 68 +++++++++++++++++++++++------ fracmanager/frac_info_cache_test.go | 67 +++++++++++++++++++++++++++- seq/seq.go | 9 ++++ 3 files changed, 129 insertions(+), 15 deletions(-) diff --git a/fracmanager/frac_info_cache.go b/fracmanager/frac_info_cache.go index ed9c6c3b..8b0d1d22 100644 --- a/fracmanager/frac_info_cache.go +++ b/fracmanager/frac_info_cache.go @@ -10,13 +10,57 @@ import ( "go.uber.org/zap" - "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/logger" + "github.com/ozontech/seq-db/seq" ) const defaultFilePermission = 0o660 +// infoJSON is a temporary struct for JSON marshaling/unmarshaling +// that always stores From and To in milliseconds for backward compatibility +type infoJSON struct { + *common.Info + From uint64 `json:"from"` + To uint64 `json:"to"` +} + +// MarshalJSON implements custom JSON marshaling to always store From and To in milliseconds +func (e *infoJSON) MarshalJSON() ([]byte, error) { + // Use type alias to avoid infinite recursion + type Alias common.Info + return json.Marshal(&struct { + From uint64 `json:"from"` + To uint64 `json:"to"` + *Alias + }{ + From: uint64(seq.MIDToMillis(e.Info.From)), + To: seq.MIDToCeilingMillis(e.Info.To), + Alias: (*Alias)(e.Info), + }) +} + +// UnmarshalJSON implements custom JSON unmarshaling to convert From and To from milliseconds to nanoseconds +func (e *infoJSON) UnmarshalJSON(data []byte) error { + e.Info = &common.Info{} + + // Use type alias to avoid infinite recursion + type Alias common.Info + tmp := &struct { + From uint64 `json:"from"` + To uint64 `json:"to"` + *Alias + }{ + Alias: (*Alias)(e.Info), + } + if err := json.Unmarshal(data, &tmp); err != nil { + return err + } + e.Info.From = seq.MillisToMID(tmp.From) + e.Info.To = seq.MillisToMID(tmp.To) + return nil +} + type fracInfoCache struct { dataDir string fullPath string @@ -60,24 +104,17 @@ func (fc *fracInfoCache) LoadFromDisk(fileName string) { return } - err = json.Unmarshal(content, &fc.cache) + cacheJSON := make(map[string]*infoJSON) + err = json.Unmarshal(content, &cacheJSON) if err != nil { logger.Warn("can't unmarshal frac-cache, new frac-cache will be created later on", zap.Error(err), ) return } - - versionMismatchFracs := make([]string, 0) - for frac, info := range fc.cache { - if info.BinaryDataVer != config.CurrentFracVersion { - versionMismatchFracs = append(versionMismatchFracs, frac) - } - } - for _, key := range versionMismatchFracs { - delete(fc.cache, key) + for frac, entry := range cacheJSON { + fc.cache[frac] = entry.Info } - logger.Info("frac-cache loaded from disk", zap.String("filename", fileName), zap.Int("cache_entries", len(fc.cache)), @@ -123,7 +160,12 @@ func (fc *fracInfoCache) getContentWithVersion() (uint64, []byte, error) { return 0, nil, nil // no changes } - content, err := json.Marshal(fc.cache) + cacheJSON := make(map[string]*infoJSON, len(fc.cache)) + for k, v := range fc.cache { + cacheJSON[k] = &infoJSON{Info: v} + } + + content, err := json.Marshal(cacheJSON) if err != nil { return 0, nil, err } diff --git a/fracmanager/frac_info_cache_test.go b/fracmanager/frac_info_cache_test.go index fb0f1563..72a39ce7 100644 --- a/fracmanager/frac_info_cache_test.go +++ b/fracmanager/frac_info_cache_test.go @@ -8,9 +8,10 @@ import ( "sync" "testing" - insaneJSON "github.com/ozontech/insane-json" "github.com/stretchr/testify/assert" + insaneJSON "github.com/ozontech/insane-json" + "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/frac/common" @@ -35,6 +36,12 @@ func loadFracCache(dataDir string) (map[string]*common.Info, error) { fracCache := make(map[string]*common.Info) err = json.Unmarshal(content, &fracCache) + + // We must convert "from" and "to" to nanosecond seq.MID, since frac cache is now also doing it + for _, info := range fracCache { + info.From = seq.MillisToMID(uint64(info.From)) + info.To = seq.MillisToMID(uint64(info.To)) + } if err != nil { return nil, err } @@ -289,7 +296,7 @@ func TestFracInfoSavedToCache(t *testing.T) { totalSize := uint64(0) cnt := 1 for totalSize < maxSize { - addDummyDoc(t, fm, dp, seq.SimpleID(cnt)) + addDummyDoc(t, fm, dp, seq.SimpleID(cnt*1000000)) cnt++ fracInstance := rotateAndSeal(fm) totalSize += fracInstance.Info().FullSize() @@ -480,3 +487,59 @@ func TestMissingCacheFilesDeleted(t *testing.T) { assert.NoError(t, err) assert.Equal(t, fracCacheFromDisk, []byte("{}")) } + +func TestInfoCacheJSONEntryMarshalUnmarshal(t *testing.T) { + originalInfo := &common.Info{ + Path: "test-frac", + Ver: "1.0", + BinaryDataVer: 2, + DocsTotal: 100, + DocsOnDisk: 1000, + DocsRaw: 2000, + MetaOnDisk: 500, + IndexOnDisk: 1500, + From: seq.MID(1761812502000000000), + To: seq.MID(1761812503000000000), + CreationTime: 1666193044479, + SealingTime: 1666193045000, + } + + // Test marshaling: create temporary struct like getContentWithVersion does + type infoJSON struct { + *common.Info + From uint64 `json:"from"` + To uint64 `json:"to"` + } + entry := &infoJSON{ + Info: originalInfo, + From: uint64(seq.MIDToMillis(originalInfo.From)), + To: uint64(seq.MIDToMillis(originalInfo.To)), + } + jsonBytes, err := json.Marshal(entry) + assert.NoError(t, err) + + var jsonMap map[string]interface{} + err = json.Unmarshal(jsonBytes, &jsonMap) + assert.NoError(t, err) + + // Verify JSON contains milliseconds + assert.Equal(t, float64(1761812502000), jsonMap["from"]) + assert.Equal(t, float64(1761812503000), jsonMap["to"]) + + // Test unmarshaling: like LoadFromDisk does + var unmarshaledEntry infoJSON + err = json.Unmarshal(jsonBytes, &unmarshaledEntry) + assert.NoError(t, err) + assert.NotNil(t, unmarshaledEntry.Info) + + // Convert From and To from milliseconds to nanoseconds (like LoadFromDisk does) + unmarshaledEntry.Info.From = seq.MillisToMID(unmarshaledEntry.From) + unmarshaledEntry.Info.To = seq.MillisToMID(unmarshaledEntry.To) + + // Verify conversion back to nanoseconds + assert.Equal(t, seq.MID(1761812502000000000), unmarshaledEntry.Info.From) + assert.Equal(t, seq.MID(1761812503000000000), unmarshaledEntry.Info.To) + assert.Equal(t, originalInfo.Path, unmarshaledEntry.Info.Path) + assert.Equal(t, originalInfo.Ver, unmarshaledEntry.Info.Ver) + assert.Equal(t, originalInfo.DocsTotal, unmarshaledEntry.Info.DocsTotal) +} diff --git a/seq/seq.go b/seq/seq.go index 871f5739..dab11dfa 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -130,6 +130,15 @@ func MIDToMillis(t MID) int64 { return int64(uint64(t) / uint64(1000000)) } +func MIDToCeilingMillis(t MID) uint64 { + nanos := uint64(t) + if nanos%1000000 != 0 { + return (nanos / 1000000) + 1 + } else { + return nanos / 1000000 + } +} + func MIDToDuration(t MID) time.Duration { return time.Duration(t) } From 9c534335764b43c8f7fc8a6bef9ce7e05016428a Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 10 Nov 2025 11:34:01 +0400 Subject: [PATCH 23/35] overflow fixes, tests --- asyncsearcher/encoding_test.go | 65 +++++++++++++++ cmd/distribution/main.go | 4 +- docs/en/internal/common.md | 2 +- docs/en/internal/search.md | 2 +- docs/ru/internal/common.md | 2 +- docs/ru/internal/search.md | 2 +- frac/active_sealing_source.go | 7 +- frac/common/info.go | 2 +- frac/fraction_test.go | 90 +++++++++++++++++++-- frac/processor/search.go | 4 +- fracmanager/frac_info_cache.go | 2 +- fracmanager/frac_info_cache_test.go | 15 ++-- fracmanager/fracmanager_test.go | 4 +- proxy/bulk/ingestor_test.go | 2 +- proxy/search/async.go | 2 +- proxy/search/search_request.go | 8 +- proxyapi/grpc_fetch_test.go | 2 +- proxyapi/grpc_main_test.go | 4 +- seq/seq.go | 33 +++++--- seq/seq_test.go | 32 +++++++- storeapi/grpc_v1_test.go | 2 +- tests/integration_tests/integration_test.go | 14 ++-- tests/setup/env.go | 2 +- util/util.go | 8 ++ 24 files changed, 251 insertions(+), 59 deletions(-) diff --git a/asyncsearcher/encoding_test.go b/asyncsearcher/encoding_test.go index 8e1d4a4f..479a9381 100644 --- a/asyncsearcher/encoding_test.go +++ b/asyncsearcher/encoding_test.go @@ -92,6 +92,71 @@ func TestQPRMarshalUnmarshal(t *testing.T) { } } +func TestQPRVersion1Compatibility(t *testing.T) { + qpr := seq.QPR{ + IDs: seq.IDSources{ + { + ID: seq.ID{MID: seq.MID(1761812502573), RID: 34734732392}, + }, + }, + Histogram: map[seq.MID]uint64{ + seq.MID(1761812502573): 433, + seq.MID(1761812502463): 743, + }, + Aggs: []seq.AggregatableSamples{ + { + SamplesByBin: map[seq.AggBin]*seq.SamplesContainer{ + {Token: "_not_exists"}: { + Total: 1, + }, + {Token: "seq-db store", MID: seq.MID(1761812502953)}: { + Min: 3, + Max: 5, + Sum: 794, + Total: 1, + NotExists: 7, + Samples: []float64{324}, + }, + {Token: "seq-db store", MID: seq.MID(1761812502456)}: { + Min: 2, + Max: 6, + Sum: 544, + Total: 2, + NotExists: 3, + Samples: []float64{324}, + }, + }, + NotExists: 5412, + }, + }, + } + + rawQPR := marshalQPR(&qpr, nil) + rawQPR[0] = qprBinVersion1 + + var outQPR seq.QPR + tail, err := unmarshalQPR(&outQPR, rawQPR, math.MaxInt) + require.NoError(t, err) + require.Equal(t, 0, len(tail)) + + require.Equal(t, seq.MID(1761812502573000000), outQPR.IDs[0].ID.MID, "mid doesn't match, should convert to nanoseconds") + + require.Len(t, outQPR.Histogram, 2) + require.Equal(t, uint64(433), outQPR.Histogram[seq.MID(1761812502573000000)], "histogram bucket doesn't match") + require.Equal(t, uint64(743), outQPR.Histogram[seq.MID(1761812502463000000)], "histogram bucket doesn't match") + + require.Len(t, outQPR.Aggs, 1, "should have one AggregatableSamples") + agg := outQPR.Aggs[0] + require.Len(t, agg.SamplesByBin, 3, "should have 3 samples in bin") + + notExistsBin := seq.AggBin{Token: "_not_exists"} + require.Equal(t, int64(1), agg.SamplesByBin[notExistsBin].Total, "bucket doesn't match") + bin1 := seq.AggBin{Token: "seq-db store", MID: seq.MID(1761812502953000000)} + require.Equal(t, int64(1), agg.SamplesByBin[bin1].Total, "bucket doesn't match") + bin2 := seq.AggBin{Token: "seq-db store", MID: seq.MID(1761812502456000000)} + require.Equal(t, int64(2), agg.SamplesByBin[bin2].Total, "bucket doesn't match") +} + func getRandomQPR(size int) seq.QPR { curTime := time.Now() getTime := func() time.Time { diff --git a/cmd/distribution/main.go b/cmd/distribution/main.go index c8caad0b..527c484f 100644 --- a/cmd/distribution/main.go +++ b/cmd/distribution/main.go @@ -183,8 +183,8 @@ func main() { zap.String("name", info.Name()), zap.String("ver", info.Ver), zap.Uint32("docs_total", info.DocsTotal), - zap.String("from", util.MsTsToESFormat(uint64(info.From))), - zap.String("to", util.MsTsToESFormat(uint64(info.To))), + zap.String("from", util.NsTsToESFormat(uint64(info.From))), + zap.String("to", util.NsTsToESFormat(uint64(info.To))), zap.String("creation_time", util.MsTsToESFormat(info.CreationTime)), ) } diff --git a/docs/en/internal/common.md b/docs/en/internal/common.md index 899670bc..b8f6eec1 100644 --- a/docs/en/internal/common.md +++ b/docs/en/internal/common.md @@ -3,7 +3,7 @@ ## Common * ID: MID-RID . -* MID - milliseconds part of ID, generated (extracted from doc) by ingestor before sending to store. +* MID - nanoseconds part of ID, generated (extracted from doc) by ingestor before sending to store. * RID - random part of ID, generated by ingestor before sending to store. * docParam - link of ID and block position, position of doc in block. * Only active fraction has meta file. It is used for restoring index in memory and in process of sealing fraction it is used to form index file. diff --git a/docs/en/internal/search.md b/docs/en/internal/search.md index f59331d9..34dfaaf1 100644 --- a/docs/en/internal/search.md +++ b/docs/en/internal/search.md @@ -26,7 +26,7 @@ Some basic overview of nodes: > > **ID** (document ID) - full id of a document, that you can use on proxy to find this specific doc. Consists of two parts: mid and rid. > -> **MID** (milliseconds ID) - timestamp of a document. This is a timestamp, that log was written into stdout on the machine, not when it came into seq-db, meaning that it can be quite old relative to the time on the seq-db machine. +> **MID** (nanoseconds ID) - timestamp of a document. This is a timestamp, that log was written into stdout on the machine, not when it came into seq-db, meaning that it can be quite old relative to the time on the seq-db machine. > > **RID** (random ID) - random part of an id diff --git a/docs/ru/internal/common.md b/docs/ru/internal/common.md index 899670bc..b8f6eec1 100644 --- a/docs/ru/internal/common.md +++ b/docs/ru/internal/common.md @@ -3,7 +3,7 @@ ## Common * ID: MID-RID . -* MID - milliseconds part of ID, generated (extracted from doc) by ingestor before sending to store. +* MID - nanoseconds part of ID, generated (extracted from doc) by ingestor before sending to store. * RID - random part of ID, generated by ingestor before sending to store. * docParam - link of ID and block position, position of doc in block. * Only active fraction has meta file. It is used for restoring index in memory and in process of sealing fraction it is used to form index file. diff --git a/docs/ru/internal/search.md b/docs/ru/internal/search.md index f59331d9..34dfaaf1 100644 --- a/docs/ru/internal/search.md +++ b/docs/ru/internal/search.md @@ -26,7 +26,7 @@ Some basic overview of nodes: > > **ID** (document ID) - full id of a document, that you can use on proxy to find this specific doc. Consists of two parts: mid and rid. > -> **MID** (milliseconds ID) - timestamp of a document. This is a timestamp, that log was written into stdout on the machine, not when it came into seq-db, meaning that it can be quite old relative to the time on the seq-db machine. +> **MID** (nanoseconds ID) - timestamp of a document. This is a timestamp, that log was written into stdout on the machine, not when it came into seq-db, meaning that it can be quite old relative to the time on the seq-db machine. > > **RID** (random ID) - random part of an id diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 42bde383..d05bcec9 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -147,7 +147,12 @@ func (src *ActiveSealingSource) LastError() error { func (src *ActiveSealingSource) prepareInfo() { src.info.MetaOnDisk = 0 src.info.SealingTime = uint64(src.created.UnixMilli()) - src.info.BuildDistribution(src.mids.vals) + mids := src.mids.vals + if len(mids) > 1 { + // skip system MID + mids = mids[1:] + } + src.info.BuildDistribution(mids) } // Info returns index metadata information. diff --git a/frac/common/info.go b/frac/common/info.go index 81bd34eb..654b5d7c 100644 --- a/frac/common/info.go +++ b/frac/common/info.go @@ -80,7 +80,7 @@ func (s *Info) BuildDistribution(mids []uint64) { } func (s *Info) InitEmptyDistribution() bool { - from := time.UnixMilli(int64(s.From)) + from := s.From.Time() creationTime := time.UnixMilli(int64(s.CreationTime)) if creationTime.Sub(from) < DistributionSpreadThreshold { // no big spread in past return false diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 920445ce..41b61d86 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -457,6 +457,34 @@ func (s *FractionTestSuite) TestSearchFromTo() { assertSearch(`NOT trace_id:0 AND NOT trace_id:2`, 3, 5, []int{5, 4, 3}) } +func (s *FractionTestSuite) TestSearchFromToNanoseconds() { + docs := []string{ + /*0*/ `{"timestamp":"2000-01-01T13:00:00.000000000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, + /*1*/ `{"timestamp":"2000-01-01T13:00:00.000000001Z","message":"good","level":"2","trace_id":"0","service":"1"}`, + /*2*/ `{"timestamp":"2000-01-01T13:00:00.000000002Z","message":"bad","level":"3","trace_id":"0","service":"2"}`, + /*3*/ `{"timestamp":"2000-01-01T13:00:00.000000003Z","message":"good","level":"4","trace_id":"1","service":"0"}`, + /*4*/ `{"timestamp":"2000-01-01T13:00:00.000000004Z","message":"bad","level":"5","trace_id":"1","service":"1"}`, + /*5*/ `{"timestamp":"2000-01-01T13:00:00.000000005Z","message":"good","level":"6","trace_id":"1","service":"2"}`, + /*6*/ `{"timestamp":"2000-01-01T13:00:00.000000006Z","message":"bad","level":"7","trace_id":"2","service":"0"}`, + /*7*/ `{"timestamp":"2000-01-01T13:00:00.000000007Z","message":"good","level":"8","trace_id":"2","service":"1"}`, + } + + s.insertDocuments(docs) + + assertSearch := func(query string, fromOffset, toOffset int, expectedIndexes []int) { + s.AssertSearch(s.query( + query, + withFrom(fmt.Sprintf("2000-01-01T13:00:00.000000%03dZ", fromOffset)), + withTo(fmt.Sprintf("2000-01-01T13:00:00.000000%03dZ", toOffset))), + docs, expectedIndexes) + } + + assertSearch(`message:good`, 0, 7, []int{7, 5, 3, 1}) + assertSearch(`message:bad`, 0, 7, []int{6, 4, 2, 0}) + assertSearch(`message:good`, 0, 6, []int{5, 3, 1}) + assertSearch(`message:bad`, 1, 7, []int{6, 4, 2}) +} + func (s *FractionTestSuite) TestSearchWithLimit() { docs := []string{ /*0*/ `{"timestamp":"2000-01-01T13:00:00.000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, @@ -1026,6 +1054,58 @@ func (s *FractionTestSuite) TestSearchLargeFrac() { s.AssertSearch(s.query("level:5", withLimit(100)), docs, level5Indexes[:100]) } +func (s *FractionTestSuite) TestContains() { + now := time.Now().Truncate(time.Minute) + docs := []string{ + fmt.Sprintf(`{"timestamp":"%s","message":"apple juice"}`, now.Add(-60*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":"%s","message":"orange juice"}`, now.Add(-61*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":"%s","message":"cider"}`, now.Add(-65*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":"%s","message":"wine"}`, now.Add(-123*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":"%s","message":"cola"}`, now.Add(-365*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":"%s","message":"cola"}`, now.Add(-30*time.Hour).Format(time.RFC3339Nano)), + } + + s.insertDocuments(docs) + + s.Require().True(s.fraction.Contains(seq.TimeToMID(now.Add(-60 * time.Minute)))) + s.Require().True(s.fraction.Contains(seq.TimeToMID(now.Add(-61 * time.Minute)))) + s.Require().True(s.fraction.Contains(seq.TimeToMID(now.Add(-123 * time.Minute)))) + // also true, MID distribution bucket is 1 minute + s.Require().True(s.fraction.Contains(seq.TimeToMID(now.Add(-60 * time.Minute).Add(-30 * time.Second)))) + // contains=true: outside MID distribution but within from-to range + s.Require().True(s.fraction.Contains(seq.TimeToMID(now.Add(-27 * time.Hour)))) + s.Require().True(s.fraction.Contains(seq.TimeToMID(now.Add(-30 * time.Hour)))) + // contains=false: outside MID distribution AND outside from-to range + s.Require().False(s.fraction.Contains(seq.TimeToMID(now.Add(-30 * time.Hour).Add(-1 * time.Minute)))) +} + +func (s *FractionTestSuite) TestDistribution() { + now := time.Now().Truncate(time.Minute) + docs := []string{ + fmt.Sprintf(`{"timestamp":"%s","message":"apple juice"}`, now.Add(-60*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":"%s","message":"orange juice"}`, now.Add(-61*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":"%s","message":"cider"}`, now.Add(-65*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":"%s","message":"wine"}`, now.Add(-120*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":"%s","message":"cola"}`, now.Add(-360*time.Minute).Format(time.RFC3339Nano)), + } + + s.insertDocuments(docs) + + _, ok := s.fraction.(*Active) + if ok { + s.Require().Nil(s.fraction.Info().Distribution, "active fraction has MID distribution") + return + } + + dist := s.fraction.Info().Distribution.GetDist() + s.Require().Equal(5, len(dist)) + s.Require().Equal(now.Add(-360*time.Minute).UTC(), dist[0]) + s.Require().Equal(now.Add(-120*time.Minute).UTC(), dist[1]) + s.Require().Equal(now.Add(-65*time.Minute).UTC(), dist[2]) + s.Require().Equal(now.Add(-61*time.Minute).UTC(), dist[3]) + s.Require().Equal(now.Add(-60*time.Minute).UTC(), dist[4]) +} + func (s *FractionTestSuite) TestFractionInfo() { docs := []string{ `{"timestamp":"2000-01-01T13:00:25Z","service":"service_a","message":"first message some text", "container":"gateway"}`, @@ -1046,8 +1126,8 @@ func (s *FractionTestSuite) TestFractionInfo() { s.Require().True(info.DocsOnDisk > uint64(200) && info.DocsOnDisk < uint64(300), "doc on disk doesn't match. actual value: %d", info.DocsOnDisk) s.Require().Equal(uint64(583), info.DocsRaw, "doc raw doesn't match") - s.Require().Equal(seq.MID(946731625000), info.From, "from doesn't match") - s.Require().Equal(seq.MID(946731654000), info.To, "to doesn't match") + s.Require().Equal(seq.MID(946731625000000000), info.From, "from doesn't match") + s.Require().Equal(seq.MID(946731654000000000), info.To, "to doesn't match") switch s.fraction.(type) { case *Active: @@ -1057,7 +1137,7 @@ func (s *FractionTestSuite) TestFractionInfo() { case *Sealed: s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value") s.Require().True(info.IndexOnDisk > uint64(1400) && info.IndexOnDisk < uint64(1600), - "index on disk doesn't match. actual value: %d", info.MetaOnDisk) + "index on disk doesn't match. actual value: %d", info.IndexOnDisk) case *Remote: s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value") s.Require().True(info.IndexOnDisk > uint64(1400) && info.IndexOnDisk < uint64(1500), @@ -1090,7 +1170,7 @@ func (s *FractionTestSuite) query(queryString string, options ...searchOption) * func withFrom(from string) searchOption { return func(p *processor.SearchParams) error { - t, err := time.Parse(time.RFC3339, from) + t, err := time.Parse(time.RFC3339Nano, from) if err != nil { return err } @@ -1101,7 +1181,7 @@ func withFrom(from string) searchOption { func withTo(to string) searchOption { return func(p *processor.SearchParams) error { - t, err := time.Parse(time.RFC3339, to) + t, err := time.Parse(time.RFC3339Nano, to) if err != nil { return err } diff --git a/frac/processor/search.go b/frac/processor/search.go index a346e89a..dcf4e5b2 100644 --- a/frac/processor/search.go +++ b/frac/processor/search.go @@ -161,8 +161,8 @@ func iterateEvalTree( histogram []uint64 ) if hasHist { - histBase = uint64(seq.MIDToMillis(params.From)) / params.HistInterval - histSize := uint64(seq.MIDToMillis(params.To))/params.HistInterval - histBase + 1 + histBase = seq.MIDToMillis(params.From) / params.HistInterval + histSize := seq.MIDToMillis(params.To)/params.HistInterval - histBase + 1 histogram = make([]uint64, histSize) } diff --git a/fracmanager/frac_info_cache.go b/fracmanager/frac_info_cache.go index 8b0d1d22..9eaba658 100644 --- a/fracmanager/frac_info_cache.go +++ b/fracmanager/frac_info_cache.go @@ -34,7 +34,7 @@ func (e *infoJSON) MarshalJSON() ([]byte, error) { To uint64 `json:"to"` *Alias }{ - From: uint64(seq.MIDToMillis(e.Info.From)), + From: seq.MIDToMillis(e.Info.From), To: seq.MIDToCeilingMillis(e.Info.To), Alias: (*Alias)(e.Info), }) diff --git a/fracmanager/frac_info_cache_test.go b/fracmanager/frac_info_cache_test.go index 72a39ce7..e4e77cd8 100644 --- a/fracmanager/frac_info_cache_test.go +++ b/fracmanager/frac_info_cache_test.go @@ -296,7 +296,7 @@ func TestFracInfoSavedToCache(t *testing.T) { totalSize := uint64(0) cnt := 1 for totalSize < maxSize { - addDummyDoc(t, fm, dp, seq.SimpleID(cnt*1000000)) + addDummyDoc(t, fm, dp, seq.SimpleID(int64(cnt*1000000))) cnt++ fracInstance := rotateAndSeal(fm) totalSize += fracInstance.Info().FullSize() @@ -377,7 +377,7 @@ func TestExtraFractionsRemoved(t *testing.T) { infos := map[string]*common.Info{} for i := 1; i < times+1; i++ { - addDummyDoc(t, fm, dp, seq.SimpleID(i)) + addDummyDoc(t, fm, dp, seq.SimpleID(int64(i))) fracInstance := rotateAndSeal(fm) info := fracInstance.Info() q.Add(item{ @@ -438,7 +438,7 @@ func TestMissingCacheFilesDeleted(t *testing.T) { defer insaneJSON.Release(metaRoot) for i := 1; i < times+1; i++ { - addDummyDoc(t, fm, dp, seq.SimpleID(i)) + addDummyDoc(t, fm, dp, seq.SimpleID(int64(i))) rotateAndSeal(fm) dp.TryReset() } @@ -504,7 +504,6 @@ func TestInfoCacheJSONEntryMarshalUnmarshal(t *testing.T) { SealingTime: 1666193045000, } - // Test marshaling: create temporary struct like getContentWithVersion does type infoJSON struct { *common.Info From uint64 `json:"from"` @@ -512,8 +511,8 @@ func TestInfoCacheJSONEntryMarshalUnmarshal(t *testing.T) { } entry := &infoJSON{ Info: originalInfo, - From: uint64(seq.MIDToMillis(originalInfo.From)), - To: uint64(seq.MIDToMillis(originalInfo.To)), + From: seq.MIDToMillis(originalInfo.From), + To: seq.MIDToMillis(originalInfo.To), } jsonBytes, err := json.Marshal(entry) assert.NoError(t, err) @@ -522,21 +521,17 @@ func TestInfoCacheJSONEntryMarshalUnmarshal(t *testing.T) { err = json.Unmarshal(jsonBytes, &jsonMap) assert.NoError(t, err) - // Verify JSON contains milliseconds assert.Equal(t, float64(1761812502000), jsonMap["from"]) assert.Equal(t, float64(1761812503000), jsonMap["to"]) - // Test unmarshaling: like LoadFromDisk does var unmarshaledEntry infoJSON err = json.Unmarshal(jsonBytes, &unmarshaledEntry) assert.NoError(t, err) assert.NotNil(t, unmarshaledEntry.Info) - // Convert From and To from milliseconds to nanoseconds (like LoadFromDisk does) unmarshaledEntry.Info.From = seq.MillisToMID(unmarshaledEntry.From) unmarshaledEntry.Info.To = seq.MillisToMID(unmarshaledEntry.To) - // Verify conversion back to nanoseconds assert.Equal(t, seq.MID(1761812502000000000), unmarshaledEntry.Info.From) assert.Equal(t, seq.MID(1761812503000000000), unmarshaledEntry.Info.To) assert.Equal(t, originalInfo.Path, unmarshaledEntry.Info.Path) diff --git a/fracmanager/fracmanager_test.go b/fracmanager/fracmanager_test.go index 4635555c..fd14f75c 100644 --- a/fracmanager/fracmanager_test.go +++ b/fracmanager/fracmanager_test.go @@ -368,7 +368,7 @@ func addDocs(t *testing.T, fm *FracManager, docCount int) { doc := []byte("{\"timestamp\": 0, \"message\": \"msg\"}") docRoot, err := insaneJSON.DecodeBytes(doc) assert.NoError(t, err) - dp.Append(doc, docRoot, seq.SimpleID(i), "service:100500", "k8s_pod", "_all_:") + dp.Append(doc, docRoot, seq.SimpleID(int64(i)), "service:100500", "k8s_pod", "_all_:") } docs, metas := dp.Provide() @@ -399,7 +399,7 @@ func TestMatureMode(t *testing.T) { dp := indexer.NewTestDocProvider() makeSealedFrac := func(fm *FracManager, docsPerFrac int) { for i := 0; i < docsPerFrac; i++ { - addDummyDoc(t, fm, dp, seq.SimpleID(id)) + addDummyDoc(t, fm, dp, seq.SimpleID(int64(id))) id++ } fm.seal(fm.rotate()) diff --git a/proxy/bulk/ingestor_test.go b/proxy/bulk/ingestor_test.go index 85ac4f27..e38a9180 100644 --- a/proxy/bulk/ingestor_test.go +++ b/proxy/bulk/ingestor_test.go @@ -88,7 +88,7 @@ func TestProcessDocuments(t *testing.T) { now := time.Now().UTC() - id := seq.SimpleID(int(now.UnixNano() / 1000000)) + id := seq.SimpleID(now.UnixNano()) type TestPayload struct { InDocs []string diff --git a/proxy/search/async.go b/proxy/search/async.go index 5501d9e5..8c899286 100644 --- a/proxy/search/async.go +++ b/proxy/search/async.go @@ -53,7 +53,7 @@ func (si *Ingestor) StartAsyncSearch(ctx context.Context, r AsyncRequest) (Async From: r.From.UnixMilli(), To: r.To.UnixMilli(), Aggs: convertToAggsQuery(r.Aggregations), - HistogramInterval: seq.MIDToMillis(r.HistogramInterval), + HistogramInterval: int64(seq.MIDToMillis(r.HistogramInterval)), Retention: durationpb.New(r.Retention), WithDocs: r.WithDocs, Size: r.Size, diff --git a/proxy/search/search_request.go b/proxy/search/search_request.go index 498abd8e..88061e67 100644 --- a/proxy/search/search_request.go +++ b/proxy/search/search_request.go @@ -32,11 +32,11 @@ type SearchRequest struct { func (sr *SearchRequest) GetAPISearchRequest() *storeapi.SearchRequest { return &storeapi.SearchRequest{ Query: util.ByteToStringUnsafe(sr.Q), - From: seq.MIDToMillis(sr.From), - To: seq.MIDToMillis(sr.To), + From: int64(seq.MIDToMillis(sr.From)), + To: int64(seq.MIDToMillis(sr.To)), Size: int64(sr.Size), Offset: int64(sr.Offset), - Interval: seq.MIDToMillis(sr.Interval), + Interval: int64(seq.MIDToMillis(sr.Interval)), Aggs: convertToAggsQuery(sr.AggQ), Explain: sr.Explain, WithTotal: sr.WithTotal, @@ -63,7 +63,7 @@ func convertToAggsQuery(aggs []AggQuery) []*storeapi.AggQuery { buf[i].Func = storeapi.AggFunc(query.Func) buf[i].Quantiles = query.Quantiles - buf[i].Interval = seq.MIDToMillis(query.Interval) + buf[i].Interval = int64(seq.MIDToMillis(query.Interval)) aggQ[i] = &buf[i] } diff --git a/proxyapi/grpc_fetch_test.go b/proxyapi/grpc_fetch_test.go index b2aa3e6b..a34ff1f6 100644 --- a/proxyapi/grpc_fetch_test.go +++ b/proxyapi/grpc_fetch_test.go @@ -43,7 +43,7 @@ func prepareFetchTestData(cData fetchTestCaseData) fetchTestData { docs := [][]byte{} apiDocs := make([]*seqproxyapi.Document, 0) for i := 0; i < cData.size; i++ { - id := seq.SimpleID(cData.startID + i) + id := seq.SimpleID(int64(cData.startID + i)) ids = append(ids, seq.IDSource{ID: id}) idsStr = append(idsStr, id.String()) if !cData.noResp { diff --git a/proxyapi/grpc_main_test.go b/proxyapi/grpc_main_test.go index 790a73ca..4bd9caa2 100644 --- a/proxyapi/grpc_main_test.go +++ b/proxyapi/grpc_main_test.go @@ -197,7 +197,7 @@ func makeSearchRespData(size int) *testSearchResp { docs := make([][]byte, 0) respDocs := make([]*seqproxyapi.Document, 0) for i := 0; i < size; i++ { - id := seq.SimpleID(i) + id := seq.SimpleID(int64(i)) ids = append(ids, seq.IDSource{ID: id, Source: 0}) data := []byte("doc" + strconv.Itoa(i)) docs = append(docs, data) @@ -313,7 +313,7 @@ func makeExportRespData(size int) *testExportResp { docs := make([][]byte, size) resp := make([]*seqproxyapi.ExportResponse, size) for i := range size { - id := seq.SimpleID(i) + id := seq.SimpleID(int64(i)) ids[i] = seq.IDSource{ID: id, Source: 0} data := []byte("doc" + strconv.Itoa(i)) diff --git a/seq/seq.go b/seq/seq.go index dab11dfa..d1d5b1f6 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -20,8 +20,11 @@ type RID uint64 // random part of ID type LID uint32 // local id for a fraction func (m MID) Time() time.Time { - // TODO check for large nanos and avoid cast - return time.Unix(0, int64(m)) + nanos := uint64(m) + nanosPerSec := uint64(time.Second) + secondsPart := nanos / nanosPerSec + nanosPart := nanos - secondsPart*nanosPerSec + return time.Unix(int64(secondsPart), int64(nanosPart)) } func (d ID) String() string { @@ -99,7 +102,7 @@ func FromString(x string) (ID, error) { return id, nil } -func SimpleID(i int) ID { +func SimpleID(i int64) ID { return ID{ MID: MID(i), RID: 0, @@ -107,9 +110,11 @@ func SimpleID(i int) ID { } func MillisToMID(millis uint64) MID { - if millis < math.MaxUint64/1000000 { - return MID(millis * 1000000) + if millis <= math.MaxUint64/uint64(time.Millisecond) { + return MID(millis * uint64(time.Millisecond)) } else { + // math.MaxUint64/1000000 is 2554 year in unix time millisecond, so it's just an "infinite" future for us. + // We can't scale it to nanoseconds, so we just leave it as it is return MID(millis) } } @@ -123,19 +128,22 @@ func DurationToMID(d time.Duration) MID { } func MIDToTime(t MID) time.Time { - return time.Unix(0, 0).Add(MIDToDuration(t)) + return t.Time() } -func MIDToMillis(t MID) int64 { - return int64(uint64(t) / uint64(1000000)) +func MIDToMillis(t MID) uint64 { + return uint64(t) / uint64(time.Millisecond) } func MIDToCeilingMillis(t MID) uint64 { nanos := uint64(t) - if nanos%1000000 != 0 { - return (nanos / 1000000) + 1 + nanosPerMilli := uint64(time.Millisecond) + millisFloorPart := nanos / uint64(time.Millisecond) + nanosPart := nanos % nanosPerMilli + if nanosPart != 0 { + return millisFloorPart + 1 } else { - return nanos / 1000000 + return millisFloorPart } } @@ -149,6 +157,7 @@ func NewID(t time.Time, randomness uint64) ID { return ID{MID: mid, RID: RID(randomness)} } +// String prints MID to ESFormat. Nanosecond part will not be printed. func (m MID) String() string { - return util.MsTsToESFormat(uint64(m)) + return util.NsTsToESFormat(uint64(m)) } diff --git a/seq/seq_test.go b/seq/seq_test.go index 3dfe59de..74df9b1c 100644 --- a/seq/seq_test.go +++ b/seq/seq_test.go @@ -3,6 +3,7 @@ package seq import ( "math" "testing" + "time" "github.com/stretchr/testify/assert" ) @@ -26,7 +27,36 @@ func TestFromString(t *testing.T) { func TestMillisToMID(t *testing.T) { assert.Equal(t, MID(1761812502000000000), MillisToMID(1761812502000)) - // we can't convert millis this high to nanos (overflow), so we expect that user just want "infinite future" + // we can scale this value + assert.Equal(t, MID(math.MaxUint64/3000000*1000000), MillisToMID(math.MaxUint64/3000000)) + + // greatest milliseconds (year 2500) we can scale to nanoseconds + assert.Equal(t, MID(18446744073709000000), MillisToMID(math.MaxUint64/1000000)) + + // we can't scale millis this high to nanoseconds (overflow), so we expect that a user just wants an "infinite future" assert.Equal(t, MID(math.MaxUint64), MillisToMID(math.MaxUint64)) assert.Equal(t, MID(math.MaxUint64/1000), MillisToMID(math.MaxUint64/1000)) + +} + +func TestTimeToMIDConversionOverflow(t *testing.T) { + timestamp := time.Now() + assert.EqualExportedValues(t, timestamp, MID(timestamp.UnixNano()).Time()) + + // check that we do not overflow on huge values + maxMID := MID(math.MaxUint64) + assert.Equal(t, 2554, maxMID.Time().Year()) + assert.Equal(t, 2554, MIDToTime(maxMID).Year()) +} + +func TestMIDToCeilingMillis(t *testing.T) { + assert.Equal(t, uint64(14), MIDToCeilingMillis(MID(14000000))) + assert.Equal(t, uint64(15), MIDToCeilingMillis(MID(14000001))) + assert.Equal(t, uint64(15), MIDToCeilingMillis(MID(14999999))) +} + +func TestString(t *testing.T) { + assert.Equal(t, "2025-10-30 12:21:42", MID(1761812502000000000).String()) + assert.Equal(t, "2025-10-30 12:21:42.432", MID(1761812502432000000).String()) + assert.Equal(t, "2025-10-30 12:21:42.432", MID(1761812502432000773).String()) } diff --git a/storeapi/grpc_v1_test.go b/storeapi/grpc_v1_test.go index 8d02019e..a693e98c 100644 --- a/storeapi/grpc_v1_test.go +++ b/storeapi/grpc_v1_test.go @@ -54,7 +54,7 @@ func makeBulkRequest(cnt int) *storeapi.BulkRequest { dp := indexer.NewTestDocProvider() for i := 0; i < cnt; i++ { - id := seq.SimpleID(i + 1) + id := seq.SimpleID(int64(i + 1)) doc := []byte("document") dp.Append(doc, nil, id, "_all_:", "service:100500", "k8s_pod:"+strconv.Itoa(i)) } diff --git a/tests/integration_tests/integration_test.go b/tests/integration_tests/integration_test.go index 30f25e9e..1a406f52 100644 --- a/tests/integration_tests/integration_test.go +++ b/tests/integration_tests/integration_test.go @@ -770,7 +770,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { qpr, _, _, err := env.Search(`service:"nginx-count"`, 1024, setup.WithAggQuery(search.AggQuery{ GroupBy: "level", Func: seq.AggFuncCount, - Interval: 30 * 1000, // 30 sec interval + Interval: 30 * 1000000000, // 30 sec interval })) require.NoError(t, err) @@ -790,7 +790,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { Field: "level", GroupBy: "service", Func: seq.AggFuncMin, - Interval: 30 * 1000, // 30 sec interval + Interval: 30 * 1000000000, // 30 sec interval })) require.NoError(t, err) @@ -810,7 +810,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { qpr, _, _, err := env.Search(`service:"nginx-max"`, 1024, setup.WithAggQuery(search.AggQuery{ Field: "level", Func: seq.AggFuncMax, - Interval: 30 * 1000, // 30 sec interval + Interval: 30 * 1000000000, // 30 sec interval })) require.NoError(t, err) @@ -829,7 +829,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { qpr, _, _, err := env.Search(`service:"nginx-avg"`, 1024, setup.WithAggQuery(search.AggQuery{ Field: "level", Func: seq.AggFuncAvg, - Interval: 30 * 1000, // 30 sec interval + Interval: 30 * 1000000000, // 30 sec interval })) require.NoError(t, err) @@ -848,7 +848,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { qpr, _, _, err := env.Search(`service:"nginx-sum"`, 1024, setup.WithAggQuery(search.AggQuery{ Field: "level", Func: seq.AggFuncSum, - Interval: 30 * 1000, // 30 sec interval + Interval: 30 * 1000000000, // 30 sec interval })) require.NoError(t, err) @@ -868,7 +868,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { Field: "level", Func: seq.AggFuncQuantile, Quantiles: []float64{0.5}, - Interval: 30 * 1000, // 30 sec interval + Interval: 30 * 1000000000, // 30 sec interval })) require.NoError(t, err) @@ -992,7 +992,7 @@ func (s *IntegrationTestSuite) TestAggNoTotal() { assert.Equal(t, uint64(allDocsNum), histSum, "the sum of the histogram should be equal to the number of all documents") } - s.T().Run("ActiveFraction", test) + //s.T().Run("ActiveFraction", test) env.SealAll() s.T().Run("SealedFraction", test) } diff --git a/tests/setup/env.go b/tests/setup/env.go index 25f7ac18..d950d2a7 100644 --- a/tests/setup/env.go +++ b/tests/setup/env.go @@ -567,7 +567,7 @@ func WithAggQuery(aggQueries ...any) SearchOption { func WithInterval(interval time.Duration) SearchOption { return func(sr *search.SearchRequest) { - sr.Interval = seq.MID(interval / time.Microsecond) + sr.Interval = seq.DurationToMID(interval) } } diff --git a/util/util.go b/util/util.go index c20f0530..ebed5bb0 100644 --- a/util/util.go +++ b/util/util.go @@ -117,6 +117,14 @@ func MsTsToESFormat(ts uint64) string { return time.UnixMilli(int64(ts)).Format(consts.ESTimeFormat) } +// NsTsToESFormat converts timestamp in nanoseconds to ES time format string. +func NsTsToESFormat(ts uint64) string { + nanosPerSec := uint64(time.Second) + secondsPart := ts / nanosPerSec + nanosPart := ts - secondsPart*nanosPerSec + return time.Unix(int64(secondsPart), int64(nanosPart)).Format(consts.ESTimeFormat) +} + func BinSearchInRange(from, to int, fn func(i int) bool) int { n := to - from + 1 i := sort.Search(n, func(i int) bool { return fn(from + i) }) From 0423be37c049efdde44d8767e44e0b62080ad36e Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 11 Nov 2025 13:11:56 +0400 Subject: [PATCH 24/35] test fixes --- tests/integration_tests/integration_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration_tests/integration_test.go b/tests/integration_tests/integration_test.go index 1a406f52..c59f204a 100644 --- a/tests/integration_tests/integration_test.go +++ b/tests/integration_tests/integration_test.go @@ -770,7 +770,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { qpr, _, _, err := env.Search(`service:"nginx-count"`, 1024, setup.WithAggQuery(search.AggQuery{ GroupBy: "level", Func: seq.AggFuncCount, - Interval: 30 * 1000000000, // 30 sec interval + Interval: seq.DurationToMID(30 * time.Second), })) require.NoError(t, err) @@ -790,7 +790,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { Field: "level", GroupBy: "service", Func: seq.AggFuncMin, - Interval: 30 * 1000000000, // 30 sec interval + Interval: seq.DurationToMID(30 * time.Second), })) require.NoError(t, err) @@ -810,7 +810,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { qpr, _, _, err := env.Search(`service:"nginx-max"`, 1024, setup.WithAggQuery(search.AggQuery{ Field: "level", Func: seq.AggFuncMax, - Interval: 30 * 1000000000, // 30 sec interval + Interval: seq.DurationToMID(30 * time.Second), })) require.NoError(t, err) @@ -829,7 +829,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { qpr, _, _, err := env.Search(`service:"nginx-avg"`, 1024, setup.WithAggQuery(search.AggQuery{ Field: "level", Func: seq.AggFuncAvg, - Interval: 30 * 1000000000, // 30 sec interval + Interval: seq.DurationToMID(30 * time.Second), })) require.NoError(t, err) @@ -848,7 +848,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { qpr, _, _, err := env.Search(`service:"nginx-sum"`, 1024, setup.WithAggQuery(search.AggQuery{ Field: "level", Func: seq.AggFuncSum, - Interval: 30 * 1000000000, // 30 sec interval + Interval: seq.DurationToMID(30 * time.Second), })) require.NoError(t, err) @@ -868,7 +868,7 @@ func (s *IntegrationTestSuite) TestTimeseries() { Field: "level", Func: seq.AggFuncQuantile, Quantiles: []float64{0.5}, - Interval: 30 * 1000000000, // 30 sec interval + Interval: seq.DurationToMID(30 * time.Second), })) require.NoError(t, err) @@ -992,7 +992,7 @@ func (s *IntegrationTestSuite) TestAggNoTotal() { assert.Equal(t, uint64(allDocsNum), histSum, "the sum of the histogram should be equal to the number of all documents") } - //s.T().Run("ActiveFraction", test) + s.T().Run("ActiveFraction", test) env.SealAll() s.T().Run("SealedFraction", test) } From 9120932ce65e791c886061eda268b224dbcba9e3 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:30:36 +0400 Subject: [PATCH 25/35] use nanosecond step in tests --- tests/integration_tests/integration_test.go | 4 ++-- tests/integration_tests/single_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration_tests/integration_test.go b/tests/integration_tests/integration_test.go index c59f204a..5a3f2a4b 100644 --- a/tests/integration_tests/integration_test.go +++ b/tests/integration_tests/integration_test.go @@ -354,7 +354,7 @@ func (s *IntegrationTestSuite) envWithDummyDocs(n int) (*setup.TestingEnv, []str origDocs := make([]string, 0, allDocsNum) docsBulk := make([]string, 2*n) - getNextTs := getAutoTsGenerator(time.Now(), -time.Second) + getNextTs := getAutoTsGenerator(time.Now(), -time.Nanosecond) for i := 0; i < bulksNum; i++ { @@ -431,7 +431,7 @@ func (s *IntegrationTestSuite) TestFetchNotFound() { func (s *IntegrationTestSuite) TestMulti() { // ingest - getNextTs := getAutoTsGenerator(time.Now(), -time.Second) + getNextTs := getAutoTsGenerator(time.Now(), -time.Nanosecond) origDocs := []string{ fmt.Sprintf(`{"service":"b1", "k8s_pod":"pod1", "yyyy":"xxxx1", "ts":%q}`, getNextTs()), fmt.Sprintf(`{"service":"b2", "k8s_pod":"pod2", "yyyy":"xxxx2", "ts":%q}`, getNextTs()), diff --git a/tests/integration_tests/single_test.go b/tests/integration_tests/single_test.go index 82e37981..c86d9aaa 100644 --- a/tests/integration_tests/single_test.go +++ b/tests/integration_tests/single_test.go @@ -128,7 +128,7 @@ func (s *SingleTestSuite) TestSearchNestedWithAND() { doc = `{"timestamp":%q, "trace_id": "%d", "spans": [%s]}` ) docs := make([]string, 0, numTraces) - getNextTs := getAutoTsGenerator(time.Now(), time.Second) + getNextTs := getAutoTsGenerator(time.Now(), time.Nanosecond) for i := range numTraces { spans := make([]string, 0, numSpans) for j := range numSpans { From c92fe56961e368a9cb1fe1f4edb43dccfcf021f6 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 21 Nov 2025 12:19:27 +0400 Subject: [PATCH 26/35] move to protocol version --- config/store_protocol_version.go | 33 ++++++++++++++++++++++++++++++++ consts/consts.go | 4 ++-- network/grpcutil/interceptors.go | 17 ++++++++-------- proxy/search/async.go | 9 +++++---- proxy/search/docs_iterator.go | 25 ++++++++++++------------ proxy/search/ingestor.go | 29 ++++++++++++++-------------- proxy/search/streaming_doc.go | 7 ++++--- storeapi/grpc_server.go | 5 +++-- 8 files changed, 84 insertions(+), 45 deletions(-) create mode 100644 config/store_protocol_version.go diff --git a/config/store_protocol_version.go b/config/store_protocol_version.go new file mode 100644 index 00000000..0075ea09 --- /dev/null +++ b/config/store_protocol_version.go @@ -0,0 +1,33 @@ +package config + +type StoreProtocolVersion uint8 + +const ( + // StoreProtocolVersion1 MID is in milliseconds + StoreProtocolVersion1 StoreProtocolVersion = 1 + // StoreProtocolVersion2 MID is in nanoseconds + StoreProtocolVersion2 StoreProtocolVersion = 2 +) + +func (p StoreProtocolVersion) String() string { + switch p { + case StoreProtocolVersion1: + return "1" + case StoreProtocolVersion2: + return "2" + default: + return "1" // Default to protocol version 1 (milliseconds) + } +} + +// ParseStoreProtocolVersion parses a protocol version string and returns the corresponding StoreProtocolVersion. +func ParseStoreProtocolVersion(s string) StoreProtocolVersion { + switch s { + case "1": + return StoreProtocolVersion1 + case "2": + return StoreProtocolVersion2 + default: + return StoreProtocolVersion1 // Default to protocol version 1 (milliseconds) + } +} diff --git a/consts/consts.go b/consts/consts.go index 01673dcb..b2c1c54a 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -70,8 +70,8 @@ const ( JaegerDebugKey = "jaeger-debug-id" DebugHeader = "x-o3-sample-trace" - // MIDPrecisionHeader reports store MID precision - MIDPrecisionHeader = "x-seq-mid-precision" + // StoreProtocolVersionHeader reports store protocol version + StoreProtocolVersionHeader = "x-seq-protocol-id" ) var ( diff --git a/network/grpcutil/interceptors.go b/network/grpcutil/interceptors.go index 0ea7f40e..2527aa3f 100644 --- a/network/grpcutil/interceptors.go +++ b/network/grpcutil/interceptors.go @@ -13,6 +13,7 @@ import ( "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/proto" + "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/metric" @@ -125,33 +126,33 @@ func PassMetadataUnaryClientInterceptor() grpc.UnaryClientInterceptor { } } -// MIDPrecisionHeaderUnaryServerInterceptor sets the MID precision header for all unary responses. -func MIDPrecisionHeaderUnaryServerInterceptor(precision string) grpc.UnaryServerInterceptor { +// StoreProtocolHeaderUnaryServerInterceptor sets the store protocol version header for all unary responses. +func StoreProtocolHeaderUnaryServerInterceptor(protocolVersion config.StoreProtocolVersion) grpc.UnaryServerInterceptor { return func( ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, h grpc.UnaryHandler, ) (interface{}, error) { md := metadata.New(map[string]string{ - consts.MIDPrecisionHeader: precision, + consts.StoreProtocolVersionHeader: protocolVersion.String(), }) if err := grpc.SetHeader(ctx, md); err != nil { - logger.Error("failed to set MID precision header", zap.Error(err)) + logger.Error("failed to set store protocol version header", zap.Error(err)) } return h(ctx, req) } } -// MIDPrecisionHeaderStreamServerInterceptor sets the MID precision header for all streaming responses. -func MIDPrecisionHeaderStreamServerInterceptor(precision string) grpc.StreamServerInterceptor { +// StoreProtocolHeaderStreamServerInterceptor sets the store protocol version header for all streaming responses. +func StoreProtocolHeaderStreamServerInterceptor(protocolVersion config.StoreProtocolVersion) grpc.StreamServerInterceptor { return func( srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, h grpc.StreamHandler, ) error { md := metadata.New(map[string]string{ - consts.MIDPrecisionHeader: precision, + consts.StoreProtocolVersionHeader: protocolVersion.String(), }) if err := ss.SendHeader(md); err != nil { - logger.Error("failed to set MID precision header in stream", zap.Error(err)) + logger.Error("failed to set store protocol version header in stream", zap.Error(err)) } return h(srv, ss) } diff --git a/proxy/search/async.go b/proxy/search/async.go index 8c899286..e13fd426 100644 --- a/proxy/search/async.go +++ b/proxy/search/async.go @@ -16,6 +16,7 @@ import ( "google.golang.org/protobuf/types/known/durationpb" "github.com/ozontech/seq-db/asyncsearcher" + "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/pkg/storeapi" @@ -165,12 +166,12 @@ func (si *Ingestor) FetchAsyncSearchResult( } } - midPrecision := "ms" - if precisionValues := md.Get(consts.MIDPrecisionHeader); len(precisionValues) > 0 { - midPrecision = precisionValues[0] + protocolVersion := config.StoreProtocolVersion2 + if precisionValues := md.Get(consts.StoreProtocolVersionHeader); len(precisionValues) > 0 { + protocolVersion = config.ParseStoreProtocolVersion(precisionValues[0]) } - if midPrecision == "ms" { + if protocolVersion == config.StoreProtocolVersion1 { response := storeResp.Response for _, id := range response.IdSources { id.Id.Mid = uint64(seq.MillisToMID(id.Id.Mid)) diff --git a/proxy/search/docs_iterator.go b/proxy/search/docs_iterator.go index dbd35a8d..ead3ee8f 100644 --- a/proxy/search/docs_iterator.go +++ b/proxy/search/docs_iterator.go @@ -8,6 +8,7 @@ import ( "go.uber.org/zap" + "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/metric" "github.com/ozontech/seq-db/pkg/storeapi" @@ -76,22 +77,22 @@ func (u *uniqueIDIterator) Next() (StreamingDoc, error) { } type grpcStreamIterator struct { - source uint64 - host string - stream storeapi.StoreApi_FetchClient - totalIDs int - midPrecision string + source uint64 + host string + stream storeapi.StoreApi_FetchClient + totalIDs int + protocolVersion config.StoreProtocolVersion fetched int } -func newGrpcStreamIterator(stream storeapi.StoreApi_FetchClient, host string, source uint64, totalIDs int, midPrecision string) *grpcStreamIterator { +func newGrpcStreamIterator(stream storeapi.StoreApi_FetchClient, host string, source uint64, totalIDs int, protocolVersion config.StoreProtocolVersion) *grpcStreamIterator { return &grpcStreamIterator{ - stream: stream, - source: source, - host: host, - totalIDs: totalIDs, - midPrecision: midPrecision, + stream: stream, + source: source, + host: host, + totalIDs: totalIDs, + protocolVersion: protocolVersion, } } @@ -110,7 +111,7 @@ func (s *grpcStreamIterator) Next() (StreamingDoc, error) { return StreamingDoc{Source: s.source}, err } - doc := unpackDoc(data.Data, s.source, s.midPrecision) + doc := unpackDoc(data.Data, s.source, s.protocolVersion) if !doc.Empty() { s.fetched++ } else { diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index c73dbe2e..dbce4525 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -14,6 +14,7 @@ import ( "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" + "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/metric" @@ -230,17 +231,17 @@ func (si *Ingestor) singleDocsStream(ctx context.Context, explain bool, source u } md, err := stream.Header() - midPrecision := "ms" - if md != nil && err == nil { - if precisionValues := md.Get(consts.MIDPrecisionHeader); len(precisionValues) > 0 { - midPrecision = precisionValues[0] - } - } + protocolVersion := config.StoreProtocolVersion2 if err != nil { return nil, fmt.Errorf("can't fetch metadata: %s", err.Error()) } + if md != nil { + if precisionValues := md.Get(consts.StoreProtocolVersionHeader); len(precisionValues) > 0 { + protocolVersion = config.ParseStoreProtocolVersion(precisionValues[0]) + } + } - var it DocsIterator = newGrpcStreamIterator(stream, host, source, len(ids), midPrecision) + var it DocsIterator = newGrpcStreamIterator(stream, host, source, len(ids), protocolVersion) if explain { it = newExplainWrapperIterator(it, ids, host, startTime) } @@ -625,15 +626,15 @@ func (si *Ingestor) searchHost(ctx context.Context, req *storeapi.SearchRequest, return nil, 0, err } - // Check the store's MID precision from response header - // If header indicates milliseconds, convert to nanoseconds - midPrecision := "ms" - if precisionValues := md.Get(consts.MIDPrecisionHeader); len(precisionValues) > 0 { - midPrecision = precisionValues[0] + // Check the store's protocol version from response header + // If header indicates protocol version 1 (MID in milliseconds), then convert to nanoseconds + protocolVersion := config.StoreProtocolVersion2 + if precisionHeaderValues := md.Get(consts.StoreProtocolVersionHeader); len(precisionHeaderValues) > 0 { + protocolVersion = config.ParseStoreProtocolVersion(precisionHeaderValues[0]) } - // Convert legacy store response to nanoseconds MID - if midPrecision == "ms" { + // Convert legacy store response (protocol version 1) to nanoseconds MID + if protocolVersion == config.StoreProtocolVersion1 { for _, id := range data.IdSources { id.Id.Mid = uint64(seq.MillisToMID(id.Id.Mid)) } diff --git a/proxy/search/streaming_doc.go b/proxy/search/streaming_doc.go index 8b925dfb..5035dba0 100644 --- a/proxy/search/streaming_doc.go +++ b/proxy/search/streaming_doc.go @@ -1,6 +1,7 @@ package search import ( + "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" ) @@ -27,12 +28,12 @@ func NewStreamingDoc(idSource seq.IDSource, data []byte) StreamingDoc { } } -func unpackDoc(data []byte, source uint64, midPrecision string) StreamingDoc { +func unpackDoc(data []byte, source uint64, protocolVersion config.StoreProtocolVersion) StreamingDoc { block := storage.DocBlock(data) mid := block.GetExt1() - // Convert from milliseconds to nanoseconds if store (legacy) operates in milliseconds - if midPrecision == "ms" { + // Convert from milliseconds to nanoseconds if store (protocol version 1) operates in milliseconds + if protocolVersion == config.StoreProtocolVersion1 { mid = mid * 1000000 } diff --git a/storeapi/grpc_server.go b/storeapi/grpc_server.go index 0c9a7a7d..cb561fb1 100644 --- a/storeapi/grpc_server.go +++ b/storeapi/grpc_server.go @@ -12,6 +12,7 @@ import ( "github.com/alecthomas/units" + "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/fracmanager" "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/network/grpcutil" @@ -39,11 +40,11 @@ func newGRPCServer(cfg APIConfig, fracManager *fracmanager.FracManager, mappingP func initServer() *grpc.Server { interceptors := []grpc.UnaryServerInterceptor{ - grpcutil.MIDPrecisionHeaderUnaryServerInterceptor("ns"), + grpcutil.StoreProtocolHeaderUnaryServerInterceptor(config.StoreProtocolVersion2), grpcutil.ReturnToVTPoolUnaryServerInterceptor(), } streamInterceptors := []grpc.StreamServerInterceptor{ - grpcutil.MIDPrecisionHeaderStreamServerInterceptor("ns"), + grpcutil.StoreProtocolHeaderStreamServerInterceptor(config.StoreProtocolVersion2), } opts := []grpc.ServerOption{ grpc.ChainUnaryInterceptor(interceptors...), From 6c13141bd8d34d8fed8367b927ada9ae3694da46 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 1 Dec 2025 16:59:49 +0400 Subject: [PATCH 27/35] carry PR fixes from phase 1, add tests --- asyncsearcher/async_searcher.go | 16 +++--- asyncsearcher/encoding.go | 29 +++++----- asyncsearcher/encoding_test.go | 2 +- frac/fraction_test.go | 93 ++++++++++++++++++++++++++++++++- frac/processor/search.go | 3 +- frac/sealed/seqids/blocks.go | 44 +++++++++++----- frac/sealed/seqids/loader.go | 7 +-- fracmanager/loader_test.go | 2 +- fracmanager/searcher_test.go | 4 +- proxy/search/async.go | 2 +- proxy/search/ingestor.go | 12 ++--- proxy/search/streaming_doc.go | 2 +- seq/seq.go | 19 +++---- seq/seq_test.go | 6 +-- 14 files changed, 174 insertions(+), 67 deletions(-) diff --git a/asyncsearcher/async_searcher.go b/asyncsearcher/async_searcher.go index 6c8c9055..93870684 100644 --- a/asyncsearcher/async_searcher.go +++ b/asyncsearcher/async_searcher.go @@ -40,12 +40,14 @@ const ( minRetention = 5 * time.Minute maxRetention = 30 * 24 * time.Hour // 30 days - - infoVersion1 = uint8(1) // MIDs stored in milliseconds - infoVersion2 = uint8(2) // MIDs stored in nanoseconds ) -const infoVersion = infoVersion2 +type infoVersion uint8 + +const ( + infoVersion1 infoVersion = iota + 1 // MIDs stored in milliseconds + infoVersion2 // MIDs stored in nanoseconds +) var ( asyncSearchActiveSearches = promauto.NewGauge(prometheus.GaugeOpts{ @@ -147,7 +149,7 @@ type fracSearchState struct { } type asyncSearchInfo struct { - Version uint8 + Version infoVersion // Finished is true if there are no fracs waiting to be processed. // @@ -181,7 +183,7 @@ func newAsyncSearchInfo(r AsyncSearchRequest, list fracmanager.List) asyncSearch } ctx, cancel := context.WithCancel(context.Background()) return asyncSearchInfo{ - Version: infoVersion, + Version: infoVersion2, Finished: false, Error: "", CanceledAt: time.Time{}, @@ -592,7 +594,7 @@ func loadAsyncRequests(dataDir string) (map[string]asyncSearchInfo, error) { if info.Version == infoVersion1 { info.Request.Params.From = seq.MillisToMID(uint64(info.Request.Params.From)) info.Request.Params.To = seq.MillisToMID(uint64(info.Request.Params.To)) - info.Version = infoVersion + info.Version = infoVersion2 } info.merged.Store(areQPRsMerged[requestID]) diff --git a/asyncsearcher/encoding.go b/asyncsearcher/encoding.go index a1f4d6fb..92772deb 100644 --- a/asyncsearcher/encoding.go +++ b/asyncsearcher/encoding.go @@ -14,15 +14,20 @@ import ( var be = binary.BigEndian +type qprBinVersion uint8 + const ( - qprBinVersion1 = uint8(1) // MIDs stored in milliseconds - qprBinVersion2 = uint8(2) // MIDs stored in nanoseconds + qprBinVersion1 qprBinVersion = iota + 1 // MIDs stored in milliseconds + qprBinVersion2 // MIDs stored in nanoseconds ) -const qprBinVersion = qprBinVersion2 // Phase 2: write version 2 (nanoseconds) +var availableVersions = map[qprBinVersion]struct{}{ + qprBinVersion1: {}, + qprBinVersion2: {}, +} func marshalQPR(q *seq.QPR, dst []byte) []byte { - dst = append(dst, qprBinVersion) + dst = append(dst, uint8(qprBinVersion2)) blocksLenPos := len(dst) dst = append(dst, make([]byte, 8)...) @@ -43,10 +48,10 @@ func unmarshalQPR(dst *seq.QPR, src []byte, idsLimit int) (_ []byte, err error) return nil, fmt.Errorf("invalid QPR format; want %d bytes, got %d", 19, len(src)) } - version := src[0] + version := qprBinVersion(src[0]) src = src[1:] - if version != qprBinVersion1 && version != qprBinVersion2 { - return nil, fmt.Errorf("invalid QPR version %d; want %d or %d", version, qprBinVersion1, qprBinVersion2) + if _, ok := availableVersions[version]; !ok { + return nil, fmt.Errorf("invalid QPR version %d", version) } idsBlocksLen := int(be.Uint64(src)) @@ -174,7 +179,7 @@ func marshalIDsBlock(dst []byte, ids []seq.IDSource) ([]byte, idsCodec) { return dst, idsCodecDeltaZstd } -func unmarshalIDsBlock(dst *seq.QPR, src []byte, version uint8) (_ []byte, err error) { +func unmarshalIDsBlock(dst *seq.QPR, src []byte, version qprBinVersion) (_ []byte, err error) { if len(src) == 0 { return src, fmt.Errorf("empty IDs block") } @@ -213,7 +218,7 @@ func unmarshalIDsBlock(dst *seq.QPR, src []byte, version uint8) (_ []byte, err e } } -func unmarshalIDsDelta(dst seq.IDSources, block []byte, version uint8) (seq.IDSources, error) { +func unmarshalIDsDelta(dst seq.IDSources, block []byte, version qprBinVersion) (seq.IDSources, error) { prevMID := int64(0) for len(block) > 0 { v, n := binary.Varint(block) @@ -266,7 +271,7 @@ func marshalHistogram(dst []byte, histogram map[seq.MID]uint64) []byte { return dst } -func unmarshalHistogram(src []byte, version uint8) (map[seq.MID]uint64, []byte, error) { +func unmarshalHistogram(src []byte, version qprBinVersion) (map[seq.MID]uint64, []byte, error) { length, n := binary.Uvarint(src) src = src[n:] if n <= 0 { @@ -368,7 +373,7 @@ func marshalAggs(dst []byte, aggs []seq.AggregatableSamples) []byte { return dst } -func unmarshalAggs(dst []seq.AggregatableSamples, src []byte, version uint8) (_ []seq.AggregatableSamples, _ []byte, err error) { +func unmarshalAggs(dst []seq.AggregatableSamples, src []byte, version qprBinVersion) (_ []seq.AggregatableSamples, _ []byte, err error) { var header aggsBlockHeader src, err = header.Unmarshal(src) if err != nil { @@ -419,7 +424,7 @@ func marshalAggregatableSamples(s seq.AggregatableSamples, dst []byte) []byte { return dst } -func unmarshalAggregatableSamples(q *seq.AggregatableSamples, src []byte, version uint8) ([]byte, error) { +func unmarshalAggregatableSamples(q *seq.AggregatableSamples, src []byte, version qprBinVersion) ([]byte, error) { if len(src) < 16 { return nil, fmt.Errorf("src too short to unmarshal QPRHistogram, want at least 16 bytes, got %d", len(src)) } diff --git a/asyncsearcher/encoding_test.go b/asyncsearcher/encoding_test.go index 479a9381..16c2af93 100644 --- a/asyncsearcher/encoding_test.go +++ b/asyncsearcher/encoding_test.go @@ -132,7 +132,7 @@ func TestQPRVersion1Compatibility(t *testing.T) { } rawQPR := marshalQPR(&qpr, nil) - rawQPR[0] = qprBinVersion1 + rawQPR[0] = uint8(qprBinVersion1) var outQPR seq.QPR tail, err := unmarshalQPR(&outQPR, rawQPR, math.MaxInt) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 87ce7372..eefb96cf 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -457,6 +457,8 @@ func (s *FractionTestSuite) TestSearchFromTo() { assertSearch(`NOT trace_id:0 AND NOT trace_id:2`, 3, 5, []int{5, 4, 3}) } +// TestSearchFromToNanoseconds tests if SearchParams "from" and "to" params can be specified up to nanoseconds since they are of seq.MID type. +// However, seq-db API doesn't support searching with queries with "from" and "to" specified in nanos. Only millis are supported. func (s *FractionTestSuite) TestSearchFromToNanoseconds() { docs := []string{ /*0*/ `{"timestamp":"2000-01-01T13:00:00.000000000Z","message":"bad","level":"1","trace_id":"0","service":"0"}`, @@ -1054,7 +1056,59 @@ func (s *FractionTestSuite) TestSearchLargeFrac() { s.AssertSearch(s.query("level:5", withLimit(100)), docs, level5Indexes[:100]) } -func (s *FractionTestSuite) TestContains() { +func (s *FractionTestSuite) TestIntersectingNanoseconds() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000000000Z","message":"bad","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.000000001Z","message":"good","level":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.000000002Z","message":"ok","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.000000003Z","message":"err","level":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.000000004Z","message":"success","level":"3"}`, + `{"timestamp":"2000-01-01T13:00:00.001000000Z","message":"err","level":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.001000001Z","message":"bad","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.001000002Z","message":"good","level":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.002000000Z","message":"bad","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.002000000Z","message":"err","level":"1"}`, + } + + s.insertDocuments(docs) + + s.Require().Equal(uint64(946731600000000000), uint64(s.fraction.Info().From)) + s.Require().Equal(uint64(946731600002000000), uint64(s.fraction.Info().To)) + + s.Require().True(s.fraction.IsIntersecting( + seq.TimeToMID(mustParseTime("2000-01-01T12:59:59.000000000Z")), + seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.000000000Z"))), + "must intersect at info.From") + // 1 ns before the fraction range. Should not overlap, since MID distribution is not built for fractions with short lifetime, + // and it only covers the last 24h from now + s.Require().False(s.fraction.IsIntersecting( + seq.TimeToMID(mustParseTime("2000-01-01T12:59:59.000000000Z")), + seq.TimeToMID(mustParseTime("2000-01-01T12:59:59.999999999Z"))), + "must not overlap (outside of range)") + // overlaps at the only point at info.To + s.Require().True(s.fraction.IsIntersecting( + seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.002000000Z")), + seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.999999999Z"))), + "must intersect at info.To") + s.Require().False(s.fraction.IsIntersecting( + seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.002000001Z")), + seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.999999999Z"))), + "must not intersect (1 ns outside of range)") + s.Require().True(s.fraction.IsIntersecting( + seq.TimeToMID(mustParseTime("2000-01-01T12:59:59.999999999Z")), + seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.000000001Z"))), + "must intersect due to overlapping") + s.Require().True(s.fraction.IsIntersecting( + seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.001000000Z")), + seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.999999999Z"))), + "must intersect due to overlapping") + + // double check for seq.MID built from raw nanoseconds + s.Require().True(s.fraction.IsIntersecting(seq.MID(946731500000000000), seq.MID(946731600000000000))) + s.Require().True(s.fraction.IsIntersecting(seq.MID(946731600002000000), seq.MID(946731699999999999))) +} + +func (s *FractionTestSuite) TestContainsWithMIDDistribution() { now := time.Now().Truncate(time.Minute) docs := []string{ fmt.Sprintf(`{"timestamp":"%s","message":"apple juice"}`, now.Add(-60*time.Minute).Format(time.RFC3339Nano)), @@ -1079,6 +1133,35 @@ func (s *FractionTestSuite) TestContains() { s.Require().False(s.fraction.Contains(seq.TimeToMID(now.Add(-30 * time.Hour).Add(-1 * time.Minute)))) } +func (s *FractionTestSuite) TestContainsNanoseconds() { + docs := []string{ + `{"timestamp":"2000-01-01T13:00:00.000000000Z","message":"bad","level":"1"}`, + `{"timestamp":"2000-01-01T13:00:00.000000001Z","message":"good","level":"2"}`, + `{"timestamp":"2000-01-01T13:00:00.000000004Z","message":"success","level":"3"}`, + `{"timestamp":"2000-01-01T13:10:00.000000000Z","message":"err","level":"2"}`, + `{"timestamp":"2000-01-01T13:20:00.000000001Z","message":"bad","level":"1"}`, + `{"timestamp":"2000-01-01T13:30:00.000000002Z","message":"good","level":"2"}`, + `{"timestamp":"2000-01-01T13:40:00.000000001Z","message":"bad","level":"1"}`, + `{"timestamp":"2000-01-01T13:50:00.000000002Z","message":"err","level":"1"}`, + } + + s.insertDocuments(docs) + + s.Require().True(s.fraction.Contains(seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.000000000Z"))), "frac must contain first doc") + s.Require().True(s.fraction.Contains(seq.TimeToMID(mustParseTime("2000-01-01T13:00:00.000000001Z"))), "frac must contain second doc") + s.Require().True(s.fraction.Contains(seq.TimeToMID(mustParseTime("2000-01-01T13:10:00.000000000Z"))), "frac must contain third doc") + s.Require().True(s.fraction.Contains(seq.TimeToMID(mustParseTime("2000-01-01T13:50:00.000000002Z"))), "frac must contain last doc") + + s.Require().True(s.fraction.Contains(seq.TimeToMID(mustParseTime("2000-01-01T13:30:00.000000002Z"))), "frac must contain sixth doc") + // round doc nano to milli, still Contains returns true + s.Require().True(s.fraction.Contains(seq.TimeToMID(mustParseTime("2000-01-01T13:30:00.000000000Z"))), "frac must contain sixth doc (rounded to milli)") + + // still Contains returns true even though the timestamp is 5 minute far from nearest doc + // MID distribution only covers the last 24h, so Contains return true here + s.Require().True(s.fraction.Contains(seq.TimeToMID(mustParseTime("2000-01-01T13:15:00.000000000Z")))) + s.Require().True(s.fraction.Contains(seq.TimeToMID(mustParseTime("2000-01-01T13:25:00.000000000Z")))) +} + func (s *FractionTestSuite) TestMIDDistribution() { now := time.Now().Truncate(time.Minute) docs := []string{ @@ -1228,6 +1311,14 @@ func withAggQuery(aggQuery processor.AggQuery) searchOption { } } +func mustParseTime(timeStr string) time.Time { + t, err := time.Parse(time.RFC3339Nano, timeStr) + if err != nil { + panic(fmt.Sprintf("could not parse timestamp %s", timeStr)) + } + return t +} + func (s *FractionTestSuite) AssertSearch(queryObject interface{}, originalDocs []string, expectedIndexes []int) { switch q := queryObject.(type) { case string: diff --git a/frac/processor/search.go b/frac/processor/search.go index 3cb2ff09..58256afe 100644 --- a/frac/processor/search.go +++ b/frac/processor/search.go @@ -206,8 +206,7 @@ func iterateEvalTree( zap.Time("mid", mid.Time())) continue } - // TODO /1000000 replace - bucketIndex := uint64(mid/1000000)/params.HistInterval - histBase + bucketIndex := seq.MIDToMillis(mid)/params.HistInterval - histBase histogram[bucketIndex]++ } diff --git a/frac/sealed/seqids/blocks.go b/frac/sealed/seqids/blocks.go index f2eea290..4eba205d 100644 --- a/frac/sealed/seqids/blocks.go +++ b/frac/sealed/seqids/blocks.go @@ -5,11 +5,11 @@ import ( "errors" "github.com/ozontech/seq-db/config" + "github.com/ozontech/seq-db/seq" ) type BlockMIDs struct { - fracVersion config.BinaryDataVersion - Values []uint64 + Values []uint64 } func (b BlockMIDs) Pack(dst []byte) []byte { @@ -21,21 +21,12 @@ func (b BlockMIDs) Pack(dst []byte) []byte { return dst } -func (b *BlockMIDs) Unpack(data []byte) error { - values, err := unpackRawIDsVarint(data, b.Values) +func (b *BlockMIDs) Unpack(data []byte, fracVersion config.BinaryDataVersion) error { + values, err := unpackRawMIDsVarint(data, b.Values, fracVersion) if err != nil { return err } b.Values = values - - // Legacy format - convert millis to micros - if b.fracVersion < config.BinaryDataV2 { - for i := range b.Values { - // TODO replace this by calling function - b.Values[i] = b.Values[i] * 1000000 - } - } - return nil } @@ -86,6 +77,33 @@ func (b *BlockParams) Unpack(data []byte) error { return nil } +func unpackRawMIDsVarint(src []byte, dst []uint64, fracVersion config.BinaryDataVersion) ([]uint64, error) { + dst = dst[:0] + id := uint64(0) + for len(src) != 0 { + udelta, n := binary.Uvarint(src) + if n <= 0 { + return nil, errors.New("varint decoded with error") + } + + delta := int64(udelta >> 1) + if udelta&1 != 0 { + delta = ^delta + } + + id += uint64(delta) + if fracVersion >= config.BinaryDataV2 { + dst = append(dst, id) + } else { + // Legacy format - scale millis to nanos + dst = append(dst, uint64(seq.MillisToMID(id))) + } + + src = src[n:] + } + return dst, nil +} + func unpackRawIDsVarint(src []byte, dst []uint64) ([]uint64, error) { dst = dst[:0] id := uint64(0) diff --git a/frac/sealed/seqids/loader.go b/frac/sealed/seqids/loader.go index 31a22361..1928e182 100644 --- a/frac/sealed/seqids/loader.go +++ b/frac/sealed/seqids/loader.go @@ -48,11 +48,8 @@ func (l *Loader) GetMIDsBlock(index uint32, buf []uint64) (BlockMIDs, error) { return BlockMIDs{}, err } // unpack - block := BlockMIDs{ - fracVersion: l.fracVersion, - Values: buf, - } - if err := block.Unpack(data); err != nil { + block := BlockMIDs{Values: buf} + if err := block.Unpack(data, l.fracVersion); err != nil { return BlockMIDs{}, err } return block, nil diff --git a/fracmanager/loader_test.go b/fracmanager/loader_test.go index de92ad19..f1039885 100644 --- a/fracmanager/loader_test.go +++ b/fracmanager/loader_test.go @@ -31,7 +31,7 @@ func appendDocs(t *testing.T, active *frac.Active, docCount int) { doc := []byte("{\"timestamp\": 0, \"message\": \"msg\"}") docRoot, err := insaneJSON.DecodeBytes(doc) assert.NoError(t, err) - dp.Append(doc, docRoot, seq.SimpleID(i), "service:100500", "k8s_pod", "_all_:") + dp.Append(doc, docRoot, seq.SimpleID(int64(i)), "service:100500", "k8s_pod", "_all_:") } docs, metas := dp.Provide() diff --git a/fracmanager/searcher_test.go b/fracmanager/searcher_test.go index e584e9a1..eceff4b4 100644 --- a/fracmanager/searcher_test.go +++ b/fracmanager/searcher_test.go @@ -336,7 +336,7 @@ func newFakeQPRwithTotal(ids []int, total uint64) *seq.QPR { func newFakeQPR(ids ...int) *seq.QPR { idsWithSource := make(seq.IDSources, len(ids)) for i, mid := range ids { - idsWithSource[i] = seq.IDSource{ID: seq.SimpleID(mid)} + idsWithSource[i] = seq.IDSource{ID: seq.SimpleID(int64(mid))} } return &seq.QPR{IDs: idsWithSource} } @@ -344,7 +344,7 @@ func newFakeQPR(ids ...int) *seq.QPR { func newFakeQPRWithHist(ids []int, histogram map[seq.MID]uint64) *seq.QPR { idsWithSource := make(seq.IDSources, len(ids)) for i, mid := range ids { - idsWithSource[i] = seq.IDSource{ID: seq.SimpleID(mid)} + idsWithSource[i] = seq.IDSource{ID: seq.SimpleID(int64(mid))} } return &seq.QPR{ IDs: idsWithSource, diff --git a/proxy/search/async.go b/proxy/search/async.go index 613e6648..857d475b 100644 --- a/proxy/search/async.go +++ b/proxy/search/async.go @@ -168,7 +168,7 @@ func (si *Ingestor) FetchAsyncSearchResult( } } - protocolVersion := config.StoreProtocolVersion2 + protocolVersion := config.StoreProtocolVersion1 if precisionValues := md.Get(consts.StoreProtocolVersionHeader); len(precisionValues) > 0 { protocolVersion = config.ParseStoreProtocolVersion(precisionValues[0]) } diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index 5af9be9b..109bbe4c 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -231,11 +231,10 @@ func (si *Ingestor) singleDocsStream(ctx context.Context, explain bool, source u } md, err := stream.Header() - protocolVersion := config.StoreProtocolVersion2 + protocolVersion := config.StoreProtocolVersion1 if err != nil { return nil, fmt.Errorf("can't fetch metadata: %s", err.Error()) - } - if md != nil { + } else if md != nil { if precisionValues := md.Get(consts.StoreProtocolVersionHeader); len(precisionValues) > 0 { protocolVersion = config.ParseStoreProtocolVersion(precisionValues[0]) } @@ -636,11 +635,10 @@ func (si *Ingestor) searchHost(ctx context.Context, req *storeapi.SearchRequest, return nil, 0, err } - // Check the store's protocol version from response header // If header indicates protocol version 1 (MID in milliseconds), then convert to nanoseconds - protocolVersion := config.StoreProtocolVersion2 - if precisionHeaderValues := md.Get(consts.StoreProtocolVersionHeader); len(precisionHeaderValues) > 0 { - protocolVersion = config.ParseStoreProtocolVersion(precisionHeaderValues[0]) + protocolVersion := config.StoreProtocolVersion1 + if protocolHeaderValues := md.Get(consts.StoreProtocolVersionHeader); len(protocolHeaderValues) > 0 { + protocolVersion = config.ParseStoreProtocolVersion(protocolHeaderValues[0]) } // Convert legacy store response (protocol version 1) to nanoseconds MID diff --git a/proxy/search/streaming_doc.go b/proxy/search/streaming_doc.go index 5035dba0..0d27762a 100644 --- a/proxy/search/streaming_doc.go +++ b/proxy/search/streaming_doc.go @@ -34,7 +34,7 @@ func unpackDoc(data []byte, source uint64, protocolVersion config.StoreProtocolV // Convert from milliseconds to nanoseconds if store (protocol version 1) operates in milliseconds if protocolVersion == config.StoreProtocolVersion1 { - mid = mid * 1000000 + mid = uint64(seq.MillisToMID(mid)) } doc := StreamingDoc{ diff --git a/seq/seq.go b/seq/seq.go index d1d5b1f6..1dbea345 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -20,11 +20,8 @@ type RID uint64 // random part of ID type LID uint32 // local id for a fraction func (m MID) Time() time.Time { - nanos := uint64(m) - nanosPerSec := uint64(time.Second) - secondsPart := nanos / nanosPerSec - nanosPart := nanos - secondsPart*nanosPerSec - return time.Unix(int64(secondsPart), int64(nanosPart)) + nanosPerSecond := uint64(time.Second) + return time.Unix(int64(uint64(m)/nanosPerSecond), int64(uint64(m)%nanosPerSecond)) } func (d ID) String() string { @@ -88,13 +85,13 @@ func FromString(x string) (ID, error) { return id, err } - delimiter := x[16] - if delimiter == '-' { - // legacy format, MID in millis - id.MID = MillisToMID(binary.LittleEndian.Uint64(mid)) - } else if delimiter == '_' { + switch delimiter := x[16]; delimiter { + case '_': id.MID = MID(binary.LittleEndian.Uint64(mid)) - } else { + case '-': + // legacy format, MID in millis. Scale to nanoseconds + id.MID = MillisToMID(binary.LittleEndian.Uint64(mid)) + default: return id, fmt.Errorf("unknown delimiter %c", delimiter) } id.RID = RID(binary.LittleEndian.Uint64(rid)) diff --git a/seq/seq_test.go b/seq/seq_test.go index 881483d0..a96fa9ee 100644 --- a/seq/seq_test.go +++ b/seq/seq_test.go @@ -65,7 +65,7 @@ func TestTimeToMIDConversion(t *testing.T) { timestampNow := time.Now() assert.EqualExportedValues(t, timestampNow, MID(timestampNow.UnixNano()).Time()) - timestamp2 := MID(1763984556395).Time().UTC() + timestamp2 := MID(1763984556395000000).Time().UTC() assert.Equal(t, 2025, timestamp2.Year()) assert.Equal(t, time.Month(11), timestamp2.Month()) assert.Equal(t, 24, timestamp2.Day()) @@ -76,6 +76,6 @@ func TestTimeToMIDConversion(t *testing.T) { // check that we do not overflow on huge values maxMID := MID(math.MaxUint64) - assert.Equal(t, 292278994, maxMID.Time().Year()) - assert.Equal(t, 292278994, MIDToTime(maxMID).Year()) + assert.Equal(t, 2554, maxMID.Time().Year()) + assert.Equal(t, 2554, MIDToTime(maxMID).Year()) } From 65f7236983eb95f21920980d56f16b36203c3f24 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Tue, 2 Dec 2025 12:06:37 +0400 Subject: [PATCH 28/35] redo json marshal for common.Info, fixes --- frac/common/info.go | 33 ++++++ frac/common/info_test.go | 118 +++++++++++++++++++++ fracmanager/frac_info_cache.go | 53 +-------- fracmanager/frac_info_cache_test.go | 62 +---------- seq/seq.go | 13 +-- seq/seq_test.go | 1 + tests/integration_tests/sub_search_test.go | 2 +- tests/setup/doc.go | 2 +- util/util.go | 6 +- 9 files changed, 169 insertions(+), 121 deletions(-) create mode 100644 frac/common/info_test.go diff --git a/frac/common/info.go b/frac/common/info.go index 654b5d7c..8b854525 100644 --- a/frac/common/info.go +++ b/frac/common/info.go @@ -1,6 +1,7 @@ package common import ( + "encoding/json" "fmt" "math" "path" @@ -117,3 +118,35 @@ func (s *Info) IsIntersecting(from, to seq.MID) bool { // check with distribution return s.Distribution.IsIntersecting(from, to) } + +// MarshalJSON implements custom JSON marshaling to always store From and To in milliseconds +func (s *Info) MarshalJSON() ([]byte, error) { + type InfoAlias Info // type alias to avoid infinite recursion + + tmp := InfoAlias(*s) + + // We convert "from" and "to" to milliseconds in order to guarantee we can rollback on deploy. + // When converting nanos to millis we must round "from" down (floor) and round "to" up (ceiling). + // This guarantees that a fraction time range (checked on search with Contains and IsIntersecting methods) is not narrowed down, + // and we do not lose messages on search. + tmp.From = seq.MID(seq.MIDToMillis(s.From)) + tmp.To = seq.MID(seq.MIDToCeilingMillis(s.To)) + + return json.Marshal(tmp) +} + +// UnmarshalJSON implements custom JSON unmarshaling to convert From and To from milliseconds to nanoseconds +func (s *Info) UnmarshalJSON(data []byte) error { + type TmpInfo Info // type alias to avoid infinite recursion + var tmp TmpInfo + + err := json.Unmarshal(data, &tmp) + if err != nil { + return err + } + + *s = Info(tmp) + s.From = seq.MillisToMID(uint64(tmp.From)) + s.To = seq.MillisToMID(uint64(tmp.To)) + return nil +} diff --git a/frac/common/info_test.go b/frac/common/info_test.go new file mode 100644 index 00000000..b22eb6ab --- /dev/null +++ b/frac/common/info_test.go @@ -0,0 +1,118 @@ +package common + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/ozontech/seq-db/seq" +) + +func TestInfo_MarshalJSON(t *testing.T) { + info := &Info{ + Path: "test-frac", + Ver: "2", + DocsTotal: 100, + DocsOnDisk: 1000, + DocsRaw: 2000, + MetaOnDisk: 500, + IndexOnDisk: 1500, + From: seq.MID(1761812502000000000), + To: seq.MID(1761812503000000000), + CreationTime: 1666193044479, + SealingTime: 1666193045000, + } + + jsonBytes, err := json.Marshal(info) + require.NoError(t, err) + + var jsonMap map[string]interface{} + err = json.Unmarshal(jsonBytes, &jsonMap) + require.NoError(t, err) + + fromRaw, ok := jsonMap["from"].(float64) + require.True(t, ok, "from should be a number") + assert.Equal(t, float64(1761812502000), fromRaw, "should scale from from millis on marshal") + toRaw, ok := jsonMap["to"].(float64) + require.True(t, ok, "to should be a number") + assert.Equal(t, float64(1761812503000), toRaw, "should scale from to millis on marshal") + + // validate that original fields are not changed while marshaling (safety check) + assert.Equal(t, seq.MID(1761812502000000000), info.From, "must not change while marshaling") + assert.Equal(t, seq.MID(1761812503000000000), info.To, "must not change while marshaling") +} + +func TestInfo_UnmarshalJSON(t *testing.T) { + jsonData := `{ + "name": "test-frac", + "ver": "2", + "docs_total": 100, + "docs_on_disk": 1000, + "docs_raw": 2000, + "meta_on_disk": 500, + "index_on_disk": 1500, + "from": 1761812502000, + "to": 1761812503000, + "creation_time": 1666193044479, + "sealing_time": 1666193045000 + }` + + var info Info + err := json.Unmarshal([]byte(jsonData), &info) + require.NoError(t, err) + + assert.Equal(t, seq.MID(1761812502000000000), info.From, "should scale to nanoseconds") + assert.Equal(t, seq.MID(1761812503000000000), info.To, "should scale to nanoseconds") + assert.Equal(t, "test-frac", info.Path) + assert.Equal(t, uint32(100), info.DocsTotal) +} + +func TestInfo_MarshalUnmarshal(t *testing.T) { + original := &Info{ + Path: "test-frac", + Ver: "2", + DocsTotal: 100, + DocsOnDisk: 1000, + DocsRaw: 2000, + MetaOnDisk: 500, + IndexOnDisk: 1500, + From: seq.MID(1761812502000000000), + To: seq.MID(1761812503000000000), + CreationTime: 1666193044479, + SealingTime: 1666193045000, + } + + jsonBytes, err := json.Marshal(original) + require.NoError(t, err) + + var unmarshaled Info + err = json.Unmarshal(jsonBytes, &unmarshaled) + require.NoError(t, err) + + assert.EqualExportedValues(t, original, &unmarshaled, "should match after marshal/unmarshal") +} + +func TestInfo_MarshalUnmarshalWithNanos(t *testing.T) { + original := &Info{ + Path: "test-frac", + Ver: "2", + From: seq.MID(1761812502000000777), + To: seq.MID(1761812503000000777), + CreationTime: 1666193044479, + SealingTime: 1666193045000, + } + + jsonBytes, err := json.Marshal(original) + require.NoError(t, err) + + var unmarshaled Info + err = json.Unmarshal(jsonBytes, &unmarshaled) + require.NoError(t, err) + + // we can't represent nanos in millis while saving, so "from" is floored (rounded down) to near millisecond, + // while "to" is ceiled (rounded up) to near millisecond + assert.Equal(t, seq.MID(1761812502000000000), unmarshaled.From) + assert.Equal(t, seq.MID(1761812503001000000), unmarshaled.To) +} diff --git a/fracmanager/frac_info_cache.go b/fracmanager/frac_info_cache.go index 4ac420ce..703a0686 100644 --- a/fracmanager/frac_info_cache.go +++ b/fracmanager/frac_info_cache.go @@ -12,55 +12,10 @@ import ( "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/logger" - "github.com/ozontech/seq-db/seq" ) const defaultFilePermission = 0o664 -// infoJSON is a temporary struct for JSON marshaling/unmarshaling -// that always stores From and To in milliseconds for backward compatibility -type infoJSON struct { - *common.Info - From uint64 `json:"from"` - To uint64 `json:"to"` -} - -// MarshalJSON implements custom JSON marshaling to always store From and To in milliseconds -func (e *infoJSON) MarshalJSON() ([]byte, error) { - // Use type alias to avoid infinite recursion - type Alias common.Info - return json.Marshal(&struct { - From uint64 `json:"from"` - To uint64 `json:"to"` - *Alias - }{ - From: seq.MIDToMillis(e.Info.From), - To: seq.MIDToCeilingMillis(e.Info.To), - Alias: (*Alias)(e.Info), - }) -} - -// UnmarshalJSON implements custom JSON unmarshaling to convert From and To from milliseconds to nanoseconds -func (e *infoJSON) UnmarshalJSON(data []byte) error { - e.Info = &common.Info{} - - // Use type alias to avoid infinite recursion - type Alias common.Info - tmp := &struct { - From uint64 `json:"from"` - To uint64 `json:"to"` - *Alias - }{ - Alias: (*Alias)(e.Info), - } - if err := json.Unmarshal(data, &tmp); err != nil { - return err - } - e.Info.From = seq.MillisToMID(tmp.From) - e.Info.To = seq.MillisToMID(tmp.To) - return nil -} - type fracInfoCache struct { dataDir string fullPath string @@ -104,7 +59,7 @@ func (fc *fracInfoCache) LoadFromDisk(fileName string) { return } - cacheJSON := make(map[string]*infoJSON) + cacheJSON := make(map[string]*common.Info) err = json.Unmarshal(content, &cacheJSON) if err != nil { logger.Warn("can't unmarshal frac-cache, new frac-cache will be created later on", @@ -113,7 +68,7 @@ func (fc *fracInfoCache) LoadFromDisk(fileName string) { return } for frac, entry := range cacheJSON { - fc.cache[frac] = entry.Info + fc.cache[frac] = entry } logger.Info("frac-cache loaded from disk", zap.String("filename", fileName), @@ -160,9 +115,9 @@ func (fc *fracInfoCache) getContentWithVersion() (uint64, []byte, error) { return 0, nil, nil // no changes } - cacheJSON := make(map[string]*infoJSON, len(fc.cache)) + cacheJSON := make(map[string]*common.Info, len(fc.cache)) for k, v := range fc.cache { - cacheJSON[k] = &infoJSON{Info: v} + cacheJSON[k] = v } content, err := json.Marshal(cacheJSON) diff --git a/fracmanager/frac_info_cache_test.go b/fracmanager/frac_info_cache_test.go index 5ed7ad44..397c37dc 100644 --- a/fracmanager/frac_info_cache_test.go +++ b/fracmanager/frac_info_cache_test.go @@ -36,12 +36,6 @@ func loadFracCache(dataDir string) (map[string]*common.Info, error) { fracCache := make(map[string]*common.Info) err = json.Unmarshal(content, &fracCache) - - // We must convert "from" and "to" to nanosecond seq.MID, since frac cache is now also doing it - for _, info := range fracCache { - info.From = seq.MillisToMID(uint64(info.From)) - info.To = seq.MillisToMID(uint64(info.To)) - } if err != nil { return nil, err } @@ -289,7 +283,10 @@ func TestFracInfoSavedToCache(t *testing.T) { totalSize := uint64(0) cnt := 1 for totalSize < maxSize { - addDummyDoc(t, fm, dp, seq.SimpleID(int64(cnt*1000000))) + // increase doc id by 1000000 (1 milli in nanos) instead of 1 + // otherwise all docs fall into same millisecond and test breaks + id := seq.SimpleID(int64(seq.MillisToMID(uint64(cnt)))) + addDummyDoc(t, fm, dp, id) cnt++ fracInstance := rotateAndSeal(fm) totalSize += fracInstance.Info().FullSize() @@ -459,54 +456,3 @@ func TestMissingCacheFilesDeleted(t *testing.T) { assert.NoError(t, err) assert.Equal(t, fracCacheFromDisk, []byte("{}")) } - -func TestInfoCacheJSONEntryMarshalUnmarshal(t *testing.T) { - originalInfo := &common.Info{ - Path: "test-frac", - Ver: "1.0", - BinaryDataVer: 2, - DocsTotal: 100, - DocsOnDisk: 1000, - DocsRaw: 2000, - MetaOnDisk: 500, - IndexOnDisk: 1500, - From: seq.MID(1761812502000000000), - To: seq.MID(1761812503000000000), - CreationTime: 1666193044479, - SealingTime: 1666193045000, - } - - type infoJSON struct { - *common.Info - From uint64 `json:"from"` - To uint64 `json:"to"` - } - entry := &infoJSON{ - Info: originalInfo, - From: seq.MIDToMillis(originalInfo.From), - To: seq.MIDToMillis(originalInfo.To), - } - jsonBytes, err := json.Marshal(entry) - assert.NoError(t, err) - - var jsonMap map[string]interface{} - err = json.Unmarshal(jsonBytes, &jsonMap) - assert.NoError(t, err) - - assert.Equal(t, float64(1761812502000), jsonMap["from"]) - assert.Equal(t, float64(1761812503000), jsonMap["to"]) - - var unmarshaledEntry infoJSON - err = json.Unmarshal(jsonBytes, &unmarshaledEntry) - assert.NoError(t, err) - assert.NotNil(t, unmarshaledEntry.Info) - - unmarshaledEntry.Info.From = seq.MillisToMID(unmarshaledEntry.From) - unmarshaledEntry.Info.To = seq.MillisToMID(unmarshaledEntry.To) - - assert.Equal(t, seq.MID(1761812502000000000), unmarshaledEntry.Info.From) - assert.Equal(t, seq.MID(1761812503000000000), unmarshaledEntry.Info.To) - assert.Equal(t, originalInfo.Path, unmarshaledEntry.Info.Path) - assert.Equal(t, originalInfo.Ver, unmarshaledEntry.Info.Ver) - assert.Equal(t, originalInfo.DocsTotal, unmarshaledEntry.Info.DocsTotal) -} diff --git a/seq/seq.go b/seq/seq.go index 1dbea345..2545c47a 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -133,15 +133,12 @@ func MIDToMillis(t MID) uint64 { } func MIDToCeilingMillis(t MID) uint64 { - nanos := uint64(t) - nanosPerMilli := uint64(time.Millisecond) - millisFloorPart := nanos / uint64(time.Millisecond) - nanosPart := nanos % nanosPerMilli - if nanosPart != 0 { - return millisFloorPart + 1 - } else { - return millisFloorPart + millis := uint64(t) / uint64(time.Millisecond) + nanosPartOfMilli := uint64(t) % uint64(time.Millisecond) + if nanosPartOfMilli != 0 { + millis += 1 } + return millis } func MIDToDuration(t MID) time.Duration { diff --git a/seq/seq_test.go b/seq/seq_test.go index a96fa9ee..4e7dd380 100644 --- a/seq/seq_test.go +++ b/seq/seq_test.go @@ -58,6 +58,7 @@ func TestMIDToCeilingMillis(t *testing.T) { func TestString(t *testing.T) { assert.Equal(t, "2025-10-30 12:21:42", MID(1761812502000000000).String()) assert.Equal(t, "2025-10-30 12:21:42.432", MID(1761812502432000000).String()) + // nanoseconds are not printed intentionally assert.Equal(t, "2025-10-30 12:21:42.432", MID(1761812502432000773).String()) } diff --git a/tests/integration_tests/sub_search_test.go b/tests/integration_tests/sub_search_test.go index f63c01b1..3b859f31 100644 --- a/tests/integration_tests/sub_search_test.go +++ b/tests/integration_tests/sub_search_test.go @@ -92,7 +92,7 @@ func (s *IntegrationTestSuite) TestSubSearch() { defer env.StopAll() timeRange := 23 * time.Hour - to := time.UnixMilli(time.Now().UnixMilli()) + to := time.Now() from := to.Add(-timeRange) docsTimes := s.ingestData(env, from, to, 5*time.Minute, 100) diff --git a/tests/setup/doc.go b/tests/setup/doc.go index c441937b..6721f291 100644 --- a/tests/setup/doc.go +++ b/tests/setup/doc.go @@ -227,7 +227,7 @@ func splitRange(size int, callback func(from int, to int)) { // If timestamp after call is zero, then this function will fill it with deterministic timestamp // so you could query each doc by range, if needed func GenerateDocs(size int, generator func(int, *ExampleDoc)) []ExampleDoc { - start := time.UnixMilli(time.Now().UnixMilli()) + start := time.Now() docs := make([]ExampleDoc, size) splitRange(size, func(from int, to int) { for i := from; i < to; i++ { diff --git a/util/util.go b/util/util.go index ebed5bb0..34433213 100644 --- a/util/util.go +++ b/util/util.go @@ -117,12 +117,10 @@ func MsTsToESFormat(ts uint64) string { return time.UnixMilli(int64(ts)).Format(consts.ESTimeFormat) } -// NsTsToESFormat converts timestamp in nanoseconds to ES time format string. +// NsTsToESFormat converts timestamp in nanoseconds to ES time format string. Nanosecond part will not be printed. func NsTsToESFormat(ts uint64) string { nanosPerSec := uint64(time.Second) - secondsPart := ts / nanosPerSec - nanosPart := ts - secondsPart*nanosPerSec - return time.Unix(int64(secondsPart), int64(nanosPart)).Format(consts.ESTimeFormat) + return time.Unix(int64(ts/nanosPerSec), int64(ts%nanosPerSec)).Format(consts.ESTimeFormat) } func BinSearchInRange(from, to int, fn func(i int) bool) int { From e637cc6bf453e624c3076bbed891ded793cf079b Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:24:13 +0400 Subject: [PATCH 29/35] Merge branch 'refs/heads/main' into 232-nanos-support-phase-2 # Conflicts: # asyncsearcher/async_searcher.go # asyncsearcher/encoding.go # asyncsearcher/encoding_test.go # config/frac_version.go # config/store_protocol_version.go # frac/sealed/block_info.go # frac/sealed/seqids/blocks.go # frac/sealed_loader.go # fracmanager/frac_info_cache.go # indexer/meta_data.go # network/grpcutil/interceptors.go # proxy/search/async.go # proxy/search/docs_iterator.go # proxy/search/ingestor.go # proxy/search/streaming_doc.go # seq/seq.go # seq/seq_test.go # storeapi/grpc_server.go --- indexer/meta_data.go | 10 +++++----- proxy/search/async.go | 4 ++-- proxy/search/ingestor.go | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/indexer/meta_data.go b/indexer/meta_data.go index 2f0d7eb0..aa30d646 100644 --- a/indexer/meta_data.go +++ b/indexer/meta_data.go @@ -77,17 +77,17 @@ func (m *MetaData) UnmarshalBinary(b []byte) error { } func (m *MetaData) unmarshalVersion1(b []byte) error { - return m.unmarshal(b) -} - -func (m *MetaData) unmarshalVersion2(b []byte) error { if err := m.unmarshal(b); err != nil { return err } - m.ID.MID = seq.NanosToMID(uint64(m.ID.MID)) + m.ID.MID = seq.MillisToMID(uint64(m.ID.MID)) return nil } +func (m *MetaData) unmarshalVersion2(b []byte) error { + return m.unmarshal(b) +} + func (m *MetaData) unmarshal(b []byte) error { // Decode seq.ID. m.ID.MID = seq.MID(binary.LittleEndian.Uint64(b)) diff --git a/proxy/search/async.go b/proxy/search/async.go index 857d475b..106e8e30 100644 --- a/proxy/search/async.go +++ b/proxy/search/async.go @@ -169,8 +169,8 @@ func (si *Ingestor) FetchAsyncSearchResult( } protocolVersion := config.StoreProtocolVersion1 - if precisionValues := md.Get(consts.StoreProtocolVersionHeader); len(precisionValues) > 0 { - protocolVersion = config.ParseStoreProtocolVersion(precisionValues[0]) + if protocolVersionValues := md.Get(consts.StoreProtocolVersionHeader); len(protocolVersionValues) > 0 { + protocolVersion = config.ParseStoreProtocolVersion(protocolVersionValues[0]) } if protocolVersion == config.StoreProtocolVersion1 { diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index 219036d4..7f831ba7 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -235,8 +235,8 @@ func (si *Ingestor) singleDocsStream(ctx context.Context, explain bool, source u if err != nil { return nil, fmt.Errorf("can't fetch metadata: %s", err.Error()) } else if md != nil { - if precisionValues := md.Get(consts.StoreProtocolVersionHeader); len(precisionValues) > 0 { - protocolVersion = config.ParseStoreProtocolVersion(precisionValues[0]) + if storeProtocolValues := md.Get(consts.StoreProtocolVersionHeader); len(storeProtocolValues) > 0 { + protocolVersion = config.ParseStoreProtocolVersion(storeProtocolValues[0]) } } From 9a6393e8a90b1faaddc93b80201591292ffd59ba Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:49:21 +0400 Subject: [PATCH 30/35] fomatting fixes --- asyncsearcher/encoding_test.go | 2 ++ frac/sealed/seqids/blocks.go | 2 ++ frac/sealed_loader.go | 2 +- proxy/search/docs_iterator.go | 8 ++++---- proxy/search/ingestor.go | 4 ++-- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/asyncsearcher/encoding_test.go b/asyncsearcher/encoding_test.go index 16c2af93..5b34578b 100644 --- a/asyncsearcher/encoding_test.go +++ b/asyncsearcher/encoding_test.go @@ -92,6 +92,8 @@ func TestQPRMarshalUnmarshal(t *testing.T) { } } +// TestQPRVersion1Compatibility tests that it's possible to unmarshall and read version 1 async search result encoded. +// MIDs in IDs and a histogram must be converted from millis to nanos func TestQPRVersion1Compatibility(t *testing.T) { qpr := seq.QPR{ IDs: seq.IDSources{ diff --git a/frac/sealed/seqids/blocks.go b/frac/sealed/seqids/blocks.go index 4eba205d..f17f1be5 100644 --- a/frac/sealed/seqids/blocks.go +++ b/frac/sealed/seqids/blocks.go @@ -77,6 +77,8 @@ func (b *BlockParams) Unpack(data []byte) error { return nil } +// unpackRawMIDsVarint is a dedicated method for unpacking delta encoded MIDs. The reason a dedicated method exists +// is that we want to unpack values and potentially convert legacy frac version in one pass. func unpackRawMIDsVarint(src []byte, dst []uint64, fracVersion config.BinaryDataVersion) ([]uint64, error) { dst = dst[:0] id := uint64(0) diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index 0585ef49..ae639862 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -95,8 +95,8 @@ func (l *Loader) loadIDs(fracVersion config.BinaryDataVersion) (idsTable seqids. if header.Len() == 0 { break } - var mid seq.MID + var mid seq.MID if fracVersion < config.BinaryDataV2 { mid = seq.MillisToMID(header.GetExt1()) } else { diff --git a/proxy/search/docs_iterator.go b/proxy/search/docs_iterator.go index daf332e9..cc032b7f 100644 --- a/proxy/search/docs_iterator.go +++ b/proxy/search/docs_iterator.go @@ -93,10 +93,10 @@ func newGrpcStreamIterator( totalIDs int, protocolVersion config.StoreProtocolVersion) *grpcStreamIterator { return &grpcStreamIterator{ - stream: stream, - source: source, - host: host, - totalIDs: totalIDs, + stream: stream, + source: source, + host: host, + totalIDs: totalIDs, protocolVersion: protocolVersion, } } diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index 7f831ba7..08cbdd01 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -637,8 +637,8 @@ func (si *Ingestor) searchHost(ctx context.Context, req *storeapi.SearchRequest, // If header indicates protocol version 1 (MID in milliseconds), then convert to nanoseconds protocolVersion := config.StoreProtocolVersion1 - if protocolHeaderValues := md.Get(consts.StoreProtocolVersionHeader); len(protocolHeaderValues) > 0 { - protocolVersion = config.ParseStoreProtocolVersion(protocolHeaderValues[0]) + if storeProtocolValues := md.Get(consts.StoreProtocolVersionHeader); len(storeProtocolValues) > 0 { + protocolVersion = config.ParseStoreProtocolVersion(storeProtocolValues[0]) } // Convert legacy store response (protocol version 1) to nanoseconds MID From 63b83302d55a52b253e32666080d825ac0729a4d Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Fri, 5 Dec 2025 17:20:17 +0400 Subject: [PATCH 31/35] store protocol for single, linter, test issues --- frac/fraction_test.go | 12 ++++++------ proxy/search/async.go | 2 +- seq/seq_test.go | 7 ------- storeapi/client.go | 41 ++++++++++++++++++++++++++++++++--------- 4 files changed, 39 insertions(+), 23 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index eefb96cf..b8b41159 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -1111,12 +1111,12 @@ func (s *FractionTestSuite) TestIntersectingNanoseconds() { func (s *FractionTestSuite) TestContainsWithMIDDistribution() { now := time.Now().Truncate(time.Minute) docs := []string{ - fmt.Sprintf(`{"timestamp":"%s","message":"apple juice"}`, now.Add(-60*time.Minute).Format(time.RFC3339Nano)), - fmt.Sprintf(`{"timestamp":"%s","message":"orange juice"}`, now.Add(-61*time.Minute).Format(time.RFC3339Nano)), - fmt.Sprintf(`{"timestamp":"%s","message":"cider"}`, now.Add(-65*time.Minute).Format(time.RFC3339Nano)), - fmt.Sprintf(`{"timestamp":"%s","message":"wine"}`, now.Add(-123*time.Minute).Format(time.RFC3339Nano)), - fmt.Sprintf(`{"timestamp":"%s","message":"cola"}`, now.Add(-365*time.Minute).Format(time.RFC3339Nano)), - fmt.Sprintf(`{"timestamp":"%s","message":"cola"}`, now.Add(-30*time.Hour).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":%q,"message":"apple juice"}`, now.Add(-60*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":%q,"message":"orange juice"}`, now.Add(-61*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":%q,"message":"cider"}`, now.Add(-65*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":%q,"message":"wine"}`, now.Add(-123*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":%q,"message":"cola"}`, now.Add(-365*time.Minute).Format(time.RFC3339Nano)), + fmt.Sprintf(`{"timestamp":%q,"message":"cola"}`, now.Add(-30*time.Hour).Format(time.RFC3339Nano)), } s.insertDocuments(docs) diff --git a/proxy/search/async.go b/proxy/search/async.go index d5b263ac..25996cfa 100644 --- a/proxy/search/async.go +++ b/proxy/search/async.go @@ -479,7 +479,7 @@ func buildRequestAggs(in []*storeapi.AggQuery) []AggQuery { GroupBy: agg.GroupBy, Func: agg.Func.MustAggFunc(), Quantiles: agg.Quantiles, - Interval: seq.MID(agg.Interval), + Interval: seq.MillisToMID(uint64(agg.Interval)), }) } return reqAggs diff --git a/seq/seq_test.go b/seq/seq_test.go index 4c18fbb4..2265bce2 100644 --- a/seq/seq_test.go +++ b/seq/seq_test.go @@ -55,13 +55,6 @@ func TestMIDToCeilingMillis(t *testing.T) { assert.Equal(t, uint64(15), MIDToCeilingMillis(MID(14999999))) } -func TestString(t *testing.T) { - assert.Equal(t, "2025-10-30 12:21:42", MID(1761812502000000000).String()) - assert.Equal(t, "2025-10-30 12:21:42.432", MID(1761812502432000000).String()) - // nanoseconds are not printed intentionally - assert.Equal(t, "2025-10-30 12:21:42.432", MID(1761812502432000773).String()) -} - func TestTimeToMIDConversion(t *testing.T) { timestampNow := time.Now() assert.EqualExportedValues(t, timestampNow, MID(timestampNow.UnixNano()).Time()) diff --git a/storeapi/client.go b/storeapi/client.go index 3dd22c97..3da87929 100644 --- a/storeapi/client.go +++ b/storeapi/client.go @@ -9,6 +9,8 @@ import ( "google.golang.org/grpc/metadata" "google.golang.org/protobuf/types/known/emptypb" + "github.com/ozontech/seq-db/config" + "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/pkg/storeapi" ) @@ -20,37 +22,55 @@ func NewClient(store *Store) storeapi.StoreApiClient { return &inMemoryAPIClient{store: store} } -func (i inMemoryAPIClient) Bulk(ctx context.Context, in *storeapi.BulkRequest, _ ...grpc.CallOption) (*emptypb.Empty, error) { +func (i inMemoryAPIClient) Bulk(ctx context.Context, in *storeapi.BulkRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) { // NOTE: We copy `Metas` to prevent dataraces because `store` might work // with this memory even when it returned response to client. in.Metas = slices.Clone(in.Metas) + setProtocolVersionHeader(opts...) return i.store.GrpcV1().Bulk(ctx, in) } -func (i inMemoryAPIClient) Search(ctx context.Context, in *storeapi.SearchRequest, _ ...grpc.CallOption) (*storeapi.SearchResponse, error) { +func (i inMemoryAPIClient) Search(ctx context.Context, in *storeapi.SearchRequest, opts ...grpc.CallOption) (*storeapi.SearchResponse, error) { + setProtocolVersionHeader(opts...) return i.store.GrpcV1().Search(ctx, in) } -func (i inMemoryAPIClient) StartAsyncSearch(ctx context.Context, in *storeapi.StartAsyncSearchRequest, _ ...grpc.CallOption) (*storeapi.StartAsyncSearchResponse, error) { +func (i inMemoryAPIClient) StartAsyncSearch(ctx context.Context, in *storeapi.StartAsyncSearchRequest, opts ...grpc.CallOption) (*storeapi.StartAsyncSearchResponse, error) { + setProtocolVersionHeader(opts...) return i.store.GrpcV1().StartAsyncSearch(ctx, in) } -func (i inMemoryAPIClient) FetchAsyncSearchResult(ctx context.Context, in *storeapi.FetchAsyncSearchResultRequest, _ ...grpc.CallOption) (*storeapi.FetchAsyncSearchResultResponse, error) { +func (i inMemoryAPIClient) FetchAsyncSearchResult(ctx context.Context, in *storeapi.FetchAsyncSearchResultRequest, opts ...grpc.CallOption) (*storeapi.FetchAsyncSearchResultResponse, error) { + setProtocolVersionHeader(opts...) return i.store.GrpcV1().FetchAsyncSearchResult(ctx, in) } -func (i inMemoryAPIClient) CancelAsyncSearch(ctx context.Context, in *storeapi.CancelAsyncSearchRequest, _ ...grpc.CallOption) (*storeapi.CancelAsyncSearchResponse, error) { +func (i inMemoryAPIClient) CancelAsyncSearch(ctx context.Context, in *storeapi.CancelAsyncSearchRequest, opts ...grpc.CallOption) (*storeapi.CancelAsyncSearchResponse, error) { + setProtocolVersionHeader(opts...) return i.store.GrpcV1().CancelAsyncSearch(ctx, in) } -func (i inMemoryAPIClient) DeleteAsyncSearch(ctx context.Context, in *storeapi.DeleteAsyncSearchRequest, _ ...grpc.CallOption) (*storeapi.DeleteAsyncSearchResponse, error) { +func (i inMemoryAPIClient) DeleteAsyncSearch(ctx context.Context, in *storeapi.DeleteAsyncSearchRequest, opts ...grpc.CallOption) (*storeapi.DeleteAsyncSearchResponse, error) { + setProtocolVersionHeader(opts...) return i.store.GrpcV1().DeleteAsyncSearch(ctx, in) } -func (i inMemoryAPIClient) GetAsyncSearchesList(ctx context.Context, in *storeapi.GetAsyncSearchesListRequest, _ ...grpc.CallOption) (*storeapi.GetAsyncSearchesListResponse, error) { +func (i inMemoryAPIClient) GetAsyncSearchesList(ctx context.Context, in *storeapi.GetAsyncSearchesListRequest, opts ...grpc.CallOption) (*storeapi.GetAsyncSearchesListResponse, error) { + setProtocolVersionHeader(opts...) return i.store.GrpcV1().GetAsyncSearchesList(ctx, in) } +func setProtocolVersionHeader(opts ...grpc.CallOption) { + for _, opt := range opts { + if headerOpt, ok := opt.(grpc.HeaderCallOption); ok && headerOpt.HeaderAddr != nil { + if *headerOpt.HeaderAddr == nil { + *headerOpt.HeaderAddr = make(metadata.MD) + } + (*headerOpt.HeaderAddr)[consts.StoreProtocolVersionHeader] = []string{config.StoreProtocolVersion2.String()} + } + } +} + type storeAPIFetchServer struct { grpc.ServerStream ctx context.Context @@ -81,7 +101,9 @@ func newStoreAPIFetchClient(b []*storeapi.BinaryData) *storeAPIFetchClient { } func (x *storeAPIFetchClient) Header() (metadata.MD, error) { - return nil, nil + md := make(metadata.MD) + md[consts.StoreProtocolVersionHeader] = []string{config.StoreProtocolVersion2.String()} + return md, nil } func (x *storeAPIFetchClient) Recv() (*storeapi.BinaryData, error) { @@ -95,8 +117,9 @@ func (x *storeAPIFetchClient) Recv() (*storeapi.BinaryData, error) { return res, nil } -func (i inMemoryAPIClient) Fetch(ctx context.Context, in *storeapi.FetchRequest, _ ...grpc.CallOption) (storeapi.StoreApi_FetchClient, error) { +func (i inMemoryAPIClient) Fetch(ctx context.Context, in *storeapi.FetchRequest, opts ...grpc.CallOption) (storeapi.StoreApi_FetchClient, error) { s := newStoreAPIFetchServer(ctx) + setProtocolVersionHeader(opts...) if err := i.store.GrpcV1().Fetch(in, s); err != nil { return nil, err } From 575f66d3d23d5d56cbebe66e6891f28aeb40513d Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Mon, 15 Dec 2025 14:38:54 +0400 Subject: [PATCH 32/35] Remove unneded conversion --- frac/sealed/block_info.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/frac/sealed/block_info.go b/frac/sealed/block_info.go index 21bcd182..8436f91e 100644 --- a/frac/sealed/block_info.go +++ b/frac/sealed/block_info.go @@ -6,10 +6,8 @@ import ( "go.uber.org/zap" - "github.com/ozontech/seq-db/config" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/logger" - "github.com/ozontech/seq-db/seq" ) const seqDBMagic = "SEQM" @@ -41,11 +39,5 @@ func (b *BlockInfo) Unpack(data []byte) error { } b.Info.MetaOnDisk = 0 // todo: make this correction on sealing and remove this next time - // legacy format - MID in milliseconds - if b.Info.BinaryDataVer < config.BinaryDataV2 { - b.Info.From = seq.MillisToMID(uint64(b.Info.From)) - b.Info.To = seq.MillisToMID(uint64(b.Info.To)) - } - return nil } From 33a31f3d3584ef9564c144faa2206ec5a4d02441 Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 17 Dec 2025 12:01:45 +0400 Subject: [PATCH 33/35] PR review: rename type alias --- frac/common/info.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frac/common/info.go b/frac/common/info.go index 8b854525..69121408 100644 --- a/frac/common/info.go +++ b/frac/common/info.go @@ -121,9 +121,9 @@ func (s *Info) IsIntersecting(from, to seq.MID) bool { // MarshalJSON implements custom JSON marshaling to always store From and To in milliseconds func (s *Info) MarshalJSON() ([]byte, error) { - type InfoAlias Info // type alias to avoid infinite recursion + type TmpInfo Info // type alias to avoid infinite recursion - tmp := InfoAlias(*s) + tmp := TmpInfo(*s) // We convert "from" and "to" to milliseconds in order to guarantee we can rollback on deploy. // When converting nanos to millis we must round "from" down (floor) and round "to" up (ceiling). From 41da96088b131042f2a2972dab7258bc1a4933fd Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 17 Dec 2025 12:07:24 +0400 Subject: [PATCH 34/35] PR review: cast hist interval to seq.MID --- frac/processor/search.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/frac/processor/search.go b/frac/processor/search.go index 58256afe..6df32c07 100644 --- a/frac/processor/search.go +++ b/frac/processor/search.go @@ -135,12 +135,13 @@ func convertHistToMap(params SearchParams, hist []uint64) map[seq.MID]uint64 { return nil } res := make(map[seq.MID]uint64, len(hist)) - bucket := params.From - params.From%seq.MillisToMID(params.HistInterval) + histIntervalMID := seq.MillisToMID(params.HistInterval) + bucket := params.From - params.From%histIntervalMID for _, cnt := range hist { if cnt > 0 { res[bucket] = cnt } - bucket += seq.MillisToMID(params.HistInterval) + bucket += histIntervalMID } return res } @@ -157,12 +158,14 @@ func iterateEvalTree( needScanAllRange := params.IsScanAllRequest() var ( - histBase uint64 - histogram []uint64 + histBase uint64 + histogram []uint64 + histInterval seq.MID ) if hasHist { - histBase = seq.MIDToMillis(params.From) / params.HistInterval - histSize := seq.MIDToMillis(params.To)/params.HistInterval - histBase + 1 + histInterval = seq.MillisToMID(params.HistInterval) + histBase = uint64(params.From) / uint64(histInterval) + histSize := uint64(params.To)/uint64(histInterval) - histBase + 1 histogram = make([]uint64, histSize) } @@ -206,7 +209,7 @@ func iterateEvalTree( zap.Time("mid", mid.Time())) continue } - bucketIndex := seq.MIDToMillis(mid)/params.HistInterval - histBase + bucketIndex := uint64(mid)/uint64(histInterval) - histBase histogram[bucketIndex]++ } From a8dc1d73d5b461fa7bd1b190336b390c46d6a3fe Mon Sep 17 00:00:00 2001 From: Andrei Cheboksarov <37665782+cheb0@users.noreply.github.com> Date: Wed, 17 Dec 2025 12:10:10 +0400 Subject: [PATCH 35/35] PR review: remove unnecessary MillisToMID calls --- proxy/search/ingestor.go | 2 +- proxyapi/grpc_async_search.go | 2 +- proxyapi/grpc_complex_search_test.go | 4 ++-- proxyapi/grpc_export_test.go | 4 ++-- proxyapi/grpc_get_aggregation_test.go | 4 ++-- proxyapi/grpc_get_histogram_test.go | 4 ++-- proxyapi/grpc_search_test.go | 4 ++-- proxyapi/grpc_v1.go | 8 ++++---- tests/integration_tests/integration_test.go | 2 +- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/proxy/search/ingestor.go b/proxy/search/ingestor.go index 08cbdd01..1dce3039 100644 --- a/proxy/search/ingestor.go +++ b/proxy/search/ingestor.go @@ -513,7 +513,7 @@ func responseToQPR(resp *storeapi.SearchResponse, source uint64, explain bool) * pbhist := bin.Hist tbin := seq.AggBin{ - MID: seq.MillisToMID(uint64(bin.Ts.AsTime().UnixMilli())), + MID: seq.MID(bin.Ts.AsTime().UnixNano()), Token: bin.Label, } diff --git a/proxyapi/grpc_async_search.go b/proxyapi/grpc_async_search.go index 96e525ea..b58e1f14 100644 --- a/proxyapi/grpc_async_search.go +++ b/proxyapi/grpc_async_search.go @@ -51,7 +51,7 @@ func (g *grpcV1) StartAsyncSearch( From: r.GetQuery().GetFrom().AsTime(), To: r.GetQuery().GetTo().AsTime(), Aggregations: aggs, - HistogramInterval: seq.MillisToMID(uint64(histInterval.Milliseconds())), + HistogramInterval: seq.MID(histInterval.Nanoseconds()), WithDocs: r.WithDocs, Size: r.Size, }) diff --git a/proxyapi/grpc_complex_search_test.go b/proxyapi/grpc_complex_search_test.go index cea13089..43c1eec1 100644 --- a/proxyapi/grpc_complex_search_test.go +++ b/proxyapi/grpc_complex_search_test.go @@ -130,8 +130,8 @@ func prepareComplexSearchTestData(t *testing.T, cData cSearchTestCaseData) cSear Q: []byte(req.Query.Query), Size: int(req.Size), Offset: int(req.Offset), - From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), - To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), + From: seq.MID(req.Query.From.AsTime().UnixNano()), + To: seq.MID(req.Query.To.AsTime().UnixNano()), WithTotal: req.WithTotal, ShouldFetch: true, } diff --git a/proxyapi/grpc_export_test.go b/proxyapi/grpc_export_test.go index fdfa17bd..86b95a15 100644 --- a/proxyapi/grpc_export_test.go +++ b/proxyapi/grpc_export_test.go @@ -76,8 +76,8 @@ func prepareExportTestData(cData exportTestCaseData) exportTestData { Q: []byte(req.Query.Query), Offset: int(req.Offset), Size: int(req.Size), - From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), - To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), + From: seq.MID(req.Query.From.AsTime().UnixNano()), + To: seq.MID(req.Query.To.AsTime().UnixNano()), ShouldFetch: true, }, ret: siSearchRet{ diff --git a/proxyapi/grpc_get_aggregation_test.go b/proxyapi/grpc_get_aggregation_test.go index d39befbc..251d25a3 100644 --- a/proxyapi/grpc_get_aggregation_test.go +++ b/proxyapi/grpc_get_aggregation_test.go @@ -92,8 +92,8 @@ func prepareGetAggregationTestData(t *testing.T, cData getAggregationTestCaseDat sr := &search.SearchRequest{ Explain: req.Query.Explain, Q: []byte(req.Query.Query), - From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), - To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), + From: seq.MID(req.Query.From.AsTime().UnixNano()), + To: seq.MID(req.Query.To.AsTime().UnixNano()), } if len(cData.aggQ) > 0 { for _, query := range cData.aggQ { diff --git a/proxyapi/grpc_get_histogram_test.go b/proxyapi/grpc_get_histogram_test.go index 35f49021..f498d4ba 100644 --- a/proxyapi/grpc_get_histogram_test.go +++ b/proxyapi/grpc_get_histogram_test.go @@ -98,8 +98,8 @@ func prepareGetHistogramTestData(t *testing.T, cData getHistogramTestCaseData) g sr := &search.SearchRequest{ Explain: req.Query.Explain, Q: []byte(req.Query.Query), - From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), - To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), + From: seq.MID(req.Query.From.AsTime().UnixNano()), + To: seq.MID(req.Query.To.AsTime().UnixNano()), Interval: seq.DurationToMID(intervalDur), } siSearchMock = &siSearchMockData{ diff --git a/proxyapi/grpc_search_test.go b/proxyapi/grpc_search_test.go index a33cebb6..0f99932a 100644 --- a/proxyapi/grpc_search_test.go +++ b/proxyapi/grpc_search_test.go @@ -88,8 +88,8 @@ func prepareSearchTestData(t *testing.T, cData searchTestCaseData) searchTestDat Q: []byte(req.Query.Query), Size: int(req.Size), Offset: int(req.Offset), - From: seq.MillisToMID(uint64(req.Query.From.AsTime().UnixMilli())), - To: seq.MillisToMID(uint64(req.Query.To.AsTime().UnixMilli())), + From: seq.MID(req.Query.From.AsTime().UnixNano()), + To: seq.MID(req.Query.To.AsTime().UnixNano()), WithTotal: req.WithTotal, ShouldFetch: true, } diff --git a/proxyapi/grpc_v1.go b/proxyapi/grpc_v1.go index 7ebda110..4bf27f9e 100644 --- a/proxyapi/grpc_v1.go +++ b/proxyapi/grpc_v1.go @@ -226,8 +226,8 @@ func (g *grpcV1) doSearch( proxyReq := &search.SearchRequest{ Q: []byte(req.Query.Query), - From: seq.MillisToMID(uint64(fromTime.UnixMilli())), - To: seq.MillisToMID(uint64(toTime.UnixMilli())), + From: seq.MID(fromTime.UnixNano()), + To: seq.MID(toTime.UnixNano()), Explain: req.Query.Explain, Size: int(req.Size), Offset: int(req.Offset), @@ -253,7 +253,7 @@ func (g *grpcV1) doSearch( err, ) } - proxyReq.Interval = seq.MillisToMID(uint64(intervalDuration.Milliseconds())) + proxyReq.Interval = seq.MID(intervalDuration.Nanoseconds()) } qpr, docsStream, _, err := g.searchIngestor.Search(ctx, proxyReq, tr) @@ -329,7 +329,7 @@ func convertAggsQuery(aggs []*seqproxyapi.AggQuery) ([]search.AggQuery, error) { ) } - aggQuery.Interval = seq.MillisToMID(uint64(interval.Milliseconds())) + aggQuery.Interval = seq.MID(interval.Nanoseconds()) result = append(result, aggQuery) } return result, nil diff --git a/tests/integration_tests/integration_test.go b/tests/integration_tests/integration_test.go index 5a3f2a4b..87139f34 100644 --- a/tests/integration_tests/integration_test.go +++ b/tests/integration_tests/integration_test.go @@ -1521,7 +1521,7 @@ func (s *IntegrationTestSuite) TestAsyncSearch() { Quantiles: []float64{0.99, 0.95, 0.50}, }, }, - HistogramInterval: seq.MillisToMID(uint64(time.Second.Milliseconds())), + HistogramInterval: seq.MID(time.Second.Nanoseconds()), WithDocs: true, Size: 100, }