From d82e70c124f6b386973615db4b1199f14bd8a044 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Sun, 21 Dec 2025 20:33:35 +0530 Subject: [PATCH 01/16] geo perf 1 --- index/scorch/snapshot_index.go | 21 +++++++----- search/query/boolean.go | 2 ++ search/searcher/search_filter.go | 6 ++++ search/searcher/search_geopointdistance.go | 40 ++++++++++++---------- 4 files changed, 42 insertions(+), 27 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 3f2a330c5..6bc55f07a 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -815,19 +815,24 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( // Filter out fields that have been completely deleted or had their // docvalues data deleted from both visitable fields and required fields filterUpdatedFields := func(fields []string) []string { - filteredFields := make([]string, 0, len(fields)) + // fast path: if no updatedFields just return the input + if len(is.updatedFields) == 0 { + return fields + } + n := 0 for _, field := range fields { if info, ok := is.updatedFields[field]; ok && (info.DocValues || info.Deleted) { continue } - filteredFields = append(filteredFields, field) + fields[n] = field + n++ } - return filteredFields + return fields[:n] } - fieldsFiltered := filterUpdatedFields(fields) - vFieldsFiltered := filterUpdatedFields(vFields) + fields = filterUpdatedFields(fields) + vFields = filterUpdatedFields(vFields) var errCh chan error @@ -836,7 +841,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( // if the caller happens to know we're on the same segmentIndex // from a previous invocation if cFields == nil { - cFields = subtractStrings(fieldsFiltered, vFieldsFiltered) + cFields = subtractStrings(fields, vFields) if !ss.cachedDocs.hasFields(cFields) { errCh = make(chan error, 1) @@ -851,8 +856,8 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( } } - if ssvOk && ssv != nil && len(vFieldsFiltered) > 0 { - dvs, err = ssv.VisitDocValues(localDocNum, fieldsFiltered, visitor, dvs) + if ssvOk && ssv != nil && len(vFields) > 0 { + dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs) if err != nil { return nil, nil, err } diff --git a/search/query/boolean.go b/search/query/boolean.go index 3bf6f9145..96df4a6d8 100644 --- a/search/query/boolean.go +++ b/search/query/boolean.go @@ -204,6 +204,8 @@ func (q *BooleanQuery) Searcher(ctx context.Context, i index.IndexReader, m mapp // Compare document IDs cmp := refDoc.IndexInternalID.Compare(d.IndexInternalID) if cmp < 0 { + // recycle refDoc now that we do not need it + sctx.DocumentMatchPool.Put(refDoc) // filterSearcher is behind the current document, Advance() it refDoc, err = filterSearcher.Advance(sctx, d.IndexInternalID) if err != nil || refDoc == nil { diff --git a/search/searcher/search_filter.go b/search/searcher/search_filter.go index 97d706b5f..ef070c73f 100644 --- a/search/searcher/search_filter.go +++ b/search/searcher/search_filter.go @@ -60,6 +60,9 @@ func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMat if f.accept(ctx, next) { return next, nil } + // recycle this document match now, since + // we do not need it anymore + ctx.DocumentMatchPool.Put(next) next, err = f.child.Next(ctx) } return nil, err @@ -76,6 +79,9 @@ func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInt if f.accept(ctx, adv) { return adv, nil } + // recycle this document match now, since + // we do not need it anymore + ctx.DocumentMatchPool.Put(adv) return f.Next(ctx) } diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go index 357ac4de3..3d0e0a6b8 100644 --- a/search/searcher/search_geopointdistance.go +++ b/search/searcher/search_geopointdistance.go @@ -66,7 +66,7 @@ func NewGeoPointDistanceSearcher(ctx context.Context, indexReader index.IndexRea // wrap it in a filtering searcher which checks the actual distance return NewFilteringSearcher(ctx, rectSearcher, - buildDistFilter(ctx, dvReader, field, centerLon, centerLat, dist)), nil + buildDistFilter(ctx, dvReader, centerLon, centerLat, dist)), nil } // boxSearcher builds a searcher for the described bounding box @@ -113,27 +113,29 @@ func boxSearcher(ctx context.Context, indexReader index.IndexReader, return boxSearcher, nil } -func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, field string, +func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, centerLon, centerLat, maxDist float64) FilterFunc { + // reuse the following for each document match that is checked using the filter + var lons, lats []float64 + var found bool + dvVisitor := func(_ string, term []byte) { + // only consider the values which are shifted 0 + prefixCoded := numeric.PrefixCoded(term) + shift, err := prefixCoded.Shift() + if err == nil && shift == 0 { + i64, err := prefixCoded.Int64() + if err == nil { + lons = append(lons, geo.MortonUnhashLon(uint64(i64))) + lats = append(lats, geo.MortonUnhashLat(uint64(i64))) + found = true + } + } + } return func(sctx *search.SearchContext, d *search.DocumentMatch) bool { // check geo matches against all numeric type terms indexed - var lons, lats []float64 - var found bool - - err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) { - // only consider the values which are shifted 0 - prefixCoded := numeric.PrefixCoded(term) - shift, err := prefixCoded.Shift() - if err == nil && shift == 0 { - i64, err := prefixCoded.Int64() - if err == nil { - lons = append(lons, geo.MortonUnhashLon(uint64(i64))) - lats = append(lats, geo.MortonUnhashLat(uint64(i64))) - found = true - } - } - }) - if err == nil && found { + lons, lats = lons[:0], lats[:0] + found = false + if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found { bytes := dvReader.BytesRead() if bytes > 0 { reportIOStats(ctx, bytes) From c4453c52ad717290fe6d0f55b3a40bc9a6350863 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Sun, 21 Dec 2025 22:13:16 +0530 Subject: [PATCH 02/16] fix 2 --- search/searcher/search_geoboundingbox.go | 47 ++++++----- search/searcher/search_geopointdistance.go | 4 + search/searcher/search_geopolygon.go | 49 ++++++------ search/searcher/search_geoshape.go | 91 +++++++++++----------- 4 files changed, 105 insertions(+), 86 deletions(-) diff --git a/search/searcher/search_geoboundingbox.go b/search/searcher/search_geoboundingbox.go index c2551a871..a146fa654 100644 --- a/search/searcher/search_geoboundingbox.go +++ b/search/searcher/search_geoboundingbox.go @@ -53,7 +53,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade } return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(ctx, dvReader, - field, minLon, minLat, maxLon, maxLat)), nil + minLon, minLat, maxLon, maxLat)), nil } } @@ -88,7 +88,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade } // add filter to check points near the boundary onBoundarySearcher = NewFilteringSearcher(ctx, rawOnBoundarySearcher, - buildRectFilter(ctx, dvReader, field, minLon, minLat, maxLon, maxLat)) + buildRectFilter(ctx, dvReader, minLon, minLat, maxLon, maxLat)) openedSearchers = append(openedSearchers, onBoundarySearcher) } @@ -205,28 +205,35 @@ func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, fiel return isIndexed, closeF, err } -func buildRectFilter(ctx context.Context, dvReader index.DocValueReader, field string, +func buildRectFilter(ctx context.Context, dvReader index.DocValueReader, minLon, minLat, maxLon, maxLat float64, ) FilterFunc { + // reuse the following for each document match that is checked using the filter + var lons, lats []float64 + var found bool + dvVisitor := func(_ string, term []byte) { + if found { + // avoid redundant work if already found + return + } + // only consider the values which are shifted 0 + prefixCoded := numeric.PrefixCoded(term) + shift, err := prefixCoded.Shift() + if err == nil && shift == 0 { + var i64 int64 + i64, err = prefixCoded.Int64() + if err == nil { + lons = append(lons, geo.MortonUnhashLon(uint64(i64))) + lats = append(lats, geo.MortonUnhashLat(uint64(i64))) + found = true + } + } + } return func(sctx *search.SearchContext, d *search.DocumentMatch) bool { // check geo matches against all numeric type terms indexed - var lons, lats []float64 - var found bool - err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) { - // only consider the values which are shifted 0 - prefixCoded := numeric.PrefixCoded(term) - shift, err := prefixCoded.Shift() - if err == nil && shift == 0 { - var i64 int64 - i64, err = prefixCoded.Int64() - if err == nil { - lons = append(lons, geo.MortonUnhashLon(uint64(i64))) - lats = append(lats, geo.MortonUnhashLat(uint64(i64))) - found = true - } - } - }) - if err == nil && found { + lons, lats = lons[:0], lats[:0] + found = false + if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found { bytes := dvReader.BytesRead() if bytes > 0 { reportIOStats(ctx, bytes) diff --git a/search/searcher/search_geopointdistance.go b/search/searcher/search_geopointdistance.go index 3d0e0a6b8..7591bcc60 100644 --- a/search/searcher/search_geopointdistance.go +++ b/search/searcher/search_geopointdistance.go @@ -119,6 +119,10 @@ func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, var lons, lats []float64 var found bool dvVisitor := func(_ string, term []byte) { + if found { + // avoid redundant work if already found + return + } // only consider the values which are shifted 0 prefixCoded := numeric.PrefixCoded(term) shift, err := prefixCoded.Shift() diff --git a/search/searcher/search_geopolygon.go b/search/searcher/search_geopolygon.go index dc04bb66a..fb6e09be4 100644 --- a/search/searcher/search_geopolygon.go +++ b/search/searcher/search_geopolygon.go @@ -85,28 +85,37 @@ func almostEqual(a, b float64) bool { // here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, field string, coordinates []geo.Point) FilterFunc { + // reuse the following for each document match that is checked using the filter + var lons, lats []float64 + var found bool + dvVisitor := func(_ string, term []byte) { + if found { + // avoid redundant work if already found + return + } + // only consider the values which are shifted 0 + prefixCoded := numeric.PrefixCoded(term) + shift, err := prefixCoded.Shift() + if err == nil && shift == 0 { + i64, err := prefixCoded.Int64() + if err == nil { + lons = append(lons, geo.MortonUnhashLon(uint64(i64))) + lats = append(lats, geo.MortonUnhashLat(uint64(i64))) + found = true + } + } + } + rayIntersectsSegment := func(point, a, b geo.Point) bool { + return (a.Lat > point.Lat) != (b.Lat > point.Lat) && + point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon + } return func(sctx *search.SearchContext, d *search.DocumentMatch) bool { // check geo matches against all numeric type terms indexed - var lons, lats []float64 - var found bool - - err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) { - // only consider the values which are shifted 0 - prefixCoded := numeric.PrefixCoded(term) - shift, err := prefixCoded.Shift() - if err == nil && shift == 0 { - i64, err := prefixCoded.Int64() - if err == nil { - lons = append(lons, geo.MortonUnhashLon(uint64(i64))) - lats = append(lats, geo.MortonUnhashLat(uint64(i64))) - found = true - } - } - }) - + lons, lats = lons[:0], lats[:0] + found = false // Note: this approach works for points which are strictly inside // the polygon. ie it might fail for certain points on the polygon boundaries. - if err == nil && found { + if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found { bytes := dvReader.BytesRead() if bytes > 0 { reportIOStats(ctx, bytes) @@ -116,10 +125,6 @@ func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, fiel if len(coordinates) < 3 { return false } - rayIntersectsSegment := func(point, a, b geo.Point) bool { - return (a.Lat > point.Lat) != (b.Lat > point.Lat) && - point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon - } for i := range lons { pt := geo.Point{Lon: lons[i], Lat: lats[i]} diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 703693d78..552579c16 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -69,7 +69,7 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea // this is for accumulating the shape's actual complete value // spread across multiple docvalue visitor callbacks. var dvShapeValue []byte - var startReading, finishReading bool + var startReading, finishReading, found bool var reader *bytes.Reader var bufPool *s2.GeoBufferPool @@ -77,51 +77,54 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea bufPool = bufPoolCallback() } - return func(sctx *search.SearchContext, d *search.DocumentMatch) bool { - var found bool - - err := dvReader.VisitDocValues(d.IndexInternalID, - func(field string, term []byte) { - // only consider the values which are GlueBytes prefixed or - // if it had already started reading the shape bytes from previous callbacks. - if startReading || len(term) > geo.GlueBytesOffset { - - if !startReading && bytes.Equal(geo.GlueBytes, term[:geo.GlueBytesOffset]) { - startReading = true - - if bytes.Equal(geo.GlueBytes, term[len(term)-geo.GlueBytesOffset:]) { - term = term[:len(term)-geo.GlueBytesOffset] - finishReading = true - } - - dvShapeValue = append(dvShapeValue, term[geo.GlueBytesOffset:]...) - - } else if startReading && !finishReading { - if len(term) > geo.GlueBytesOffset && - bytes.Equal(geo.GlueBytes, term[len(term)-geo.GlueBytesOffset:]) { - term = term[:len(term)-geo.GlueBytesOffset] - finishReading = true - } - - term = append(termSeparatorSplitSlice, term...) - dvShapeValue = append(dvShapeValue, term...) - } - - // apply the filter once the entire docvalue is finished reading. - if finishReading { - v, err := geojson.FilterGeoShapesOnRelation(shape, dvShapeValue, relation, &reader, bufPool) - if err == nil && v { - found = true - } - - dvShapeValue = dvShapeValue[:0] - startReading = false - finishReading = false - } + dvVisitor := func(_ string, term []byte) { + if found { + // avoid redundant work if already found + return + } + tl := len(term) + // only consider the values which are GlueBytes prefixed or + // if it had already started reading the shape bytes from previous callbacks. + if startReading || tl > geo.GlueBytesOffset { + + if !startReading && bytes.Equal(geo.GlueBytes, term[:geo.GlueBytesOffset]) { + startReading = true + + if bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) { + term = term[:tl-geo.GlueBytesOffset] + finishReading = true + } + + dvShapeValue = append(dvShapeValue, term[geo.GlueBytesOffset:]...) + + } else if startReading && !finishReading { + if tl > geo.GlueBytesOffset && + bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) { + term = term[:tl-geo.GlueBytesOffset] + finishReading = true + } + + dvShapeValue = append(dvShapeValue, termSeparatorSplitSlice...) + dvShapeValue = append(dvShapeValue, term...) + } + + // apply the filter once the entire docvalue is finished reading. + if finishReading { + v, err := geojson.FilterGeoShapesOnRelation(shape, dvShapeValue, relation, &reader, bufPool) + if err == nil && v { + found = true } - }) - if err == nil && found { + dvShapeValue = dvShapeValue[:0] + startReading = false + finishReading = false + } + } + } + + return func(sctx *search.SearchContext, d *search.DocumentMatch) bool { + found = false + if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found { bytes := dvReader.BytesRead() if bytes > 0 { reportIOStats(ctx, bytes) From bcf2bc037bc163f492c039b1e57712e15b75a8d6 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Mon, 22 Dec 2025 11:44:10 +0530 Subject: [PATCH 03/16] code review --- index/scorch/snapshot_index.go | 7 +++---- search/searcher/search_geoshape.go | 4 ++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 6bc55f07a..8473c249a 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -819,16 +819,15 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( if len(is.updatedFields) == 0 { return fields } - n := 0 + filteredFields := make([]string, 0, len(fields)) for _, field := range fields { if info, ok := is.updatedFields[field]; ok && (info.DocValues || info.Deleted) { continue } - fields[n] = field - n++ + filteredFields = append(filteredFields, field) } - return fields[:n] + return filteredFields } fields = filterUpdatedFields(fields) diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 552579c16..4f90808a1 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -123,7 +123,11 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea } return func(sctx *search.SearchContext, d *search.DocumentMatch) bool { + // reset state variables for each document found = false + startReading = false + finishReading = false + dvShapeValue = dvShapeValue[:0] if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found { bytes := dvReader.BytesRead() if bytes > 0 { From ce552bf6bd79622fae3f3e8baa07fc33066b214e Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Mon, 22 Dec 2025 12:55:19 +0530 Subject: [PATCH 04/16] use readlock --- index/scorch/snapshot_index.go | 16 +++++++--------- index/scorch/snapshot_segment.go | 16 ++++++++-------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 8473c249a..664e6dfe9 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -842,7 +842,7 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( if cFields == nil { cFields = subtractStrings(fields, vFields) - if !ss.cachedDocs.hasFields(cFields) { + if len(cFields) > 0 && !ss.cachedDocs.hasFields(cFields) { errCh = make(chan error, 1) go func() { @@ -984,17 +984,15 @@ func subtractStrings(a, b []string) []string { return a } - // Create a map for O(1) lookups - bMap := make(map[string]struct{}, len(b)) - for _, bs := range b { - bMap[bs] = struct{}{} - } - rv := make([]string, 0, len(a)) +OUTER: for _, as := range a { - if _, exists := bMap[as]; !exists { - rv = append(rv, as) + for _, bs := range b { + if as == bs { + continue OUTER + } } + rv = append(rv, as) } return rv } diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index c6f3584cc..1421d10d0 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -241,7 +241,7 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { type cachedDocs struct { size uint64 - m sync.Mutex // As the cache is asynchronously prepared, need a lock + m sync.RWMutex // As the cache is asynchronously prepared, need a lock cache map[string]*cachedFieldDocs // Keyed by field } @@ -283,14 +283,14 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e // hasFields returns true if the cache has all the given fields func (c *cachedDocs) hasFields(fields []string) bool { - c.m.Lock() + c.m.RLock() for _, field := range fields { if _, exists := c.cache[field]; !exists { - c.m.Unlock() + c.m.RUnlock() return false // found a field not in cache } } - c.m.Unlock() + c.m.RUnlock() return true } @@ -311,13 +311,13 @@ func (c *cachedDocs) updateSizeLOCKED() { func (c *cachedDocs) visitDoc(localDocNum uint64, fields []string, visitor index.DocValueVisitor) { - c.m.Lock() + c.m.RLock() for _, field := range fields { if cachedFieldDocs, exists := c.cache[field]; exists { - c.m.Unlock() + c.m.RUnlock() <-cachedFieldDocs.readyCh - c.m.Lock() + c.m.RLock() if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists { for { @@ -332,7 +332,7 @@ func (c *cachedDocs) visitDoc(localDocNum uint64, } } - c.m.Unlock() + c.m.RUnlock() } // the purpose of the cachedMeta is to simply allow the user of this type to record From e39cdcc317a1e77f1075f77b8f906d4ddc1f4371 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Mon, 22 Dec 2025 17:24:55 +0530 Subject: [PATCH 05/16] fix geoSort --- search/sort.go | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/search/sort.go b/search/sort.go index 44e5cd91c..dfe02195e 100644 --- a/search/sort.go +++ b/search/sort.go @@ -683,7 +683,7 @@ type SortGeoDistance struct { Field string Desc bool Unit string - values []string + values [][]byte Lon float64 Lat float64 unitMult float64 @@ -693,19 +693,18 @@ type SortGeoDistance struct { // this field has the specified term func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) { if field == s.Field { - s.values = append(s.values, string(term)) + s.values = append(s.values, term) } } // Value returns the sort value of the DocumentMatch -// it also resets the state of this SortField for +// it also resets the state of this SortGeoDistance for // processing the next document func (s *SortGeoDistance) Value(i *DocumentMatch) string { - iTerms := s.filterTermsByType(s.values) - iTerm := s.filterTermsByMode(iTerms) + iTerm := s.findPrefixCodedNumericTerm(s.values) s.values = s.values[:0] - if iTerm == "" { + if iTerm == nil { return maxDistance } @@ -739,25 +738,16 @@ func (s *SortGeoDistance) Descending() bool { return s.Desc } -func (s *SortGeoDistance) filterTermsByMode(terms []string) string { - if len(terms) >= 1 { - return terms[0] - } - - return "" -} - -// filterTermsByType attempts to make one pass on the terms -// return only valid prefix coded numbers with shift of 0 -func (s *SortGeoDistance) filterTermsByType(terms []string) []string { - var termsWithShiftZero []string +// findPrefixCodedNumericTerm looks through the provided terms +// and returns the first valid prefix coded numeric term with shift of 0 +func (s *SortGeoDistance) findPrefixCodedNumericTerm(terms [][]byte) []byte { for _, term := range terms { - valid, shift := numeric.ValidPrefixCodedTerm(term) + valid, shift := numeric.ValidPrefixCodedTermBytes(term) if valid && shift == 0 { - termsWithShiftZero = append(termsWithShiftZero, term) + return term } } - return termsWithShiftZero + return nil } // RequiresDocID says this SearchSort does not require the DocID be loaded From 0ef6f227b637cd3bc13b6ced38f0ee4cff0578f6 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Mon, 22 Dec 2025 17:58:46 +0530 Subject: [PATCH 06/16] use prealloc --- numeric/prefix_coded.go | 8 ++++++++ search/sort.go | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/numeric/prefix_coded.go b/numeric/prefix_coded.go index 29bd0fc5c..03ba043e3 100644 --- a/numeric/prefix_coded.go +++ b/numeric/prefix_coded.go @@ -66,6 +66,14 @@ func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { return rv } +func MustNewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) PrefixCoded { + rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, prealloc) + if err != nil { + panic(err) + } + return rv +} + // Shift returns the number of bits shifted // returns 0 if in uninitialized state func (p PrefixCoded) Shift() (uint, error) { diff --git a/search/sort.go b/search/sort.go index dfe02195e..64230c116 100644 --- a/search/sort.go +++ b/search/sort.go @@ -687,6 +687,7 @@ type SortGeoDistance struct { Lon float64 Lat float64 unitMult float64 + tmp []byte } // UpdateVisitor notifies this sort field that in this document @@ -722,7 +723,8 @@ func (s *SortGeoDistance) Value(i *DocumentMatch) string { dist /= s.unitMult } distInt64 := numeric.Float64ToInt64(dist) - return string(numeric.MustNewPrefixCodedInt64(distInt64, 0)) + s.tmp = numeric.MustNewPrefixCodedInt64Prealloc(distInt64, 0, s.tmp) + return string(s.tmp) } func (s *SortGeoDistance) DecodeValue(value string) string { From 385d9aa97a5c8705d575023d35365cef59d10f25 Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 23 Dec 2025 14:03:19 +0530 Subject: [PATCH 07/16] MB-59633: Removed snappy encoding and set chunk mode to 1 --- document/field_geopoint.go | 4 ++++ document/field_geoshape.go | 4 ++++ index_test.go | 12 ++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/document/field_geopoint.go b/document/field_geopoint.go index 5795043f2..5807e8889 100644 --- a/document/field_geopoint.go +++ b/document/field_geopoint.go @@ -180,6 +180,10 @@ func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *G func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options index.FieldIndexingOptions) *GeoPointField { mhash := geo.MortonHash(lon, lat) prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0) + + options |= index.SkipSnappy + options |= index.SkipChunking + return &GeoPointField{ name: name, arrayPositions: arrayPositions, diff --git a/document/field_geoshape.go b/document/field_geoshape.go index 6282ff12b..e7a60cd5f 100644 --- a/document/field_geoshape.go +++ b/document/field_geoshape.go @@ -181,6 +181,8 @@ func NewGeoShapeFieldFromShapeWithIndexingOptions(name string, arrayPositions [] // docvalues are always enabled for geoshape fields, even if the // indexing options are set to not include docvalues. options |= index.DocValues + options |= index.SkipSnappy + options |= index.SkipChunking return &GeoShapeField{ shape: shape, @@ -233,6 +235,8 @@ func NewGeometryCollectionFieldFromShapesWithIndexingOptions(name string, // docvalues are always enabled for geoshape fields, even if the // indexing options are set to not include docvalues. options |= index.DocValues + options |= index.SkipSnappy + options |= index.SkipChunking return &GeoShapeField{ shape: shape, diff --git a/index_test.go b/index_test.go index 7ed27ff86..2022b7387 100644 --- a/index_test.go +++ b/index_test.go @@ -612,9 +612,9 @@ func TestBytesRead(t *testing.T) { stats, _ := idx.StatsMap()["index"].(map[string]interface{}) prevBytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64) - expectedBytesRead := uint64(22049) + expectedBytesRead := uint64(21164) if supportForVectorSearch { - expectedBytesRead = 22459 + expectedBytesRead = 21574 } if prevBytesRead != expectedBytesRead && res.Cost == prevBytesRead { @@ -770,9 +770,9 @@ func TestBytesReadStored(t *testing.T) { stats, _ := idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64) - expectedBytesRead := uint64(11911) + expectedBytesRead := uint64(11025) if supportForVectorSearch { - expectedBytesRead = 12321 + expectedBytesRead = 11435 } if bytesRead != expectedBytesRead && bytesRead == res.Cost { @@ -847,9 +847,9 @@ func TestBytesReadStored(t *testing.T) { stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - expectedBytesRead = uint64(4097) + expectedBytesRead = uint64(3212) if supportForVectorSearch { - expectedBytesRead = 4507 + expectedBytesRead = 3622 } if bytesRead != expectedBytesRead && bytesRead == res.Cost { From 451586d1708c30440f038efd927dedd6d876f650 Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 23 Dec 2025 16:17:35 +0530 Subject: [PATCH 08/16] Minor fix --- document/field_geopoint.go | 1 + 1 file changed, 1 insertion(+) diff --git a/document/field_geopoint.go b/document/field_geopoint.go index 5807e8889..41bff4aaf 100644 --- a/document/field_geopoint.go +++ b/document/field_geopoint.go @@ -181,6 +181,7 @@ func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, l mhash := geo.MortonHash(lon, lat) prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0) + options |= index.DocValues options |= index.SkipSnappy options |= index.SkipChunking From db93d77c4c04e8be45bed8358e444daed8bc2993 Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 23 Dec 2025 16:22:36 +0530 Subject: [PATCH 09/16] Adding chunking for geo point --- document/field_geopoint.go | 1 - 1 file changed, 1 deletion(-) diff --git a/document/field_geopoint.go b/document/field_geopoint.go index 41bff4aaf..9b542502c 100644 --- a/document/field_geopoint.go +++ b/document/field_geopoint.go @@ -183,7 +183,6 @@ func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, l options |= index.DocValues options |= index.SkipSnappy - options |= index.SkipChunking return &GeoPointField{ name: name, From 73d46340ef1e331f9e686977c1ba85efaa16c57a Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 23 Dec 2025 20:18:37 +0530 Subject: [PATCH 10/16] More improvements --- document/field_geopoint.go | 1 + index/scorch/snapshot_segment.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/document/field_geopoint.go b/document/field_geopoint.go index 9b542502c..41bff4aaf 100644 --- a/document/field_geopoint.go +++ b/document/field_geopoint.go @@ -183,6 +183,7 @@ func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, l options |= index.DocValues options |= index.SkipSnappy + options |= index.SkipChunking return &GeoPointField{ name: name, diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index 1421d10d0..196c04a6b 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -321,7 +321,7 @@ func (c *cachedDocs) visitDoc(localDocNum uint64, if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists { for { - i := bytes.Index(tlist, TermSeparatorSplitSlice) + i := bytes.IndexByte(tlist, TermSeparator) if i < 0 { break } From d7c52fc1df9ccc8a94763282350dd30cff5bd4f2 Mon Sep 17 00:00:00 2001 From: Likith B Date: Wed, 24 Dec 2025 14:34:44 +0530 Subject: [PATCH 11/16] Code cleanup --- document/field_geopoint.go | 4 ++++ document/field_geoshape.go | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/document/field_geopoint.go b/document/field_geopoint.go index 41bff4aaf..bf816b420 100644 --- a/document/field_geopoint.go +++ b/document/field_geopoint.go @@ -181,6 +181,10 @@ func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, l mhash := geo.MortonHash(lon, lat) prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0) + // docvalues are always enabled for geopoint fields, even if the + // indexing options are set to not include docvalues. + // snappy compression and chunking are always skipped for geopoint + // to avoid mem copies and faster lookups. options |= index.DocValues options |= index.SkipSnappy options |= index.SkipChunking diff --git a/document/field_geoshape.go b/document/field_geoshape.go index e7a60cd5f..93090028c 100644 --- a/document/field_geoshape.go +++ b/document/field_geoshape.go @@ -180,6 +180,8 @@ func NewGeoShapeFieldFromShapeWithIndexingOptions(name string, arrayPositions [] // docvalues are always enabled for geoshape fields, even if the // indexing options are set to not include docvalues. + // snappy compression and chunking are always skipped for geoshape + // to avoid mem copies and faster lookups. options |= index.DocValues options |= index.SkipSnappy options |= index.SkipChunking @@ -234,6 +236,8 @@ func NewGeometryCollectionFieldFromShapesWithIndexingOptions(name string, // docvalues are always enabled for geoshape fields, even if the // indexing options are set to not include docvalues. + // snappy compression and chunking are always skipped for geoshape + // to avoid mem copies and faster lookups. options |= index.DocValues options |= index.SkipSnappy options |= index.SkipChunking From 3a59d42e998e4d6cf679692132d7fde402640ce3 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Fri, 26 Dec 2025 09:42:52 +0530 Subject: [PATCH 12/16] use DocValueTermSeparator from bleve_index_api --- index/scorch/snapshot_segment.go | 8 ++------ search/searcher/search_geoshape.go | 7 +------ 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index 196c04a6b..624c193ad 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -26,10 +26,6 @@ import ( segment "github.com/blevesearch/scorch_segment_api/v2" ) -var TermSeparator byte = 0xff - -var TermSeparatorSplitSlice = []byte{TermSeparator} - type SegmentSnapshot struct { // this flag is needed to identify whether this // segment was mmaped recently, in which case @@ -220,7 +216,7 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { for err2 == nil && nextPosting != nil { docNum := nextPosting.Number() cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...) - cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator) + cfd.docs[docNum] = append(cfd.docs[docNum], index.DocValueTermSeparator) cfd.size += uint64(len(next.Term) + 1) // map value nextPosting, err2 = postingsItr.Next() } @@ -321,7 +317,7 @@ func (c *cachedDocs) visitDoc(localDocNum uint64, if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists { for { - i := bytes.IndexByte(tlist, TermSeparator) + i := bytes.IndexByte(tlist, index.DocValueTermSeparator) if i < 0 { break } diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 4f90808a1..cd020fafc 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -58,11 +58,6 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha return NewFilteringSearcher(ctx, mSearcher, buildRelationFilterOnShapes(ctx, dvReader, field, relation, shape)), nil } -// Using the same term splitter slice used in the doc values in zap. -// TODO: This needs to be revisited whenever we change the zap -// implementation of doc values. -var termSeparatorSplitSlice = []byte{0xff} - func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string, relation string, shape index.GeoJSON, ) FilterFunc { @@ -104,7 +99,7 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea finishReading = true } - dvShapeValue = append(dvShapeValue, termSeparatorSplitSlice...) + dvShapeValue = append(dvShapeValue, index.DocValueTermSeparator) dvShapeValue = append(dvShapeValue, term...) } From 2a5a276dc42fa421e552a5ed0bdef0a2eae3dd71 Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Thu, 22 Jan 2026 11:54:52 +0530 Subject: [PATCH 13/16] small fix --- index/scorch/snapshot_index.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 37f761393..3422d9a14 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -796,10 +796,6 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( // Filter out fields that have been completely deleted or had their // docvalues data deleted from both visitable fields and required fields filterUpdatedFields := func(fields []string) []string { - // fast path: if no updatedFields just return the input - if len(is.updatedFields) == 0 { - return fields - } filteredFields := make([]string, 0, len(fields)) for _, field := range fields { if info, ok := is.updatedFields[field]; ok && @@ -811,8 +807,10 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment( return filteredFields } - fields = filterUpdatedFields(fields) - vFields = filterUpdatedFields(vFields) + if len(is.updatedFields) > 0 { + fields = filterUpdatedFields(fields) + vFields = filterUpdatedFields(vFields) + } var errCh chan error From e849cda5eff0133415d70969f40829727a20aa21 Mon Sep 17 00:00:00 2001 From: Likith B Date: Fri, 30 Jan 2026 16:49:37 +0530 Subject: [PATCH 14/16] Changed docvalues to better accomodate geoshapes --- index/scorch/snapshot_segment.go | 21 +++++-------- search/searcher/search_geoshape.go | 50 +++--------------------------- 2 files changed, 12 insertions(+), 59 deletions(-) diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index 2348c08ca..1ff19769f 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -15,7 +15,6 @@ package scorch import ( - "bytes" "os" "sync" "sync/atomic" @@ -180,9 +179,9 @@ func (s *SegmentSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.Updat type cachedFieldDocs struct { m sync.Mutex - readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used. - err error // Non-nil if there was an error when preparing this cachedFieldDocs. - docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF. + readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used. + err error // Non-nil if there was an error when preparing this cachedFieldDocs. + docs map[uint64][]string // Keyed by localDocNum, value is a list of terms delimited by 0xFF. size uint64 } @@ -228,8 +227,7 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { nextPosting, err2 := postingsItr.Next() for err2 == nil && nextPosting != nil { docNum := nextPosting.Number() - cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...) - cfd.docs[docNum] = append(cfd.docs[docNum], index.DocValueTermSeparator) + cfd.docs[docNum] = append(cfd.docs[docNum], next.Term) cfd.size += uint64(len(next.Term) + 1) // map value nextPosting, err2 = postingsItr.Next() } @@ -266,7 +264,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e if !exists { c.cache[field] = &cachedFieldDocs{ readyCh: make(chan struct{}), - docs: make(map[uint64][]byte), + docs: make(map[uint64][]string), } go c.cache[field].prepareField(field, ss) @@ -329,13 +327,8 @@ func (c *cachedDocs) visitDoc(localDocNum uint64, c.m.RLock() if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists { - for { - i := bytes.IndexByte(tlist, index.DocValueTermSeparator) - if i < 0 { - break - } - visitor(field, tlist[0:i]) - tlist = tlist[i+1:] + for _, term := range tlist { + visitor(field, []byte(term)) } } } diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index cd020fafc..32ebeb390 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -63,9 +63,8 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea ) FilterFunc { // this is for accumulating the shape's actual complete value // spread across multiple docvalue visitor callbacks. - var dvShapeValue []byte - var startReading, finishReading, found bool var reader *bytes.Reader + var found bool var bufPool *s2.GeoBufferPool if bufPoolCallback, ok := ctx.Value(search.GeoBufferPoolCallbackKey).(search.GeoBufferPoolCallbackFunc); ok { @@ -73,46 +72,10 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea } dvVisitor := func(_ string, term []byte) { - if found { - // avoid redundant work if already found - return - } - tl := len(term) - // only consider the values which are GlueBytes prefixed or - // if it had already started reading the shape bytes from previous callbacks. - if startReading || tl > geo.GlueBytesOffset { - - if !startReading && bytes.Equal(geo.GlueBytes, term[:geo.GlueBytesOffset]) { - startReading = true - - if bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) { - term = term[:tl-geo.GlueBytesOffset] - finishReading = true - } - - dvShapeValue = append(dvShapeValue, term[geo.GlueBytesOffset:]...) - - } else if startReading && !finishReading { - if tl > geo.GlueBytesOffset && - bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) { - term = term[:tl-geo.GlueBytesOffset] - finishReading = true - } - - dvShapeValue = append(dvShapeValue, index.DocValueTermSeparator) - dvShapeValue = append(dvShapeValue, term...) - } - - // apply the filter once the entire docvalue is finished reading. - if finishReading { - v, err := geojson.FilterGeoShapesOnRelation(shape, dvShapeValue, relation, &reader, bufPool) - if err == nil && v { - found = true - } - - dvShapeValue = dvShapeValue[:0] - startReading = false - finishReading = false + if len(term) > 4 && bytes.Equal(term[0:2], geo.GlueBytes) && bytes.Equal(term[len(term)-2:], geo.GlueBytes) { + v, err := geojson.FilterGeoShapesOnRelation(shape, term[2:len(term)-2], relation, &reader, bufPool) + if err == nil && v { + found = true } } } @@ -120,9 +83,6 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea return func(sctx *search.SearchContext, d *search.DocumentMatch) bool { // reset state variables for each document found = false - startReading = false - finishReading = false - dvShapeValue = dvShapeValue[:0] if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found { bytes := dvReader.BytesRead() if bytes > 0 { From 2e736707dba3d82dd6946bea15b265e8b1d62cd6 Mon Sep 17 00:00:00 2001 From: Likith B Date: Tue, 3 Feb 2026 16:12:50 +0530 Subject: [PATCH 15/16] Revert "Changed docvalues to better accomodate geoshapes" This reverts commit e849cda5eff0133415d70969f40829727a20aa21. --- index/scorch/snapshot_segment.go | 21 ++++++++----- search/searcher/search_geoshape.go | 50 +++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/index/scorch/snapshot_segment.go b/index/scorch/snapshot_segment.go index 1ff19769f..2348c08ca 100644 --- a/index/scorch/snapshot_segment.go +++ b/index/scorch/snapshot_segment.go @@ -15,6 +15,7 @@ package scorch import ( + "bytes" "os" "sync" "sync/atomic" @@ -179,9 +180,9 @@ func (s *SegmentSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.Updat type cachedFieldDocs struct { m sync.Mutex - readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used. - err error // Non-nil if there was an error when preparing this cachedFieldDocs. - docs map[uint64][]string // Keyed by localDocNum, value is a list of terms delimited by 0xFF. + readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used. + err error // Non-nil if there was an error when preparing this cachedFieldDocs. + docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF. size uint64 } @@ -227,7 +228,8 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { nextPosting, err2 := postingsItr.Next() for err2 == nil && nextPosting != nil { docNum := nextPosting.Number() - cfd.docs[docNum] = append(cfd.docs[docNum], next.Term) + cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...) + cfd.docs[docNum] = append(cfd.docs[docNum], index.DocValueTermSeparator) cfd.size += uint64(len(next.Term) + 1) // map value nextPosting, err2 = postingsItr.Next() } @@ -264,7 +266,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e if !exists { c.cache[field] = &cachedFieldDocs{ readyCh: make(chan struct{}), - docs: make(map[uint64][]string), + docs: make(map[uint64][]byte), } go c.cache[field].prepareField(field, ss) @@ -327,8 +329,13 @@ func (c *cachedDocs) visitDoc(localDocNum uint64, c.m.RLock() if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists { - for _, term := range tlist { - visitor(field, []byte(term)) + for { + i := bytes.IndexByte(tlist, index.DocValueTermSeparator) + if i < 0 { + break + } + visitor(field, tlist[0:i]) + tlist = tlist[i+1:] } } } diff --git a/search/searcher/search_geoshape.go b/search/searcher/search_geoshape.go index 32ebeb390..cd020fafc 100644 --- a/search/searcher/search_geoshape.go +++ b/search/searcher/search_geoshape.go @@ -63,8 +63,9 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea ) FilterFunc { // this is for accumulating the shape's actual complete value // spread across multiple docvalue visitor callbacks. + var dvShapeValue []byte + var startReading, finishReading, found bool var reader *bytes.Reader - var found bool var bufPool *s2.GeoBufferPool if bufPoolCallback, ok := ctx.Value(search.GeoBufferPoolCallbackKey).(search.GeoBufferPoolCallbackFunc); ok { @@ -72,10 +73,46 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea } dvVisitor := func(_ string, term []byte) { - if len(term) > 4 && bytes.Equal(term[0:2], geo.GlueBytes) && bytes.Equal(term[len(term)-2:], geo.GlueBytes) { - v, err := geojson.FilterGeoShapesOnRelation(shape, term[2:len(term)-2], relation, &reader, bufPool) - if err == nil && v { - found = true + if found { + // avoid redundant work if already found + return + } + tl := len(term) + // only consider the values which are GlueBytes prefixed or + // if it had already started reading the shape bytes from previous callbacks. + if startReading || tl > geo.GlueBytesOffset { + + if !startReading && bytes.Equal(geo.GlueBytes, term[:geo.GlueBytesOffset]) { + startReading = true + + if bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) { + term = term[:tl-geo.GlueBytesOffset] + finishReading = true + } + + dvShapeValue = append(dvShapeValue, term[geo.GlueBytesOffset:]...) + + } else if startReading && !finishReading { + if tl > geo.GlueBytesOffset && + bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) { + term = term[:tl-geo.GlueBytesOffset] + finishReading = true + } + + dvShapeValue = append(dvShapeValue, index.DocValueTermSeparator) + dvShapeValue = append(dvShapeValue, term...) + } + + // apply the filter once the entire docvalue is finished reading. + if finishReading { + v, err := geojson.FilterGeoShapesOnRelation(shape, dvShapeValue, relation, &reader, bufPool) + if err == nil && v { + found = true + } + + dvShapeValue = dvShapeValue[:0] + startReading = false + finishReading = false } } } @@ -83,6 +120,9 @@ func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueRea return func(sctx *search.SearchContext, d *search.DocumentMatch) bool { // reset state variables for each document found = false + startReading = false + finishReading = false + dvShapeValue = dvShapeValue[:0] if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found { bytes := dvReader.BytesRead() if bytes > 0 { From 2700e7218f8172e28d4162eabeca26bcb34c7d2b Mon Sep 17 00:00:00 2001 From: Likith B Date: Wed, 11 Feb 2026 13:19:25 +0530 Subject: [PATCH 16/16] naming changes --- document/field_geopoint.go | 4 ++-- document/field_geoshape.go | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/document/field_geopoint.go b/document/field_geopoint.go index bf816b420..ef8938f70 100644 --- a/document/field_geopoint.go +++ b/document/field_geopoint.go @@ -186,8 +186,8 @@ func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, l // snappy compression and chunking are always skipped for geopoint // to avoid mem copies and faster lookups. options |= index.DocValues - options |= index.SkipSnappy - options |= index.SkipChunking + options |= index.SkipDVChunking + options |= index.SkipDVCompression return &GeoPointField{ name: name, diff --git a/document/field_geoshape.go b/document/field_geoshape.go index 93090028c..2eb7aa3f2 100644 --- a/document/field_geoshape.go +++ b/document/field_geoshape.go @@ -183,8 +183,8 @@ func NewGeoShapeFieldFromShapeWithIndexingOptions(name string, arrayPositions [] // snappy compression and chunking are always skipped for geoshape // to avoid mem copies and faster lookups. options |= index.DocValues - options |= index.SkipSnappy - options |= index.SkipChunking + options |= index.SkipDVChunking + options |= index.SkipDVCompression return &GeoShapeField{ shape: shape, @@ -239,8 +239,8 @@ func NewGeometryCollectionFieldFromShapesWithIndexingOptions(name string, // snappy compression and chunking are always skipped for geoshape // to avoid mem copies and faster lookups. options |= index.DocValues - options |= index.SkipSnappy - options |= index.SkipChunking + options |= index.SkipDVChunking + options |= index.SkipDVCompression return &GeoShapeField{ shape: shape,