From 1a84a6a554e78c3e662ce7f901e709ba867569d9 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Fri, 23 May 2025 12:55:32 -0600 Subject: [PATCH 1/4] MB-66395: Support for batch processing of vector search requests Requires: - https://github.com/blevesearch/scorch_segment_api/pull/62 - https://github.com/blevesearch/zapx/pull/332 --- go.mod | 4 +- go.sum | 8 +-- index/scorch/optimize_knn.go | 14 ++++- search_knn_test.go | 106 ++++++++++++++++++++++++++++++++++- 4 files changed, 122 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index bcdfa6f42..56901e903 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/blevesearch/go-porterstemmer v1.0.3 github.com/blevesearch/goleveldb v1.0.1 github.com/blevesearch/gtreap v0.1.1 - github.com/blevesearch/scorch_segment_api/v2 v2.3.10 + github.com/blevesearch/scorch_segment_api/v2 v2.3.11-0.20250527202424-37f101287093 github.com/blevesearch/segment v0.9.1 github.com/blevesearch/snowball v0.6.1 github.com/blevesearch/snowballstem v0.9.0 @@ -26,7 +26,7 @@ require ( github.com/blevesearch/zapx/v13 v13.4.2 github.com/blevesearch/zapx/v14 v14.4.2 github.com/blevesearch/zapx/v15 v15.4.2 - github.com/blevesearch/zapx/v16 v16.2.4 + github.com/blevesearch/zapx/v16 v16.2.5-0.20250527210657-dce974b5d3ce github.com/couchbase/moss v0.2.0 github.com/golang/protobuf v1.3.2 github.com/spf13/cobra v1.8.1 diff --git a/go.sum b/go.sum index 1a3b7c9ba..c81840ad8 100644 --- a/go.sum +++ b/go.sum @@ -20,8 +20,8 @@ github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgY github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA= github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= -github.com/blevesearch/scorch_segment_api/v2 v2.3.10 h1:Yqk0XD1mE0fDZAJXTjawJ8If/85JxnLd8v5vG/jWE/s= -github.com/blevesearch/scorch_segment_api/v2 v2.3.10/go.mod h1:Z3e6ChN3qyN35yaQpl00MfI5s8AxUJbpTR/DL8QOQ+8= +github.com/blevesearch/scorch_segment_api/v2 v2.3.11-0.20250527202424-37f101287093 h1:QHWCknx3jQsu4KRjsFLBWNnyN1kBl5yOCgQ+VqEL7Jc= +github.com/blevesearch/scorch_segment_api/v2 v2.3.11-0.20250527202424-37f101287093/go.mod h1:Z3e6ChN3qyN35yaQpl00MfI5s8AxUJbpTR/DL8QOQ+8= github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw= github.com/blevesearch/snowball v0.6.1 h1:cDYjn/NCH+wwt2UdehaLpr2e4BwLIjN4V/TdLsL+B5A= @@ -44,8 +44,8 @@ github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8= github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k= github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= -github.com/blevesearch/zapx/v16 v16.2.4 h1:tGgfvleXTAkwsD5mEzgM3zCS/7pgocTCnO1oyAUjlww= -github.com/blevesearch/zapx/v16 v16.2.4/go.mod h1:Rti/REtuuMmzwsI8/C/qIzRaEoSK/wiFYw5e5ctUKKs= +github.com/blevesearch/zapx/v16 v16.2.5-0.20250527210657-dce974b5d3ce h1:JmqgNbznjRniwBFYj/YS/6wORlZJUpB/3Lry1awliOg= +github.com/blevesearch/zapx/v16 v16.2.5-0.20250527210657-dce974b5d3ce/go.mod h1:7j7kQoS5uZi4l7XeLD5Lg/LM/FXRx/bn1tODeluxozI= github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps= github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o= diff --git a/index/scorch/optimize_knn.go b/index/scorch/optimize_knn.go index 3b3bc3d19..def4f5edc 100644 --- a/index/scorch/optimize_knn.go +++ b/index/scorch/optimize_knn.go @@ -38,8 +38,12 @@ type OptimizeVR struct { requiresFiltering bool } -// This setting _MUST_ only be changed during init and not after. -var BleveMaxKNNConcurrency = 10 +// These settings _MUST_ only be changed during init and not after. +var ( + BleveMaxKNNConcurrency = 10 + BleveVectorSearchBatchExecution = false + BleveVectorSearchBatchExecutionDelay = segment_api.DefaultBatchExecutionDelay +) func (o *OptimizeVR) invokeSearcherEndCallback() { if o.ctx != nil { @@ -80,7 +84,11 @@ func (o *OptimizeVR) Finish() error { }() for field, vrs := range o.vrs { vecIndex, err := segment.InterpretVectorIndex(field, - o.requiresFiltering, origSeg.deleted) + o.requiresFiltering, origSeg.deleted, + segment_api.InterpretVectorIndexOptions{ + Batch: BleveVectorSearchBatchExecution, + BatchExecutionDelay: BleveVectorSearchBatchExecutionDelay, + }) if err != nil { errorsM.Lock() errors = append(errors, err) diff --git a/search_knn_test.go b/search_knn_test.go index a2d207bfc..3db7159d3 100644 --- a/search_knn_test.go +++ b/search_knn_test.go @@ -1644,7 +1644,6 @@ func TestNestedVectors(t *testing.T) { } func TestNumVecsStat(t *testing.T) { - dataset, _, err := readDatasetAndQueries(testInputCompressedFile) if err != nil { t.Fatal(err) @@ -1701,3 +1700,108 @@ func TestNumVecsStat(t *testing.T) { } } } + +func TestSerialAndBatchRequestsToVectorIndex(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + docs := []map[string]interface{}{ + { + "id": "1", + "vec": []float32{1.0, 2.0, 3.0}, + }, + { + "id": "2", + "vec": []float32{4.0, 5.0, 6.0}, + }, + { + "id": "3", + "vec": []float32{7.0, 8.0, 9.0}, + }, + { + "id": "4", + "vec": []float32{10.0, 11.0, 12.0}, + }, + { + "id": "5", + "vec": []float32{13.0, 14.0, 15.0}, + }, + { + "id": "6", + "vec": []float32{16.0, 17.0, 18.0}, + }, + { + "id": "7", + "vec": []float32{19.0, 20.0, 21.0}, + }, + { + "id": "8", + "vec": []float32{22.0, 23.0, 24.0}, + }, + { + "id": "9", + "vec": []float32{25.0, 26.0, 27.0}, + }, + { + "id": "10", + "vec": []float32{28.0, 29.0, 30.0}, + }, + } + // Index mapping + indexMapping := NewIndexMapping() + vm := mapping.NewVectorFieldMapping() + vm.Dims = 3 + vm.Similarity = "l2_norm" + indexMapping.DefaultMapping.AddFieldMappingsAt("vec", vm) + + // Create index and upload documents + idx, err := New(tmpIndexPath, indexMapping) + if err != nil { + t.Fatal(err) + } + defer func() { + err := idx.Close() + if err != nil { + t.Fatal(err) + } + }() + + batch := idx.NewBatch() + for _, doc := range docs { + err = batch.Index(doc["id"].(string), doc) + if err != nil { + t.Fatal(err) + } + } + + err = idx.Batch(batch) + if err != nil { + t.Fatal(err) + } + + tests := []struct { + queryVec []float32 + }{ + { + queryVec: []float32{2, 5, 8}, + }, + { + queryVec: []float32{11, 14, 17}, + }, + { + queryVec: []float32{2, 5, 8, 11, 14, 17}, + }, + } + + for _, test := range tests { + searchReq := NewSearchRequest(query.NewMatchNoneQuery()) + searchReq.AddKNN("vec", test.queryVec, 3, 1) + + res, err := idx.Search(searchReq) + if err != nil { + t.Fatal(err) + } + + fmt.Println(test.queryVec, res.Hits) + } +} From 66b2ee7324a7cfc3fcbc2eb4548b56438d8f4177 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Fri, 23 May 2025 13:13:55 -0600 Subject: [PATCH 2/4] Fix unit test --- search_knn_test.go | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/search_knn_test.go b/search_knn_test.go index 3db7159d3..e1e3f2a34 100644 --- a/search_knn_test.go +++ b/search_knn_test.go @@ -26,6 +26,7 @@ import ( "fmt" "math" "math/rand" + "reflect" "sort" "strconv" "sync" @@ -1701,9 +1702,14 @@ func TestNumVecsStat(t *testing.T) { } } -func TestSerialAndBatchRequestsToVectorIndex(t *testing.T) { +func TestBatchingRequestsToVectorIndex(t *testing.T) { tmpIndexPath := createTmpIndexPath(t) - defer cleanupTmpIndexPath(t, tmpIndexPath) + prevValue := scorch.BleveVectorSearchBatchExecution + scorch.BleveVectorSearchBatchExecution = true + defer func() { + scorch.BleveVectorSearchBatchExecution = prevValue + cleanupTmpIndexPath(t, tmpIndexPath) + }() docs := []map[string]interface{}{ { @@ -1788,12 +1794,14 @@ func TestSerialAndBatchRequestsToVectorIndex(t *testing.T) { { queryVec: []float32{11, 14, 17}, }, - { - queryVec: []float32{2, 5, 8, 11, 14, 17}, - }, } - for _, test := range tests { + expectedHitsSortedByID := [][]string{ + {"1", "2", "3"}, + {"4", "5", "6"}, + } + + for testi, test := range tests { searchReq := NewSearchRequest(query.NewMatchNoneQuery()) searchReq.AddKNN("vec", test.queryVec, 3, 1) @@ -1802,6 +1810,19 @@ func TestSerialAndBatchRequestsToVectorIndex(t *testing.T) { t.Fatal(err) } - fmt.Println(test.queryVec, res.Hits) + if len(res.Hits) != 3 { + t.Fatalf("[%d] unexpected number of hits: %v", testi+1, len(res.Hits)) + } + + var gotHitIDs []string + for i := 0; i < len(res.Hits); i++ { + gotHitIDs = append(gotHitIDs, res.Hits[i].ID) + } + + sort.Strings(gotHitIDs) + if !reflect.DeepEqual(gotHitIDs, expectedHitsSortedByID[testi]) { + t.Fatalf("[%d] expect hits: %v, got hits: %v", testi+1, + expectedHitsSortedByID[testi], gotHitIDs) + } } } From cc20fd8933d672f79fc318a03f19c60b1cdfe259 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Tue, 27 May 2025 15:04:00 -0600 Subject: [PATCH 3/4] Address comments --- index/scorch/optimize_knn.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/index/scorch/optimize_knn.go b/index/scorch/optimize_knn.go index def4f5edc..f3d7db3e9 100644 --- a/index/scorch/optimize_knn.go +++ b/index/scorch/optimize_knn.go @@ -70,6 +70,14 @@ func (o *OptimizeVR) Finish() error { defer o.invokeSearcherEndCallback() + var interpretVectorIndexOptions *segment_api.InterpretVectorIndexOptions + if BleveVectorSearchBatchExecution { + interpretVectorIndexOptions = &segment_api.InterpretVectorIndexOptions{ + Batch: BleveVectorSearchBatchExecution, + BatchExecutionDelay: BleveVectorSearchBatchExecutionDelay, + } + } + wg := sync.WaitGroup{} semaphore := make(chan struct{}, BleveMaxKNNConcurrency) // Launch goroutines to get vector index for each segment @@ -84,11 +92,7 @@ func (o *OptimizeVR) Finish() error { }() for field, vrs := range o.vrs { vecIndex, err := segment.InterpretVectorIndex(field, - o.requiresFiltering, origSeg.deleted, - segment_api.InterpretVectorIndexOptions{ - Batch: BleveVectorSearchBatchExecution, - BatchExecutionDelay: BleveVectorSearchBatchExecutionDelay, - }) + o.requiresFiltering, origSeg.deleted, interpretVectorIndexOptions) if err != nil { errorsM.Lock() errors = append(errors, err) From 719776ab3d890a4133d2c32108c709443ee520e2 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Tue, 27 May 2025 15:09:11 -0600 Subject: [PATCH 4/4] go mod tidy (vecRequestBatching) --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 56901e903..b9ed0db1d 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/blevesearch/zapx/v13 v13.4.2 github.com/blevesearch/zapx/v14 v14.4.2 github.com/blevesearch/zapx/v15 v15.4.2 - github.com/blevesearch/zapx/v16 v16.2.5-0.20250527210657-dce974b5d3ce + github.com/blevesearch/zapx/v16 v16.2.5-0.20250528201548-73384940be16 github.com/couchbase/moss v0.2.0 github.com/golang/protobuf v1.3.2 github.com/spf13/cobra v1.8.1 diff --git a/go.sum b/go.sum index c81840ad8..58a2889f3 100644 --- a/go.sum +++ b/go.sum @@ -44,8 +44,8 @@ github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8= github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k= github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= -github.com/blevesearch/zapx/v16 v16.2.5-0.20250527210657-dce974b5d3ce h1:JmqgNbznjRniwBFYj/YS/6wORlZJUpB/3Lry1awliOg= -github.com/blevesearch/zapx/v16 v16.2.5-0.20250527210657-dce974b5d3ce/go.mod h1:7j7kQoS5uZi4l7XeLD5Lg/LM/FXRx/bn1tODeluxozI= +github.com/blevesearch/zapx/v16 v16.2.5-0.20250528201548-73384940be16 h1:6x2nRwd//BW6dRE3wdzQleJCK5bFugMFDrA6Z/sL5sw= +github.com/blevesearch/zapx/v16 v16.2.5-0.20250528201548-73384940be16/go.mod h1:7j7kQoS5uZi4l7XeLD5Lg/LM/FXRx/bn1tODeluxozI= github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps= github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=