From 803e2e80b582ce5c7498bd53726d9ee9caf656d1 Mon Sep 17 00:00:00 2001
From: Patrick Ge <53650488+cococo2000@users.noreply.github.com>
Date: Fri, 3 Jan 2025 20:43:39 +0800
Subject: [PATCH 1/4] Update bvb-run.yml

Add sptag and pgvector to action check
---
 .github/workflows/bvb-run.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/bvb-run.yml b/.github/workflows/bvb-run.yml
index 62fc451..0aed2fb 100644
--- a/.github/workflows/bvb-run.yml
+++ b/.github/workflows/bvb-run.yml
@@ -36,6 +36,8 @@ jobs:
           - weaviate
           - redis
           - elasticsearch
+          - sptag
+          - pgvector
           # - vearch # Vearch test is disabled temporarily due to disk space constraints.
           # GPU support is not available in the free tier
           # - milvus_gpu

From 33e2fcfeffb0dd1acc8d0b1c0b54fb681f131cd1 Mon Sep 17 00:00:00 2001
From: "Gzx@151" <gezhongxin18@mails.ucas.ac.cn>
Date: Fri, 3 Jan 2025 22:53:59 +0800
Subject: [PATCH 2/4] fix sptag: add super.init - missing docker_cient

---
 bigvectorbench/algorithms/sptag/module.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/bigvectorbench/algorithms/sptag/module.py b/bigvectorbench/algorithms/sptag/module.py
index 14a2cf1..d2143b5 100644
--- a/bigvectorbench/algorithms/sptag/module.py
+++ b/bigvectorbench/algorithms/sptag/module.py
@@ -1,12 +1,8 @@
 """ SPTAG module for BigVectorBench framework. """
 
-import subprocess
-from time import sleep
 import numpy as np
 import SPTAG
 from bigvectorbench.algorithms.base.module import BaseANN
-import csv
-import shutil
 
 def metric_mapping(_metric: str):
     """
@@ -31,6 +27,7 @@ class SPTAGBase(BaseANN):
     """SPTAG implementation"""
 
     def __init__(self, metric: str, dim: int):
+        super().__init__()
         self._metric = metric
         self._dim = dim
         self._metric_type = metric_mapping(metric)
@@ -109,7 +106,7 @@ def set_query_arguments(self):
 
     def query(self, v, n, filter_expr=None):
         if filter_expr is not None:
-            raise ValueError( f"[SPTAG] have not supported filter-query!!!" )
+            raise ValueError("[SPTAG] have not supported filter-query!!!")
         j = SPTAG.AnnIndex.Load(self.index_name)
         # print(j.Search(v.tobytes(), n)[0])
         # print(j.Search(v.tobytes(), n)[1])

From 06c7a3dc3b2c13387ac827e2e17476afda46a7ec Mon Sep 17 00:00:00 2001
From: "Gzx@151" <gezhongxin18@mails.ucas.ac.cn>
Date: Fri, 3 Jan 2025 22:58:52 +0800
Subject: [PATCH 3/4] add docker_tag filter

---
 bigvectorbench/main.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/bigvectorbench/main.py b/bigvectorbench/main.py
index aa5bc7b..bc015fc 100644
--- a/bigvectorbench/main.py
+++ b/bigvectorbench/main.py
@@ -516,6 +516,10 @@ def main():
         force=args.force,
     )
 
+    if args.docker_tag:
+        logger.info("running only %s", args.docker_tag)
+        definitions = [d for d in definitions if d.docker_tag == args.docker_tag]
+
     if args.algorithm:
         logger.info("running only %s", args.algorithm)
         definitions = [d for d in definitions if d.algorithm == args.algorithm]

From 62fcd334ba8946f6b2edfb132d880f42611d1fba Mon Sep 17 00:00:00 2001
From: "Gzx@151" <gezhongxin18@mails.ucas.ac.cn>
Date: Fri, 3 Jan 2025 23:16:55 +0800
Subject: [PATCH 4/4] fix pgvector: only hnsw, add and fix insert, update op

---
 bigvectorbench/algorithms/pgvector/config.yml | 22 +++++------
 bigvectorbench/algorithms/pgvector/module.py  | 37 +++++++++----------
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/bigvectorbench/algorithms/pgvector/config.yml b/bigvectorbench/algorithms/pgvector/config.yml
index 3439f18..06011a0 100644
--- a/bigvectorbench/algorithms/pgvector/config.yml
+++ b/bigvectorbench/algorithms/pgvector/config.yml
@@ -15,16 +15,16 @@ float:
           arg_groups: [{M: 24, efConstruction: 200}]
           # args: {}
           query_args: [[10, 20, 40, 80, 120, 200, 400, 800]]
-    - base_args: ["@metric"]
-      constructor: PGVectorIVFFLAT
-      disabled: false
-      docker_tag: bigvectorbench-pgvector
-      module: bigvectorbench.algorithms.pgvector
-      name: pgvector-ivfflat
-      run_groups:
-        IVFFLAT_32:
-          arg_groups: [{ nlist: 32 }]
-          query_args: [[1, 4, 8, 16, 32]]
+    # - base_args: ["@metric"]
+    #   constructor: PGVectorIVFFLAT
+    #   disabled: false
+    #   docker_tag: bigvectorbench-pgvector
+    #   module: bigvectorbench.algorithms.pgvector
+    #   name: pgvector-ivfflat
+    #   run_groups:
+    #     IVFFLAT_32:
+    #       arg_groups: [{ nlist: 32 }]
+    #       query_args: [[1, 4, 8, 16, 32]]
         # IVFFLAT_64:
         #   arg_groups: [{ nlist: 64 }]
         #   query_args: [[4, 16, 32, 48, 64]]
@@ -45,4 +45,4 @@ float:
         #   query_args: [[256, 1024, 2048, 3072, 4096]]
         # IVFFLAT_8192:
         #   arg_groups: [{ nlist: 8192 }]
-        #   query_args: [[512, 2048, 4096, 6144, 8192]]
\ No newline at end of file
+        #   query_args: [[512, 2048, 4096, 6144, 8192]]
diff --git a/bigvectorbench/algorithms/pgvector/module.py b/bigvectorbench/algorithms/pgvector/module.py
index cd66587..527db6c 100644
--- a/bigvectorbench/algorithms/pgvector/module.py
+++ b/bigvectorbench/algorithms/pgvector/module.py
@@ -11,9 +11,10 @@
 
 class PGVector(BaseANN):
     def __init__(self, metric, method_param):
+        super().__init__()
         self._metric = metric
-        self._m = method_param['M']
-        self._ef_construction = method_param['efConstruction']
+        self._m = method_param.get("M", 32)
+        self._ef_construction = method_param.get("efConstruction", 40)
         self._cur = None
         self.labels = None
         self.label_names = None
@@ -26,7 +27,6 @@ def __init__(self, metric, method_param):
             self._query = "SELECT id FROM items ORDER BY embedding <-> %s LIMIT %s"
         else:
             raise RuntimeError(f"Unknown metric {metric}")
-        
 
     def get_vector_index(self):
         """Get vector index"""
@@ -53,7 +53,7 @@ def load_data(
             table_definition = f"id integer, embedding vector({embeddings.shape[1]})"
         cur.execute(f"CREATE TABLE items ({table_definition})")
         cur.execute("ALTER TABLE items ALTER COLUMN embedding SET STORAGE PLAIN")
-        
+
         if labels is not None and label_names is not None:
             with cur.copy(f"COPY items (id, embedding, {', '.join(label_names)}) FROM STDIN WITH (FORMAT BINARY)") as copy:
                 copy.set_types(["int4", "vector"] + ["int4" for _ in label_names])
@@ -64,16 +64,16 @@ def load_data(
                 copy.set_types(["int4", "vector"])
                 for i, embedding in enumerate(embeddings):
                     copy.write_row((i, embedding))
-        
+
         print("Creating index...")
-        
+
         if self._metric == "angular":
             cur.execute("CREATE INDEX ON items USING %s (embedding vector_cosine_ops) WITH (m = %d, ef_construction = %d)" % (self.index,self._m, self._ef_construction))
         elif self._metric == "euclidean":
             cur.execute("CREATE INDEX ON items USING %s (embedding vector_l2_ops) WITH (m = %d, ef_construction = %d)" % (self.index,self._m, self._ef_construction))
         else:
             raise RuntimeError(f"Unknown metric {self._metric}")
-        
+
         print("Done!")
         self._cur = cur
 
@@ -109,7 +109,7 @@ def get_memory_usage(self):
 
     def __str__(self):
         return f"PGVector(m={self._m}, ef_construction={self._ef_construction}, ef_search={self._ef_search})"
-    
+
     def insert(self, embeddings: np.ndarray, labels: np.ndarray | None = None) -> None:
         """
         Single insert data
@@ -122,9 +122,9 @@ def insert(self, embeddings: np.ndarray, labels: np.ndarray | None = None) -> No
             None
         """
         if labels is not None and self.label_names is not None:
-            insert_sentence = (f"INSERT INTO items (id,embedding,{', '.join(self.label_names)}) VALUES ({self.num_entities+1},{embeddings},{', '.join(labels)})")
+            insert_sentence = f"INSERT INTO items (id,embedding,{', '.join(self.label_names)}) VALUES ({self.num_entities+1},'[{', '.join(map(str, embeddings.tolist()))}]',{', '.join(map(str, labels.tolist()))})"
         else:
-            insert_sentence = (f"INSERT INTO items (id,embedding) VALUES ({self.num_entities+1},{embeddings}")
+            insert_sentence = f"INSERT INTO items (id,embedding) VALUES ({self.num_entities+1},'[{', '.join(map(str, embeddings.tolist()))}]')"
         self._cur.execute(insert_sentence)
         self.num_entities += 1
 
@@ -142,10 +142,10 @@ def update(
         Returns:
             None
         """
-        update_item = (f"embeddings = {embeddings},")
+        update_item = f"embedding = '[{', '.join(map(str, embeddings.tolist()))}]'"
         if labels is not None and self.label_names is not None:
-            for i in enumerate(self.label_names):
-                update_item += f"{self.label_names[i]} = {labels[i]}"
+            for i in range(len(self.label_names)):
+                update_item += f", {self.label_names[i]} = {str(labels[i].item())}"
         update_sentence = (f"UPDATE items SET {update_item} where id = {index}")
 
         self._cur.execute(update_sentence)
@@ -171,7 +171,7 @@ def delete(
 class PGVectorHNSW(PGVector):
     def __init__(self, metric: str, index_param: dict):
         super().__init__(metric, index_param)
-        self._nlinks = index_param.get("nlinks", 32)
+        self._m = index_param.get("M", 32)
         self._efConstruction = index_param.get("efConstruction", 40)
 
     def get_vector_index(self):
@@ -183,16 +183,15 @@ def set_query_arguments(self, efSearch: int = 40):
         Set query arguments for pgvector query with hnsw index
         """
         self.search_params = {
-            "metric_type": self._metric_type,
+            "metric_type": self._metric,
             "efSearch": efSearch,
         }
-        self.name = f"pgvector HNSW metric:{self._metric}, nlinks:{self._nlinks}, efConstruction:{self._efConstruction}, efSearch:{efSearch}"
+        self.name = f"pgvector HNSW metric:{self._metric}, M:{self._m}, efConstruction:{self._efConstruction}, efSearch:{efSearch}"
 
 class PGVectorIVFFLAT(PGVector):
     def __init__(self, metric: str, index_param: dict):
         super().__init__(metric, index_param)
         self._nlinks = index_param.get("nlinks", 32)
-        self._efConstruction = index_param.get("efConstruction", 40)
 
     def get_vector_index(self):
         """Get IVFFLAT vector index"""
@@ -203,7 +202,7 @@ def set_query_arguments(self, efSearch: int = 40):
         Set query arguments for pgvector query with ivfflat index
         """
         self.search_params = {
-            "metric_type": self._metric_type,
+            "metric_type": self._metric,
             "efSearch": efSearch,
         }
-        self.name = f"pgvector ivfflat metric:{self._metric}, nlinks:{self._nlinks}, efConstruction:{self._efConstruction}, efSearch:{efSearch}"
\ No newline at end of file
+        self.name = f"pgvector ivfflat metric:{self._metric}, nlinks:{self._nlinks}, efSearch:{efSearch}"