Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/bvb-run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ jobs:
- weaviate
- redis
- elasticsearch
- sptag
- pgvector
# - vearch # Vearch test is disabled temporarily due to disk space constraints.
# GPU support is not available in the free tier
# - milvus_gpu
Expand Down
22 changes: 11 additions & 11 deletions bigvectorbench/algorithms/pgvector/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ float:
arg_groups: [{M: 24, efConstruction: 200}]
# args: {}
query_args: [[10, 20, 40, 80, 120, 200, 400, 800]]
- base_args: ["@metric"]
constructor: PGVectorIVFFLAT
disabled: false
docker_tag: bigvectorbench-pgvector
module: bigvectorbench.algorithms.pgvector
name: pgvector-ivfflat
run_groups:
IVFFLAT_32:
arg_groups: [{ nlist: 32 }]
query_args: [[1, 4, 8, 16, 32]]
# - base_args: ["@metric"]
# constructor: PGVectorIVFFLAT
# disabled: false
# docker_tag: bigvectorbench-pgvector
# module: bigvectorbench.algorithms.pgvector
# name: pgvector-ivfflat
# run_groups:
# IVFFLAT_32:
# arg_groups: [{ nlist: 32 }]
# query_args: [[1, 4, 8, 16, 32]]
# IVFFLAT_64:
# arg_groups: [{ nlist: 64 }]
# query_args: [[4, 16, 32, 48, 64]]
Expand All @@ -45,4 +45,4 @@ float:
# query_args: [[256, 1024, 2048, 3072, 4096]]
# IVFFLAT_8192:
# arg_groups: [{ nlist: 8192 }]
# query_args: [[512, 2048, 4096, 6144, 8192]]
# query_args: [[512, 2048, 4096, 6144, 8192]]
37 changes: 18 additions & 19 deletions bigvectorbench/algorithms/pgvector/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@

class PGVector(BaseANN):
def __init__(self, metric, method_param):
super().__init__()
self._metric = metric
self._m = method_param['M']
self._ef_construction = method_param['efConstruction']
self._m = method_param.get("M", 32)
self._ef_construction = method_param.get("efConstruction", 40)
self._cur = None
self.labels = None
self.label_names = None
Expand All @@ -26,7 +27,6 @@ def __init__(self, metric, method_param):
self._query = "SELECT id FROM items ORDER BY embedding <-> %s LIMIT %s"
else:
raise RuntimeError(f"Unknown metric {metric}")


def get_vector_index(self):
"""Get vector index"""
Expand All @@ -53,7 +53,7 @@ def load_data(
table_definition = f"id integer, embedding vector({embeddings.shape[1]})"
cur.execute(f"CREATE TABLE items ({table_definition})")
cur.execute("ALTER TABLE items ALTER COLUMN embedding SET STORAGE PLAIN")

if labels is not None and label_names is not None:
with cur.copy(f"COPY items (id, embedding, {', '.join(label_names)}) FROM STDIN WITH (FORMAT BINARY)") as copy:
copy.set_types(["int4", "vector"] + ["int4" for _ in label_names])
Expand All @@ -64,16 +64,16 @@ def load_data(
copy.set_types(["int4", "vector"])
for i, embedding in enumerate(embeddings):
copy.write_row((i, embedding))

print("Creating index...")

if self._metric == "angular":
cur.execute("CREATE INDEX ON items USING %s (embedding vector_cosine_ops) WITH (m = %d, ef_construction = %d)" % (self.index,self._m, self._ef_construction))
elif self._metric == "euclidean":
cur.execute("CREATE INDEX ON items USING %s (embedding vector_l2_ops) WITH (m = %d, ef_construction = %d)" % (self.index,self._m, self._ef_construction))
else:
raise RuntimeError(f"Unknown metric {self._metric}")

print("Done!")
self._cur = cur

Expand Down Expand Up @@ -109,7 +109,7 @@ def get_memory_usage(self):

def __str__(self):
return f"PGVector(m={self._m}, ef_construction={self._ef_construction}, ef_search={self._ef_search})"

def insert(self, embeddings: np.ndarray, labels: np.ndarray | None = None) -> None:
"""
Single insert data
Expand All @@ -122,9 +122,9 @@ def insert(self, embeddings: np.ndarray, labels: np.ndarray | None = None) -> No
None
"""
if labels is not None and self.label_names is not None:
insert_sentence = (f"INSERT INTO items (id,embedding,{', '.join(self.label_names)}) VALUES ({self.num_entities+1},{embeddings},{', '.join(labels)})")
insert_sentence = f"INSERT INTO items (id,embedding,{', '.join(self.label_names)}) VALUES ({self.num_entities+1},'[{', '.join(map(str, embeddings.tolist()))}]',{', '.join(map(str, labels.tolist()))})"
else:
insert_sentence = (f"INSERT INTO items (id,embedding) VALUES ({self.num_entities+1},{embeddings}")
insert_sentence = f"INSERT INTO items (id,embedding) VALUES ({self.num_entities+1},'[{', '.join(map(str, embeddings.tolist()))}]')"
self._cur.execute(insert_sentence)
self.num_entities += 1

Expand All @@ -142,10 +142,10 @@ def update(
Returns:
None
"""
update_item = (f"embeddings = {embeddings},")
update_item = f"embedding = '[{', '.join(map(str, embeddings.tolist()))}]'"
if labels is not None and self.label_names is not None:
for i in enumerate(self.label_names):
update_item += f"{self.label_names[i]} = {labels[i]}"
for i in range(len(self.label_names)):
update_item += f", {self.label_names[i]} = {str(labels[i].item())}"
update_sentence = (f"UPDATE items SET {update_item} where id = {index}")

self._cur.execute(update_sentence)
Expand All @@ -171,7 +171,7 @@ def delete(
class PGVectorHNSW(PGVector):
def __init__(self, metric: str, index_param: dict):
super().__init__(metric, index_param)
self._nlinks = index_param.get("nlinks", 32)
self._m = index_param.get("M", 32)
self._efConstruction = index_param.get("efConstruction", 40)

def get_vector_index(self):
Expand All @@ -183,16 +183,15 @@ def set_query_arguments(self, efSearch: int = 40):
Set query arguments for pgvector query with hnsw index
"""
self.search_params = {
"metric_type": self._metric_type,
"metric_type": self._metric,
"efSearch": efSearch,
}
self.name = f"pgvector HNSW metric:{self._metric}, nlinks:{self._nlinks}, efConstruction:{self._efConstruction}, efSearch:{efSearch}"
self.name = f"pgvector HNSW metric:{self._metric}, M:{self._m}, efConstruction:{self._efConstruction}, efSearch:{efSearch}"

class PGVectorIVFFLAT(PGVector):
def __init__(self, metric: str, index_param: dict):
super().__init__(metric, index_param)
self._nlinks = index_param.get("nlinks", 32)
self._efConstruction = index_param.get("efConstruction", 40)

def get_vector_index(self):
"""Get IVFFLAT vector index"""
Expand All @@ -203,7 +202,7 @@ def set_query_arguments(self, efSearch: int = 40):
Set query arguments for pgvector query with ivfflat index
"""
self.search_params = {
"metric_type": self._metric_type,
"metric_type": self._metric,
"efSearch": efSearch,
}
self.name = f"pgvector ivfflat metric:{self._metric}, nlinks:{self._nlinks}, efConstruction:{self._efConstruction}, efSearch:{efSearch}"
self.name = f"pgvector ivfflat metric:{self._metric}, nlinks:{self._nlinks}, efSearch:{efSearch}"
7 changes: 2 additions & 5 deletions bigvectorbench/algorithms/sptag/module.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
""" SPTAG module for BigVectorBench framework. """

import subprocess
from time import sleep
import numpy as np
import SPTAG
from bigvectorbench.algorithms.base.module import BaseANN
import csv
import shutil

def metric_mapping(_metric: str):
"""
Expand All @@ -31,6 +27,7 @@ class SPTAGBase(BaseANN):
"""SPTAG implementation"""

def __init__(self, metric: str, dim: int):
super().__init__()
self._metric = metric
self._dim = dim
self._metric_type = metric_mapping(metric)
Expand Down Expand Up @@ -109,7 +106,7 @@ def set_query_arguments(self):

def query(self, v, n, filter_expr=None):
if filter_expr is not None:
raise ValueError( f"[SPTAG] have not supported filter-query!!!" )
raise ValueError("[SPTAG] have not supported filter-query!!!")
j = SPTAG.AnnIndex.Load(self.index_name)
# print(j.Search(v.tobytes(), n)[0])
# print(j.Search(v.tobytes(), n)[1])
Expand Down
4 changes: 4 additions & 0 deletions bigvectorbench/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,10 @@ def main():
force=args.force,
)

if args.docker_tag:
logger.info("running only %s", args.docker_tag)
definitions = [d for d in definitions if d.docker_tag == args.docker_tag]

if args.algorithm:
logger.info("running only %s", args.algorithm)
definitions = [d for d in definitions if d.algorithm == args.algorithm]
Expand Down
Loading