Skip to content

Commit 657bb2f

Browse files
authored
Integrated gRpc Service (#6)
1 parent 65509ee commit 657bb2f

File tree

16 files changed

+381
-62
lines changed

16 files changed

+381
-62
lines changed

.dockerignore

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
target/
2+
.git/
3+
examples/data/
4+
**/*.fvecs
5+
**/*.ivecs
6+
**/*.bvecs
7+
.env
8+
.DS_Store
9+
*.md
10+
LICENSE
11+
rustfmt.toml

Cargo.lock

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Dockerfile

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
FROM rust:1.92-slim AS builder
2+
3+
# Install build dependencies
4+
RUN apt-get update && apt-get install -y \
5+
protobuf-compiler \
6+
libssl-dev \
7+
pkg-config \
8+
libc6-dev \
9+
&& rm -rf /var/lib/apt/lists/*
10+
11+
WORKDIR /usr/src/nyas
12+
13+
COPY . .
14+
15+
RUN chmod +x bolt.sh && ./bolt.sh build
16+
17+
FROM debian:bookworm-slim
18+
19+
# Install runtime dependencies if needed
20+
RUN apt-get update && apt-get install -y \
21+
ca-certificates \
22+
&& rm -rf /var/lib/apt/lists/*
23+
24+
WORKDIR /usr/local/bin
25+
26+
COPY --from=builder /usr/src/nyas/target/release/vecd .
27+
28+
EXPOSE 50051
29+
30+
CMD ["./vecd"]

README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,42 @@ It supports multiple distance metrics (L2, Cosine, Dot), hybrid CPU/GPU executio
1313
https://arxiv.org/abs/2105.09613
1414

1515
These papers influenced Nyas’s design, particularly in indexing structures, graph search, and efficient query traversal.
16+
17+
---
18+
19+
## Quick Start
20+
21+
### Prerequisites
22+
- [Rust](https://www.rust-lang.org/tools/install) (latest stable)
23+
- [Protobuf Compiler](https://grpc.io/docs/protoc-installation/) (for gRPC)
24+
- [Docker](https://docs.docker.com/engine/install/) (optional, for containerized run)
25+
26+
### Running the gRPC Service (vecd)
27+
28+
#### Using Cargo
29+
```bash
30+
# Start the VectorDB service
31+
cargo run -p vecd
32+
```
33+
34+
#### Using Docker
35+
```bash
36+
# Build the image
37+
docker build -t vecd .
38+
39+
# Run the container
40+
docker run -p 50051:50051 vecd
41+
```
42+
43+
### Running Examples
44+
45+
#### SIFT 10k gRPC Example
46+
This example demonstrates connecting to the running `vecd` service, inserting the SIFT 10k dataset, and calculating recall.
47+
48+
1. Ensure `vecd` is running (see above).
49+
2. Download the SIFT 10k dataset into `examples/data/siftsmall`:(ftp://ftp.irisa.fr/local/texmex/corpus/siftsmall.tar.gz)
50+
*(Note: The example expects `siftsmall_base.fvecs`, `siftsmall_query.fvecs`, and `siftsmall_groundtruth.ivecs`)*
51+
3. Run the example:
52+
```bash
53+
cargo run --bin sift10k_index_grpc --release
54+
```

bolt.sh

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,95 @@
22
set -e
33

44
RUST_VERSION="1.92.0"
5-
5+
PROTOBUF_VERSION="21.12"
66
format() {
77
cargo +nightly fmt;
88
}
99

10+
setup_protobuf() {
11+
echo "[INFO] Checking Protobuf installation..."
12+
13+
OS="$(uname -s)"
14+
ARCH="$(uname -m)"
15+
16+
if [ "$OS" = "Linux" ]; then
17+
sudo apt-get update -y
18+
sudo apt-get install -y protobuf-compiler unzip
19+
PROTO_OS="linux"
20+
elif [ "$OS" = "Darwin" ]; then
21+
if ! command -v brew >/dev/null 2>&1; then
22+
echo "[WARN] Homebrew not found. Manual install will proceed but system dependencies might be missing."
23+
fi
24+
PROTO_OS="osx"
25+
else
26+
echo "[ERROR] Unsupported OS: $OS"
27+
exit 1
28+
fi
29+
30+
if [ "$ARCH" = "x86_64" ]; then
31+
PROTO_ARCH="x86_64"
32+
elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
33+
if [ "$PROTO_OS" = "osx" ]; then
34+
PROTO_ARCH="aarch_64"
35+
else
36+
PROTO_ARCH="aarch_64"
37+
fi
38+
else
39+
echo "[ERROR] Unsupported architecture: $ARCH"
40+
exit 1
41+
fi
42+
43+
ZIP_FILE="protoc-$PROTOBUF_VERSION-$PROTO_OS-$PROTO_ARCH.zip"
44+
45+
if command -v protoc >/dev/null 2>&1; then
46+
CURRENT_VERSION=$(protoc --version | awk '{print $2}')
47+
echo "[INFO] Found Protobuf version $CURRENT_VERSION"
48+
if [ "$CURRENT_VERSION" != "$PROTOBUF_VERSION" ]; then
49+
echo "[INFO] Updating Protobuf to $PROTOBUF_VERSION..."
50+
curl -L "https://github.com/protocolbuffers/protobuf/releases/download/v$PROTOBUF_VERSION/$ZIP_FILE" -o protoc.zip
51+
unzip -o protoc.zip
52+
53+
SUDO=""
54+
if [ ! -w "/usr/local/bin" ] || [ ! -w "/usr/local/include" ]; then
55+
SUDO="sudo"
56+
fi
57+
58+
$SUDO mv bin/protoc /usr/local/bin/
59+
# Copy contents to avoid nesting and ensure we don't delete existing include dir if it has other things
60+
$SUDO mkdir -p /usr/local/include/google
61+
$SUDO cp -r include/google/* /usr/local/include/google/
62+
63+
rm -rf protoc.zip bin include readme.txt
64+
else
65+
echo "[OK] Protobuf is already $PROTOBUF_VERSION"
66+
fi
67+
else
68+
echo "[INFO] Protobuf not found. Installing Protobuf $PROTOBUF_VERSION..."
69+
curl -L "https://github.com/protocolbuffers/protobuf/releases/download/v$PROTOBUF_VERSION/$ZIP_FILE" -o protoc.zip
70+
unzip -o protoc.zip
71+
72+
SUDO=""
73+
if [ ! -w "/usr/local/bin" ] || [ ! -w "/usr/local/include" ]; then
74+
SUDO="sudo"
75+
fi
76+
77+
$SUDO mv bin/protoc /usr/local/bin/
78+
$SUDO mkdir -p /usr/local/include/google
79+
$SUDO cp -r include/google/* /usr/local/include/google/
80+
81+
rm -rf protoc.zip bin include readme.txt
82+
fi
83+
}
84+
1085
setup_rust(){
1186
echo "[INFO] Checking Rust installation..."
87+
88+
OS="$(uname -s)"
89+
if [ "$OS" = "Linux" ]; then
90+
sudo apt-get update -y
91+
sudo apt-get install -y build-essential curl
92+
fi
93+
1294
if command -v rustc >/dev/null 2>&1; then
1395
CURRENT_VERSION=$(rustc --version | awk '{print $2}')
1496
echo "[INFO] Found Rust version $CURRENT_VERSION"
@@ -24,6 +106,8 @@ setup_rust(){
24106
fi
25107

26108
export PATH="$HOME/.cargo/bin:$PATH"
109+
source "$HOME/.cargo/env" 2>/dev/null || true
110+
27111
rustc --version
28112
cargo --version
29113

@@ -36,6 +120,7 @@ setup_rust(){
36120

37121
setup() {
38122
setup_rust
123+
setup_protobuf
39124
}
40125

41126
clean() {
@@ -59,8 +144,8 @@ check() {
59144
}
60145

61146
build() {
62-
echo "[INFO] Building..."
63-
cargo build --release
147+
echo "[INFO] Building with native CPU optimizations..."
148+
RUSTFLAGS="-C target-cpu=native" cargo build --release
64149
echo "[OK] Build completed!"
65150
}
66151

@@ -119,7 +204,7 @@ main() {
119204
setup
120205
check
121206
build
122-
deploy
207+
# deploy removed as it is not defined
123208
;;
124209
help|""|*)
125210
help

examples/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,19 @@ version.workspace = true
1212
[dependencies]
1313
diskann = { path = "../nyas/diskann" }
1414
system = { path = "../nyas/system" }
15+
service = { path = "../nyas/service" }
1516
tokio.workspace = true
17+
tonic.workspace = true
1618
cpu-time = "1.0.0"
1719

1820
[[bin]]
1921
name = "sift10k_index"
2022
path = "src/sift10k_index.rs"
2123

24+
[[bin]]
25+
name = "sift10k_index_grpc"
26+
path = "src/sift10k_index_grpc.rs"
27+
2228
[[bin]]
2329
name = "sift1m_index"
2430
path = "src/sift1m_index.rs"

examples/src/index_utils.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ pub fn compute_recall(results: &[Vec<u32>], ground_truth: &[Vec<u32>], k: usize)
150150
total_matches as f64 / (num_queries * k) as f64
151151
}
152152

153+
#[allow(dead_code)]
153154
pub async fn compute_recall_at_k(
154155
index_view: &IndexView, query: &SiftDataset, ground_truth: &[Vec<u32>], k: usize,
155156
) {

examples/src/sift10k_index_grpc.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
use std::time::{Duration, SystemTime};
2+
3+
use cpu_time::ProcessTime;
4+
use service::vector::vector_db_client::VectorDbClient;
5+
use service::vector::{InsertVectorRequest, SearchVectorRequest};
6+
use tokio::io;
7+
use tonic::Request;
8+
9+
use crate::index_utils::SiftDataset;
10+
mod index_utils;
11+
12+
#[tokio::main]
13+
async fn main() -> io::Result<()> {
14+
let base_folder = "examples/data/siftsmall";
15+
let start_cpu = ProcessTime::now();
16+
let start_wall = SystemTime::now();
17+
18+
let (base, query, ground_truth) = tokio::try_join!(
19+
SiftDataset::from_fvecs(format!("{}/siftsmall_base.fvecs", base_folder)),
20+
SiftDataset::from_fvecs(format!("{}/siftsmall_query.fvecs", base_folder)),
21+
SiftDataset::from_ivecs(format!("{}/siftsmall_groundtruth.ivecs", base_folder))
22+
)?;
23+
24+
println!("Base dataset: {} vectors of dimension {}", base.vectors.len(), base.dimension);
25+
println!("Query dataset: {} vectors of dimension {}", query.vectors.len(), query.dimension);
26+
println!("Ground truth: {} queries", ground_truth.len());
27+
28+
let mut client = VectorDbClient::connect("http://0.0.0.0:50051")
29+
.await
30+
.map_err(|e| io::Error::new(io::ErrorKind::ConnectionRefused, e))?;
31+
32+
// Since we are connecting via gRPC, we don't check for local file existence here
33+
// as easily. For simplicity in this example, we'll try to insert if not found
34+
// or just always try to insert (server can handle duplicates if implemented).
35+
// However, the original code had:
36+
// let path = Path::new(index_name);
37+
// if !path.exists() { ... }
38+
39+
// We'll skip the check for now as the server manages the index.
40+
// If we want to avoid re-inserting, we'd need a way to check if index is built on server.
41+
42+
println!("Inserting vectors...");
43+
for (index, vector) in base.vectors.iter().enumerate() {
44+
let request = Request::new(InsertVectorRequest {
45+
id: index.to_string(),
46+
vector: vector.to_f32_vec(),
47+
});
48+
49+
let _response = client.insert_vector(request).await.map_err(io::Error::other)?;
50+
51+
if index % 1000 == 0 && index > 0 {
52+
println!("Inserted {} vectors", index);
53+
}
54+
}
55+
56+
let index_cpu_time = start_cpu.elapsed();
57+
let index_wall_time = start_wall.elapsed().unwrap();
58+
println!("Indexing time: CPU {:?}, Wall {:?}", index_cpu_time, index_wall_time);
59+
60+
for k in [1, 10, 100] {
61+
let mut results = Vec::new();
62+
for q in query.vectors.iter() {
63+
let request =
64+
Request::new(SearchVectorRequest { vector: q.to_f32_vec(), top_k: k as u32 });
65+
let response = client.search_vector(request).await.map_err(io::Error::other)?;
66+
let search_res = response.into_inner();
67+
68+
// Map string IDs back to u32
69+
let u32_ids: Vec<u32> =
70+
search_res.ids.iter().filter_map(|id: &String| id.parse::<u32>().ok()).collect();
71+
results.push(u32_ids);
72+
}
73+
let recall = index_utils::compute_recall(&results, &ground_truth, k);
74+
println!("Recall for k={}: {}", k, recall);
75+
}
76+
77+
let cpu_time: Duration = start_cpu.elapsed();
78+
let wall_time = start_wall.elapsed().unwrap();
79+
80+
println!("Total CPU time: {:?}", cpu_time);
81+
println!("Total Wall time: {:?}", wall_time);
82+
83+
Ok(())
84+
}

nyas/diskann/src/disk_index_storage.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -467,8 +467,11 @@ impl DiskIndexStorage {
467467
cached
468468
} else if let Ok(disk_data) = self.read_node(current.point_id).await {
469469
add_to_cache(&mut node_cache, disk_data);
470-
471-
node_cache.get(&current.point_id).unwrap()
470+
if let Some(node_pair) = node_cache.get(&current.point_id) {
471+
node_pair
472+
} else {
473+
continue;
474+
}
472475
} else {
473476
continue;
474477
};
@@ -500,8 +503,11 @@ impl DiskIndexStorage {
500503
vector.distance(query, metric_type)
501504
} else if let Ok(neighbor_data) = self.read_node(neighbor_id).await {
502505
add_to_cache(&mut node_cache, neighbor_data);
503-
let vector = &node_cache.get(&neighbor_id).unwrap().1;
504-
vector.distance(query, metric_type)
506+
if let Some(vector) = node_cache.get(&neighbor_id) {
507+
vector.1.distance(query, metric_type)
508+
} else {
509+
continue;
510+
}
505511
} else {
506512
continue;
507513
};

0 commit comments

Comments
 (0)