neocrafttech
diff --git a/‎.dockerignore‎
Lines changed: 11 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 4 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 30 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 39 additions & 0 deletions b/‎README.md‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎bolt.sh‎
Lines changed: 89 additions & 4 deletions b/‎bolt.sh‎
Lines changed: 89 additions & 4 deletions
diff --git a/‎examples/Cargo.toml‎
Lines changed: 6 additions & 0 deletions b/‎examples/Cargo.toml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/src/index_utils.rs‎
Lines changed: 1 addition & 0 deletions b/‎examples/src/index_utils.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/src/sift10k_index_grpc.rs‎
Lines changed: 84 additions & 0 deletions b/‎examples/src/sift10k_index_grpc.rs‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎nyas/diskann/src/disk_index_storage.rs‎
Lines changed: 10 additions & 4 deletions b/‎nyas/diskann/src/disk_index_storage.rs‎
Lines changed: 10 additions & 4 deletions
@@ -0,0 +1,11 @@
+target/
+.git/
+examples/data/
+**/*.fvecs
+**/*.ivecs
+**/*.bvecs
+.env
+.DS_Store
+*.md
+LICENSE
+rustfmt.toml
@@ -0,0 +1,30 @@
+FROM rust:1.92-slim AS builder
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y \
+    protobuf-compiler \
+    libssl-dev \
+    pkg-config \
+    libc6-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /usr/src/nyas
+
+COPY . .
+
+RUN chmod +x bolt.sh && ./bolt.sh build
+
+FROM debian:bookworm-slim
+
+# Install runtime dependencies if needed
+RUN apt-get update && apt-get install -y \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /usr/local/bin
+
+COPY --from=builder /usr/src/nyas/target/release/vecd .
+
+EXPOSE 50051
+
+CMD ["./vecd"]
@@ -13,3 +13,42 @@ It supports multiple distance metrics (L2, Cosine, Dot), hybrid CPU/GPU executio
   https://arxiv.org/abs/2105.09613
 
 These papers influenced Nyas’s design, particularly in indexing structures, graph search, and efficient query traversal.
+
+---
+
+## Quick Start
+
+### Prerequisites
+- [Rust](https://www.rust-lang.org/tools/install) (latest stable)
+- [Protobuf Compiler](https://grpc.io/docs/protoc-installation/) (for gRPC)
+- [Docker](https://docs.docker.com/engine/install/) (optional, for containerized run)
+
+### Running the gRPC Service (vecd)
+
+#### Using Cargo
+```bash
+# Start the VectorDB service
+cargo run -p vecd
+```
+
+#### Using Docker
+```bash
+# Build the image
+docker build -t vecd .
+
+# Run the container
+docker run -p 50051:50051 vecd
+```
+
+### Running Examples
+
+#### SIFT 10k gRPC Example
+This example demonstrates connecting to the running `vecd` service, inserting the SIFT 10k dataset, and calculating recall.
+
+1. Ensure `vecd` is running (see above).
+2. Download the SIFT 10k dataset into `examples/data/siftsmall`:(ftp://ftp.irisa.fr/local/texmex/corpus/siftsmall.tar.gz)
+   *(Note: The example expects `siftsmall_base.fvecs`, `siftsmall_query.fvecs`, and `siftsmall_groundtruth.ivecs`)*
+3. Run the example:
+   ```bash
+   cargo run --bin sift10k_index_grpc --release
+   ```
@@ -2,13 +2,95 @@
 set -e
 
 RUST_VERSION="1.92.0"
-
+PROTOBUF_VERSION="21.12"
 format() {
     cargo +nightly fmt;
 }
 
+setup_protobuf() {
+    echo "[INFO] Checking Protobuf installation..."
+    
+    OS="$(uname -s)"
+    ARCH="$(uname -m)"
+
+    if [ "$OS" = "Linux" ]; then
+        sudo apt-get update -y
+        sudo apt-get install -y protobuf-compiler unzip
+        PROTO_OS="linux"
+    elif [ "$OS" = "Darwin" ]; then
+        if ! command -v brew >/dev/null 2>&1; then
+            echo "[WARN] Homebrew not found. Manual install will proceed but system dependencies might be missing."
+        fi
+        PROTO_OS="osx"
+    else
+        echo "[ERROR] Unsupported OS: $OS"
+        exit 1
+    fi
+
+    if [ "$ARCH" = "x86_64" ]; then
+        PROTO_ARCH="x86_64"
+    elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
+        if [ "$PROTO_OS" = "osx" ]; then
+            PROTO_ARCH="aarch_64"
+        else
+            PROTO_ARCH="aarch_64"
+        fi
+    else
+        echo "[ERROR] Unsupported architecture: $ARCH"
+        exit 1
+    fi
+
+    ZIP_FILE="protoc-$PROTOBUF_VERSION-$PROTO_OS-$PROTO_ARCH.zip"
+
+    if command -v protoc >/dev/null 2>&1; then
+        CURRENT_VERSION=$(protoc --version | awk '{print $2}')
+        echo "[INFO] Found Protobuf version $CURRENT_VERSION"
+        if [ "$CURRENT_VERSION" != "$PROTOBUF_VERSION" ]; then
+            echo "[INFO] Updating Protobuf to $PROTOBUF_VERSION..."
+            curl -L "https://github.com/protocolbuffers/protobuf/releases/download/v$PROTOBUF_VERSION/$ZIP_FILE" -o protoc.zip
+            unzip -o protoc.zip
+            
+            SUDO=""
+            if [ ! -w "/usr/local/bin" ] || [ ! -w "/usr/local/include" ]; then
+                SUDO="sudo"
+            fi
+
+            $SUDO mv bin/protoc /usr/local/bin/
+            # Copy contents to avoid nesting and ensure we don't delete existing include dir if it has other things
+            $SUDO mkdir -p /usr/local/include/google
+            $SUDO cp -r include/google/* /usr/local/include/google/
+            
+            rm -rf protoc.zip bin include readme.txt
+        else
+            echo "[OK] Protobuf is already $PROTOBUF_VERSION"
+        fi
+    else
+        echo "[INFO] Protobuf not found. Installing Protobuf $PROTOBUF_VERSION..."
+        curl -L "https://github.com/protocolbuffers/protobuf/releases/download/v$PROTOBUF_VERSION/$ZIP_FILE" -o protoc.zip
+        unzip -o protoc.zip
+        
+        SUDO=""
+        if [ ! -w "/usr/local/bin" ] || [ ! -w "/usr/local/include" ]; then
+            SUDO="sudo"
+        fi
+
+        $SUDO mv bin/protoc /usr/local/bin/
+        $SUDO mkdir -p /usr/local/include/google
+        $SUDO cp -r include/google/* /usr/local/include/google/
+        
+        rm -rf protoc.zip bin include readme.txt
+    fi
+}
+
 setup_rust(){
     echo "[INFO] Checking Rust installation..."
+    
+    OS="$(uname -s)"
+    if [ "$OS" = "Linux" ]; then
+        sudo apt-get update -y
+        sudo apt-get install -y build-essential curl
+    fi
+
     if command -v rustc >/dev/null 2>&1; then
         CURRENT_VERSION=$(rustc --version | awk '{print $2}')
         echo "[INFO] Found Rust version $CURRENT_VERSION"
@@ -24,6 +106,8 @@ setup_rust(){
     fi
 
     export PATH="$HOME/.cargo/bin:$PATH"
+    source "$HOME/.cargo/env" 2>/dev/null || true
+    
     rustc --version
     cargo --version
 
@@ -36,6 +120,7 @@ setup_rust(){
 
 setup() {
     setup_rust
+    setup_protobuf
 }
 
 clean() {
@@ -59,8 +144,8 @@ check() {
 }
 
 build() {
-    echo "[INFO] Building..."
-    cargo build --release
+    echo "[INFO] Building with native CPU optimizations..."
+    RUSTFLAGS="-C target-cpu=native" cargo build --release
     echo "[OK] Build completed!"
 }
 
@@ -119,7 +204,7 @@ main() {
             setup
             check
             build
-            deploy
+            # deploy removed as it is not defined
             ;;
         help|""|*)
             help
 
@@ -12,13 +12,19 @@ version.workspace = true
 [dependencies]
 diskann = { path = "../nyas/diskann" }
 system = { path = "../nyas/system" }
+service = { path = "../nyas/service" }
 tokio.workspace = true
+tonic.workspace = true
 cpu-time = "1.0.0"
 
 [[bin]]
 name = "sift10k_index"
 path = "src/sift10k_index.rs"
 
+[[bin]]
+name = "sift10k_index_grpc"
+path = "src/sift10k_index_grpc.rs"
+
 [[bin]]
 name = "sift1m_index"
 path = "src/sift1m_index.rs"
@@ -150,6 +150,7 @@ pub fn compute_recall(results: &[Vec<u32>], ground_truth: &[Vec<u32>], k: usize)
     total_matches as f64 / (num_queries * k) as f64
 }
 
+#[allow(dead_code)]
 pub async fn compute_recall_at_k(
     index_view: &IndexView, query: &SiftDataset, ground_truth: &[Vec<u32>], k: usize,
 ) {
 
@@ -0,0 +1,84 @@
+use std::time::{Duration, SystemTime};
+
+use cpu_time::ProcessTime;
+use service::vector::vector_db_client::VectorDbClient;
+use service::vector::{InsertVectorRequest, SearchVectorRequest};
+use tokio::io;
+use tonic::Request;
+
+use crate::index_utils::SiftDataset;
+mod index_utils;
+
+#[tokio::main]
+async fn main() -> io::Result<()> {
+    let base_folder = "examples/data/siftsmall";
+    let start_cpu = ProcessTime::now();
+    let start_wall = SystemTime::now();
+
+    let (base, query, ground_truth) = tokio::try_join!(
+        SiftDataset::from_fvecs(format!("{}/siftsmall_base.fvecs", base_folder)),
+        SiftDataset::from_fvecs(format!("{}/siftsmall_query.fvecs", base_folder)),
+        SiftDataset::from_ivecs(format!("{}/siftsmall_groundtruth.ivecs", base_folder))
+    )?;
+
+    println!("Base dataset: {} vectors of dimension {}", base.vectors.len(), base.dimension);
+    println!("Query dataset: {} vectors of dimension {}", query.vectors.len(), query.dimension);
+    println!("Ground truth: {} queries", ground_truth.len());
+
+    let mut client = VectorDbClient::connect("http://0.0.0.0:50051")
+        .await
+        .map_err(|e| io::Error::new(io::ErrorKind::ConnectionRefused, e))?;
+
+    // Since we are connecting via gRPC, we don't check for local file existence here
+    // as easily. For simplicity in this example, we'll try to insert if not found
+    // or just always try to insert (server can handle duplicates if implemented).
+    // However, the original code had:
+    // let path = Path::new(index_name);
+    // if !path.exists() { ... }
+
+    // We'll skip the check for now as the server manages the index.
+    // If we want to avoid re-inserting, we'd need a way to check if index is built on server.
+
+    println!("Inserting vectors...");
+    for (index, vector) in base.vectors.iter().enumerate() {
+        let request = Request::new(InsertVectorRequest {
+            id: index.to_string(),
+            vector: vector.to_f32_vec(),
+        });
+
+        let _response = client.insert_vector(request).await.map_err(io::Error::other)?;
+
+        if index % 1000 == 0 && index > 0 {
+            println!("Inserted {} vectors", index);
+        }
+    }
+
+    let index_cpu_time = start_cpu.elapsed();
+    let index_wall_time = start_wall.elapsed().unwrap();
+    println!("Indexing time: CPU {:?}, Wall {:?}", index_cpu_time, index_wall_time);
+
+    for k in [1, 10, 100] {
+        let mut results = Vec::new();
+        for q in query.vectors.iter() {
+            let request =
+                Request::new(SearchVectorRequest { vector: q.to_f32_vec(), top_k: k as u32 });
+            let response = client.search_vector(request).await.map_err(io::Error::other)?;
+            let search_res = response.into_inner();
+
+            // Map string IDs back to u32
+            let u32_ids: Vec<u32> =
+                search_res.ids.iter().filter_map(|id: &String| id.parse::<u32>().ok()).collect();
+            results.push(u32_ids);
+        }
+        let recall = index_utils::compute_recall(&results, &ground_truth, k);
+        println!("Recall for k={}: {}", k, recall);
+    }
+
+    let cpu_time: Duration = start_cpu.elapsed();
+    let wall_time = start_wall.elapsed().unwrap();
+
+    println!("Total CPU time: {:?}", cpu_time);
+    println!("Total Wall time: {:?}", wall_time);
+
+    Ok(())
+}
@@ -467,8 +467,11 @@ impl DiskIndexStorage {
                 cached
             } else if let Ok(disk_data) = self.read_node(current.point_id).await {
                 add_to_cache(&mut node_cache, disk_data);
-
-                node_cache.get(&current.point_id).unwrap()
+                if let Some(node_pair) = node_cache.get(&current.point_id) {
+                    node_pair
+                } else {
+                    continue;
+                }
             } else {
                 continue;
             };
@@ -500,8 +503,11 @@ impl DiskIndexStorage {
                     vector.distance(query, metric_type)
                 } else if let Ok(neighbor_data) = self.read_node(neighbor_id).await {
                     add_to_cache(&mut node_cache, neighbor_data);
-                    let vector = &node_cache.get(&neighbor_id).unwrap().1;
-                    vector.distance(query, metric_type)
+                    if let Some(vector) = node_cache.get(&neighbor_id) {
+                        vector.1.distance(query, metric_type)
+                    } else {
+                        continue;
+                    }
                 } else {
                     continue;
                 };
Original file line number	Diff line number	Diff line change
`@@ -150,6 +150,7 @@ pub fn compute_recall(results: &[Vec<u32>], ground_truth: &[Vec<u32>], k: usize)`
`150`	`150`	`total_matches as f64 / (num_queries * k) as f64`
`151`	`151`	`}`
`152`	`152`
	`153`	`+#[allow(dead_code)]`
`153`	`154`	`pub async fn compute_recall_at_k(`
`154`	`155`	`index_view: &IndexView, query: &SiftDataset, ground_truth: &[Vec<u32>], k: usize,`
`155`	`156`	`) {`