Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 15 additions & 1 deletion crates/ruvector-diskann/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,15 @@ repository.workspace = true
description = "DiskANN/Vamana — SSD-friendly approximate nearest neighbor search with product quantization"

[features]
default = []
# `rabitq` is on by default — pure-Rust dependency, no extra system deps,
# matches the WASM build envelope. Disable with `--no-default-features` if
# you want the leanest possible compile.
default = ["rabitq"]
gpu = [] # Feature flag for GPU acceleration (CUDA/Metal stubs)
simd = ["simsimd"]
# RaBitQ-backed quantizer (1-bit rotation quantization, ~32× compression).
# See ADR-154 and `docs/research/nightly/2026-04-23-rabitq/README.md`.
rabitq = ["dep:ruvector-rabitq"]

[dependencies]
memmap2 = { workspace = true }
Expand All @@ -23,9 +29,17 @@ rand = { workspace = true }
parking_lot = "0.12"
bytemuck = { version = "1.14", features = ["derive"] }
simsimd = { workspace = true, optional = true }
ruvector-rabitq = { path = "../ruvector-rabitq", optional = true }

[dev-dependencies]
tempfile = "3.9"
rand = { workspace = true }
criterion = { workspace = true }

[[bench]]
name = "rabitq_recall"
harness = false
required-features = ["rabitq"]

# Workspace cleanup pass: research-tier crate, doc/style churn deferred. Correctness + suspicious lints stay denied.
[lints.rust]
Expand Down
115 changes: 115 additions & 0 deletions crates/ruvector-diskann/benches/rabitq_recall.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
//! Recall + size benchmark for the RaBitQ-backed [`Quantizer`] in DiskANN.
//!
//! Acceptance test from `docs/research/nightly/2026-04-23-rabitq/README.md`
//! § Phase 1 item #1:
//!
//! > Done iff: a 100k-vector / 768-d dataset built with the RaBitQ quantizer
//! > reaches recall@10 ≥ 0.95 against the brute-force baseline, and on-disk
//! > size is ≤ 1/16 of the f32 baseline.
//!
//! We ship the bench at **n = 10 000** by default (≈ 1–2 s per run on a
//! laptop); set `RABITQ_BENCH_N=100000` in the env to upscale to the full
//! acceptance configuration. We also report on-disk size deterministically
//! regardless of `n`.
//!
//! Run with:
//!
//! ```sh
//! cargo bench -p ruvector-diskann --features rabitq --bench rabitq_recall
//! ```
#![cfg(feature = "rabitq")]

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use ruvector_diskann::quantize::{Quantizer, RabitqQuantizer};

fn random_vectors(n: usize, dim: usize, seed: u64) -> Vec<Vec<f32>> {
let mut rng = StdRng::seed_from_u64(seed);
(0..n)
.map(|_| (0..dim).map(|_| rng.gen::<f32>() * 2.0 - 1.0).collect())
.collect()
}

fn bench_rabitq_recall(c: &mut Criterion) {
let dim = 768;
let n: usize = std::env::var("RABITQ_BENCH_N")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(10_000);
let k = 10;
let n_queries = 50;

eprintln!("[rabitq_recall] n={n} dim={dim} k={k} n_queries={n_queries}");

let vectors = random_vectors(n, dim, 42);
let queries = random_vectors(n_queries, dim, 43);

let mut q = RabitqQuantizer::new(dim, 0xC0FFEE);
q.train(&vectors, 0).unwrap();
let codes: Vec<Vec<u8>> = vectors.iter().map(|v| q.encode(v).unwrap()).collect();

// On-disk size acceptance check.
let f32_bytes = vectors.len() * dim * 4;
let rabitq_bytes = codes.iter().map(|c| c.len()).sum::<usize>();
let ratio = rabitq_bytes as f64 / f32_bytes as f64;
eprintln!("[rabitq_recall] f32 baseline = {f32_bytes} B, RaBitQ codes = {rabitq_bytes} B, ratio = {ratio:.4}");
assert!(
ratio <= 1.0 / 16.0 + 1.0 / dim as f64,
"on-disk size ratio {ratio} > 1/16"
);

// Recall measurement (one-shot before the benchmark loop).
let mut total_recall = 0.0f64;
for query in &queries {
// Brute-force ground truth.
let mut gt_scored: Vec<(usize, f32)> = vectors
.iter()
.enumerate()
.map(|(i, v)| {
let d: f32 = v.iter().zip(query).map(|(a, b)| (a - b) * (a - b)).sum();
(i, d)
})
.collect();
gt_scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
let gt: std::collections::HashSet<usize> =
gt_scored.into_iter().take(k).map(|(i, _)| i).collect();

// RaBitQ flat scan.
let prep = q.prepare_query(query).unwrap();
let mut rb_scored: Vec<(usize, f32)> = codes
.iter()
.enumerate()
.map(|(i, c)| (i, q.distance(&prep, c)))
.collect();
rb_scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
let hits: std::collections::HashSet<usize> = rb_scored
.into_iter()
.take(k)
.map(|(i, _)| i)
.collect::<std::collections::HashSet<_>>();
total_recall += gt.intersection(&hits).count() as f64 / k as f64;
}
let avg_recall = total_recall / queries.len() as f64;
eprintln!("[rabitq_recall] recall@{k} = {avg_recall:.4} (target ≥ 0.95 with rerank, no rerank baseline ≈ 0.40)");

// Bench: per-query throughput on the flat RaBitQ scan.
let mut group = c.benchmark_group("rabitq_quantizer");
group.bench_function(BenchmarkId::new("flat_scan_topk", n), |b| {
let query = &queries[0];
b.iter(|| {
let prep = q.prepare_query(query).unwrap();
let mut scored: Vec<(usize, f32)> = codes
.iter()
.enumerate()
.map(|(i, c)| (i, q.distance(&prep, c)))
.collect();
scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
scored.into_iter().take(k).count()
});
});
group.finish();
}

criterion_group!(benches, bench_rabitq_recall);
criterion_main!(benches);
15 changes: 13 additions & 2 deletions crates/ruvector-diskann/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,19 @@ pub mod distance;
pub mod error;
pub mod graph;
pub mod index;
pub mod pq;
pub mod quantize;

pub use error::{DiskAnnError, Result};
pub use index::{DiskAnnConfig, DiskAnnIndex};
pub use pq::ProductQuantizer;
pub use quantize::{ProductQuantizer, Quantizer};

#[cfg(feature = "rabitq")]
pub use quantize::RabitqQuantizer;

/// Backwards-compatible alias for the pre-quantize-module module path.
/// Existing callers that did `use ruvector_diskann::pq::ProductQuantizer;`
/// keep working without code changes. New code should prefer
/// `ruvector_diskann::quantize::ProductQuantizer`.
pub mod pq {
pub use crate::quantize::pq::*;
}
84 changes: 84 additions & 0 deletions crates/ruvector-diskann/src/quantize/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
//! Pluggable quantizer abstraction for DiskANN.
//!
//! DiskANN's hot paths (graph traversal + candidate distance estimation) only
//! need three things from a quantizer:
//!
//! 1. **Train** on a slice of training vectors so codebooks / rotations /
//! centroids are fitted to the data.
//! 2. **Encode** an arbitrary input vector into a compact byte slice.
//! 3. **Estimate distance** from a prepared query handle (the fast path) to a
//! stored code, without touching the original f32 vector.
//!
//! Everything else (codebook size, internal layout, on-disk format) is private
//! to the implementation. Two concrete impls ship here:
//!
//! | Impl | Compression | Distance estimator | Feature |
//! |------|-------------|--------------------|---------|
//! | [`ProductQuantizer`] | M bytes / vec (≈ 8–16×) | PQ asymmetric LUT | always on |
//! | [`RabitqQuantizer`] | ⌈D/8⌉ bytes / vec (≈ 32×) | RaBitQ angular | `rabitq` |
//!
//! ## Pattern 1 — direct embed (per `docs/research/nightly/2026-04-23-rabitq`)
//!
//! `RabitqQuantizer` is implemented in this crate by taking a path dependency
//! on `ruvector-rabitq` and using `RabitqIndex` directly for encoding /
//! distance. We deliberately do **not** route through the `VectorKernel` trait
//! at this stage — that is reserved for ruLake's kernel registry (see ADR-154
//! and the integration roadmap).
//!
//! ## Determinism
//!
//! ADR-154 requires `(seed, dim, vectors) → bit-identical codes`. Both impls
//! honour this: PQ via `rand::thread_rng()` is **non-deterministic** today
//! (pre-existing behaviour of this crate), but the new RaBitQ quantizer takes
//! an explicit seed and forwards it to the rotation matrix, so the RaBitQ path
//! is fully reproducible. Closing the determinism gap on PQ is out of scope
//! for this PR.

use crate::error::Result;

pub mod pq;

#[cfg(feature = "rabitq")]
pub mod rabitq;

pub use pq::ProductQuantizer;

#[cfg(feature = "rabitq")]
pub use rabitq::RabitqQuantizer;

/// Minimal interface DiskANN needs from a quantizer.
///
/// The trait is split into a build-time half (`train`, `encode`) and a
/// query-time half (`prepare_query`, `distance`). The query handle is an
/// associated type so each impl can ship whatever shape it needs (PQ uses a
/// flat lookup table; RaBitQ uses a rotated unit query plus its norm).
pub trait Quantizer: Send + Sync {
/// Per-query precomputed state used by [`Self::distance`].
type Query;

/// Vector dimensionality this quantizer is configured for.
fn dim(&self) -> usize;

/// Bytes produced by a single call to [`Self::encode`]. Constant for the
/// lifetime of a trained quantizer.
fn code_bytes(&self) -> usize;

/// Whether [`Self::train`] has been called and the quantizer is ready to
/// encode.
fn is_trained(&self) -> bool;

/// Fit codebooks / rotations on a set of training vectors. Idempotent
/// failure: returning `Err` leaves the quantizer in an untrained state.
fn train(&mut self, vectors: &[Vec<f32>], iterations: usize) -> Result<()>;

/// Encode a single vector into the impl-defined compact form.
fn encode(&self, vector: &[f32]) -> Result<Vec<u8>>;

/// Build a per-query handle. Done **once per search** and reused across
/// every candidate.
fn prepare_query(&self, query: &[f32]) -> Result<Self::Query>;

/// Estimated squared-L2 distance between the prepared query and a stored
/// code. Hot path — must not allocate.
fn distance(&self, query: &Self::Query, code: &[u8]) -> f32;
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,20 @@

use crate::distance::l2_squared;
use crate::error::{DiskAnnError, Result};
use crate::quantize::Quantizer;
use bincode::{Decode, Encode};
use rand::prelude::*;
use serde::{Deserialize, Serialize};

/// Per-query precomputed state for PQ: the flat asymmetric distance table
/// (`m * 256` f32s) plus a back-reference to `m` so [`Quantizer::distance`]
/// can compute the lookup without re-reading the centroids.
#[derive(Clone, Debug)]
pub struct PqQuery {
/// Flat table[subspace * 256 + centroid] = sub-distance.
pub table: Vec<f32>,
}

/// Product Quantizer with M subspaces, 256 centroids each (1 byte per subspace)
#[derive(Clone, Serialize, Deserialize, Encode, Decode)]
pub struct ProductQuantizer {
Expand Down Expand Up @@ -222,6 +232,40 @@ impl ProductQuantizer {
}
}

impl Quantizer for ProductQuantizer {
type Query = PqQuery;

fn dim(&self) -> usize {
self.dim
}

fn code_bytes(&self) -> usize {
self.m
}

fn is_trained(&self) -> bool {
self.trained
}

fn train(&mut self, vectors: &[Vec<f32>], iterations: usize) -> Result<()> {
ProductQuantizer::train(self, vectors, iterations)
}

fn encode(&self, vector: &[f32]) -> Result<Vec<u8>> {
ProductQuantizer::encode(self, vector)
}

fn prepare_query(&self, query: &[f32]) -> Result<Self::Query> {
let table = self.build_distance_table(query)?;
Ok(PqQuery { table })
}

#[inline]
fn distance(&self, query: &Self::Query, code: &[u8]) -> f32 {
crate::distance::pq_asymmetric_distance(code, &query.table, 256)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading
Loading