ruvnet · ruvnet · Apr 26, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/ruvector-diskann/Cargo.toml b/crates/ruvector-diskann/Cargo.toml
@@ -8,9 +8,15 @@ repository.workspace = true
 description = "DiskANN/Vamana — SSD-friendly approximate nearest neighbor search with product quantization"
 
 [features]
-default = []
+# `rabitq` is on by default — pure-Rust dependency, no extra system deps,
+# matches the WASM build envelope. Disable with `--no-default-features` if
+# you want the leanest possible compile.
+default = ["rabitq"]
 gpu = []  # Feature flag for GPU acceleration (CUDA/Metal stubs)
 simd = ["simsimd"]
+# RaBitQ-backed quantizer (1-bit rotation quantization, ~32× compression).
+# See ADR-154 and `docs/research/nightly/2026-04-23-rabitq/README.md`.
+rabitq = ["dep:ruvector-rabitq"]
 
 [dependencies]
 memmap2 = { workspace = true }
@@ -23,9 +29,17 @@ rand = { workspace = true }
 parking_lot = "0.12"
 bytemuck = { version = "1.14", features = ["derive"] }
 simsimd = { workspace = true, optional = true }
+ruvector-rabitq = { path = "../ruvector-rabitq", optional = true }
 
 [dev-dependencies]
 tempfile = "3.9"
+rand = { workspace = true }
+criterion = { workspace = true }
+
+[[bench]]
+name = "rabitq_recall"
+harness = false
+required-features = ["rabitq"]
 
 # Workspace cleanup pass: research-tier crate, doc/style churn deferred. Correctness + suspicious lints stay denied.
 [lints.rust]

diff --git a/crates/ruvector-diskann/benches/rabitq_recall.rs b/crates/ruvector-diskann/benches/rabitq_recall.rs
@@ -0,0 +1,115 @@
+//! Recall + size benchmark for the RaBitQ-backed [`Quantizer`] in DiskANN.
+//!
+//! Acceptance test from `docs/research/nightly/2026-04-23-rabitq/README.md`
+//! § Phase 1 item #1:
+//!
+//! > Done iff: a 100k-vector / 768-d dataset built with the RaBitQ quantizer
+//! > reaches recall@10 ≥ 0.95 against the brute-force baseline, and on-disk
+//! > size is ≤ 1/16 of the f32 baseline.
+//!
+//! We ship the bench at **n = 10 000** by default (≈ 1–2 s per run on a
+//! laptop); set `RABITQ_BENCH_N=100000` in the env to upscale to the full
+//! acceptance configuration. We also report on-disk size deterministically
+//! regardless of `n`.
+//!
+//! Run with:
+//!
+//! ```sh
+//! cargo bench -p ruvector-diskann --features rabitq --bench rabitq_recall
+//! ```
+#![cfg(feature = "rabitq")]
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use ruvector_diskann::quantize::{Quantizer, RabitqQuantizer};
+
+fn random_vectors(n: usize, dim: usize, seed: u64) -> Vec<Vec<f32>> {
+    let mut rng = StdRng::seed_from_u64(seed);
+    (0..n)
+        .map(|_| (0..dim).map(|_| rng.gen::<f32>() * 2.0 - 1.0).collect())
+        .collect()
+}
+
+fn bench_rabitq_recall(c: &mut Criterion) {
+    let dim = 768;
+    let n: usize = std::env::var("RABITQ_BENCH_N")
+        .ok()
+        .and_then(|v| v.parse().ok())
+        .unwrap_or(10_000);
+    let k = 10;
+    let n_queries = 50;
+
+    eprintln!("[rabitq_recall] n={n} dim={dim} k={k} n_queries={n_queries}");
+
+    let vectors = random_vectors(n, dim, 42);
+    let queries = random_vectors(n_queries, dim, 43);
+
+    let mut q = RabitqQuantizer::new(dim, 0xC0FFEE);
+    q.train(&vectors, 0).unwrap();
+    let codes: Vec<Vec<u8>> = vectors.iter().map(|v| q.encode(v).unwrap()).collect();
+
+    // On-disk size acceptance check.
+    let f32_bytes = vectors.len() * dim * 4;
+    let rabitq_bytes = codes.iter().map(|c| c.len()).sum::<usize>();
+    let ratio = rabitq_bytes as f64 / f32_bytes as f64;
+    eprintln!("[rabitq_recall] f32 baseline = {f32_bytes} B, RaBitQ codes = {rabitq_bytes} B, ratio = {ratio:.4}");
+    assert!(
+        ratio <= 1.0 / 16.0 + 1.0 / dim as f64,
+        "on-disk size ratio {ratio} > 1/16"
+    );
+
+    // Recall measurement (one-shot before the benchmark loop).
+    let mut total_recall = 0.0f64;
+    for query in &queries {
+        // Brute-force ground truth.
+        let mut gt_scored: Vec<(usize, f32)> = vectors
+            .iter()
+            .enumerate()
+            .map(|(i, v)| {
+                let d: f32 = v.iter().zip(query).map(|(a, b)| (a - b) * (a - b)).sum();
+                (i, d)
+            })
+            .collect();
+        gt_scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
+        let gt: std::collections::HashSet<usize> =
+            gt_scored.into_iter().take(k).map(|(i, _)| i).collect();
+
+        // RaBitQ flat scan.
+        let prep = q.prepare_query(query).unwrap();
+        let mut rb_scored: Vec<(usize, f32)> = codes
+            .iter()
+            .enumerate()
+            .map(|(i, c)| (i, q.distance(&prep, c)))
+            .collect();
+        rb_scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
+        let hits: std::collections::HashSet<usize> = rb_scored
+            .into_iter()
+            .take(k)
+            .map(|(i, _)| i)
+            .collect::<std::collections::HashSet<_>>();
+        total_recall += gt.intersection(&hits).count() as f64 / k as f64;
+    }
+    let avg_recall = total_recall / queries.len() as f64;
+    eprintln!("[rabitq_recall] recall@{k} = {avg_recall:.4}  (target ≥ 0.95 with rerank, no rerank baseline ≈ 0.40)");
+
+    // Bench: per-query throughput on the flat RaBitQ scan.
+    let mut group = c.benchmark_group("rabitq_quantizer");
+    group.bench_function(BenchmarkId::new("flat_scan_topk", n), |b| {
+        let query = &queries[0];
+        b.iter(|| {
+            let prep = q.prepare_query(query).unwrap();
+            let mut scored: Vec<(usize, f32)> = codes
+                .iter()
+                .enumerate()
+                .map(|(i, c)| (i, q.distance(&prep, c)))
+                .collect();
+            scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
+            scored.into_iter().take(k).count()
+        });
+    });
+    group.finish();
+}
+
+criterion_group!(benches, bench_rabitq_recall);
+criterion_main!(benches);
diff --git a/crates/ruvector-diskann/src/lib.rs b/crates/ruvector-diskann/src/lib.rs
@@ -14,8 +14,19 @@ pub mod distance;
 pub mod error;
 pub mod graph;
 pub mod index;
-pub mod pq;
+pub mod quantize;
 
 pub use error::{DiskAnnError, Result};
 pub use index::{DiskAnnConfig, DiskAnnIndex};
-pub use pq::ProductQuantizer;
+pub use quantize::{ProductQuantizer, Quantizer};
+
+#[cfg(feature = "rabitq")]
+pub use quantize::RabitqQuantizer;
+
+/// Backwards-compatible alias for the pre-quantize-module module path.
+/// Existing callers that did `use ruvector_diskann::pq::ProductQuantizer;`
+/// keep working without code changes. New code should prefer
+/// `ruvector_diskann::quantize::ProductQuantizer`.
+pub mod pq {
+    pub use crate::quantize::pq::*;
+}
diff --git a/crates/ruvector-diskann/src/quantize/mod.rs b/crates/ruvector-diskann/src/quantize/mod.rs
@@ -0,0 +1,84 @@
+//! Pluggable quantizer abstraction for DiskANN.
+//!
+//! DiskANN's hot paths (graph traversal + candidate distance estimation) only
+//! need three things from a quantizer:
+//!
+//! 1. **Train** on a slice of training vectors so codebooks / rotations /
+//!    centroids are fitted to the data.
+//! 2. **Encode** an arbitrary input vector into a compact byte slice.
+//! 3. **Estimate distance** from a prepared query handle (the fast path) to a
+//!    stored code, without touching the original f32 vector.
+//!
+//! Everything else (codebook size, internal layout, on-disk format) is private
+//! to the implementation. Two concrete impls ship here:
+//!
+//! | Impl | Compression | Distance estimator | Feature |
+//! |------|-------------|--------------------|---------|
+//! | [`ProductQuantizer`] | M bytes / vec (≈ 8–16×) | PQ asymmetric LUT | always on |
+//! | [`RabitqQuantizer`] | ⌈D/8⌉ bytes / vec (≈ 32×) | RaBitQ angular | `rabitq` |
+//!
+//! ## Pattern 1 — direct embed (per `docs/research/nightly/2026-04-23-rabitq`)
+//!
+//! `RabitqQuantizer` is implemented in this crate by taking a path dependency
+//! on `ruvector-rabitq` and using `RabitqIndex` directly for encoding /
+//! distance. We deliberately do **not** route through the `VectorKernel` trait
+//! at this stage — that is reserved for ruLake's kernel registry (see ADR-154
+//! and the integration roadmap).
+//!
+//! ## Determinism
+//!
+//! ADR-154 requires `(seed, dim, vectors) → bit-identical codes`. Both impls
+//! honour this: PQ via `rand::thread_rng()` is **non-deterministic** today
+//! (pre-existing behaviour of this crate), but the new RaBitQ quantizer takes
+//! an explicit seed and forwards it to the rotation matrix, so the RaBitQ path
+//! is fully reproducible. Closing the determinism gap on PQ is out of scope
+//! for this PR.
+
+use crate::error::Result;
+
+pub mod pq;
+
+#[cfg(feature = "rabitq")]
+pub mod rabitq;
+
+pub use pq::ProductQuantizer;
+
+#[cfg(feature = "rabitq")]
+pub use rabitq::RabitqQuantizer;
+
+/// Minimal interface DiskANN needs from a quantizer.
+///
+/// The trait is split into a build-time half (`train`, `encode`) and a
+/// query-time half (`prepare_query`, `distance`). The query handle is an
+/// associated type so each impl can ship whatever shape it needs (PQ uses a
+/// flat lookup table; RaBitQ uses a rotated unit query plus its norm).
+pub trait Quantizer: Send + Sync {
+    /// Per-query precomputed state used by [`Self::distance`].
+    type Query;
+
+    /// Vector dimensionality this quantizer is configured for.
+    fn dim(&self) -> usize;
+
+    /// Bytes produced by a single call to [`Self::encode`]. Constant for the
+    /// lifetime of a trained quantizer.
+    fn code_bytes(&self) -> usize;
+
+    /// Whether [`Self::train`] has been called and the quantizer is ready to
+    /// encode.
+    fn is_trained(&self) -> bool;
+
+    /// Fit codebooks / rotations on a set of training vectors. Idempotent
+    /// failure: returning `Err` leaves the quantizer in an untrained state.
+    fn train(&mut self, vectors: &[Vec<f32>], iterations: usize) -> Result<()>;
+
+    /// Encode a single vector into the impl-defined compact form.
+    fn encode(&self, vector: &[f32]) -> Result<Vec<u8>>;
+
+    /// Build a per-query handle. Done **once per search** and reused across
+    /// every candidate.
+    fn prepare_query(&self, query: &[f32]) -> Result<Self::Query>;
+
+    /// Estimated squared-L2 distance between the prepared query and a stored
+    /// code. Hot path — must not allocate.
+    fn distance(&self, query: &Self::Query, code: &[u8]) -> f32;
+}
diff --git a/crates/ruvector-diskann/src/pq.rs → crates/ruvector-diskann/src/quantize/pq.rs b/crates/ruvector-diskann/src/pq.rs → crates/ruvector-diskann/src/quantize/pq.rs
@@ -5,10 +5,20 @@
 
 use crate::distance::l2_squared;
 use crate::error::{DiskAnnError, Result};
+use crate::quantize::Quantizer;
 use bincode::{Decode, Encode};
 use rand::prelude::*;
 use serde::{Deserialize, Serialize};
 
+/// Per-query precomputed state for PQ: the flat asymmetric distance table
+/// (`m * 256` f32s) plus a back-reference to `m` so [`Quantizer::distance`]
+/// can compute the lookup without re-reading the centroids.
+#[derive(Clone, Debug)]
+pub struct PqQuery {
+    /// Flat table[subspace * 256 + centroid] = sub-distance.
+    pub table: Vec<f32>,
+}
+
 /// Product Quantizer with M subspaces, 256 centroids each (1 byte per subspace)
 #[derive(Clone, Serialize, Deserialize, Encode, Decode)]
 pub struct ProductQuantizer {
@@ -222,6 +232,40 @@ impl ProductQuantizer {
     }
 }
 
+impl Quantizer for ProductQuantizer {
+    type Query = PqQuery;
+
+    fn dim(&self) -> usize {
+        self.dim
+    }
+
+    fn code_bytes(&self) -> usize {
+        self.m
+    }
+
+    fn is_trained(&self) -> bool {
+        self.trained
+    }
+
+    fn train(&mut self, vectors: &[Vec<f32>], iterations: usize) -> Result<()> {
+        ProductQuantizer::train(self, vectors, iterations)
+    }
+
+    fn encode(&self, vector: &[f32]) -> Result<Vec<u8>> {
+        ProductQuantizer::encode(self, vector)
+    }
+
+    fn prepare_query(&self, query: &[f32]) -> Result<Self::Query> {
+        let table = self.build_distance_table(query)?;
+        Ok(PqQuery { table })
+    }
+
+    #[inline]
+    fn distance(&self, query: &Self::Query, code: &[u8]) -> f32 {
+        crate::distance::pq_asymmetric_distance(code, &query.table, 256)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;