From 9d1b50bc2f8c64833833c805f5ecd80e7b7e5981 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 15:16:04 +0200 Subject: [PATCH 01/74] add aws-sdk-sqs --- Cargo.lock | 230 +++++++++++++++++++++----- crates/bin/docs_rs_watcher/Cargo.toml | 1 + 2 files changed, 189 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5c049e144..b1ceb81dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -486,7 +486,7 @@ dependencies = [ "aws-sdk-sts", "aws-smithy-async", "aws-smithy-http 0.63.6", - "aws-smithy-json 0.62.5", + "aws-smithy-json 0.62.6", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -536,9 +536,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.3" +version = "1.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dcd93c82209ac7413532388067dce79be5a8780c1786e5fae3df22e4dee2864" +checksum = "77ed8e8c52d2dc2390ad9f15647fe663f71e9780b4262c190fbb823a32721566" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -596,6 +596,30 @@ dependencies = [ "url", ] +[[package]] +name = "aws-sdk-sqs" +version = "1.99.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce61cf7e451891862a315dc96e1dbeb5e6a6f3740b354b5243217602b7e437b" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http 0.63.6", + "aws-smithy-json 0.62.6", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + [[package]] name = "aws-sdk-sts" version = "1.103.0" @@ -606,7 +630,7 @@ dependencies = [ "aws-runtime", "aws-smithy-async", "aws-smithy-http 0.63.6", - "aws-smithy-json 0.62.5", + "aws-smithy-json 0.62.6", "aws-smithy-observability", "aws-smithy-query", "aws-smithy-runtime", @@ -623,9 +647,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.4.3" +version = "1.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68dc0b907359b120170613b5c09ccc61304eac3998ff6274b97d93ee6490115a" +checksum = "b7083fb918b38474ac65ffbf8a69fc8792d36879f4ac5f1667b43aec61efe9a5" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", @@ -738,17 +762,23 @@ dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", - "h2", + "h2 0.3.27", + "h2 0.4.14", + "http 0.2.12", "http 1.4.0", - "hyper", - "hyper-rustls", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.9.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.9", "hyper-util", "pin-project-lite", - "rustls", + "rustls 0.21.12", + "rustls 0.23.40", "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower", "tracing", ] @@ -764,10 +794,12 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.62.5" +version = "0.62.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9648b0bb82a2eedd844052c6ad2a1a822d1f8e3adee5fbf668366717e428856a" +checksum = "517089205f18ab4adc5a3e02888cb139bbbbb2e168eac9f396216925d1fbeaf5" dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-schema", "aws-smithy-types", ] @@ -792,15 +824,16 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.11.1" +version = "1.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0504b1ab12debb5959e5165ee5fe97dd387e7aa7ea6a477bfd7635dfe769a4f5" +checksum = "b8e6f5caf6fea86f8c2206541ab5857cfcda9013426cdbe8fa0098b9e2d32182" dependencies = [ "aws-smithy-async", "aws-smithy-http 0.63.6", "aws-smithy-http-client", "aws-smithy-observability", "aws-smithy-runtime-api", + "aws-smithy-schema", "aws-smithy-types", "bytes", "fastrand", @@ -817,9 +850,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71a13df6ada0aafbf21a73bdfcdf9324cfa9df77d96b8446045be3cde61b42e" +checksum = "dc117c179ecf39a62a0a3f49f600e9ac26a7ad7dd172177999f83933af776c32" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api-macros", @@ -844,11 +877,22 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "aws-smithy-schema" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7442cb268338f0eb8278140a107c046756aa01093d8ef5e99628d34ae09c94f5" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "http 1.4.0", +] + [[package]] name = "aws-smithy-types" -version = "1.4.7" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" +checksum = "056b66dbce2f81cc0c1e2b05bb402eb58f8a3530479d650efadd5bbae9a4050b" dependencies = [ "base64-simd", "bytes", @@ -891,13 +935,14 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.15" +version = "1.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f4bbcaa9304ea40902d3d5f42a0428d1bd895a2b0f6999436fb279ffddc58ac" +checksum = "d16bf10b03a3c01e6b3b7d47cd964e873ffe9e7d4e80fad16bd4c077cb068531" dependencies = [ "aws-credential-types", "aws-smithy-async", "aws-smithy-runtime-api", + "aws-smithy-schema", "aws-smithy-types", "rustc_version", "tracing", @@ -917,7 +962,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "itoa 1.0.18", "matchit", @@ -2445,6 +2490,7 @@ name = "docs_rs_watcher" version = "0.6.0" dependencies = [ "anyhow", + "aws-sdk-sqs", "clap", "crates-index", "crates-index-diff", @@ -3946,6 +3992,25 @@ dependencies = [ "phf 0.11.3", ] +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap 2.14.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.14" @@ -4241,6 +4306,30 @@ dependencies = [ "typenum", ] +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa 1.0.18", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.9.0" @@ -4251,7 +4340,7 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", + "h2 0.4.14", "http 1.4.0", "http-body 1.0.1", "httparse", @@ -4263,6 +4352,21 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.9" @@ -4270,12 +4374,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ "http 1.4.0", - "hyper", + "hyper 1.9.0", "hyper-util", - "rustls", + "rustls 0.23.40", "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", ] @@ -4285,7 +4389,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper", + "hyper 1.9.0", "hyper-util", "pin-project-lite", "tokio", @@ -4300,7 +4404,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "native-tls", "tokio", @@ -4320,7 +4424,7 @@ dependencies = [ "futures-util", "http 1.4.0", "http-body 1.0.1", - "hyper", + "hyper 1.9.0", "ipnet", "libc", "percent-encoding", @@ -4980,7 +5084,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "log", "pin-project-lite", @@ -6025,7 +6129,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", + "rustls 0.23.40", "socket2 0.6.3", "thiserror", "tokio", @@ -6046,7 +6150,7 @@ dependencies = [ "rand 0.9.4", "ring", "rustc-hash", - "rustls", + "rustls 0.23.40", "rustls-pki-types", "slab", "thiserror", @@ -6336,12 +6440,12 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", + "h2 0.4.14", "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.9.0", + "hyper-rustls 0.27.9", "hyper-tls", "hyper-util", "js-sys", @@ -6351,7 +6455,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.40", "rustls-pki-types", "rustls-platform-verifier", "serde", @@ -6359,7 +6463,7 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "tower", "tower-http", @@ -6445,6 +6549,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.40" @@ -6454,7 +6570,7 @@ dependencies = [ "aws-lc-rs", "once_cell", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.13", "subtle", "zeroize", ] @@ -6492,10 +6608,10 @@ dependencies = [ "jni", "log", "once_cell", - "rustls", + "rustls 0.23.40", "rustls-native-certs", "rustls-platform-verifier-android", - "rustls-webpki", + "rustls-webpki 0.103.13", "security-framework", "security-framework-sys", "webpki-root-certs", @@ -6508,6 +6624,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.13" @@ -6611,6 +6737,16 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "security-framework" version = "3.7.0" @@ -7738,13 +7874,23 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.40", "tokio", ] @@ -7823,7 +7969,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-timeout", "hyper-util", "percent-encoding", diff --git a/crates/bin/docs_rs_watcher/Cargo.toml b/crates/bin/docs_rs_watcher/Cargo.toml index 0b0f9ba6a..e0f80e245 100644 --- a/crates/bin/docs_rs_watcher/Cargo.toml +++ b/crates/bin/docs_rs_watcher/Cargo.toml @@ -8,6 +8,7 @@ edition.workspace = true [dependencies] anyhow = { workspace = true } +aws-sdk-sqs = "1.99.0" clap = { workspace = true } # NOTE: on the new infra, switch back from `git-https-reqwest` to `git-https` (curl) once the curl version is new enough crates-index = { version = "3.0.0", default-features = false, features = ["git", "git-https-reqwest", "git-performance", "parallel"] } From fddeb9a511f039ab0412e83738d25d253cd5f1f5 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 15:40:36 +0200 Subject: [PATCH 02/74] add shared subcrate for event types --- Cargo.lock | 13 +- crates/lib/docs_rs_crates_io/Cargo.toml | 19 +++ crates/lib/docs_rs_crates_io/src/events.rs | 184 +++++++++++++++++++++ crates/lib/docs_rs_crates_io/src/lib.rs | 1 + 4 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 crates/lib/docs_rs_crates_io/Cargo.toml create mode 100644 crates/lib/docs_rs_crates_io/src/events.rs create mode 100644 crates/lib/docs_rs_crates_io/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index b1ceb81dd..7dc2708d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2152,6 +2152,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "docs_rs_crates_io" +version = "0.1.0" +dependencies = [ + "semver", + "serde", + "serde_json", +] + [[package]] name = "docs_rs_database" version = "0.0.0" @@ -6988,9 +6997,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa 1.0.18", "memchr", diff --git a/crates/lib/docs_rs_crates_io/Cargo.toml b/crates/lib/docs_rs_crates_io/Cargo.toml new file mode 100644 index 000000000..d10606ee6 --- /dev/null +++ b/crates/lib/docs_rs_crates_io/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "docs_rs_crates_io" +version = "0.1.0" +description = "types & logic for the direct integration between docs.rs & crates.io" + +authors.workspace = true +license.workspace = true +repository.workspace = true +edition.workspace = true + +[dependencies] +serde = { version = "1.0.228", features = ["derive"] } +semver = { version = "1.0.28", features = ["serde"] } + +[dev-dependencies] +serde_json = "1.0.150" + +[lints] +workspace = true diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs new file mode 100644 index 000000000..41a81dd17 --- /dev/null +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -0,0 +1,184 @@ +#![allow(clippy::disallowed_types)] + +use std::fmt; + +/// Identify a kind of change that occurred to a crate +#[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] +#[serde(tag = "type", content = "payload", rename_all = "snake_case")] +pub enum Change { + /// A crate version was added. + Added(CrateVersion), + /// A crate version was unyanked. + Unyanked(CrateVersion), + /// A crate version was yanked. + Yanked(CrateVersion), + /// The name of the crate whose file was deleted, which implies all versions were deleted as well. + CrateDeleted { name: String }, + /// A crate version was deleted. + VersionDeleted(CrateVersion), +} + +impl Change { + /// Return the added crate, if this is this kind of change. + pub fn added(&self) -> Option<&CrateVersion> { + match self { + Change::Added(v) => Some(v), + _ => None, + } + } + + /// Return the yanked crate, if this is this kind of change. + pub fn yanked(&self) -> Option<&CrateVersion> { + match self { + Change::Yanked(v) => Some(v), + _ => None, + } + } + + /// Return the unyanked crate, if this is this kind of change. + pub fn unyanked(&self) -> Option<&CrateVersion> { + match self { + Change::Unyanked(v) => Some(v), + _ => None, + } + } + + /// Return the deleted crate, if this is this kind of change. + pub fn crate_deleted(&self) -> Option<&str> { + match self { + Change::CrateDeleted { name, .. } => Some(name.as_str()), + _ => None, + } + } + + /// Return the deleted version crate, if this is this kind of change. + pub fn version_deleted(&self) -> Option<&CrateVersion> { + match self { + Change::VersionDeleted(v) => Some(v), + _ => None, + } + } +} + +impl fmt::Display for Change { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + Change::Added(_) => "added", + Change::Yanked(_) => "yanked", + Change::CrateDeleted { .. } => "crate deleted", + Change::VersionDeleted(_) => "version deleted", + Change::Unyanked(_) => "unyanked", + } + ) + } +} + +/// Pack all information we know about a change made to a version of a crate. +#[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] +pub struct CrateVersion { + /// The crate name, i.e. `clap`. + pub name: String, + /// is the release yanked? + pub yanked: bool, + /// The semantic version of the crate. + #[serde(rename = "vers")] + pub version: semver::Version, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn crate_version() -> CrateVersion { + CrateVersion { + name: "clap".into(), + yanked: false, + version: semver::Version::new(4, 5, 0), + } + } + + #[test] + fn crate_version_serializes_with_vers_field() { + let event = crate_version(); + + assert_eq!( + serde_json::to_value(&event).unwrap(), + json!({ + "name": "clap", + "yanked": false, + "vers": "4.5.0", + }) + ); + } + + #[test] + fn change_serializes_with_expected_variant_shapes() { + let crate_version = crate_version(); + + let cases = [ + ( + Change::Added(crate_version.clone()), + json!({ + "type": "added", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ( + Change::Unyanked(crate_version.clone()), + json!({ + "type": "unyanked", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ( + Change::Yanked(crate_version.clone()), + json!({ + "type": "yanked", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ( + Change::CrateDeleted { + name: "old-crate".into(), + }, + json!({ + "type": "crate_deleted", + "payload": { + "name": "old-crate" + } + }), + ), + ( + Change::VersionDeleted(crate_version), + json!({ + "type": "version_deleted", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ]; + + for (event, expected) in cases { + assert_eq!(serde_json::to_value(&event).unwrap(), expected); + } + } +} diff --git a/crates/lib/docs_rs_crates_io/src/lib.rs b/crates/lib/docs_rs_crates_io/src/lib.rs new file mode 100644 index 000000000..a9970c28f --- /dev/null +++ b/crates/lib/docs_rs_crates_io/src/lib.rs @@ -0,0 +1 @@ +pub mod events; From 87c438435e9996c40292844b35e68f542eb2488f Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 16:14:48 +0200 Subject: [PATCH 03/74] feat(events): add event envelope metadata Wrap typed change payloads in a conventional event envelope with id, occurred_at, source, and schema_version. --- crates/lib/docs_rs_crates_io/src/events.rs | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 41a81dd17..d8eb10074 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -76,6 +76,22 @@ impl fmt::Display for Change { } } +/// A conventional event envelope for crate index changes. +#[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] +pub struct Event { + /// Unique event identifier for deduplication and tracing. + pub id: String, + /// Timestamp when the underlying change occurred, as an RFC 3339 string. + pub occurred_at: String, + /// System that emitted the event. + pub source: String, + /// Version of the serialized event schema. + pub schema_version: u32, + /// The typed change payload. + #[serde(flatten)] + pub change: Change, +} + /// Pack all information we know about a change made to a version of a crate. #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] pub struct CrateVersion { @@ -101,6 +117,16 @@ mod tests { } } + fn event(change: Change) -> Event { + Event { + id: "evt_123".into(), + occurred_at: "2026-05-22T12:34:56Z".into(), + source: "crates-index".into(), + schema_version: 1, + change, + } + } + #[test] fn crate_version_serializes_with_vers_field() { let event = crate_version(); @@ -181,4 +207,25 @@ mod tests { assert_eq!(serde_json::to_value(&event).unwrap(), expected); } } + + #[test] + fn event_serializes_with_minimum_metadata() { + let event = event(Change::CrateDeleted { + name: "old-crate".into(), + }); + + assert_eq!( + serde_json::to_value(&event).unwrap(), + json!({ + "id": "evt_123", + "occurred_at": "2026-05-22T12:34:56Z", + "source": "crates-index", + "schema_version": 1, + "type": "crate_deleted", + "payload": { + "name": "old-crate" + } + }) + ); + } } From b7403da2686a3c44caf5ce4e0927f8bc2b63d8b8 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 16:25:14 +0200 Subject: [PATCH 04/74] refactor(events): version event payload types Rename the current wire payload to ChangeV1 and make the event envelope generic for future schema versions. --- crates/lib/docs_rs_crates_io/src/events.rs | 49 ++++++++++++---------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index d8eb10074..d2cd795d2 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -5,7 +5,7 @@ use std::fmt; /// Identify a kind of change that occurred to a crate #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] #[serde(tag = "type", content = "payload", rename_all = "snake_case")] -pub enum Change { +pub enum ChangeV1 { /// A crate version was added. Added(CrateVersion), /// A crate version was unyanked. @@ -18,11 +18,11 @@ pub enum Change { VersionDeleted(CrateVersion), } -impl Change { +impl ChangeV1 { /// Return the added crate, if this is this kind of change. pub fn added(&self) -> Option<&CrateVersion> { match self { - Change::Added(v) => Some(v), + ChangeV1::Added(v) => Some(v), _ => None, } } @@ -30,7 +30,7 @@ impl Change { /// Return the yanked crate, if this is this kind of change. pub fn yanked(&self) -> Option<&CrateVersion> { match self { - Change::Yanked(v) => Some(v), + ChangeV1::Yanked(v) => Some(v), _ => None, } } @@ -38,7 +38,7 @@ impl Change { /// Return the unyanked crate, if this is this kind of change. pub fn unyanked(&self) -> Option<&CrateVersion> { match self { - Change::Unyanked(v) => Some(v), + ChangeV1::Unyanked(v) => Some(v), _ => None, } } @@ -46,7 +46,7 @@ impl Change { /// Return the deleted crate, if this is this kind of change. pub fn crate_deleted(&self) -> Option<&str> { match self { - Change::CrateDeleted { name, .. } => Some(name.as_str()), + ChangeV1::CrateDeleted { name, .. } => Some(name.as_str()), _ => None, } } @@ -54,23 +54,23 @@ impl Change { /// Return the deleted version crate, if this is this kind of change. pub fn version_deleted(&self) -> Option<&CrateVersion> { match self { - Change::VersionDeleted(v) => Some(v), + ChangeV1::VersionDeleted(v) => Some(v), _ => None, } } } -impl fmt::Display for Change { +impl fmt::Display for ChangeV1 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{}", match *self { - Change::Added(_) => "added", - Change::Yanked(_) => "yanked", - Change::CrateDeleted { .. } => "crate deleted", - Change::VersionDeleted(_) => "version deleted", - Change::Unyanked(_) => "unyanked", + ChangeV1::Added(_) => "added", + ChangeV1::Yanked(_) => "yanked", + ChangeV1::CrateDeleted { .. } => "crate deleted", + ChangeV1::VersionDeleted(_) => "version deleted", + ChangeV1::Unyanked(_) => "unyanked", } ) } @@ -78,7 +78,7 @@ impl fmt::Display for Change { /// A conventional event envelope for crate index changes. #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] -pub struct Event { +pub struct Event { /// Unique event identifier for deduplication and tracing. pub id: String, /// Timestamp when the underlying change occurred, as an RFC 3339 string. @@ -89,9 +89,12 @@ pub struct Event { pub schema_version: u32, /// The typed change payload. #[serde(flatten)] - pub change: Change, + pub change: T, } +/// The first version of the public event wire format. +pub type EventV1 = Event; + /// Pack all information we know about a change made to a version of a crate. #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] pub struct CrateVersion { @@ -117,8 +120,8 @@ mod tests { } } - fn event(change: Change) -> Event { - Event { + fn event(change: ChangeV1) -> EventV1 { + EventV1 { id: "evt_123".into(), occurred_at: "2026-05-22T12:34:56Z".into(), source: "crates-index".into(), @@ -147,7 +150,7 @@ mod tests { let cases = [ ( - Change::Added(crate_version.clone()), + ChangeV1::Added(crate_version.clone()), json!({ "type": "added", "payload": { @@ -158,7 +161,7 @@ mod tests { }), ), ( - Change::Unyanked(crate_version.clone()), + ChangeV1::Unyanked(crate_version.clone()), json!({ "type": "unyanked", "payload": { @@ -169,7 +172,7 @@ mod tests { }), ), ( - Change::Yanked(crate_version.clone()), + ChangeV1::Yanked(crate_version.clone()), json!({ "type": "yanked", "payload": { @@ -180,7 +183,7 @@ mod tests { }), ), ( - Change::CrateDeleted { + ChangeV1::CrateDeleted { name: "old-crate".into(), }, json!({ @@ -191,7 +194,7 @@ mod tests { }), ), ( - Change::VersionDeleted(crate_version), + ChangeV1::VersionDeleted(crate_version), json!({ "type": "version_deleted", "payload": { @@ -210,7 +213,7 @@ mod tests { #[test] fn event_serializes_with_minimum_metadata() { - let event = event(Change::CrateDeleted { + let event = event(ChangeV1::CrateDeleted { name: "old-crate".into(), }); From e52cae891fa7d5f1349f613d7b50faf81b5add6f Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 16:32:57 +0200 Subject: [PATCH 05/74] refactor(events): use typed event timestamps Remove event source metadata and store occurred_at as an RFC 3339 OffsetDateTime. --- Cargo.lock | 1 + crates/lib/docs_rs_crates_io/Cargo.toml | 1 + crates/lib/docs_rs_crates_io/src/events.rs | 39 ++++++++++++++++++---- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7dc2708d6..be33873ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2159,6 +2159,7 @@ dependencies = [ "semver", "serde", "serde_json", + "time", ] [[package]] diff --git a/crates/lib/docs_rs_crates_io/Cargo.toml b/crates/lib/docs_rs_crates_io/Cargo.toml index d10606ee6..df404c935 100644 --- a/crates/lib/docs_rs_crates_io/Cargo.toml +++ b/crates/lib/docs_rs_crates_io/Cargo.toml @@ -11,6 +11,7 @@ edition.workspace = true [dependencies] serde = { version = "1.0.228", features = ["derive"] } semver = { version = "1.0.28", features = ["serde"] } +time = { version = "0.3.44", features = ["formatting", "parsing", "serde"] } [dev-dependencies] serde_json = "1.0.150" diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index d2cd795d2..34abfe92e 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -1,6 +1,7 @@ #![allow(clippy::disallowed_types)] use std::fmt; +use time::OffsetDateTime; /// Identify a kind of change that occurred to a crate #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] @@ -81,10 +82,9 @@ impl fmt::Display for ChangeV1 { pub struct Event { /// Unique event identifier for deduplication and tracing. pub id: String, - /// Timestamp when the underlying change occurred, as an RFC 3339 string. - pub occurred_at: String, - /// System that emitted the event. - pub source: String, + /// Timestamp when the underlying change occurred. + #[serde(with = "time::serde::rfc3339")] + pub occurred_at: OffsetDateTime, /// Version of the serialized event schema. pub schema_version: u32, /// The typed change payload. @@ -123,8 +123,11 @@ mod tests { fn event(change: ChangeV1) -> EventV1 { EventV1 { id: "evt_123".into(), - occurred_at: "2026-05-22T12:34:56Z".into(), - source: "crates-index".into(), + occurred_at: OffsetDateTime::parse( + "2026-05-22T12:34:56Z", + &time::format_description::well_known::Rfc3339, + ) + .unwrap(), schema_version: 1, change, } @@ -222,7 +225,6 @@ mod tests { json!({ "id": "evt_123", "occurred_at": "2026-05-22T12:34:56Z", - "source": "crates-index", "schema_version": 1, "type": "crate_deleted", "payload": { @@ -231,4 +233,27 @@ mod tests { }) ); } + + #[test] + fn event_deserializes_rfc3339_occurred_at() { + let event: EventV1 = serde_json::from_value(json!({ + "id": "evt_123", + "occurred_at": "2026-05-22T12:34:56Z", + "schema_version": 1, + "type": "crate_deleted", + "payload": { + "name": "old-crate" + } + })) + .unwrap(); + + assert_eq!( + event.occurred_at, + OffsetDateTime::parse( + "2026-05-22T12:34:56Z", + &time::format_description::well_known::Rfc3339, + ) + .unwrap() + ); + } } From 6b053a37c239139f875a6f8a64dcc3e62947597d Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 16:35:11 +0200 Subject: [PATCH 06/74] refactor(events): use chrono timestamps Replace time::OffsetDateTime with chrono::DateTime for RFC 3339 event timestamps. --- Cargo.lock | 4 +++- crates/lib/docs_rs_crates_io/Cargo.toml | 2 +- crates/lib/docs_rs_crates_io/src/events.rs | 21 ++++++++------------- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be33873ff..d696e0ebd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1290,8 +1290,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", + "js-sys", "num-traits", "serde", + "wasm-bindgen", "windows-link", ] @@ -2156,10 +2158,10 @@ dependencies = [ name = "docs_rs_crates_io" version = "0.1.0" dependencies = [ + "chrono", "semver", "serde", "serde_json", - "time", ] [[package]] diff --git a/crates/lib/docs_rs_crates_io/Cargo.toml b/crates/lib/docs_rs_crates_io/Cargo.toml index df404c935..a513e14ac 100644 --- a/crates/lib/docs_rs_crates_io/Cargo.toml +++ b/crates/lib/docs_rs_crates_io/Cargo.toml @@ -9,9 +9,9 @@ repository.workspace = true edition.workspace = true [dependencies] +chrono = { version = "0.4.42", features = ["serde"] } serde = { version = "1.0.228", features = ["derive"] } semver = { version = "1.0.28", features = ["serde"] } -time = { version = "0.3.44", features = ["formatting", "parsing", "serde"] } [dev-dependencies] serde_json = "1.0.150" diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 34abfe92e..3dcf86760 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -1,7 +1,7 @@ #![allow(clippy::disallowed_types)] +use chrono::{DateTime, Utc}; use std::fmt; -use time::OffsetDateTime; /// Identify a kind of change that occurred to a crate #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] @@ -83,8 +83,7 @@ pub struct Event { /// Unique event identifier for deduplication and tracing. pub id: String, /// Timestamp when the underlying change occurred. - #[serde(with = "time::serde::rfc3339")] - pub occurred_at: OffsetDateTime, + pub occurred_at: DateTime, /// Version of the serialized event schema. pub schema_version: u32, /// The typed change payload. @@ -123,11 +122,9 @@ mod tests { fn event(change: ChangeV1) -> EventV1 { EventV1 { id: "evt_123".into(), - occurred_at: OffsetDateTime::parse( - "2026-05-22T12:34:56Z", - &time::format_description::well_known::Rfc3339, - ) - .unwrap(), + occurred_at: DateTime::parse_from_rfc3339("2026-05-22T12:34:56Z") + .unwrap() + .with_timezone(&Utc), schema_version: 1, change, } @@ -249,11 +246,9 @@ mod tests { assert_eq!( event.occurred_at, - OffsetDateTime::parse( - "2026-05-22T12:34:56Z", - &time::format_description::well_known::Rfc3339, - ) - .unwrap() + DateTime::parse_from_rfc3339("2026-05-22T12:34:56Z") + .unwrap() + .with_timezone(&Utc) ); } } From 0b7dd8da0419b32bbc47c840b9802582e7dfd2df Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 17:03:56 +0200 Subject: [PATCH 07/74] wider deps --- crates/lib/docs_rs_crates_io/Cargo.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/lib/docs_rs_crates_io/Cargo.toml b/crates/lib/docs_rs_crates_io/Cargo.toml index a513e14ac..96b373c89 100644 --- a/crates/lib/docs_rs_crates_io/Cargo.toml +++ b/crates/lib/docs_rs_crates_io/Cargo.toml @@ -9,12 +9,12 @@ repository.workspace = true edition.workspace = true [dependencies] -chrono = { version = "0.4.42", features = ["serde"] } -serde = { version = "1.0.228", features = ["derive"] } -semver = { version = "1.0.28", features = ["serde"] } +chrono = { version = "0.4", features = ["serde"] } +serde = { version = "1", features = ["derive"] } +semver = { version = "1", features = ["serde"] } [dev-dependencies] -serde_json = "1.0.150" +serde_json = "1.0" [lints] workspace = true From 277ea22f63faa20ff8cf8e141da148913effdf24 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 18:33:43 +0200 Subject: [PATCH 08/74] fix(watcher): make version delete idempotent Treat duplicate version deletion events as a no-op so temporary event-based handling can safely replay them. --- crates/bin/docs_rs_watcher/src/db/delete.rs | 36 +++++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/db/delete.rs b/crates/bin/docs_rs_watcher/src/db/delete.rs index dbfa0e58e..742bbb634 100644 --- a/crates/bin/docs_rs_watcher/src/db/delete.rs +++ b/crates/bin/docs_rs_watcher/src/db/delete.rs @@ -170,14 +170,18 @@ async fn delete_version_from_database( format!("DELETE FROM {table} WHERE {column} IN (SELECT id FROM releases WHERE crate_id = $1 AND version = $2)").as_str()) .bind(crate_id).bind(version).execute(&mut *transaction).await?; } - let is_library: bool = sqlx::query_scalar!( + let Some(is_library) = sqlx::query_scalar!( "DELETE FROM releases WHERE crate_id = $1 AND version = $2 RETURNING is_library", crate_id.0, version as _, ) - .fetch_one(&mut *transaction) + .fetch_optional(&mut *transaction) .await? - .unwrap_or(false); + else { + transaction.commit().await?; + return Ok(false); + }; + let is_library = is_library.unwrap_or(false); sqlx::query!( "DELETE FROM queue WHERE name = $1 AND version = $2;", @@ -690,6 +694,32 @@ mod tests { Ok(()) } + #[tokio::test(flavor = "multi_thread")] + async fn test_delete_already_deleted_version_doesnt_error() -> Result<()> { + let env = TestEnvironment::new().await?; + let mut conn = env.async_conn().await?; + + env.fake_release() + .await + .name(&KRATE) + .version(V1) + .create() + .await?; + env.fake_release() + .await + .name(&KRATE) + .version(V2) + .create() + .await?; + + delete_version(&mut conn, env.storage()?, env.config(), &KRATE, &V1).await?; + delete_version(&mut conn, env.storage()?, env.config(), &KRATE, &V1).await?; + + assert!(crate_exists(&mut conn, &KRATE).await?); + + Ok(()) + } + #[tokio::test(flavor = "multi_thread")] async fn test_delete_version_waits_for_locked_queue_rows() -> Result<()> { let env = TestEnvironment::new().await?; From cb7453f3948d00a19834c4e3f9ac2d25605ea2bc Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 18:56:01 +0200 Subject: [PATCH 09/74] feat(watcher): add SQS config Add watcher config fields for an SQS queue URL and region to support an event-based path. --- crates/bin/docs_rs_watcher/src/config.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index 7b5f17976..597549f29 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -7,6 +7,8 @@ use std::{path::PathBuf, time::Duration}; pub struct Config { pub registry_index_path: PathBuf, pub registry_url: Option, + pub sqs_queue_url: Option, + pub sqs_region: Option, /// How long to wait between registry checks pub delay_between_registry_fetches: Duration, @@ -29,6 +31,8 @@ impl AppConfig for Config { Ok(Self { registry_index_path: env("REGISTRY_INDEX_PATH", prefix.join("crates.io-index"))?, registry_url: maybe_env("REGISTRY_URL")?, + sqs_queue_url: maybe_env("DOCSRS_SQS_QUEUE_URL")?, + sqs_region: maybe_env("DOCSRS_SQS_REGION")?, delay_between_registry_fetches: Duration::from_secs(env::( "DOCSRS_DELAY_BETWEEN_REGISTRY_FETCHES", 60, From de207e532af1fea82d5d78718a3e4723cba48930 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 18:56:48 +0200 Subject: [PATCH 10/74] refactor(watcher): parse SQS queue URL Use url::Url for the watcher SQS queue URL config so invalid values fail during config loading. --- crates/bin/docs_rs_watcher/Cargo.toml | 1 + crates/bin/docs_rs_watcher/src/config.rs | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/bin/docs_rs_watcher/Cargo.toml b/crates/bin/docs_rs_watcher/Cargo.toml index e0f80e245..38eb11199 100644 --- a/crates/bin/docs_rs_watcher/Cargo.toml +++ b/crates/bin/docs_rs_watcher/Cargo.toml @@ -33,6 +33,7 @@ rayon = "1.6.1" sqlx = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } +url = { workspace = true } [dev-dependencies] docs_rs_config = { path = "../../lib/docs_rs_config", features = ["testing"] } diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index 597549f29..404ade8f5 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -2,12 +2,13 @@ use anyhow::Result; use docs_rs_config::AppConfig; use docs_rs_env_vars::{env, maybe_env, require_env}; use std::{path::PathBuf, time::Duration}; +use url::Url; #[derive(Debug)] pub struct Config { pub registry_index_path: PathBuf, pub registry_url: Option, - pub sqs_queue_url: Option, + pub sqs_queue_url: Option, pub sqs_region: Option, /// How long to wait between registry checks From b311a599d391230dc6d882de28b9089ec9b5bd6b Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 May 2026 18:57:05 +0200 Subject: [PATCH 11/74] chore(lockfile): record watcher url dep Update Cargo.lock after making docs_rs_watcher depend directly on url. --- Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.lock b/Cargo.lock index d696e0ebd..dfa12b8ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2529,6 +2529,7 @@ dependencies = [ "test-case", "tokio", "tracing", + "url", ] [[package]] From e84c343e18c6b8d7c9aaebe9c93d18800f44572c Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 23 May 2026 08:15:59 +0200 Subject: [PATCH 12/74] refactor(events): drop schema version Remove the redundant schema_version field from the crates.io event envelope and keep versioning in the typed payloads. --- crates/lib/docs_rs_crates_io/src/events.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 3dcf86760..5ba9b4abc 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -84,8 +84,6 @@ pub struct Event { pub id: String, /// Timestamp when the underlying change occurred. pub occurred_at: DateTime, - /// Version of the serialized event schema. - pub schema_version: u32, /// The typed change payload. #[serde(flatten)] pub change: T, @@ -125,7 +123,6 @@ mod tests { occurred_at: DateTime::parse_from_rfc3339("2026-05-22T12:34:56Z") .unwrap() .with_timezone(&Utc), - schema_version: 1, change, } } @@ -222,7 +219,6 @@ mod tests { json!({ "id": "evt_123", "occurred_at": "2026-05-22T12:34:56Z", - "schema_version": 1, "type": "crate_deleted", "payload": { "name": "old-crate" @@ -236,7 +232,6 @@ mod tests { let event: EventV1 = serde_json::from_value(json!({ "id": "evt_123", "occurred_at": "2026-05-22T12:34:56Z", - "schema_version": 1, "type": "crate_deleted", "payload": { "name": "old-crate" From 1e374cf4bfcfd5bdaf51ef8c8bc91e6e063f2b8b Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 23 May 2026 08:17:50 +0200 Subject: [PATCH 13/74] renames --- crates/lib/docs_rs_crates_io/src/events.rs | 52 +++++++++++----------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 5ba9b4abc..46f43b11a 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -6,7 +6,7 @@ use std::fmt; /// Identify a kind of change that occurred to a crate #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] #[serde(tag = "type", content = "payload", rename_all = "snake_case")] -pub enum ChangeV1 { +pub enum IndexChangeV1 { /// A crate version was added. Added(CrateVersion), /// A crate version was unyanked. @@ -19,11 +19,11 @@ pub enum ChangeV1 { VersionDeleted(CrateVersion), } -impl ChangeV1 { +impl IndexChangeV1 { /// Return the added crate, if this is this kind of change. pub fn added(&self) -> Option<&CrateVersion> { match self { - ChangeV1::Added(v) => Some(v), + IndexChangeV1::Added(v) => Some(v), _ => None, } } @@ -31,7 +31,7 @@ impl ChangeV1 { /// Return the yanked crate, if this is this kind of change. pub fn yanked(&self) -> Option<&CrateVersion> { match self { - ChangeV1::Yanked(v) => Some(v), + IndexChangeV1::Yanked(v) => Some(v), _ => None, } } @@ -39,7 +39,7 @@ impl ChangeV1 { /// Return the unyanked crate, if this is this kind of change. pub fn unyanked(&self) -> Option<&CrateVersion> { match self { - ChangeV1::Unyanked(v) => Some(v), + IndexChangeV1::Unyanked(v) => Some(v), _ => None, } } @@ -47,7 +47,7 @@ impl ChangeV1 { /// Return the deleted crate, if this is this kind of change. pub fn crate_deleted(&self) -> Option<&str> { match self { - ChangeV1::CrateDeleted { name, .. } => Some(name.as_str()), + IndexChangeV1::CrateDeleted { name, .. } => Some(name.as_str()), _ => None, } } @@ -55,42 +55,42 @@ impl ChangeV1 { /// Return the deleted version crate, if this is this kind of change. pub fn version_deleted(&self) -> Option<&CrateVersion> { match self { - ChangeV1::VersionDeleted(v) => Some(v), + IndexChangeV1::VersionDeleted(v) => Some(v), _ => None, } } } -impl fmt::Display for ChangeV1 { +impl fmt::Display for IndexChangeV1 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{}", match *self { - ChangeV1::Added(_) => "added", - ChangeV1::Yanked(_) => "yanked", - ChangeV1::CrateDeleted { .. } => "crate deleted", - ChangeV1::VersionDeleted(_) => "version deleted", - ChangeV1::Unyanked(_) => "unyanked", + IndexChangeV1::Added(_) => "added", + IndexChangeV1::Yanked(_) => "yanked", + IndexChangeV1::CrateDeleted { .. } => "crate deleted", + IndexChangeV1::VersionDeleted(_) => "version deleted", + IndexChangeV1::Unyanked(_) => "unyanked", } ) } } -/// A conventional event envelope for crate index changes. +/// A conventional event envelope for our events between crates.io & docs.rs #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] pub struct Event { /// Unique event identifier for deduplication and tracing. pub id: String, - /// Timestamp when the underlying change occurred. + /// Timestamp when the event occured pub occurred_at: DateTime, - /// The typed change payload. + /// The typed payload. #[serde(flatten)] pub change: T, } /// The first version of the public event wire format. -pub type EventV1 = Event; +pub type IndexChangeEventV1 = Event; /// Pack all information we know about a change made to a version of a crate. #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] @@ -117,8 +117,8 @@ mod tests { } } - fn event(change: ChangeV1) -> EventV1 { - EventV1 { + fn event(change: IndexChangeV1) -> IndexChangeEventV1 { + IndexChangeEventV1 { id: "evt_123".into(), occurred_at: DateTime::parse_from_rfc3339("2026-05-22T12:34:56Z") .unwrap() @@ -147,7 +147,7 @@ mod tests { let cases = [ ( - ChangeV1::Added(crate_version.clone()), + IndexChangeV1::Added(crate_version.clone()), json!({ "type": "added", "payload": { @@ -158,7 +158,7 @@ mod tests { }), ), ( - ChangeV1::Unyanked(crate_version.clone()), + IndexChangeV1::Unyanked(crate_version.clone()), json!({ "type": "unyanked", "payload": { @@ -169,7 +169,7 @@ mod tests { }), ), ( - ChangeV1::Yanked(crate_version.clone()), + IndexChangeV1::Yanked(crate_version.clone()), json!({ "type": "yanked", "payload": { @@ -180,7 +180,7 @@ mod tests { }), ), ( - ChangeV1::CrateDeleted { + IndexChangeV1::CrateDeleted { name: "old-crate".into(), }, json!({ @@ -191,7 +191,7 @@ mod tests { }), ), ( - ChangeV1::VersionDeleted(crate_version), + IndexChangeV1::VersionDeleted(crate_version), json!({ "type": "version_deleted", "payload": { @@ -210,7 +210,7 @@ mod tests { #[test] fn event_serializes_with_minimum_metadata() { - let event = event(ChangeV1::CrateDeleted { + let event = event(IndexChangeV1::CrateDeleted { name: "old-crate".into(), }); @@ -229,7 +229,7 @@ mod tests { #[test] fn event_deserializes_rfc3339_occurred_at() { - let event: EventV1 = serde_json::from_value(json!({ + let event: IndexChangeEventV1 = serde_json::from_value(json!({ "id": "evt_123", "occurred_at": "2026-05-22T12:34:56Z", "type": "crate_deleted", From bb4d8456ea0a98fa7151c98809d9b18f5bd89112 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 23 May 2026 08:22:02 +0200 Subject: [PATCH 14/74] some cleanup --- crates/lib/docs_rs_crates_io/src/events.rs | 44 +--------------------- 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 46f43b11a..f01933db6 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -3,7 +3,7 @@ use chrono::{DateTime, Utc}; use std::fmt; -/// Identify a kind of change that occurred to a crate +/// A change that can happen to a crate on our index. #[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] #[serde(tag = "type", content = "payload", rename_all = "snake_case")] pub enum IndexChangeV1 { @@ -19,48 +19,6 @@ pub enum IndexChangeV1 { VersionDeleted(CrateVersion), } -impl IndexChangeV1 { - /// Return the added crate, if this is this kind of change. - pub fn added(&self) -> Option<&CrateVersion> { - match self { - IndexChangeV1::Added(v) => Some(v), - _ => None, - } - } - - /// Return the yanked crate, if this is this kind of change. - pub fn yanked(&self) -> Option<&CrateVersion> { - match self { - IndexChangeV1::Yanked(v) => Some(v), - _ => None, - } - } - - /// Return the unyanked crate, if this is this kind of change. - pub fn unyanked(&self) -> Option<&CrateVersion> { - match self { - IndexChangeV1::Unyanked(v) => Some(v), - _ => None, - } - } - - /// Return the deleted crate, if this is this kind of change. - pub fn crate_deleted(&self) -> Option<&str> { - match self { - IndexChangeV1::CrateDeleted { name, .. } => Some(name.as_str()), - _ => None, - } - } - - /// Return the deleted version crate, if this is this kind of change. - pub fn version_deleted(&self) -> Option<&CrateVersion> { - match self { - IndexChangeV1::VersionDeleted(v) => Some(v), - _ => None, - } - } -} - impl fmt::Display for IndexChangeV1 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( From c0436b166914cf1326d6e7104961ea9a477e0378 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 23 May 2026 09:22:13 +0200 Subject: [PATCH 15/74] no rustls --- Cargo.lock | 158 +++++--------------------- crates/bin/docs_rs_watcher/Cargo.toml | 2 +- 2 files changed, 27 insertions(+), 133 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dfa12b8ed..cf62a1150 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -762,23 +762,17 @@ dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", - "h2 0.3.27", - "h2 0.4.14", - "http 0.2.12", + "h2", "http 1.4.0", - "http-body 0.4.6", - "hyper 0.14.32", - "hyper 1.9.0", - "hyper-rustls 0.24.2", - "hyper-rustls 0.27.9", + "hyper", + "hyper-rustls", "hyper-util", "pin-project-lite", - "rustls 0.21.12", - "rustls 0.23.40", + "rustls", "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tower", "tracing", ] @@ -962,7 +956,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.9.0", + "hyper", "hyper-util", "itoa 1.0.18", "matchit", @@ -4005,25 +3999,6 @@ dependencies = [ "phf 0.11.3", ] -[[package]] -name = "h2" -version = "0.3.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.12", - "indexmap 2.14.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "h2" version = "0.4.14" @@ -4319,30 +4294,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "hyper" -version = "0.14.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", - "httparse", - "httpdate", - "itoa 1.0.18", - "pin-project-lite", - "socket2 0.5.10", - "tokio", - "tower-service", - "tracing", - "want", -] - [[package]] name = "hyper" version = "1.9.0" @@ -4353,7 +4304,7 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2 0.4.14", + "h2", "http 1.4.0", "http-body 1.0.1", "httparse", @@ -4365,21 +4316,6 @@ dependencies = [ "want", ] -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.12", - "hyper 0.14.32", - "log", - "rustls 0.21.12", - "tokio", - "tokio-rustls 0.24.1", -] - [[package]] name = "hyper-rustls" version = "0.27.9" @@ -4387,12 +4323,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ "http 1.4.0", - "hyper 1.9.0", + "hyper", "hyper-util", - "rustls 0.23.40", + "rustls", "rustls-native-certs", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tower-service", ] @@ -4402,7 +4338,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.9.0", + "hyper", "hyper-util", "pin-project-lite", "tokio", @@ -4417,7 +4353,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.9.0", + "hyper", "hyper-util", "native-tls", "tokio", @@ -4437,7 +4373,7 @@ dependencies = [ "futures-util", "http 1.4.0", "http-body 1.0.1", - "hyper 1.9.0", + "hyper", "ipnet", "libc", "percent-encoding", @@ -5097,7 +5033,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.9.0", + "hyper", "hyper-util", "log", "pin-project-lite", @@ -6142,7 +6078,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.40", + "rustls", "socket2 0.6.3", "thiserror", "tokio", @@ -6163,7 +6099,7 @@ dependencies = [ "rand 0.9.4", "ring", "rustc-hash", - "rustls 0.23.40", + "rustls", "rustls-pki-types", "slab", "thiserror", @@ -6453,12 +6389,12 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.4.14", + "h2", "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.9.0", - "hyper-rustls 0.27.9", + "hyper", + "hyper-rustls", "hyper-tls", "hyper-util", "js-sys", @@ -6468,7 +6404,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.40", + "rustls", "rustls-pki-types", "rustls-platform-verifier", "serde", @@ -6476,7 +6412,7 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", - "tokio-rustls 0.26.4", + "tokio-rustls", "tokio-util", "tower", "tower-http", @@ -6562,18 +6498,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "rustls" -version = "0.21.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" -dependencies = [ - "log", - "ring", - "rustls-webpki 0.101.7", - "sct", -] - [[package]] name = "rustls" version = "0.23.40" @@ -6583,7 +6507,7 @@ dependencies = [ "aws-lc-rs", "once_cell", "rustls-pki-types", - "rustls-webpki 0.103.13", + "rustls-webpki", "subtle", "zeroize", ] @@ -6621,10 +6545,10 @@ dependencies = [ "jni", "log", "once_cell", - "rustls 0.23.40", + "rustls", "rustls-native-certs", "rustls-platform-verifier-android", - "rustls-webpki 0.103.13", + "rustls-webpki", "security-framework", "security-framework-sys", "webpki-root-certs", @@ -6637,16 +6561,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" -[[package]] -name = "rustls-webpki" -version = "0.101.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "rustls-webpki" version = "0.103.13" @@ -6750,16 +6664,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "security-framework" version = "3.7.0" @@ -7887,23 +7791,13 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.12", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.40", + "rustls", "tokio", ] @@ -7982,7 +7876,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.9.0", + "hyper", "hyper-timeout", "hyper-util", "percent-encoding", diff --git a/crates/bin/docs_rs_watcher/Cargo.toml b/crates/bin/docs_rs_watcher/Cargo.toml index 38eb11199..caeaefa97 100644 --- a/crates/bin/docs_rs_watcher/Cargo.toml +++ b/crates/bin/docs_rs_watcher/Cargo.toml @@ -8,7 +8,7 @@ edition.workspace = true [dependencies] anyhow = { workspace = true } -aws-sdk-sqs = "1.99.0" +aws-sdk-sqs = { version = "1.99.0", default-features = false, features = ["default-https-client", "rt-tokio"] } clap = { workspace = true } # NOTE: on the new infra, switch back from `git-https-reqwest` to `git-https` (curl) once the curl version is new enough crates-index = { version = "3.0.0", default-features = false, features = ["git", "git-https-reqwest", "git-performance", "parallel"] } From 65af70e4b5519015b60215c419174f746f0f9699 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 3 Jun 2026 22:36:11 +0200 Subject: [PATCH 16/74] add docs_rs_crates_io subcrate for interaction / shared types --- Cargo.lock | 12 + crates/lib/docs_rs_crates_io/Cargo.toml | 20 ++ crates/lib/docs_rs_crates_io/src/events.rs | 249 +++++++++++++++++++++ crates/lib/docs_rs_crates_io/src/lib.rs | 1 + 4 files changed, 282 insertions(+) create mode 100644 crates/lib/docs_rs_crates_io/Cargo.toml create mode 100644 crates/lib/docs_rs_crates_io/src/events.rs create mode 100644 crates/lib/docs_rs_crates_io/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index b65c61ca8..a46048b74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1235,8 +1235,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ "iana-time-zone", + "js-sys", "num-traits", "serde", + "wasm-bindgen", "windows-link", ] @@ -2077,6 +2079,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "docs_rs_crates_io" +version = "0.1.0" +dependencies = [ + "chrono", + "semver", + "serde", + "serde_json", +] + [[package]] name = "docs_rs_database" version = "0.0.0" diff --git a/crates/lib/docs_rs_crates_io/Cargo.toml b/crates/lib/docs_rs_crates_io/Cargo.toml new file mode 100644 index 000000000..c6f9224b3 --- /dev/null +++ b/crates/lib/docs_rs_crates_io/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "docs_rs_crates_io" +version = "0.1.0" +description = "types & logic for the direct integration between docs.rs & crates.io" + +authors.workspace = true +license.workspace = true +repository.workspace = true +edition.workspace = true + +[dependencies] +chrono = { version = "0.4", features = ["serde"] } +semver = { version = "1", features = ["serde"] } +serde = { version = "1", features = ["derive"] } + +[dev-dependencies] +serde_json = "1.0" + +[lints] +workspace = true diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs new file mode 100644 index 000000000..12cc5170b --- /dev/null +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -0,0 +1,249 @@ +#![allow(clippy::disallowed_types)] + +use chrono::{DateTime, Utc}; +use std::fmt; + +/// A change that can happen to a crate on our index. +#[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] +#[serde(tag = "type", content = "payload", rename_all = "snake_case")] +pub enum IndexChangeV1 { + /// A crate version was added. + Added(CrateVersion), + /// A crate version was unyanked. + Unyanked(CrateVersion), + /// A crate version was yanked. + Yanked(CrateVersion), + /// The name of the crate whose file was deleted, which implies all versions were deleted as well. + CrateDeleted { name: String }, + /// A crate version was deleted. + VersionDeleted(CrateVersion), +} + +impl IndexChangeV1 { + /// Return the added crate, if this is this kind of change. + pub fn added(&self) -> Option<&CrateVersion> { + match self { + IndexChangeV1::Added(v) => Some(v), + _ => None, + } + } + + /// Return the yanked crate, if this is this kind of change. + pub fn yanked(&self) -> Option<&CrateVersion> { + match self { + IndexChangeV1::Yanked(v) => Some(v), + _ => None, + } + } + + /// Return the unyanked crate, if this is this kind of change. + pub fn unyanked(&self) -> Option<&CrateVersion> { + match self { + IndexChangeV1::Unyanked(v) => Some(v), + _ => None, + } + } + + /// Return the deleted crate, if this is this kind of change. + pub fn crate_deleted(&self) -> Option<&str> { + match self { + IndexChangeV1::CrateDeleted { name } => Some(name.as_str()), + _ => None, + } + } + + /// Return the deleted version crate, if this is this kind of change. + pub fn version_deleted(&self) -> Option<&CrateVersion> { + match self { + IndexChangeV1::VersionDeleted(v) => Some(v), + _ => None, + } + } +} + +impl fmt::Display for IndexChangeV1 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + IndexChangeV1::Added(_) => "added", + IndexChangeV1::Yanked(_) => "yanked", + IndexChangeV1::CrateDeleted { .. } => "crate deleted", + IndexChangeV1::VersionDeleted(_) => "version deleted", + IndexChangeV1::Unyanked(_) => "unyanked", + } + ) + } +} + +/// A conventional event envelope for our events between crates.io & docs.rs +#[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] +pub struct Event { + /// Unique event identifier for deduplication and tracing. + pub id: String, + /// Timestamp when the event occured + pub occurred_at: DateTime, + /// The typed payload. + #[serde(flatten)] + pub change: T, +} + +/// The first version of the public event wire format. +pub type IndexChangeEventV1 = Event; + +/// Pack all information we know about a change made to a version of a crate. +#[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] +pub struct CrateVersion { + /// The crate name, i.e. `clap`. + pub name: String, + /// is the release yanked? + pub yanked: bool, + /// The semantic version of the crate. + #[serde(rename = "vers")] + pub version: semver::Version, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn crate_version() -> CrateVersion { + CrateVersion { + name: "clap".into(), + yanked: false, + version: semver::Version::new(4, 5, 0), + } + } + + fn event(change: IndexChangeV1) -> IndexChangeEventV1 { + IndexChangeEventV1 { + id: "evt_123".into(), + occurred_at: DateTime::parse_from_rfc3339("2026-05-22T12:34:56Z") + .unwrap() + .with_timezone(&Utc), + change, + } + } + + #[test] + fn crate_version_serializes_with_vers_field() { + let event = crate_version(); + + assert_eq!( + serde_json::to_value(&event).unwrap(), + json!({ + "name": "clap", + "yanked": false, + "vers": "4.5.0", + }) + ); + } + + #[test] + fn change_serializes_with_expected_variant_shapes() { + let crate_version = crate_version(); + + let cases = [ + ( + IndexChangeV1::Added(crate_version.clone()), + json!({ + "type": "added", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ( + IndexChangeV1::Unyanked(crate_version.clone()), + json!({ + "type": "unyanked", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ( + IndexChangeV1::Yanked(crate_version.clone()), + json!({ + "type": "yanked", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ( + IndexChangeV1::CrateDeleted { + name: "old-crate".into(), + }, + json!({ + "type": "crate_deleted", + "payload": { + "name": "old-crate" + } + }), + ), + ( + IndexChangeV1::VersionDeleted(crate_version), + json!({ + "type": "version_deleted", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ]; + + for (event, expected) in cases { + assert_eq!(serde_json::to_value(&event).unwrap(), expected); + } + } + + #[test] + fn event_serializes_with_minimum_metadata() { + let event = event(IndexChangeV1::CrateDeleted { + name: "old-crate".into(), + }); + + assert_eq!( + serde_json::to_value(&event).unwrap(), + json!({ + "id": "evt_123", + "occurred_at": "2026-05-22T12:34:56Z", + "type": "crate_deleted", + "payload": { + "name": "old-crate" + } + }) + ); + } + + #[test] + fn event_deserializes_rfc3339_occurred_at() { + let event: IndexChangeEventV1 = serde_json::from_value(json!({ + "id": "evt_123", + "occurred_at": "2026-05-22T12:34:56Z", + "type": "crate_deleted", + "payload": { + "name": "old-crate" + } + })) + .unwrap(); + + assert_eq!( + event.occurred_at, + DateTime::parse_from_rfc3339("2026-05-22T12:34:56Z") + .unwrap() + .with_timezone(&Utc) + ); + } +} diff --git a/crates/lib/docs_rs_crates_io/src/lib.rs b/crates/lib/docs_rs_crates_io/src/lib.rs new file mode 100644 index 000000000..a9970c28f --- /dev/null +++ b/crates/lib/docs_rs_crates_io/src/lib.rs @@ -0,0 +1 @@ +pub mod events; From fed546f2f27c1e98e47d03e25b22934a9a01b261 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 13 Jun 2026 03:55:39 +0200 Subject: [PATCH 17/74] sort --- crates/lib/docs_rs_crates_io/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/lib/docs_rs_crates_io/Cargo.toml b/crates/lib/docs_rs_crates_io/Cargo.toml index 96b373c89..c6f9224b3 100644 --- a/crates/lib/docs_rs_crates_io/Cargo.toml +++ b/crates/lib/docs_rs_crates_io/Cargo.toml @@ -10,8 +10,8 @@ edition.workspace = true [dependencies] chrono = { version = "0.4", features = ["serde"] } -serde = { version = "1", features = ["derive"] } semver = { version = "1", features = ["serde"] } +serde = { version = "1", features = ["derive"] } [dev-dependencies] serde_json = "1.0" From f056b427b9ac842fe577fbe8df2d2372ad4a86a6 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 13 Jun 2026 04:03:26 +0200 Subject: [PATCH 18/74] save --- Cargo.lock | 39 +++++++++++++++++---- crates/bin/docs_rs_watcher/src/db/delete.rs | 14 ++++++++ 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a46048b74..2ef05ee9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -537,9 +537,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.4" +version = "1.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ed8e8c52d2dc2390ad9f15647fe663f71e9780b4262c190fbb823a32721566" +checksum = "6c9b9de216a988dd54b754a82a7660cfe14cee4f6782ae4524470972fa0ccb39" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -599,6 +599,31 @@ dependencies = [ "url", ] +[[package]] +name = "aws-sdk-sqs" +version = "1.102.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0246bf049cfc003ce44599dff955b9353758de3afa68a053da9b2c7de20a07d8" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.1", + "regex-lite", + "tracing", +] + [[package]] name = "aws-sdk-sts" version = "1.106.0" @@ -682,9 +707,9 @@ dependencies = [ [[package]] name = "aws-smithy-eventstream" -version = "0.60.20" +version = "0.60.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faf09d74e5e32f76b8762da505a3cd59303e367a664ca67295387baa8c1d7548" +checksum = "78d8391e65fcea47c586a22e1a41f173b38615b112b2c6b7a44e80cec3e6b706" dependencies = [ "aws-smithy-types", "bytes", @@ -835,9 +860,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.9" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f93074121a1be41317b9aa607143ae17900631f7f59a99f2b905d519d6783b" +checksum = "32b42fcf341259d85ca10fac9a2f6448a8ec691c6955a18e45bc3b71a85fab85" dependencies = [ "base64-simd", "bytes", @@ -2428,6 +2453,7 @@ name = "docs_rs_watcher" version = "0.6.0" dependencies = [ "anyhow", + "aws-sdk-sqs", "clap", "crates-index", "crates-index-diff", @@ -2454,6 +2480,7 @@ dependencies = [ "test-case", "tokio", "tracing", + "url", ] [[package]] diff --git a/crates/bin/docs_rs_watcher/src/db/delete.rs b/crates/bin/docs_rs_watcher/src/db/delete.rs index c65fa7a6b..40cd19341 100644 --- a/crates/bin/docs_rs_watcher/src/db/delete.rs +++ b/crates/bin/docs_rs_watcher/src/db/delete.rs @@ -453,6 +453,13 @@ mod tests { ); } + // running delete-crate again doesn't error. + assert!( + delete_crate(&mut conn, storage, env.config(), &FOO) + .await + .is_ok() + ); + Ok(()) } @@ -617,6 +624,13 @@ mod tests { vec!["Peter Rabbit".to_string()] ); + // running delete-version again doesn't fail. + assert!( + delete_version(&mut conn, storage, env.config(), &KRATE, &V1) + .await + .is_ok() + ); + // FIXME: remove for now until test frontend is async // let web = env.frontend(); // assert_success("/a/2.0.0/a/", web)?; From d0417990a4f10986d2ee4c04fa2e88240bc34dea Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 13 Jun 2026 04:12:16 +0200 Subject: [PATCH 19/74] make deletes repeatable --- crates/bin/docs_rs_watcher/src/db/delete.rs | 53 +++++++++++++-------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/db/delete.rs b/crates/bin/docs_rs_watcher/src/db/delete.rs index 40cd19341..4b97be297 100644 --- a/crates/bin/docs_rs_watcher/src/db/delete.rs +++ b/crates/bin/docs_rs_watcher/src/db/delete.rs @@ -67,7 +67,13 @@ pub async fn delete_version( return Ok(()); }; - let is_library = delete_version_from_database(conn, config, name, crate_id, version).await?; + let Some(is_library) = + delete_version_from_database(conn, config, name, crate_id, version).await? + else { + // release doesn't exist + return Ok(()); + }; + let paths = if is_library { LIBRARY_STORAGE_PATHS_TO_DELETE } else { @@ -133,7 +139,18 @@ async fn delete_version_from_database( name: &KrateName, crate_id: CrateId, version: &Version, -) -> Result { +) -> Result> { + let Some(release_id) = sqlx::query_scalar!( + "SELECT id FROM releases WHERE crate_id = $1 AND version = $2", + crate_id as _, + version as _ + ) + .fetch_optional(&mut *conn) + .await? + else { + return Ok(None); + }; + let mut transaction = conn.begin().await?; let delete_lock_timeout = format!("{}ms", config.delete_lock_timeout.as_millis()); @@ -157,31 +174,27 @@ async fn delete_version_from_database( sqlx::query!( "DELETE FROM builds_logs bl USING builds b - JOIN releases r ON b.rid = r.id - WHERE bl.build_id = b.id AND r.crate_id = $1 AND r.version = $2;", - crate_id as _, - version as _ + WHERE bl.build_id = b.id AND b.rid = $1;", + release_id as _, ) .execute(&mut *transaction) .await?; for &(table, column) in METADATA { - sqlx::query(sqlx::AssertSqlSafe( - format!("DELETE FROM {table} WHERE {column} IN (SELECT id FROM releases WHERE crate_id = $1 AND version = $2)"))) - .bind(crate_id).bind(version).execute(&mut *transaction).await?; + sqlx::query(sqlx::AssertSqlSafe(format!( + "DELETE FROM {table} WHERE {column} = $1" + ))) + .bind(release_id) + .execute(&mut *transaction) + .await?; } - let Some(is_library) = sqlx::query_scalar!( - "DELETE FROM releases WHERE crate_id = $1 AND version = $2 RETURNING is_library", - crate_id.0, - version as _, + let is_library: bool = sqlx::query_scalar!( + "DELETE FROM releases WHERE id = $1 RETURNING is_library", + release_id as _, ) - .fetch_optional(&mut *transaction) + .fetch_one(&mut *transaction) .await? - else { - transaction.commit().await?; - return Ok(false); - }; - let is_library = is_library.unwrap_or(false); + .unwrap_or(false); sqlx::query!( "DELETE FROM queue WHERE name = $1 AND version = $2;", @@ -194,7 +207,7 @@ async fn delete_version_from_database( update_latest_version_id(&mut transaction, crate_id).await?; transaction.commit().await?; - Ok(is_library) + Ok(Some(is_library)) } /// Returns whether any release in this crate was a library From e1f4467e1a3eace5b2e0006983d6d34bec98a421 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 13 Jun 2026 05:31:14 +0200 Subject: [PATCH 20/74] first version --- Cargo.lock | 3 + Cargo.toml | 5 + crates/bin/docs_rs_watcher/Cargo.toml | 3 + crates/bin/docs_rs_watcher/src/config.rs | 4 + .../bin/docs_rs_watcher/src/index_watcher.rs | 57 +++++- crates/bin/docs_rs_watcher/src/lib.rs | 10 +- crates/bin/docs_rs_watcher/src/main.rs | 8 +- crates/bin/docs_rs_watcher/src/subscriber.rs | 192 ++++++++++++++++++ .../docs_rs_watcher/src/synchronization.rs | 27 +++ crates/lib/docs_rs_crates_io/src/events.rs | 10 + crates/lib/docs_rs_storage/Cargo.toml | 6 +- 11 files changed, 307 insertions(+), 18 deletions(-) create mode 100644 crates/bin/docs_rs_watcher/src/subscriber.rs create mode 100644 crates/bin/docs_rs_watcher/src/synchronization.rs diff --git a/Cargo.lock b/Cargo.lock index 2ef05ee9a..da131d79d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2453,6 +2453,7 @@ name = "docs_rs_watcher" version = "0.6.0" dependencies = [ "anyhow", + "aws-config", "aws-sdk-sqs", "clap", "crates-index", @@ -2460,6 +2461,7 @@ dependencies = [ "docs_rs_build_queue", "docs_rs_config", "docs_rs_context", + "docs_rs_crates_io", "docs_rs_database", "docs_rs_env_vars", "docs_rs_fastly", @@ -2476,6 +2478,7 @@ dependencies = [ "opentelemetry", "pretty_assertions", "rayon", + "serde_json", "sqlx", "test-case", "tokio", diff --git a/Cargo.toml b/Cargo.toml index 1d178d433..3f7d1a58b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,11 @@ edition = "2024" anyhow = { version = "1.0.42", features = ["backtrace"] } askama = "0.16.0" async-stream = "0.3.5" +# The default `rustls` feature pulls in the legacy hyper 0.14 + rustls 0.21 +# stack via `aws-smithy-runtime/tls-rustls`, which includes the vulnerable +# `rustls-webpki` v0.101.x. Using only `default-https-client` avoids this by +# using the modern rustls 0.23 + hyper 1.x stack instead. +aws-config = { version = "1.0.0", default-features = false, features = ["default-https-client", "rt-tokio"] } axum-extra = { version = "0.12.0", features = ["middleware", "routing", "typed-header"] } base64 = "0.22" bon = { version = "3.8.1", features = ["experimental-overwritable"] } diff --git a/crates/bin/docs_rs_watcher/Cargo.toml b/crates/bin/docs_rs_watcher/Cargo.toml index caeaefa97..4b24a2d55 100644 --- a/crates/bin/docs_rs_watcher/Cargo.toml +++ b/crates/bin/docs_rs_watcher/Cargo.toml @@ -8,6 +8,7 @@ edition.workspace = true [dependencies] anyhow = { workspace = true } +aws-config = { workspace = true } aws-sdk-sqs = { version = "1.99.0", default-features = false, features = ["default-https-client", "rt-tokio"] } clap = { workspace = true } # NOTE: on the new infra, switch back from `git-https-reqwest` to `git-https` (curl) once the curl version is new enough @@ -16,6 +17,7 @@ crates-index = { version = "3.0.0", default-features = false, features = ["git", crates-index-diff = { version = "30.0.0", default-features = false, features = ["http-reqwest", "max-performance", "semver"] } docs_rs_build_queue = { path = "../../lib/docs_rs_build_queue" } docs_rs_config = { path = "../../lib/docs_rs_config" } +docs_rs_crates_io = { path = "../../lib/docs_rs_crates_io" } docs_rs_context = { path = "../../lib/docs_rs_context" } docs_rs_database = { path = "../../lib/docs_rs_database" } docs_rs_env_vars = { path = "../../lib/docs_rs_env_vars" } @@ -28,6 +30,7 @@ docs_rs_types = { path = "../../lib/docs_rs_types" } docs_rs_utils = { path = "../../lib/docs_rs_utils" } futures-util = { workspace = true } itertools = { workspace = true } +serde_json = { workspace = true } opentelemetry = { workspace = true } rayon = "1.6.1" sqlx = { workspace = true } diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index 404ade8f5..388eaa5db 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -10,6 +10,7 @@ pub struct Config { pub registry_url: Option, pub sqs_queue_url: Option, pub sqs_region: Option, + pub aws_sdk_max_retries: u32, /// How long to wait between registry checks pub delay_between_registry_fetches: Duration, @@ -32,8 +33,11 @@ impl AppConfig for Config { Ok(Self { registry_index_path: env("REGISTRY_INDEX_PATH", prefix.join("crates.io-index"))?, registry_url: maybe_env("REGISTRY_URL")?, + sqs_queue_url: maybe_env("DOCSRS_SQS_QUEUE_URL")?, sqs_region: maybe_env("DOCSRS_SQS_REGION")?, + aws_sdk_max_retries: env("DOCSRS_AWS_SDK_MAX_RETRIES", 6u32)?, + delay_between_registry_fetches: Duration::from_secs(env::( "DOCSRS_DELAY_BETWEEN_REGISTRY_FETCHES", 60, diff --git a/crates/bin/docs_rs_watcher/src/index_watcher.rs b/crates/bin/docs_rs_watcher/src/index_watcher.rs index 68b964d79..871572be0 100644 --- a/crates/bin/docs_rs_watcher/src/index_watcher.rs +++ b/crates/bin/docs_rs_watcher/src/index_watcher.rs @@ -2,6 +2,7 @@ use crate::{ Config, db::{delete_crate, delete_version}, index::Index, + synchronization::CrateLocks, }; use anyhow::{Context as _, Result}; use crates_index_diff::Change; @@ -45,6 +46,30 @@ impl TryFrom for CrateVersion { } } +impl TryFrom<&docs_rs_crates_io::events::CrateVersion> for CrateVersion { + type Error = anyhow::Error; + + fn try_from(value: &docs_rs_crates_io::events::CrateVersion) -> Result { + Ok(Self { + name: value.name.parse()?, + version: value.version.clone().into(), + yanked: value.yanked, + }) + } +} + +impl TryFrom for CrateVersion { + type Error = anyhow::Error; + + fn try_from(value: docs_rs_crates_io::events::CrateVersion) -> Result { + Ok(Self { + name: value.name.parse()?, + version: value.version.into(), + yanked: value.yanked, + }) + } +} + #[cfg(test)] impl From for crates_index_diff::CrateVersion { fn from(value: CrateVersion) -> Self { @@ -92,6 +117,7 @@ async fn queue_crate_invalidation(krate: &KrateName, cdn: Option<&Cdn>) { /// Returns the number of crates added pub(crate) async fn get_new_crates( context: &Context, + locks: &CrateLocks, index: &Index, config: &Config, ) -> Result { @@ -115,7 +141,7 @@ pub(crate) async fn get_new_crates( debug!(last_seen_reference=%last_seen_reference, new_reference=%new_reference, "queueing changes"); - let crates_added = process_changes(context, &changes, config).await; + let crates_added = process_changes(context, &locks, &changes, config).await; if let Err(err) = context.build_queue()?.deprioritize_workspaces().await { error!(?err, "error deprioritizing workspaces"); @@ -129,11 +155,16 @@ pub(crate) async fn get_new_crates( Ok(crates_added) } -async fn process_changes(context: &Context, changes: &Vec, config: &Config) -> usize { +async fn process_changes( + context: &Context, + locks: &CrateLocks, + changes: &Vec, + config: &Config, +) -> usize { let mut crates_added = 0; for change in changes { - match process_change(context, change, config).await { + match process_change(context, locks, change, config).await { Ok(added) => { if added { crates_added += 1; @@ -148,7 +179,12 @@ async fn process_changes(context: &Context, changes: &Vec, config: &Conf } /// Process a crate change, returning whether the change was a crate addition or not. -async fn process_change(context: &Context, change: &Change, config: &Config) -> Result { +pub(crate) async fn process_change( + context: &Context, + locks: &CrateLocks, + change: &Change, + config: &Config, +) -> Result { let crate_version: CrateVersion = change .versions() .first() @@ -156,6 +192,8 @@ async fn process_change(context: &Context, change: &Change, config: &Config) -> .clone() .try_into()?; + let _guard = locks.lock(crate_version.name.to_string()).await; + match change { Change::Added(_release) => process_version_added(context, &crate_version).await?, Change::AddedAndYanked(_release) => { @@ -177,7 +215,10 @@ async fn process_change(context: &Context, change: &Change, config: &Config) -> } /// Processes crate changes, whether they got yanked or unyanked. -async fn process_version_yank_status(context: &Context, release: &CrateVersion) -> Result<()> { +pub(crate) async fn process_version_yank_status( + context: &Context, + release: &CrateVersion, +) -> Result<()> { // FIXME: delay yanks of crates that have not yet finished building // https://github.com/rust-lang/docs.rs/issues/1934 set_yanked(context, &release.name, &release.version, release.yanked).await?; @@ -185,7 +226,7 @@ async fn process_version_yank_status(context: &Context, release: &CrateVersion) Ok(()) } -async fn process_version_added(context: &Context, release: &CrateVersion) -> Result<()> { +pub(crate) async fn process_version_added(context: &Context, release: &CrateVersion) -> Result<()> { let mut conn = context.pool()?.get_async().await?; let priority = get_crate_priority(&mut conn, &release.name).await?; context @@ -216,7 +257,7 @@ async fn process_version_added(context: &Context, release: &CrateVersion) -> Res Ok(()) } -async fn process_version_deleted( +pub(crate) async fn process_version_deleted( context: &Context, config: &Config, release: &CrateVersion, @@ -250,7 +291,7 @@ async fn process_version_deleted( Ok(()) } -async fn process_crate_deleted( +pub(crate) async fn process_crate_deleted( context: &Context, config: &Config, krate: &KrateName, diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 833a6c688..05cf5cc68 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -5,6 +5,8 @@ mod index; pub mod index_watcher; mod rebuilds; mod service_metrics; +pub mod subscriber; +pub mod synchronization; #[cfg(test)] mod testing; @@ -13,7 +15,9 @@ pub use db::{delete_crate, delete_version}; pub use index::Index; pub use rebuilds::queue_rebuilds; -use crate::{index_watcher::get_new_crates, service_metrics::OtelServiceMetrics}; +use crate::{ + index_watcher::get_new_crates, service_metrics::OtelServiceMetrics, synchronization::CrateLocks, +}; use anyhow::Result; use docs_rs_context::Context; use docs_rs_utils::start_async_cron; @@ -24,7 +28,7 @@ use tracing::{debug, error, info, trace}; /// Run the registry watcher /// NOTE: this should only be run once, otherwise crates would be added /// to the queue multiple times. -pub async fn watch_registry(config: &Config, context: &Context) -> Result<()> { +pub async fn watch_registry(config: &Config, context: &Context, locks: &CrateLocks) -> Result<()> { let mut last_gc = Instant::now(); let queue = context.build_queue()?; @@ -36,7 +40,7 @@ pub async fn watch_registry(config: &Config, context: &Context) -> Result<()> { debug!("Checking new crates"); let index = Index::from_config(config).await?; - match get_new_crates(context, &index, config).await { + match get_new_crates(context, locks, &index, config).await { Ok(n) => debug!("{} crates added to queue", n), Err(e) => { error!(?e, "Failed to get new crates"); diff --git a/crates/bin/docs_rs_watcher/src/main.rs b/crates/bin/docs_rs_watcher/src/main.rs index ba133b8e1..3a7ad4d34 100644 --- a/crates/bin/docs_rs_watcher/src/main.rs +++ b/crates/bin/docs_rs_watcher/src/main.rs @@ -3,7 +3,7 @@ use clap::{Parser, Subcommand}; use docs_rs_config::AppConfig as _; use docs_rs_context::Context; use docs_rs_types::{KrateName, Version}; -use docs_rs_watcher::{Config, Index, index_watcher}; +use docs_rs_watcher::{Config, Index, index_watcher, synchronization::CrateLocks}; use std::sync::Arc; #[tokio::main] @@ -81,7 +81,11 @@ impl CommandLine { // which should only run once, and all the time. docs_rs_watcher::start_background_service_metric_collector(&ctx).await?; - docs_rs_watcher::watch_registry(&config, &ctx).await?; + let locks = CrateLocks::new(); + tokio::try_join!( + docs_rs_watcher::watch_registry(&config, &ctx, &locks), + docs_rs_watcher::subscriber::listen(&config, &ctx, &locks), + )?; } Self::Queue { subcommand } => subcommand.handle_args(config, ctx).await?, Self::Database { subcommand } => subcommand.handle_args(config, ctx).await?, diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs new file mode 100644 index 000000000..78e76df5f --- /dev/null +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -0,0 +1,192 @@ +use crate::{ + Config, + index_watcher::{ + process_crate_deleted, process_version_added, process_version_deleted, + process_version_yank_status, + }, + synchronization::CrateLocks, +}; +use anyhow::{Context as _, Result, bail}; +use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; +use aws_sdk_sqs::Client; +use docs_rs_context::Context; +use docs_rs_crates_io::events::{IndexChangeEventV1, IndexChangeV1}; +use docs_rs_types::KrateName; +use docs_rs_utils::retry_async; +use std::time::Duration; +use tokio::time; +use tracing::{debug, error, instrument, warn}; + +/// visibility timeout: +/// should be longer than the longest time our server takes to handle a message. +/// +/// if we fetch a message, and don't delete it in this time, it will be redelivered. +const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); + +/// wait-time (long polling): +/// +/// How long should the request be kept open when there are no messages. +const WAIT_TIME: Duration = Duration::from_secs(30); + +/// when one long-polling request is finished, how long to sleep before starting the next? +const SLEEP_BETWEEN_REQUESTS: Duration = Duration::from_secs(1); + +/// when we have an error handling a message, how long should SQS wait until +/// it redelivers this message. +const RETRY_DELAY: Duration = Duration::from_secs(30); + +pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> Result<()> { + let (Some(region), Some(queue_url)) = (&config.sqs_region, &config.sqs_queue_url) else { + bail!("missing sqs region or url, disabling crates.io subscriber"); + }; + let queue_url = queue_url.to_string(); + + let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; + let client = Client::from_conf( + aws_sdk_sqs::config::Builder::from(&shared_config) + .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) + .region(Region::new(region.clone())) + .build(), + ); + + let queue = context.build_queue()?; + + loop { + if queue.is_locked().await? { + debug!("Queue is locked, skipping checking new crates"); + time::sleep(WAIT_TIME).await; + continue; + } + + let response = match client + .receive_message() + .queue_url(queue_url.clone()) + .max_number_of_messages(10) + .wait_time_seconds(WAIT_TIME.as_secs() as i32) + .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) + .send() + .await + { + Ok(response) => response, + Err(err) => { + error!( + ?err, + queue_url, "error receiving messages from sqs, retrying" + ); + time::sleep(WAIT_TIME).await; + continue; + } + }; + + for message in response.messages() { + let Some(body) = message.body() else { + continue; + }; + + match retry_async( + || async move { process_message(context, config, locks, body).await }, + 3, + ) + .await + { + Ok(_) => { + if let Some(receipt_handle) = message.receipt_handle() { + // mark the message as "done" + if let Err(err) = client + .delete_message() + .queue_url(queue_url.clone()) + .receipt_handle(receipt_handle) + .send() + .await + { + // sqs will redeliver the message after the visibility timeout passed + error!( + ?err, + receipt_handle, queue_url, "error deleting message from queue" + ); + } + } + } + Err(err) => { + error!( + ?err, + ?message, + ?RETRY_DELAY, + body, + "error handling message. Retrying." + ); + + if let Some(receipt_handle) = message.receipt_handle() { + // Don't delete the message. + // It will become visible again after the visibility timeout. + if let Err(err) = client + .change_message_visibility() + .queue_url(queue_url.clone()) + .receipt_handle(receipt_handle) + // retry after some time + .visibility_timeout(RETRY_DELAY.as_secs() as i32) // retry + .send() + .await + { + // this error doesn't really matter, without the changed visibility + // timeout sqs will redeliver after the default visibility timeout. + warn!( + ?err, + receipt_handle, + queue_url, + "error setting visibility_timeout for retry" + ); + } + } + } + } + } + + time::sleep(SLEEP_BETWEEN_REQUESTS).await; + } +} + +#[instrument(skip(context, config, locks))] +async fn process_message( + context: &Context, + config: &Config, + locks: &CrateLocks, + body: &str, +) -> Result<()> { + let event: IndexChangeEventV1 = + serde_json::from_str(body).context("error parsing event from json")?; + + debug!(?event, "received event from sqs"); + + let _guard = locks.lock(event.change.name()).await; + + process_change(context, &event.change, config) + .await + .context("error processing change")?; + + Ok(()) +} + +/// Process a crate change, returning whether the change was a crate addition or not. +pub(crate) async fn process_change( + context: &Context, + change: &IndexChangeV1, + config: &Config, +) -> Result { + match change { + IndexChangeV1::Added(crate_version) => { + process_version_added(context, &crate_version.try_into().unwrap()).await? + } + IndexChangeV1::Unyanked(crate_version) | IndexChangeV1::Yanked(crate_version) => { + process_version_yank_status(context, &crate_version.try_into().unwrap()).await? + } + IndexChangeV1::CrateDeleted { name, .. } => { + let name: KrateName = name.parse()?; + process_crate_deleted(context, config, &name).await? + } + IndexChangeV1::VersionDeleted(crate_version) => { + process_version_deleted(context, config, &crate_version.try_into().unwrap()).await? + } + }; + Ok(change.added().is_some()) +} diff --git a/crates/bin/docs_rs_watcher/src/synchronization.rs b/crates/bin/docs_rs_watcher/src/synchronization.rs new file mode 100644 index 000000000..d80be40c6 --- /dev/null +++ b/crates/bin/docs_rs_watcher/src/synchronization.rs @@ -0,0 +1,27 @@ +use std::{collections::HashMap, sync::Arc}; +use tokio::sync::{Mutex, OwnedMutexGuard}; + +#[derive(Clone, Default)] +pub struct CrateLocks { + locks: Arc>>>>, +} + +impl CrateLocks { + pub fn new() -> Self { + Self { + locks: Arc::new(Mutex::new(HashMap::new())), + } + } + + pub async fn lock(&self, crate_name: impl Into) -> OwnedMutexGuard<()> { + let lock = { + let mut locks = self.locks.lock().await; + locks + .entry(crate_name.into()) + .or_insert_with(|| Arc::new(Mutex::new(()))) + .clone() + }; + + lock.lock_owned().await + } +} diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 12cc5170b..986e814cb 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -59,6 +59,16 @@ impl IndexChangeV1 { _ => None, } } + + pub fn name(&self) -> &str { + match self { + IndexChangeV1::Added(crate_version) => &crate_version.name, + IndexChangeV1::Unyanked(crate_version) => &crate_version.name, + IndexChangeV1::Yanked(crate_version) => &crate_version.name, + IndexChangeV1::CrateDeleted { name } => &name, + IndexChangeV1::VersionDeleted(crate_version) => &crate_version.name, + } + } } impl fmt::Display for IndexChangeV1 { diff --git a/crates/lib/docs_rs_storage/Cargo.toml b/crates/lib/docs_rs_storage/Cargo.toml index 153de2489..6b0fb85ea 100644 --- a/crates/lib/docs_rs_storage/Cargo.toml +++ b/crates/lib/docs_rs_storage/Cargo.toml @@ -16,11 +16,7 @@ testing = [ anyhow = { workspace = true } async-compression = { version = "0.4.32", features = ["bzip2", "deflate", "gzip", "tokio", "zstd"] } async-stream = { workspace = true } -# The default `rustls` feature pulls in the legacy hyper 0.14 + rustls 0.21 -# stack via `aws-smithy-runtime/tls-rustls`, which includes the vulnerable -# `rustls-webpki` v0.101.x. Using only `default-https-client` avoids this by -# using the modern rustls 0.23 + hyper 1.x stack instead. -aws-config = { version = "1.0.0", default-features = false, features = ["default-https-client", "rt-tokio"] } +aws-config = { workspace = true } aws-sdk-s3 = { version = "1.3.0", default-features = false, features = ["default-https-client", "rt-tokio"] } aws-smithy-types-convert = { version = "0.60.0", features = ["convert-chrono"] } base64 = { workspace = true } From bc3b16e3c067f4cb469e8fa08dfce1242906ca73 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 13 Jun 2026 05:33:48 +0200 Subject: [PATCH 21/74] errs --- crates/bin/docs_rs_watcher/src/index_watcher.rs | 4 +++- crates/lib/docs_rs_crates_io/src/events.rs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/index_watcher.rs b/crates/bin/docs_rs_watcher/src/index_watcher.rs index 871572be0..87e00cd50 100644 --- a/crates/bin/docs_rs_watcher/src/index_watcher.rs +++ b/crates/bin/docs_rs_watcher/src/index_watcher.rs @@ -141,7 +141,7 @@ pub(crate) async fn get_new_crates( debug!(last_seen_reference=%last_seen_reference, new_reference=%new_reference, "queueing changes"); - let crates_added = process_changes(context, &locks, &changes, config).await; + let crates_added = process_changes(context, locks, &changes, config).await; if let Err(err) = context.build_queue()?.deprioritize_workspaces().await { error!(?err, "error deprioritizing workspaces"); @@ -558,8 +558,10 @@ mod tests { version: V2, ..Default::default() }; + let locks = CrateLocks::new(); let added = process_changes( &env, + &locks, &vec![ // Should be added correctly Change::Added(krate1.into()), diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 986e814cb..4b9dc4ea3 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -65,7 +65,7 @@ impl IndexChangeV1 { IndexChangeV1::Added(crate_version) => &crate_version.name, IndexChangeV1::Unyanked(crate_version) => &crate_version.name, IndexChangeV1::Yanked(crate_version) => &crate_version.name, - IndexChangeV1::CrateDeleted { name } => &name, + IndexChangeV1::CrateDeleted { name } => name, IndexChangeV1::VersionDeleted(crate_version) => &crate_version.name, } } From 8f49598882325c8f25ac1b01fe1af83ff1149d7d Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 13 Jun 2026 05:34:30 +0200 Subject: [PATCH 22/74] todo --- crates/bin/docs_rs_watcher/src/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/bin/docs_rs_watcher/src/main.rs b/crates/bin/docs_rs_watcher/src/main.rs index 3a7ad4d34..9ee93819c 100644 --- a/crates/bin/docs_rs_watcher/src/main.rs +++ b/crates/bin/docs_rs_watcher/src/main.rs @@ -82,6 +82,7 @@ impl CommandLine { docs_rs_watcher::start_background_service_metric_collector(&ctx).await?; let locks = CrateLocks::new(); + // FIXME: we don't want to exit in error case, do we? tokio::try_join!( docs_rs_watcher::watch_registry(&config, &ctx, &locks), docs_rs_watcher::subscriber::listen(&config, &ctx, &locks), From d927bc5e56e3b1e8565f467ba246541ef93544e5 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sun, 14 Jun 2026 14:59:54 +0200 Subject: [PATCH 23/74] prio --- crates/bin/docs_rs_watcher/src/subscriber.rs | 3 +++ crates/bin/docs_rs_watcher/src/synchronization.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 78e76df5f..2dce449e9 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -17,6 +17,9 @@ use std::time::Duration; use tokio::time; use tracing::{debug, error, instrument, warn}; +// TODO: +// * when should we run deprioritize_workspaces ? + /// visibility timeout: /// should be longer than the longest time our server takes to handle a message. /// diff --git a/crates/bin/docs_rs_watcher/src/synchronization.rs b/crates/bin/docs_rs_watcher/src/synchronization.rs index d80be40c6..b1720fe91 100644 --- a/crates/bin/docs_rs_watcher/src/synchronization.rs +++ b/crates/bin/docs_rs_watcher/src/synchronization.rs @@ -1,6 +1,9 @@ use std::{collections::HashMap, sync::Arc}; use tokio::sync::{Mutex, OwnedMutexGuard}; +/// shared locks so we can serialize changes to the same crate, +/// for the transition phase where we might get input from both +/// the git index and the sqs queue. #[derive(Clone, Default)] pub struct CrateLocks { locks: Arc>>>>, From 431662efc18f63d4c846d14b31f293c6ce70f786 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 3 Jun 2026 22:36:11 +0200 Subject: [PATCH 24/74] add docs_rs_crates_io subcrate for interaction / shared types --- Cargo.lock | 12 + crates/lib/docs_rs_crates_io/Cargo.toml | 20 ++ crates/lib/docs_rs_crates_io/src/events.rs | 249 +++++++++++++++++++++ crates/lib/docs_rs_crates_io/src/lib.rs | 1 + 4 files changed, 282 insertions(+) create mode 100644 crates/lib/docs_rs_crates_io/Cargo.toml create mode 100644 crates/lib/docs_rs_crates_io/src/events.rs create mode 100644 crates/lib/docs_rs_crates_io/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 0810f40d8..fcae544b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1235,8 +1235,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ "iana-time-zone", + "js-sys", "num-traits", "serde", + "wasm-bindgen", "windows-link", ] @@ -2108,6 +2110,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "docs_rs_crates_io" +version = "0.1.0" +dependencies = [ + "chrono", + "semver", + "serde", + "serde_json", +] + [[package]] name = "docs_rs_database" version = "0.0.0" diff --git a/crates/lib/docs_rs_crates_io/Cargo.toml b/crates/lib/docs_rs_crates_io/Cargo.toml new file mode 100644 index 000000000..c6f9224b3 --- /dev/null +++ b/crates/lib/docs_rs_crates_io/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "docs_rs_crates_io" +version = "0.1.0" +description = "types & logic for the direct integration between docs.rs & crates.io" + +authors.workspace = true +license.workspace = true +repository.workspace = true +edition.workspace = true + +[dependencies] +chrono = { version = "0.4", features = ["serde"] } +semver = { version = "1", features = ["serde"] } +serde = { version = "1", features = ["derive"] } + +[dev-dependencies] +serde_json = "1.0" + +[lints] +workspace = true diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs new file mode 100644 index 000000000..12cc5170b --- /dev/null +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -0,0 +1,249 @@ +#![allow(clippy::disallowed_types)] + +use chrono::{DateTime, Utc}; +use std::fmt; + +/// A change that can happen to a crate on our index. +#[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] +#[serde(tag = "type", content = "payload", rename_all = "snake_case")] +pub enum IndexChangeV1 { + /// A crate version was added. + Added(CrateVersion), + /// A crate version was unyanked. + Unyanked(CrateVersion), + /// A crate version was yanked. + Yanked(CrateVersion), + /// The name of the crate whose file was deleted, which implies all versions were deleted as well. + CrateDeleted { name: String }, + /// A crate version was deleted. + VersionDeleted(CrateVersion), +} + +impl IndexChangeV1 { + /// Return the added crate, if this is this kind of change. + pub fn added(&self) -> Option<&CrateVersion> { + match self { + IndexChangeV1::Added(v) => Some(v), + _ => None, + } + } + + /// Return the yanked crate, if this is this kind of change. + pub fn yanked(&self) -> Option<&CrateVersion> { + match self { + IndexChangeV1::Yanked(v) => Some(v), + _ => None, + } + } + + /// Return the unyanked crate, if this is this kind of change. + pub fn unyanked(&self) -> Option<&CrateVersion> { + match self { + IndexChangeV1::Unyanked(v) => Some(v), + _ => None, + } + } + + /// Return the deleted crate, if this is this kind of change. + pub fn crate_deleted(&self) -> Option<&str> { + match self { + IndexChangeV1::CrateDeleted { name } => Some(name.as_str()), + _ => None, + } + } + + /// Return the deleted version crate, if this is this kind of change. + pub fn version_deleted(&self) -> Option<&CrateVersion> { + match self { + IndexChangeV1::VersionDeleted(v) => Some(v), + _ => None, + } + } +} + +impl fmt::Display for IndexChangeV1 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + IndexChangeV1::Added(_) => "added", + IndexChangeV1::Yanked(_) => "yanked", + IndexChangeV1::CrateDeleted { .. } => "crate deleted", + IndexChangeV1::VersionDeleted(_) => "version deleted", + IndexChangeV1::Unyanked(_) => "unyanked", + } + ) + } +} + +/// A conventional event envelope for our events between crates.io & docs.rs +#[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] +pub struct Event { + /// Unique event identifier for deduplication and tracing. + pub id: String, + /// Timestamp when the event occured + pub occurred_at: DateTime, + /// The typed payload. + #[serde(flatten)] + pub change: T, +} + +/// The first version of the public event wire format. +pub type IndexChangeEventV1 = Event; + +/// Pack all information we know about a change made to a version of a crate. +#[derive(Clone, serde::Serialize, serde::Deserialize, Eq, PartialEq, Debug)] +pub struct CrateVersion { + /// The crate name, i.e. `clap`. + pub name: String, + /// is the release yanked? + pub yanked: bool, + /// The semantic version of the crate. + #[serde(rename = "vers")] + pub version: semver::Version, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn crate_version() -> CrateVersion { + CrateVersion { + name: "clap".into(), + yanked: false, + version: semver::Version::new(4, 5, 0), + } + } + + fn event(change: IndexChangeV1) -> IndexChangeEventV1 { + IndexChangeEventV1 { + id: "evt_123".into(), + occurred_at: DateTime::parse_from_rfc3339("2026-05-22T12:34:56Z") + .unwrap() + .with_timezone(&Utc), + change, + } + } + + #[test] + fn crate_version_serializes_with_vers_field() { + let event = crate_version(); + + assert_eq!( + serde_json::to_value(&event).unwrap(), + json!({ + "name": "clap", + "yanked": false, + "vers": "4.5.0", + }) + ); + } + + #[test] + fn change_serializes_with_expected_variant_shapes() { + let crate_version = crate_version(); + + let cases = [ + ( + IndexChangeV1::Added(crate_version.clone()), + json!({ + "type": "added", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ( + IndexChangeV1::Unyanked(crate_version.clone()), + json!({ + "type": "unyanked", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ( + IndexChangeV1::Yanked(crate_version.clone()), + json!({ + "type": "yanked", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ( + IndexChangeV1::CrateDeleted { + name: "old-crate".into(), + }, + json!({ + "type": "crate_deleted", + "payload": { + "name": "old-crate" + } + }), + ), + ( + IndexChangeV1::VersionDeleted(crate_version), + json!({ + "type": "version_deleted", + "payload": { + "name": "clap", + "yanked": false, + "vers": "4.5.0", + } + }), + ), + ]; + + for (event, expected) in cases { + assert_eq!(serde_json::to_value(&event).unwrap(), expected); + } + } + + #[test] + fn event_serializes_with_minimum_metadata() { + let event = event(IndexChangeV1::CrateDeleted { + name: "old-crate".into(), + }); + + assert_eq!( + serde_json::to_value(&event).unwrap(), + json!({ + "id": "evt_123", + "occurred_at": "2026-05-22T12:34:56Z", + "type": "crate_deleted", + "payload": { + "name": "old-crate" + } + }) + ); + } + + #[test] + fn event_deserializes_rfc3339_occurred_at() { + let event: IndexChangeEventV1 = serde_json::from_value(json!({ + "id": "evt_123", + "occurred_at": "2026-05-22T12:34:56Z", + "type": "crate_deleted", + "payload": { + "name": "old-crate" + } + })) + .unwrap(); + + assert_eq!( + event.occurred_at, + DateTime::parse_from_rfc3339("2026-05-22T12:34:56Z") + .unwrap() + .with_timezone(&Utc) + ); + } +} diff --git a/crates/lib/docs_rs_crates_io/src/lib.rs b/crates/lib/docs_rs_crates_io/src/lib.rs new file mode 100644 index 000000000..a9970c28f --- /dev/null +++ b/crates/lib/docs_rs_crates_io/src/lib.rs @@ -0,0 +1 @@ +pub mod events; From 6a862eef736eb5b5679ea7f68b8d411e1ae45a40 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 14:46:18 +0200 Subject: [PATCH 25/74] remove "yanked" from event payload --- crates/lib/docs_rs_crates_io/src/events.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 12cc5170b..375c85690 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -97,8 +97,6 @@ pub type IndexChangeEventV1 = Event; pub struct CrateVersion { /// The crate name, i.e. `clap`. pub name: String, - /// is the release yanked? - pub yanked: bool, /// The semantic version of the crate. #[serde(rename = "vers")] pub version: semver::Version, @@ -112,7 +110,6 @@ mod tests { fn crate_version() -> CrateVersion { CrateVersion { name: "clap".into(), - yanked: false, version: semver::Version::new(4, 5, 0), } } @@ -135,7 +132,6 @@ mod tests { serde_json::to_value(&event).unwrap(), json!({ "name": "clap", - "yanked": false, "vers": "4.5.0", }) ); @@ -152,7 +148,6 @@ mod tests { "type": "added", "payload": { "name": "clap", - "yanked": false, "vers": "4.5.0", } }), @@ -163,7 +158,6 @@ mod tests { "type": "unyanked", "payload": { "name": "clap", - "yanked": false, "vers": "4.5.0", } }), @@ -174,7 +168,6 @@ mod tests { "type": "yanked", "payload": { "name": "clap", - "yanked": false, "vers": "4.5.0", } }), @@ -196,7 +189,6 @@ mod tests { "type": "version_deleted", "payload": { "name": "clap", - "yanked": false, "vers": "4.5.0", } }), From f4344f77807c923ec00fc012e2d34ea5a980a82e Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 14:48:58 +0200 Subject: [PATCH 26/74] use String for IndexChangeEvent -> version --- Cargo.lock | 1 - crates/lib/docs_rs_crates_io/Cargo.toml | 1 - crates/lib/docs_rs_crates_io/src/events.rs | 6 ++---- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fcae544b1..8bc91de48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2115,7 +2115,6 @@ name = "docs_rs_crates_io" version = "0.1.0" dependencies = [ "chrono", - "semver", "serde", "serde_json", ] diff --git a/crates/lib/docs_rs_crates_io/Cargo.toml b/crates/lib/docs_rs_crates_io/Cargo.toml index c6f9224b3..497536d66 100644 --- a/crates/lib/docs_rs_crates_io/Cargo.toml +++ b/crates/lib/docs_rs_crates_io/Cargo.toml @@ -10,7 +10,6 @@ edition.workspace = true [dependencies] chrono = { version = "0.4", features = ["serde"] } -semver = { version = "1", features = ["serde"] } serde = { version = "1", features = ["derive"] } [dev-dependencies] diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 375c85690..5bb4afa56 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -1,5 +1,3 @@ -#![allow(clippy::disallowed_types)] - use chrono::{DateTime, Utc}; use std::fmt; @@ -99,7 +97,7 @@ pub struct CrateVersion { pub name: String, /// The semantic version of the crate. #[serde(rename = "vers")] - pub version: semver::Version, + pub version: String, } #[cfg(test)] @@ -110,7 +108,7 @@ mod tests { fn crate_version() -> CrateVersion { CrateVersion { name: "clap".into(), - version: semver::Version::new(4, 5, 0), + version: "4.5.0".into(), } } From 216b68d9e2e9cf7d2292d0c3fda3f94fa0ffec7d Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 16:08:31 +0200 Subject: [PATCH 27/74] WIP --- Cargo.lock | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 0810f40d8..8ee007bb0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -599,6 +599,31 @@ dependencies = [ "url", ] +[[package]] +name = "aws-sdk-sqs" +version = "1.102.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0246bf049cfc003ce44599dff955b9353758de3afa68a053da9b2c7de20a07d8" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.2", + "regex-lite", + "tracing", +] + [[package]] name = "aws-sdk-sts" version = "1.107.0" @@ -1235,8 +1260,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ "iana-time-zone", + "js-sys", "num-traits", "serde", + "wasm-bindgen", "windows-link", ] @@ -2108,6 +2135,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "docs_rs_crates_io" +version = "0.1.0" +dependencies = [ + "chrono", + "serde", + "serde_json", +] + [[package]] name = "docs_rs_database" version = "0.0.0" @@ -2447,12 +2483,15 @@ name = "docs_rs_watcher" version = "0.6.0" dependencies = [ "anyhow", + "aws-config", + "aws-sdk-sqs", "clap", "crates-index", "crates-index-diff", "docs_rs_build_queue", "docs_rs_config", "docs_rs_context", + "docs_rs_crates_io", "docs_rs_database", "docs_rs_env_vars", "docs_rs_fastly", @@ -2469,10 +2508,12 @@ dependencies = [ "opentelemetry", "pretty_assertions", "rayon", + "serde_json", "sqlx", "test-case", "tokio", "tracing", + "url", ] [[package]] From 7d6c4c20115654248e30464c704c72a7e1086e22 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 16:29:00 +0200 Subject: [PATCH 28/74] wip --- crates/bin/cratesfyi/src/daemon.rs | 11 ++++-- .../bin/docs_rs_watcher/src/index_watcher.rs | 35 +++++++------------ crates/bin/docs_rs_watcher/src/subscriber.rs | 7 ++-- 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/crates/bin/cratesfyi/src/daemon.rs b/crates/bin/cratesfyi/src/daemon.rs index 2ea37bd6b..9f2393b03 100644 --- a/crates/bin/cratesfyi/src/daemon.rs +++ b/crates/bin/cratesfyi/src/daemon.rs @@ -2,9 +2,10 @@ use anyhow::{Error, anyhow}; use docs_rs_builder::{RustwideBuilder, queue_builder}; use docs_rs_config::AppConfig as _; use docs_rs_context::Context; +use docs_rs_watcher::synchronization::CrateLocks; use docs_rs_watcher::{ start_background_queue_rebuild, start_background_repository_stats_updater, - start_background_service_metric_collector, watch_registry, + start_background_service_metric_collector, }; use docs_rs_web::run_web_server; use std::sync::Arc; @@ -21,7 +22,13 @@ fn start_registry_watcher( // space this out to prevent it from clashing against the queue-builder thread on launch tokio::time::sleep(Duration::from_secs(30)).await; - watch_registry(&config, &context).await + let locks = CrateLocks::new(); + // FIXME: we don't want to exit in error case, do we? + // This is a spawn, so in the background, so we probably want to at least log the error? + tokio::try_join!( + docs_rs_watcher::watch_registry(&config, &context, &locks), + docs_rs_watcher::subscriber::listen(&config, &context, &locks), + ) }); Ok(()) diff --git a/crates/bin/docs_rs_watcher/src/index_watcher.rs b/crates/bin/docs_rs_watcher/src/index_watcher.rs index 87e00cd50..784700ccc 100644 --- a/crates/bin/docs_rs_watcher/src/index_watcher.rs +++ b/crates/bin/docs_rs_watcher/src/index_watcher.rs @@ -20,7 +20,6 @@ use tracing::{debug, error, info, warn}; pub(crate) struct CrateVersion { pub name: KrateName, pub version: Version, - pub yanked: bool, } #[cfg(test)] @@ -29,7 +28,6 @@ impl Default for CrateVersion { Self { name: docs_rs_types::testing::KRATE, version: docs_rs_types::testing::V1, - yanked: false, } } } @@ -41,7 +39,6 @@ impl TryFrom for CrateVersion { Ok(Self { name: value.name.parse()?, version: value.version.parse()?, - yanked: value.yanked, }) } } @@ -52,8 +49,7 @@ impl TryFrom<&docs_rs_crates_io::events::CrateVersion> for CrateVersion { fn try_from(value: &docs_rs_crates_io::events::CrateVersion) -> Result { Ok(Self { name: value.name.parse()?, - version: value.version.clone().into(), - yanked: value.yanked, + version: value.version.parse()?, }) } } @@ -64,8 +60,7 @@ impl TryFrom for CrateVersion { fn try_from(value: docs_rs_crates_io::events::CrateVersion) -> Result { Ok(Self { name: value.name.parse()?, - version: value.version.into(), - yanked: value.yanked, + version: value.version.parse()?, }) } } @@ -76,7 +71,6 @@ impl From for crates_index_diff::CrateVersion { Self { name: value.name.to_string().into(), version: value.version.to_string().into(), - yanked: value.yanked, ..Default::default() } } @@ -198,10 +192,13 @@ pub(crate) async fn process_change( Change::Added(_release) => process_version_added(context, &crate_version).await?, Change::AddedAndYanked(_release) => { process_version_added(context, &crate_version).await?; - process_version_yank_status(context, &crate_version).await?; + process_version_yank_status(context, &crate_version, true).await?; + } + Change::Unyanked(_release) => { + process_version_yank_status(context, &crate_version, false).await? } - Change::Unyanked(_release) | Change::Yanked(_release) => { - process_version_yank_status(context, &crate_version).await? + Change::Yanked(_release) => { + process_version_yank_status(context, &crate_version, true).await? } Change::CrateDeleted { name, .. } => { let name: KrateName = name.parse()?; @@ -218,10 +215,11 @@ pub(crate) async fn process_change( pub(crate) async fn process_version_yank_status( context: &Context, release: &CrateVersion, + yanked: bool, ) -> Result<()> { // FIXME: delay yanks of crates that have not yet finished building // https://github.com/rust-lang/docs.rs/issues/1934 - set_yanked(context, &release.name, &release.version, release.yanked).await?; + set_yanked(context, &release.name, &release.version, yanked).await?; queue_crate_invalidation(&release.name, context.cdn.as_deref()).await; Ok(()) } @@ -383,7 +381,6 @@ mod tests { let krate = CrateVersion { name: KRATE, version: V1, - ..Default::default() }; process_version_added(&env, &krate).await?; @@ -394,7 +391,6 @@ mod tests { let krate = CrateVersion { name: "krate".parse()?, version: V2.to_string().parse()?, - ..Default::default() }; process_version_added(&env, &krate).await?; @@ -427,9 +423,8 @@ mod tests { let krate = CrateVersion { name: KRATE, version: V1, - yanked: true, }; - process_version_yank_status(&env, &krate).await?; + process_version_yank_status(&env, &krate, true).await?; // And verify it's actually marked as yanked let row = sqlx::query!( @@ -446,9 +441,8 @@ mod tests { let krate = CrateVersion { name: KRATE, version: V1, - yanked: false, }; - process_version_yank_status(&env, &krate).await?; + process_version_yank_status(&env, &krate, false).await?; let row = sqlx::query!( "SELECT yanked @@ -511,7 +505,6 @@ mod tests { let krate = CrateVersion { name: KRATE, version: V2, - ..Default::default() }; process_version_deleted(&env, env.config(), &krate).await?; @@ -541,22 +534,18 @@ mod tests { let krate1 = CrateVersion { name: KRATE, version: V1, - ..Default::default() }; let krate2 = CrateVersion { name: "krate2".parse()?, version: V1, - ..Default::default() }; let krate_already_present = CrateVersion { name: "krate_already_present".parse()?, version: V1, - ..Default::default() }; let non_existing_version = CrateVersion { name: "krate_already_present".parse()?, version: V2, - ..Default::default() }; let locks = CrateLocks::new(); let added = process_changes( diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 2dce449e9..244cd2cff 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -180,8 +180,11 @@ pub(crate) async fn process_change( IndexChangeV1::Added(crate_version) => { process_version_added(context, &crate_version.try_into().unwrap()).await? } - IndexChangeV1::Unyanked(crate_version) | IndexChangeV1::Yanked(crate_version) => { - process_version_yank_status(context, &crate_version.try_into().unwrap()).await? + IndexChangeV1::Yanked(crate_version) => { + process_version_yank_status(context, &crate_version.try_into().unwrap(), true).await? + } + IndexChangeV1::Unyanked(crate_version) => { + process_version_yank_status(context, &crate_version.try_into().unwrap(), false).await? } IndexChangeV1::CrateDeleted { name, .. } => { let name: KrateName = name.parse()?; From 5f6dde2156ca3a6a6ff13049d07014a31af0f0a2 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 22:51:00 +0200 Subject: [PATCH 29/74] read --- crates/bin/docs_rs_watcher/src/config.rs | 4 ++++ .../bin/docs_rs_watcher/src/index_watcher.rs | 6 +++++ crates/bin/docs_rs_watcher/src/subscriber.rs | 22 +++++++++++++++---- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index 388eaa5db..c14f77590 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -8,8 +8,10 @@ use url::Url; pub struct Config { pub registry_index_path: PathBuf, pub registry_url: Option, + pub registry_dry_run: bool, pub sqs_queue_url: Option, pub sqs_region: Option, + pub sqs_dry_run: bool, pub aws_sdk_max_retries: u32, /// How long to wait between registry checks @@ -33,9 +35,11 @@ impl AppConfig for Config { Ok(Self { registry_index_path: env("REGISTRY_INDEX_PATH", prefix.join("crates.io-index"))?, registry_url: maybe_env("REGISTRY_URL")?, + registry_dry_run: env("DOCS_RS_REGISTRY_DRY_RUN", false)?, sqs_queue_url: maybe_env("DOCSRS_SQS_QUEUE_URL")?, sqs_region: maybe_env("DOCSRS_SQS_REGION")?, + sqs_dry_run: env("DOCS_RS_SQS_DRY_RUN", true)?, aws_sdk_max_retries: env("DOCSRS_AWS_SDK_MAX_RETRIES", 6u32)?, delay_between_registry_fetches: Duration::from_secs(env::( diff --git a/crates/bin/docs_rs_watcher/src/index_watcher.rs b/crates/bin/docs_rs_watcher/src/index_watcher.rs index 784700ccc..98a6febb2 100644 --- a/crates/bin/docs_rs_watcher/src/index_watcher.rs +++ b/crates/bin/docs_rs_watcher/src/index_watcher.rs @@ -158,6 +158,12 @@ async fn process_changes( let mut crates_added = 0; for change in changes { + debug!(?change, "received change from git index"); + + if config.registry_dry_run { + continue; + } + match process_change(context, locks, change, config).await { Ok(added) => { if added { diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 244cd2cff..168384d4b 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -13,7 +13,7 @@ use docs_rs_context::Context; use docs_rs_crates_io::events::{IndexChangeEventV1, IndexChangeV1}; use docs_rs_types::KrateName; use docs_rs_utils::retry_async; -use std::time::Duration; +use std::time::{Duration, Instant}; use tokio::time; use tracing::{debug, error, instrument, warn}; @@ -38,6 +38,9 @@ const SLEEP_BETWEEN_REQUESTS: Duration = Duration::from_secs(1); /// it redelivers this message. const RETRY_DELAY: Duration = Duration::from_secs(30); +/// How long to wait before rechecking the priorities of queued crates. +const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); + pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> Result<()> { let (Some(region), Some(queue_url)) = (&config.sqs_region, &config.sqs_queue_url) else { bail!("missing sqs region or url, disabling crates.io subscriber"); @@ -52,6 +55,7 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R .build(), ); + let mut last_priority_recheck = Instant::now(); let queue = context.build_queue()?; loop { @@ -145,6 +149,14 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R } } + if last_priority_recheck.elapsed() >= DELAY_BETWEEN_PRIORITY_RECHECK { + if let Err(err) = queue.deprioritize_workspaces().await { + error!(?err, "error deprioritizing workspaces"); + } + + last_priority_recheck = Instant::now(); + } + time::sleep(SLEEP_BETWEEN_REQUESTS).await; } } @@ -163,9 +175,11 @@ async fn process_message( let _guard = locks.lock(event.change.name()).await; - process_change(context, &event.change, config) - .await - .context("error processing change")?; + if !config.sqs_dry_run { + process_change(context, &event.change, config) + .await + .context("error processing change")?; + } Ok(()) } From e5149b16a9cd7b7b7710543fa50217cf47406a1f Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 23:08:38 +0200 Subject: [PATCH 30/74] simp --- crates/bin/docs_rs_watcher/src/subscriber.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 168384d4b..3daef0af3 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -6,7 +6,7 @@ use crate::{ }, synchronization::CrateLocks, }; -use anyhow::{Context as _, Result, bail}; +use anyhow::{Context as _, Result}; use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; use aws_sdk_sqs::Client; use docs_rs_context::Context; @@ -43,8 +43,10 @@ const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> Result<()> { let (Some(region), Some(queue_url)) = (&config.sqs_region, &config.sqs_queue_url) else { - bail!("missing sqs region or url, disabling crates.io subscriber"); + warn!("missing sqs region or url, disabling crates.io SQS subscriber"); + return Ok(()); }; + let queue_url = queue_url.to_string(); let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; From c82f60bcbf2fd2567b85e177426113563c6a38f2 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 23:25:56 +0200 Subject: [PATCH 31/74] err --- crates/bin/cratesfyi/src/daemon.rs | 9 +-------- crates/bin/docs_rs_watcher/src/lib.rs | 14 ++++++++++++++ crates/bin/docs_rs_watcher/src/main.rs | 7 +------ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/crates/bin/cratesfyi/src/daemon.rs b/crates/bin/cratesfyi/src/daemon.rs index 9f2393b03..f8b7f5808 100644 --- a/crates/bin/cratesfyi/src/daemon.rs +++ b/crates/bin/cratesfyi/src/daemon.rs @@ -2,7 +2,6 @@ use anyhow::{Error, anyhow}; use docs_rs_builder::{RustwideBuilder, queue_builder}; use docs_rs_config::AppConfig as _; use docs_rs_context::Context; -use docs_rs_watcher::synchronization::CrateLocks; use docs_rs_watcher::{ start_background_queue_rebuild, start_background_repository_stats_updater, start_background_service_metric_collector, @@ -22,13 +21,7 @@ fn start_registry_watcher( // space this out to prevent it from clashing against the queue-builder thread on launch tokio::time::sleep(Duration::from_secs(30)).await; - let locks = CrateLocks::new(); - // FIXME: we don't want to exit in error case, do we? - // This is a spawn, so in the background, so we probably want to at least log the error? - tokio::try_join!( - docs_rs_watcher::watch_registry(&config, &context, &locks), - docs_rs_watcher::subscriber::listen(&config, &context, &locks), - ) + docs_rs_watcher::watch(&config, &context).await; }); Ok(()) diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 05cf5cc68..c85e23a08 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -25,6 +25,20 @@ use std::{sync::Arc, time::Duration}; use tokio::time::{self, Instant}; use tracing::{debug, error, info, trace}; +pub async fn watch(config: &Config, context: &Context) { + let locks = CrateLocks::new(); + + loop { + if let Err(err) = tokio::try_join!( + crate::watch_registry(&config, &context, &locks), + crate::subscriber::listen(&config, &context, &locks), + ) { + error!(?err, "error watching registry or SQS, will retry"); + time::sleep(Duration::from_secs(10)).await; + } + } +} + /// Run the registry watcher /// NOTE: this should only be run once, otherwise crates would be added /// to the queue multiple times. diff --git a/crates/bin/docs_rs_watcher/src/main.rs b/crates/bin/docs_rs_watcher/src/main.rs index dc07a7515..574f24e88 100644 --- a/crates/bin/docs_rs_watcher/src/main.rs +++ b/crates/bin/docs_rs_watcher/src/main.rs @@ -82,12 +82,7 @@ impl CommandLine { // which should only run once, and all the time. docs_rs_watcher::start_background_service_metric_collector(&ctx).await?; - let locks = CrateLocks::new(); - // FIXME: we don't want to exit in error case, do we? - tokio::try_join!( - docs_rs_watcher::watch_registry(&config, &ctx, &locks), - docs_rs_watcher::subscriber::listen(&config, &ctx, &locks), - )?; + docs_rs_watcher::watch(&config, &ctx).await; } Self::Queue { subcommand } => subcommand.handle_args(config, ctx).await?, Self::Database { subcommand } => subcommand.handle_args(config, ctx).await?, From 6ff066643c56c8b164c973ed13f6110283e5bd9f Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 23:36:08 +0200 Subject: [PATCH 32/74] msg --- crates/bin/docs_rs_watcher/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index c85e23a08..d1df9c8f4 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -33,7 +33,10 @@ pub async fn watch(config: &Config, context: &Context) { crate::watch_registry(&config, &context, &locks), crate::subscriber::listen(&config, &context, &locks), ) { - error!(?err, "error watching registry or SQS, will retry"); + error!( + ?err, + "unexpected error watching registry or SQS, will retry" + ); time::sleep(Duration::from_secs(10)).await; } } From 4ee1856cdb939caa4b03cdfa876c7a0cc5141e33 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 23:46:03 +0200 Subject: [PATCH 33/74] test(watcher): cover subscriber dispatch Add unit tests for process_change and process_message in the SQS subscriber and clean up small watcher warnings found during validation. --- crates/bin/docs_rs_watcher/src/lib.rs | 4 +- crates/bin/docs_rs_watcher/src/main.rs | 2 +- crates/bin/docs_rs_watcher/src/subscriber.rs | 173 +++++++++++++++++++ 3 files changed, 176 insertions(+), 3 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index d1df9c8f4..8e8f8a140 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -30,8 +30,8 @@ pub async fn watch(config: &Config, context: &Context) { loop { if let Err(err) = tokio::try_join!( - crate::watch_registry(&config, &context, &locks), - crate::subscriber::listen(&config, &context, &locks), + crate::watch_registry(config, context, &locks), + crate::subscriber::listen(config, context, &locks), ) { error!( ?err, diff --git a/crates/bin/docs_rs_watcher/src/main.rs b/crates/bin/docs_rs_watcher/src/main.rs index 574f24e88..d4138a375 100644 --- a/crates/bin/docs_rs_watcher/src/main.rs +++ b/crates/bin/docs_rs_watcher/src/main.rs @@ -3,7 +3,7 @@ use clap::{Parser, Subcommand}; use docs_rs_config::AppConfig as _; use docs_rs_context::Context; use docs_rs_types::{KrateName, Version}; -use docs_rs_watcher::{Config, Index, index_watcher, synchronization::CrateLocks}; +use docs_rs_watcher::{Config, Index, index_watcher}; use futures_util::FutureExt as _; use std::sync::Arc; diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 3daef0af3..08e18e736 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -212,3 +212,176 @@ pub(crate) async fn process_change( }; Ok(change.added().is_some()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::testing::TestEnvironment; + use docs_rs_config::AppConfig as _; + use docs_rs_crates_io::events::CrateVersion; + use docs_rs_types::testing::{KRATE, V1, V2}; + use pretty_assertions::assert_eq; + + fn added_event_json(name: &str, version: &str) -> String { + format!( + r#"{{"id":"evt_123","occurred_at":"2026-06-01T12:00:00Z","type":"added","payload":{{"name":"{name}","vers":"{version}"}}}}"# + ) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_process_change_added_queues_crate() -> Result<()> { + let env = TestEnvironment::new().await?; + + let added = process_change( + &env, + &IndexChangeV1::Added(CrateVersion { + name: KRATE.to_string(), + version: V1.to_string(), + }), + env.config(), + ) + .await?; + + assert!(added); + let queue = env.build_queue()?.queued_crates().await?; + assert_eq!(queue.len(), 1); + assert_eq!(queue[0].name, KRATE); + assert_eq!(queue[0].version, V1); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_process_change_yanked_updates_release() -> Result<()> { + let env = TestEnvironment::new().await?; + let mut conn = env.async_conn().await?; + + let id = env + .fake_release() + .await + .name("krate") + .version(V1) + .create() + .await?; + + let added = process_change( + &env, + &IndexChangeV1::Yanked(CrateVersion { + name: KRATE.to_string(), + version: V1.to_string(), + }), + env.config(), + ) + .await?; + + assert!(!added); + let row = sqlx::query!( + "SELECT yanked + FROM releases + WHERE id = $1", + id.0 + ) + .fetch_one(&mut *conn) + .await?; + assert_eq!(row.yanked, Some(true)); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_process_change_version_deleted_removes_release() -> Result<()> { + let env = TestEnvironment::new().await?; + let mut conn = env.async_conn().await?; + + let rid_1 = env + .fake_release() + .await + .name("krate") + .version(V1) + .create() + .await?; + env.fake_release() + .await + .name("krate") + .version(V2) + .create() + .await?; + + let added = process_change( + &env, + &IndexChangeV1::VersionDeleted(CrateVersion { + name: KRATE.to_string(), + version: V2.to_string(), + }), + env.config(), + ) + .await?; + + assert!(!added); + let rows = sqlx::query!( + "SELECT id + FROM releases", + ) + .fetch_all(&mut *conn) + .await?; + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].id, rid_1.0); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_process_message_dispatches_added_event() -> Result<()> { + let mut config = Config::test_config()?; + config.sqs_dry_run = false; + let env = TestEnvironment::builder().config(config).build().await?; + + process_message( + &env, + env.config(), + &CrateLocks::new(), + &added_event_json("krate", &V1.to_string()), + ) + .await?; + + let queue = env.build_queue()?.queued_crates().await?; + assert_eq!(queue.len(), 1); + assert_eq!(queue[0].name, KRATE); + assert_eq!(queue[0].version, V1); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_process_message_respects_sqs_dry_run() -> Result<()> { + let env = TestEnvironment::new().await?; + + process_message( + &env, + env.config(), + &CrateLocks::new(), + &added_event_json("krate", &V1.to_string()), + ) + .await?; + + assert!(env.build_queue()?.queued_crates().await?.is_empty()); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_process_message_rejects_invalid_json() -> Result<()> { + let env = TestEnvironment::new().await?; + + let err = process_message(&env, env.config(), &CrateLocks::new(), "{not json").await; + + assert!(err.is_err()); + let err = format!("{:?}", err.unwrap_err()); + assert!( + err.contains("error parsing event from json"), + "unexpected error: {err}" + ); + + Ok(()) + } +} From 4955cbb9869ff2c752e4b7017b6da2428f2f597f Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 1 Jul 2026 23:50:47 +0200 Subject: [PATCH 34/74] refactor(watcher): isolate SQS subscriber transport Split the subscriber into poll, handle, and decode layers behind a small SQS client trait so transport behavior can be unit-tested without an emulator. --- crates/bin/docs_rs_watcher/src/subscriber.rs | 472 +++++++++++++++---- 1 file changed, 384 insertions(+), 88 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 08e18e736..de79b0f8e 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -13,6 +13,7 @@ use docs_rs_context::Context; use docs_rs_crates_io::events::{IndexChangeEventV1, IndexChangeV1}; use docs_rs_types::KrateName; use docs_rs_utils::retry_async; +use futures_util::future::BoxFuture; use std::time::{Duration, Instant}; use tokio::time; use tracing::{debug, error, instrument, warn}; @@ -41,6 +42,96 @@ const RETRY_DELAY: Duration = Duration::from_secs(30); /// How long to wait before rechecking the priorities of queued crates. const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); +#[derive(Clone, Debug, PartialEq, Eq)] +struct ReceivedMessage { + body: Option, + receipt_handle: Option, +} + +trait SqsClient: Sync { + fn receive_messages<'a>( + &'a self, + queue_url: &'a str, + ) -> BoxFuture<'a, Result>>; + fn delete_message<'a>( + &'a self, + queue_url: &'a str, + receipt_handle: &'a str, + ) -> BoxFuture<'a, Result<()>>; + fn retry_message<'a>( + &'a self, + queue_url: &'a str, + receipt_handle: &'a str, + delay: Duration, + ) -> BoxFuture<'a, Result<()>>; +} + +struct AwsSqsClient { + inner: Client, +} + +impl SqsClient for AwsSqsClient { + fn receive_messages<'a>( + &'a self, + queue_url: &'a str, + ) -> BoxFuture<'a, Result>> { + Box::pin(async move { + let response = self + .inner + .receive_message() + .queue_url(queue_url) + .max_number_of_messages(10) + .wait_time_seconds(WAIT_TIME.as_secs() as i32) + .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) + .send() + .await?; + + Ok(response + .messages() + .iter() + .map(|message| ReceivedMessage { + body: message.body().map(str::to_owned), + receipt_handle: message.receipt_handle().map(str::to_owned), + }) + .collect()) + }) + } + + fn delete_message<'a>( + &'a self, + queue_url: &'a str, + receipt_handle: &'a str, + ) -> BoxFuture<'a, Result<()>> { + Box::pin(async move { + self.inner + .delete_message() + .queue_url(queue_url) + .receipt_handle(receipt_handle) + .send() + .await?; + Ok(()) + }) + } + + fn retry_message<'a>( + &'a self, + queue_url: &'a str, + receipt_handle: &'a str, + delay: Duration, + ) -> BoxFuture<'a, Result<()>> { + Box::pin(async move { + self.inner + .change_message_visibility() + .queue_url(queue_url) + .receipt_handle(receipt_handle) + .visibility_timeout(delay.as_secs() as i32) + .send() + .await?; + Ok(()) + }) + } +} + pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> Result<()> { let (Some(region), Some(queue_url)) = (&config.sqs_region, &config.sqs_queue_url) else { warn!("missing sqs region or url, disabling crates.io SQS subscriber"); @@ -50,13 +141,25 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R let queue_url = queue_url.to_string(); let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; - let client = Client::from_conf( - aws_sdk_sqs::config::Builder::from(&shared_config) - .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) - .region(Region::new(region.clone())) - .build(), - ); + let client = AwsSqsClient { + inner: Client::from_conf( + aws_sdk_sqs::config::Builder::from(&shared_config) + .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) + .region(Region::new(region.clone())) + .build(), + ), + }; + + listen_with_client(&client, &queue_url, config, context, locks).await +} +async fn listen_with_client( + client: &dyn SqsClient, + queue_url: &str, + config: &Config, + context: &Context, + locks: &CrateLocks, +) -> Result<()> { let mut last_priority_recheck = Instant::now(); let queue = context.build_queue()?; @@ -67,88 +170,13 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R continue; } - let response = match client - .receive_message() - .queue_url(queue_url.clone()) - .max_number_of_messages(10) - .wait_time_seconds(WAIT_TIME.as_secs() as i32) - .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) - .send() - .await - { - Ok(response) => response, - Err(err) => { - error!( - ?err, - queue_url, "error receiving messages from sqs, retrying" - ); - time::sleep(WAIT_TIME).await; - continue; - } - }; - - for message in response.messages() { - let Some(body) = message.body() else { - continue; - }; - - match retry_async( - || async move { process_message(context, config, locks, body).await }, - 3, - ) - .await - { - Ok(_) => { - if let Some(receipt_handle) = message.receipt_handle() { - // mark the message as "done" - if let Err(err) = client - .delete_message() - .queue_url(queue_url.clone()) - .receipt_handle(receipt_handle) - .send() - .await - { - // sqs will redeliver the message after the visibility timeout passed - error!( - ?err, - receipt_handle, queue_url, "error deleting message from queue" - ); - } - } - } - Err(err) => { - error!( - ?err, - ?message, - ?RETRY_DELAY, - body, - "error handling message. Retrying." - ); - - if let Some(receipt_handle) = message.receipt_handle() { - // Don't delete the message. - // It will become visible again after the visibility timeout. - if let Err(err) = client - .change_message_visibility() - .queue_url(queue_url.clone()) - .receipt_handle(receipt_handle) - // retry after some time - .visibility_timeout(RETRY_DELAY.as_secs() as i32) // retry - .send() - .await - { - // this error doesn't really matter, without the changed visibility - // timeout sqs will redeliver after the default visibility timeout. - warn!( - ?err, - receipt_handle, - queue_url, - "error setting visibility_timeout for retry" - ); - } - } - } - } + if let Err(err) = poll_once(client, queue_url, context, config, locks).await { + error!( + ?err, + queue_url, "error receiving messages from sqs, retrying" + ); + time::sleep(WAIT_TIME).await; + continue; } if last_priority_recheck.elapsed() >= DELAY_BETWEEN_PRIORITY_RECHECK { @@ -163,6 +191,73 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R } } +async fn poll_once( + client: &dyn SqsClient, + queue_url: &str, + context: &Context, + config: &Config, + locks: &CrateLocks, +) -> Result<()> { + let messages = client.receive_messages(queue_url).await?; + + for message in messages { + handle_message(client, queue_url, &message, context, config, locks).await; + } + + Ok(()) +} + +async fn handle_message( + client: &dyn SqsClient, + queue_url: &str, + message: &ReceivedMessage, + context: &Context, + config: &Config, + locks: &CrateLocks, +) { + let Some(body) = message.body.as_deref() else { + return; + }; + + match retry_async( + || async move { process_message(context, config, locks, body).await }, + 3, + ) + .await + { + Ok(_) => { + if let Some(receipt_handle) = message.receipt_handle.as_deref() + && let Err(err) = client.delete_message(queue_url, receipt_handle).await + { + error!( + ?err, + receipt_handle, queue_url, "error deleting message from queue" + ); + } + } + Err(err) => { + error!( + ?err, + ?message, + ?RETRY_DELAY, + body, + "error handling message. Retrying." + ); + + if let Some(receipt_handle) = message.receipt_handle.as_deref() + && let Err(err) = client + .retry_message(queue_url, receipt_handle, RETRY_DELAY) + .await + { + warn!( + ?err, + receipt_handle, queue_url, "error setting visibility_timeout for retry" + ); + } + } + } +} + #[instrument(skip(context, config, locks))] async fn process_message( context: &Context, @@ -217,10 +312,110 @@ pub(crate) async fn process_change( mod tests { use super::*; use crate::testing::TestEnvironment; + use anyhow::anyhow; use docs_rs_config::AppConfig as _; use docs_rs_crates_io::events::CrateVersion; use docs_rs_types::testing::{KRATE, V1, V2}; use pretty_assertions::assert_eq; + use std::sync::{Arc, Mutex}; + + #[derive(Debug, Clone, PartialEq, Eq)] + enum FakeAction { + Delete { + queue_url: String, + receipt_handle: String, + }, + Retry { + queue_url: String, + receipt_handle: String, + delay: Duration, + }, + } + + #[derive(Clone)] + struct FakeSqsClient { + receive_result: Arc, String>>>, + actions: Arc>>, + delete_error: Arc>>, + retry_error: Arc>>, + } + + impl FakeSqsClient { + fn new() -> Self { + Self::default() + } + + fn with_messages(messages: Vec) -> Self { + Self { + receive_result: Arc::new(Mutex::new(Ok(messages))), + ..Self::default() + } + } + } + + impl Default for FakeSqsClient { + fn default() -> Self { + Self { + receive_result: Arc::new(Mutex::new(Ok(Vec::new()))), + actions: Arc::new(Mutex::new(Vec::new())), + delete_error: Arc::new(Mutex::new(None)), + retry_error: Arc::new(Mutex::new(None)), + } + } + } + + impl SqsClient for FakeSqsClient { + fn receive_messages<'a>( + &'a self, + _queue_url: &'a str, + ) -> BoxFuture<'a, Result>> { + Box::pin(async move { + self.receive_result + .lock() + .unwrap() + .clone() + .map_err(|err| anyhow!(err)) + }) + } + + fn delete_message<'a>( + &'a self, + queue_url: &'a str, + receipt_handle: &'a str, + ) -> BoxFuture<'a, Result<()>> { + Box::pin(async move { + self.actions.lock().unwrap().push(FakeAction::Delete { + queue_url: queue_url.to_string(), + receipt_handle: receipt_handle.to_string(), + }); + if let Some(err) = self.delete_error.lock().unwrap().clone() { + Err(anyhow!(err)) + } else { + Ok(()) + } + }) + } + + fn retry_message<'a>( + &'a self, + queue_url: &'a str, + receipt_handle: &'a str, + delay: Duration, + ) -> BoxFuture<'a, Result<()>> { + Box::pin(async move { + self.actions.lock().unwrap().push(FakeAction::Retry { + queue_url: queue_url.to_string(), + receipt_handle: receipt_handle.to_string(), + delay, + }); + if let Some(err) = self.retry_error.lock().unwrap().clone() { + Err(anyhow!(err)) + } else { + Ok(()) + } + }) + } + } fn added_event_json(name: &str, version: &str) -> String { format!( @@ -384,4 +579,105 @@ mod tests { Ok(()) } + + #[tokio::test(flavor = "multi_thread")] + async fn test_handle_message_acknowledges_success() -> Result<()> { + let mut config = Config::test_config()?; + config.sqs_dry_run = false; + let env = TestEnvironment::builder().config(config).build().await?; + let client = FakeSqsClient::new(); + + handle_message( + &client, + "https://example.invalid/queue", + &ReceivedMessage { + body: Some(added_event_json("krate", &V1.to_string())), + receipt_handle: Some("receipt-1".to_string()), + }, + &env, + env.config(), + &CrateLocks::new(), + ) + .await; + + assert_eq!( + *client.actions.lock().unwrap(), + vec![FakeAction::Delete { + queue_url: "https://example.invalid/queue".to_string(), + receipt_handle: "receipt-1".to_string(), + }] + ); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_handle_message_retries_failed_processing() -> Result<()> { + let env = TestEnvironment::new().await?; + let client = FakeSqsClient::new(); + + handle_message( + &client, + "https://example.invalid/queue", + &ReceivedMessage { + body: Some("{bad json".to_string()), + receipt_handle: Some("receipt-2".to_string()), + }, + &env, + env.config(), + &CrateLocks::new(), + ) + .await; + + assert_eq!( + *client.actions.lock().unwrap(), + vec![FakeAction::Retry { + queue_url: "https://example.invalid/queue".to_string(), + receipt_handle: "receipt-2".to_string(), + delay: RETRY_DELAY, + }] + ); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_poll_once_processes_batch() -> Result<()> { + let mut config = Config::test_config()?; + config.sqs_dry_run = false; + let env = TestEnvironment::builder().config(config).build().await?; + let client = FakeSqsClient::with_messages(vec![ + ReceivedMessage { + body: Some(added_event_json("krate", &V1.to_string())), + receipt_handle: Some("receipt-1".to_string()), + }, + ReceivedMessage { + body: None, + receipt_handle: Some("receipt-ignored".to_string()), + }, + ]); + + poll_once( + &client, + "https://example.invalid/queue", + &env, + env.config(), + &CrateLocks::new(), + ) + .await?; + + let queue = env.build_queue()?.queued_crates().await?; + assert_eq!(queue.len(), 1); + assert_eq!(queue[0].name, KRATE); + assert_eq!(queue[0].version, V1); + assert_eq!( + *client.actions.lock().unwrap(), + vec![FakeAction::Delete { + queue_url: "https://example.invalid/queue".to_string(), + receipt_handle: "receipt-1".to_string(), + }] + ); + + Ok(()) + } } From 59fc37d41a5df3c55f2561515c7612d1974506b1 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:05:05 +0200 Subject: [PATCH 35/74] refactor --- crates/bin/docs_rs_watcher/src/lib.rs | 1 + .../docs_rs_watcher/src/message_queue/mod.rs | 21 ++ .../docs_rs_watcher/src/message_queue/sqs.rs | 90 ++++++++ crates/bin/docs_rs_watcher/src/subscriber.rs | 194 +++--------------- 4 files changed, 138 insertions(+), 168 deletions(-) create mode 100644 crates/bin/docs_rs_watcher/src/message_queue/mod.rs create mode 100644 crates/bin/docs_rs_watcher/src/message_queue/sqs.rs diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 8e8f8a140..342cb5a2c 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -3,6 +3,7 @@ pub mod consistency; mod db; mod index; pub mod index_watcher; +mod message_queue; mod rebuilds; mod service_metrics; pub mod subscriber; diff --git a/crates/bin/docs_rs_watcher/src/message_queue/mod.rs b/crates/bin/docs_rs_watcher/src/message_queue/mod.rs new file mode 100644 index 000000000..377f69cac --- /dev/null +++ b/crates/bin/docs_rs_watcher/src/message_queue/mod.rs @@ -0,0 +1,21 @@ +use anyhow::Result; +use futures_util::future::BoxFuture; +use std::time::Duration; + +pub(crate) mod sqs; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct ReceivedMessage { + pub(crate) body: Option, + pub(crate) receipt_handle: Option, +} + +pub(crate) trait MessageQueueClient: Sync { + fn receive_messages<'a>(&'a self) -> BoxFuture<'a, Result>>; + fn delete_message<'a>(&'a self, receipt_handle: &'a str) -> BoxFuture<'a, Result<()>>; + fn retry_message<'a>( + &'a self, + receipt_handle: &'a str, + delay: Duration, + ) -> BoxFuture<'a, Result<()>>; +} diff --git a/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs b/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs new file mode 100644 index 000000000..9a46ba3d4 --- /dev/null +++ b/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs @@ -0,0 +1,90 @@ +use crate::{ + message_queue::{MessageQueueClient, ReceivedMessage}, + subscriber::WAIT_TIME, +}; +use anyhow::Result; +use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; +use aws_sdk_sqs::Client; +use futures_util::future::BoxFuture; +use std::time::Duration; +use url::Url; + +/// visibility timeout: +/// should be longer than the longest time our server takes to handle a message. +/// +/// if we fetch a message, and don't delete it in this time, it will be redelivered. +const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); + +pub(crate) struct AwsSqsClient { + inner: Client, + queue_url: String, +} + +impl AwsSqsClient { + pub(crate) async fn new(queue_url: &Url, region: impl Into, max_retries: u32) -> Self { + let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; + Self { + queue_url: queue_url.to_string(), + inner: Client::from_conf( + aws_sdk_sqs::config::Builder::from(&shared_config) + .retry_config(RetryConfig::standard().with_max_attempts(max_retries)) + .region(Region::new(region.into())) + .build(), + ), + } + } +} + +impl MessageQueueClient for AwsSqsClient { + fn receive_messages<'a>(&'a self) -> BoxFuture<'a, Result>> { + Box::pin(async move { + let response = self + .inner + .receive_message() + .queue_url(&self.queue_url) + .max_number_of_messages(10) + .wait_time_seconds(WAIT_TIME.as_secs() as i32) + .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) + .send() + .await?; + + Ok(response + .messages() + .iter() + .map(|message| ReceivedMessage { + body: message.body().map(str::to_owned), + receipt_handle: message.receipt_handle().map(str::to_owned), + }) + .collect()) + }) + } + + fn delete_message<'a>(&'a self, receipt_handle: &'a str) -> BoxFuture<'a, Result<()>> { + Box::pin(async move { + self.inner + .delete_message() + .queue_url(&self.queue_url) + .receipt_handle(receipt_handle) + .send() + .await?; + Ok(()) + }) + } + + fn retry_message<'a>( + &'a self, + receipt_handle: &'a str, + delay: Duration, + ) -> BoxFuture<'a, Result<()>> { + Box::pin(async move { + self.inner + .change_message_visibility() + .queue_url(&self.queue_url) + .receipt_handle(receipt_handle) + .visibility_timeout(delay.as_secs() as i32) + .send() + .await?; + Ok(()) + }) + } +} diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index de79b0f8e..17da87cb6 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -4,16 +4,14 @@ use crate::{ process_crate_deleted, process_version_added, process_version_deleted, process_version_yank_status, }, + message_queue::{MessageQueueClient, ReceivedMessage, sqs::AwsSqsClient}, synchronization::CrateLocks, }; use anyhow::{Context as _, Result}; -use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; -use aws_sdk_sqs::Client; use docs_rs_context::Context; use docs_rs_crates_io::events::{IndexChangeEventV1, IndexChangeV1}; use docs_rs_types::KrateName; use docs_rs_utils::retry_async; -use futures_util::future::BoxFuture; use std::time::{Duration, Instant}; use tokio::time; use tracing::{debug, error, instrument, warn}; @@ -21,16 +19,10 @@ use tracing::{debug, error, instrument, warn}; // TODO: // * when should we run deprioritize_workspaces ? -/// visibility timeout: -/// should be longer than the longest time our server takes to handle a message. -/// -/// if we fetch a message, and don't delete it in this time, it will be redelivered. -const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); - /// wait-time (long polling): /// /// How long should the request be kept open when there are no messages. -const WAIT_TIME: Duration = Duration::from_secs(30); +pub(crate) const WAIT_TIME: Duration = Duration::from_secs(30); /// when one long-polling request is finished, how long to sleep before starting the next? const SLEEP_BETWEEN_REQUESTS: Duration = Duration::from_secs(1); @@ -42,120 +34,19 @@ const RETRY_DELAY: Duration = Duration::from_secs(30); /// How long to wait before rechecking the priorities of queued crates. const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); -#[derive(Clone, Debug, PartialEq, Eq)] -struct ReceivedMessage { - body: Option, - receipt_handle: Option, -} - -trait SqsClient: Sync { - fn receive_messages<'a>( - &'a self, - queue_url: &'a str, - ) -> BoxFuture<'a, Result>>; - fn delete_message<'a>( - &'a self, - queue_url: &'a str, - receipt_handle: &'a str, - ) -> BoxFuture<'a, Result<()>>; - fn retry_message<'a>( - &'a self, - queue_url: &'a str, - receipt_handle: &'a str, - delay: Duration, - ) -> BoxFuture<'a, Result<()>>; -} - -struct AwsSqsClient { - inner: Client, -} - -impl SqsClient for AwsSqsClient { - fn receive_messages<'a>( - &'a self, - queue_url: &'a str, - ) -> BoxFuture<'a, Result>> { - Box::pin(async move { - let response = self - .inner - .receive_message() - .queue_url(queue_url) - .max_number_of_messages(10) - .wait_time_seconds(WAIT_TIME.as_secs() as i32) - .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) - .send() - .await?; - - Ok(response - .messages() - .iter() - .map(|message| ReceivedMessage { - body: message.body().map(str::to_owned), - receipt_handle: message.receipt_handle().map(str::to_owned), - }) - .collect()) - }) - } - - fn delete_message<'a>( - &'a self, - queue_url: &'a str, - receipt_handle: &'a str, - ) -> BoxFuture<'a, Result<()>> { - Box::pin(async move { - self.inner - .delete_message() - .queue_url(queue_url) - .receipt_handle(receipt_handle) - .send() - .await?; - Ok(()) - }) - } - - fn retry_message<'a>( - &'a self, - queue_url: &'a str, - receipt_handle: &'a str, - delay: Duration, - ) -> BoxFuture<'a, Result<()>> { - Box::pin(async move { - self.inner - .change_message_visibility() - .queue_url(queue_url) - .receipt_handle(receipt_handle) - .visibility_timeout(delay.as_secs() as i32) - .send() - .await?; - Ok(()) - }) - } -} - pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> Result<()> { let (Some(region), Some(queue_url)) = (&config.sqs_region, &config.sqs_queue_url) else { warn!("missing sqs region or url, disabling crates.io SQS subscriber"); return Ok(()); }; - let queue_url = queue_url.to_string(); - - let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; - let client = AwsSqsClient { - inner: Client::from_conf( - aws_sdk_sqs::config::Builder::from(&shared_config) - .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) - .region(Region::new(region.clone())) - .build(), - ), - }; + let client = AwsSqsClient::new(queue_url, region, config.aws_sdk_max_retries).await; - listen_with_client(&client, &queue_url, config, context, locks).await + listen_with_client(&client, config, context, locks).await } async fn listen_with_client( - client: &dyn SqsClient, - queue_url: &str, + client: &dyn MessageQueueClient, config: &Config, context: &Context, locks: &CrateLocks, @@ -170,11 +61,8 @@ async fn listen_with_client( continue; } - if let Err(err) = poll_once(client, queue_url, context, config, locks).await { - error!( - ?err, - queue_url, "error receiving messages from sqs, retrying" - ); + if let Err(err) = poll_once(client, context, config, locks).await { + error!(?err,); time::sleep(WAIT_TIME).await; continue; } @@ -192,24 +80,22 @@ async fn listen_with_client( } async fn poll_once( - client: &dyn SqsClient, - queue_url: &str, + client: &dyn MessageQueueClient, context: &Context, config: &Config, locks: &CrateLocks, ) -> Result<()> { - let messages = client.receive_messages(queue_url).await?; + let messages = client.receive_messages().await?; for message in messages { - handle_message(client, queue_url, &message, context, config, locks).await; + handle_message(client, &message, context, config, locks).await; } Ok(()) } async fn handle_message( - client: &dyn SqsClient, - queue_url: &str, + client: &dyn MessageQueueClient, message: &ReceivedMessage, context: &Context, config: &Config, @@ -227,12 +113,9 @@ async fn handle_message( { Ok(_) => { if let Some(receipt_handle) = message.receipt_handle.as_deref() - && let Err(err) = client.delete_message(queue_url, receipt_handle).await + && let Err(err) = client.delete_message(receipt_handle).await { - error!( - ?err, - receipt_handle, queue_url, "error deleting message from queue" - ); + error!(?err, receipt_handle, "error deleting message from queue"); } } Err(err) => { @@ -245,13 +128,11 @@ async fn handle_message( ); if let Some(receipt_handle) = message.receipt_handle.as_deref() - && let Err(err) = client - .retry_message(queue_url, receipt_handle, RETRY_DELAY) - .await + && let Err(err) = client.retry_message(receipt_handle, RETRY_DELAY).await { warn!( ?err, - receipt_handle, queue_url, "error setting visibility_timeout for retry" + receipt_handle, "error setting visibility_timeout for retry" ); } } @@ -316,31 +197,30 @@ mod tests { use docs_rs_config::AppConfig as _; use docs_rs_crates_io::events::CrateVersion; use docs_rs_types::testing::{KRATE, V1, V2}; + use futures_util::future::BoxFuture; use pretty_assertions::assert_eq; use std::sync::{Arc, Mutex}; #[derive(Debug, Clone, PartialEq, Eq)] enum FakeAction { Delete { - queue_url: String, receipt_handle: String, }, Retry { - queue_url: String, receipt_handle: String, delay: Duration, }, } #[derive(Clone)] - struct FakeSqsClient { + struct FakeMessageQueueClient { receive_result: Arc, String>>>, actions: Arc>>, delete_error: Arc>>, retry_error: Arc>>, } - impl FakeSqsClient { + impl FakeMessageQueueClient { fn new() -> Self { Self::default() } @@ -353,7 +233,7 @@ mod tests { } } - impl Default for FakeSqsClient { + impl Default for FakeMessageQueueClient { fn default() -> Self { Self { receive_result: Arc::new(Mutex::new(Ok(Vec::new()))), @@ -364,11 +244,8 @@ mod tests { } } - impl SqsClient for FakeSqsClient { - fn receive_messages<'a>( - &'a self, - _queue_url: &'a str, - ) -> BoxFuture<'a, Result>> { + impl MessageQueueClient for FakeMessageQueueClient { + fn receive_messages<'a>(&'a self) -> BoxFuture<'a, Result>> { Box::pin(async move { self.receive_result .lock() @@ -378,14 +255,9 @@ mod tests { }) } - fn delete_message<'a>( - &'a self, - queue_url: &'a str, - receipt_handle: &'a str, - ) -> BoxFuture<'a, Result<()>> { + fn delete_message<'a>(&'a self, receipt_handle: &'a str) -> BoxFuture<'a, Result<()>> { Box::pin(async move { self.actions.lock().unwrap().push(FakeAction::Delete { - queue_url: queue_url.to_string(), receipt_handle: receipt_handle.to_string(), }); if let Some(err) = self.delete_error.lock().unwrap().clone() { @@ -398,13 +270,11 @@ mod tests { fn retry_message<'a>( &'a self, - queue_url: &'a str, receipt_handle: &'a str, delay: Duration, ) -> BoxFuture<'a, Result<()>> { Box::pin(async move { self.actions.lock().unwrap().push(FakeAction::Retry { - queue_url: queue_url.to_string(), receipt_handle: receipt_handle.to_string(), delay, }); @@ -585,11 +455,10 @@ mod tests { let mut config = Config::test_config()?; config.sqs_dry_run = false; let env = TestEnvironment::builder().config(config).build().await?; - let client = FakeSqsClient::new(); + let client = FakeMessageQueueClient::new(); handle_message( &client, - "https://example.invalid/queue", &ReceivedMessage { body: Some(added_event_json("krate", &V1.to_string())), receipt_handle: Some("receipt-1".to_string()), @@ -603,7 +472,6 @@ mod tests { assert_eq!( *client.actions.lock().unwrap(), vec![FakeAction::Delete { - queue_url: "https://example.invalid/queue".to_string(), receipt_handle: "receipt-1".to_string(), }] ); @@ -614,11 +482,10 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_handle_message_retries_failed_processing() -> Result<()> { let env = TestEnvironment::new().await?; - let client = FakeSqsClient::new(); + let client = FakeMessageQueueClient::new(); handle_message( &client, - "https://example.invalid/queue", &ReceivedMessage { body: Some("{bad json".to_string()), receipt_handle: Some("receipt-2".to_string()), @@ -632,7 +499,6 @@ mod tests { assert_eq!( *client.actions.lock().unwrap(), vec![FakeAction::Retry { - queue_url: "https://example.invalid/queue".to_string(), receipt_handle: "receipt-2".to_string(), delay: RETRY_DELAY, }] @@ -646,7 +512,7 @@ mod tests { let mut config = Config::test_config()?; config.sqs_dry_run = false; let env = TestEnvironment::builder().config(config).build().await?; - let client = FakeSqsClient::with_messages(vec![ + let client = FakeMessageQueueClient::with_messages(vec![ ReceivedMessage { body: Some(added_event_json("krate", &V1.to_string())), receipt_handle: Some("receipt-1".to_string()), @@ -657,14 +523,7 @@ mod tests { }, ]); - poll_once( - &client, - "https://example.invalid/queue", - &env, - env.config(), - &CrateLocks::new(), - ) - .await?; + poll_once(&client, &env, env.config(), &CrateLocks::new()).await?; let queue = env.build_queue()?.queued_crates().await?; assert_eq!(queue.len(), 1); @@ -673,7 +532,6 @@ mod tests { assert_eq!( *client.actions.lock().unwrap(), vec![FakeAction::Delete { - queue_url: "https://example.invalid/queue".to_string(), receipt_handle: "receipt-1".to_string(), }] ); From 876f0836f6da44bd2783fcb63517d5e123d4de6a Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:15:07 +0200 Subject: [PATCH 36/74] refactor --- Cargo.lock | 1 + Cargo.toml | 1 + crates/bin/docs_rs_watcher/Cargo.toml | 1 + crates/bin/docs_rs_watcher/src/main.rs | 2 + .../docs_rs_watcher/src/message_queue/mod.rs | 12 +-- .../docs_rs_watcher/src/message_queue/sqs.rs | 82 ++++++++----------- crates/bin/docs_rs_watcher/src/subscriber.rs | 62 ++++++-------- .../lib/docs_rs_repository_stats/Cargo.toml | 2 +- 8 files changed, 72 insertions(+), 91 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8ee007bb0..26e4cc92e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2483,6 +2483,7 @@ name = "docs_rs_watcher" version = "0.6.0" dependencies = [ "anyhow", + "async-trait", "aws-config", "aws-sdk-sqs", "clap", diff --git a/Cargo.toml b/Cargo.toml index dd6c3544e..5c741a5e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ edition = "2024" [workspace.dependencies] anyhow = { version = "1.0.42", features = ["backtrace"] } askama = "0.16.0" +async-trait = "0.1.89" async-stream = "0.3.5" # The default `rustls` feature pulls in the legacy hyper 0.14 + rustls 0.21 # stack via `aws-smithy-runtime/tls-rustls`, which includes the vulnerable diff --git a/crates/bin/docs_rs_watcher/Cargo.toml b/crates/bin/docs_rs_watcher/Cargo.toml index 64844d680..0e5d2bf4b 100644 --- a/crates/bin/docs_rs_watcher/Cargo.toml +++ b/crates/bin/docs_rs_watcher/Cargo.toml @@ -9,6 +9,7 @@ edition.workspace = true [dependencies] anyhow = { workspace = true } aws-config = { workspace = true } +async-trait = { workspace = true } aws-sdk-sqs = { version = "1.99.0", default-features = false, features = ["default-https-client", "rt-tokio"] } clap = { workspace = true } # NOTE: on the new infra, switch back from `git-https-reqwest` to `git-https` (curl) once the curl version is new enough diff --git a/crates/bin/docs_rs_watcher/src/main.rs b/crates/bin/docs_rs_watcher/src/main.rs index d4138a375..e5a276a9b 100644 --- a/crates/bin/docs_rs_watcher/src/main.rs +++ b/crates/bin/docs_rs_watcher/src/main.rs @@ -1,3 +1,5 @@ +#![recursion_limit = "256"] + use anyhow::{Context as _, Result}; use clap::{Parser, Subcommand}; use docs_rs_config::AppConfig as _; diff --git a/crates/bin/docs_rs_watcher/src/message_queue/mod.rs b/crates/bin/docs_rs_watcher/src/message_queue/mod.rs index 377f69cac..f186a40d7 100644 --- a/crates/bin/docs_rs_watcher/src/message_queue/mod.rs +++ b/crates/bin/docs_rs_watcher/src/message_queue/mod.rs @@ -1,5 +1,4 @@ use anyhow::Result; -use futures_util::future::BoxFuture; use std::time::Duration; pub(crate) mod sqs; @@ -10,12 +9,9 @@ pub(crate) struct ReceivedMessage { pub(crate) receipt_handle: Option, } +#[async_trait::async_trait] pub(crate) trait MessageQueueClient: Sync { - fn receive_messages<'a>(&'a self) -> BoxFuture<'a, Result>>; - fn delete_message<'a>(&'a self, receipt_handle: &'a str) -> BoxFuture<'a, Result<()>>; - fn retry_message<'a>( - &'a self, - receipt_handle: &'a str, - delay: Duration, - ) -> BoxFuture<'a, Result<()>>; + async fn receive_messages(&self) -> Result>; + async fn delete_message(&self, receipt_handle: &str) -> Result<()>; + async fn retry_message(&self, receipt_handle: &str, delay: Duration) -> Result<()>; } diff --git a/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs b/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs index 9a46ba3d4..267d959cd 100644 --- a/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs +++ b/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs @@ -5,7 +5,6 @@ use crate::{ use anyhow::Result; use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; use aws_sdk_sqs::Client; -use futures_util::future::BoxFuture; use std::time::Duration; use url::Url; @@ -35,56 +34,47 @@ impl AwsSqsClient { } } +#[async_trait::async_trait] impl MessageQueueClient for AwsSqsClient { - fn receive_messages<'a>(&'a self) -> BoxFuture<'a, Result>> { - Box::pin(async move { - let response = self - .inner - .receive_message() - .queue_url(&self.queue_url) - .max_number_of_messages(10) - .wait_time_seconds(WAIT_TIME.as_secs() as i32) - .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) - .send() - .await?; + async fn receive_messages(&self) -> Result> { + let response = self + .inner + .receive_message() + .queue_url(&self.queue_url) + .max_number_of_messages(10) + .wait_time_seconds(WAIT_TIME.as_secs() as i32) + .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) + .send() + .await?; - Ok(response - .messages() - .iter() - .map(|message| ReceivedMessage { - body: message.body().map(str::to_owned), - receipt_handle: message.receipt_handle().map(str::to_owned), - }) - .collect()) - }) + Ok(response + .messages() + .iter() + .map(|message| ReceivedMessage { + body: message.body().map(str::to_owned), + receipt_handle: message.receipt_handle().map(str::to_owned), + }) + .collect()) } - fn delete_message<'a>(&'a self, receipt_handle: &'a str) -> BoxFuture<'a, Result<()>> { - Box::pin(async move { - self.inner - .delete_message() - .queue_url(&self.queue_url) - .receipt_handle(receipt_handle) - .send() - .await?; - Ok(()) - }) + async fn delete_message(&self, receipt_handle: &str) -> Result<()> { + self.inner + .delete_message() + .queue_url(&self.queue_url) + .receipt_handle(receipt_handle) + .send() + .await?; + Ok(()) } - fn retry_message<'a>( - &'a self, - receipt_handle: &'a str, - delay: Duration, - ) -> BoxFuture<'a, Result<()>> { - Box::pin(async move { - self.inner - .change_message_visibility() - .queue_url(&self.queue_url) - .receipt_handle(receipt_handle) - .visibility_timeout(delay.as_secs() as i32) - .send() - .await?; - Ok(()) - }) + async fn retry_message(&self, receipt_handle: &str, delay: Duration) -> Result<()> { + self.inner + .change_message_visibility() + .queue_url(&self.queue_url) + .receipt_handle(receipt_handle) + .visibility_timeout(delay.as_secs() as i32) + .send() + .await?; + Ok(()) } } diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 17da87cb6..9d553e7ff 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -197,7 +197,6 @@ mod tests { use docs_rs_config::AppConfig as _; use docs_rs_crates_io::events::CrateVersion; use docs_rs_types::testing::{KRATE, V1, V2}; - use futures_util::future::BoxFuture; use pretty_assertions::assert_eq; use std::sync::{Arc, Mutex}; @@ -244,46 +243,37 @@ mod tests { } } + #[async_trait::async_trait] impl MessageQueueClient for FakeMessageQueueClient { - fn receive_messages<'a>(&'a self) -> BoxFuture<'a, Result>> { - Box::pin(async move { - self.receive_result - .lock() - .unwrap() - .clone() - .map_err(|err| anyhow!(err)) - }) + async fn receive_messages(&self) -> Result> { + self.receive_result + .lock() + .unwrap() + .clone() + .map_err(|err| anyhow!(err)) } - fn delete_message<'a>(&'a self, receipt_handle: &'a str) -> BoxFuture<'a, Result<()>> { - Box::pin(async move { - self.actions.lock().unwrap().push(FakeAction::Delete { - receipt_handle: receipt_handle.to_string(), - }); - if let Some(err) = self.delete_error.lock().unwrap().clone() { - Err(anyhow!(err)) - } else { - Ok(()) - } - }) + async fn delete_message(&self, receipt_handle: &str) -> Result<()> { + self.actions.lock().unwrap().push(FakeAction::Delete { + receipt_handle: receipt_handle.to_string(), + }); + if let Some(err) = self.delete_error.lock().unwrap().clone() { + Err(anyhow!(err)) + } else { + Ok(()) + } } - fn retry_message<'a>( - &'a self, - receipt_handle: &'a str, - delay: Duration, - ) -> BoxFuture<'a, Result<()>> { - Box::pin(async move { - self.actions.lock().unwrap().push(FakeAction::Retry { - receipt_handle: receipt_handle.to_string(), - delay, - }); - if let Some(err) = self.retry_error.lock().unwrap().clone() { - Err(anyhow!(err)) - } else { - Ok(()) - } - }) + async fn retry_message(&self, receipt_handle: &str, delay: Duration) -> Result<()> { + self.actions.lock().unwrap().push(FakeAction::Retry { + receipt_handle: receipt_handle.to_string(), + delay, + }); + if let Some(err) = self.retry_error.lock().unwrap().clone() { + Err(anyhow!(err)) + } else { + Ok(()) + } } } diff --git a/crates/lib/docs_rs_repository_stats/Cargo.toml b/crates/lib/docs_rs_repository_stats/Cargo.toml index 6b3c9f46e..5626e4416 100644 --- a/crates/lib/docs_rs_repository_stats/Cargo.toml +++ b/crates/lib/docs_rs_repository_stats/Cargo.toml @@ -7,7 +7,7 @@ edition = "2024" [dependencies] anyhow = { workspace = true } -async-trait = "0.1.89" +async-trait = { workspace = true } chrono = { workspace = true } docs_rs_cargo_metadata = { path = "../docs_rs_cargo_metadata" } docs_rs_config = { path = "../docs_rs_config" } From c06a1e5f0f8fd31d4d63987046b80e8d787d34f0 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:22:45 +0200 Subject: [PATCH 37/74] rescursion --- crates/bin/cratesfyi/src/main.rs | 2 + .../docs_rs_watcher/src/message_queue/fake.rs | 83 ++++++++++++++++++ .../docs_rs_watcher/src/message_queue/mod.rs | 2 + crates/bin/docs_rs_watcher/src/subscriber.rs | 84 +------------------ 4 files changed, 91 insertions(+), 80 deletions(-) create mode 100644 crates/bin/docs_rs_watcher/src/message_queue/fake.rs diff --git a/crates/bin/cratesfyi/src/main.rs b/crates/bin/cratesfyi/src/main.rs index ccdb6c767..e6f65026f 100644 --- a/crates/bin/cratesfyi/src/main.rs +++ b/crates/bin/cratesfyi/src/main.rs @@ -1,3 +1,5 @@ +#![recursion_limit = "256"] + use anyhow::Result; use clap::Parser; use cratesfyi::daemon::start_daemon; diff --git a/crates/bin/docs_rs_watcher/src/message_queue/fake.rs b/crates/bin/docs_rs_watcher/src/message_queue/fake.rs new file mode 100644 index 000000000..463238daa --- /dev/null +++ b/crates/bin/docs_rs_watcher/src/message_queue/fake.rs @@ -0,0 +1,83 @@ +use crate::message_queue::{MessageQueueClient, ReceivedMessage}; +use anyhow::{Result, anyhow}; +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum FakeAction { + Delete { + receipt_handle: String, + }, + Retry { + receipt_handle: String, + delay: Duration, + }, +} + +#[derive(Clone)] +pub(crate) struct FakeMessageQueueClient { + pub(crate) receive_result: Arc, String>>>, + pub(crate) actions: Arc>>, + pub(crate) delete_error: Arc>>, + pub(crate) retry_error: Arc>>, +} + +impl FakeMessageQueueClient { + pub(crate) fn new() -> Self { + Self::default() + } + + pub(crate) fn with_messages(messages: Vec) -> Self { + Self { + receive_result: Arc::new(Mutex::new(Ok(messages))), + ..Self::default() + } + } +} + +impl Default for FakeMessageQueueClient { + fn default() -> Self { + Self { + receive_result: Arc::new(Mutex::new(Ok(Vec::new()))), + actions: Arc::new(Mutex::new(Vec::new())), + delete_error: Arc::new(Mutex::new(None)), + retry_error: Arc::new(Mutex::new(None)), + } + } +} + +#[async_trait::async_trait] +impl MessageQueueClient for FakeMessageQueueClient { + async fn receive_messages(&self) -> Result> { + self.receive_result + .lock() + .unwrap() + .clone() + .map_err(|err| anyhow!(err)) + } + + async fn delete_message(&self, receipt_handle: &str) -> Result<()> { + self.actions.lock().unwrap().push(FakeAction::Delete { + receipt_handle: receipt_handle.to_string(), + }); + if let Some(err) = self.delete_error.lock().unwrap().clone() { + Err(anyhow!(err)) + } else { + Ok(()) + } + } + + async fn retry_message(&self, receipt_handle: &str, delay: Duration) -> Result<()> { + self.actions.lock().unwrap().push(FakeAction::Retry { + receipt_handle: receipt_handle.to_string(), + delay, + }); + if let Some(err) = self.retry_error.lock().unwrap().clone() { + Err(anyhow!(err)) + } else { + Ok(()) + } + } +} diff --git a/crates/bin/docs_rs_watcher/src/message_queue/mod.rs b/crates/bin/docs_rs_watcher/src/message_queue/mod.rs index f186a40d7..424d31343 100644 --- a/crates/bin/docs_rs_watcher/src/message_queue/mod.rs +++ b/crates/bin/docs_rs_watcher/src/message_queue/mod.rs @@ -1,6 +1,8 @@ use anyhow::Result; use std::time::Duration; +#[cfg(test)] +pub(crate) mod fake; pub(crate) mod sqs; #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 9d553e7ff..26ca00167 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -192,90 +192,14 @@ pub(crate) async fn process_change( #[cfg(test)] mod tests { use super::*; - use crate::testing::TestEnvironment; - use anyhow::anyhow; + use crate::{ + message_queue::fake::{FakeAction, FakeMessageQueueClient}, + testing::TestEnvironment, + }; use docs_rs_config::AppConfig as _; use docs_rs_crates_io::events::CrateVersion; use docs_rs_types::testing::{KRATE, V1, V2}; use pretty_assertions::assert_eq; - use std::sync::{Arc, Mutex}; - - #[derive(Debug, Clone, PartialEq, Eq)] - enum FakeAction { - Delete { - receipt_handle: String, - }, - Retry { - receipt_handle: String, - delay: Duration, - }, - } - - #[derive(Clone)] - struct FakeMessageQueueClient { - receive_result: Arc, String>>>, - actions: Arc>>, - delete_error: Arc>>, - retry_error: Arc>>, - } - - impl FakeMessageQueueClient { - fn new() -> Self { - Self::default() - } - - fn with_messages(messages: Vec) -> Self { - Self { - receive_result: Arc::new(Mutex::new(Ok(messages))), - ..Self::default() - } - } - } - - impl Default for FakeMessageQueueClient { - fn default() -> Self { - Self { - receive_result: Arc::new(Mutex::new(Ok(Vec::new()))), - actions: Arc::new(Mutex::new(Vec::new())), - delete_error: Arc::new(Mutex::new(None)), - retry_error: Arc::new(Mutex::new(None)), - } - } - } - - #[async_trait::async_trait] - impl MessageQueueClient for FakeMessageQueueClient { - async fn receive_messages(&self) -> Result> { - self.receive_result - .lock() - .unwrap() - .clone() - .map_err(|err| anyhow!(err)) - } - - async fn delete_message(&self, receipt_handle: &str) -> Result<()> { - self.actions.lock().unwrap().push(FakeAction::Delete { - receipt_handle: receipt_handle.to_string(), - }); - if let Some(err) = self.delete_error.lock().unwrap().clone() { - Err(anyhow!(err)) - } else { - Ok(()) - } - } - - async fn retry_message(&self, receipt_handle: &str, delay: Duration) -> Result<()> { - self.actions.lock().unwrap().push(FakeAction::Retry { - receipt_handle: receipt_handle.to_string(), - delay, - }); - if let Some(err) = self.retry_error.lock().unwrap().clone() { - Err(anyhow!(err)) - } else { - Ok(()) - } - } - } fn added_event_json(name: &str, version: &str) -> String { format!( From 5a9ef5759646cab7800d902f5eea5e16488a6ea9 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:36:06 +0200 Subject: [PATCH 38/74] refactor(watcher): simplify subscriber flow Replace the transport trait and helper fan-out with a smaller listen/handle/process structure. Keep tests at the decision boundary with MessageOutcome and remove the recursion-limit workaround. --- Cargo.lock | 1 - crates/bin/docs_rs_watcher/Cargo.toml | 1 - crates/bin/docs_rs_watcher/src/lib.rs | 1 - crates/bin/docs_rs_watcher/src/main.rs | 2 - .../docs_rs_watcher/src/message_queue/fake.rs | 83 ------ .../docs_rs_watcher/src/message_queue/mod.rs | 19 -- .../docs_rs_watcher/src/message_queue/sqs.rs | 80 ------ crates/bin/docs_rs_watcher/src/subscriber.rs | 249 ++++++++++-------- 8 files changed, 137 insertions(+), 299 deletions(-) delete mode 100644 crates/bin/docs_rs_watcher/src/message_queue/fake.rs delete mode 100644 crates/bin/docs_rs_watcher/src/message_queue/mod.rs delete mode 100644 crates/bin/docs_rs_watcher/src/message_queue/sqs.rs diff --git a/Cargo.lock b/Cargo.lock index 26e4cc92e..8ee007bb0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2483,7 +2483,6 @@ name = "docs_rs_watcher" version = "0.6.0" dependencies = [ "anyhow", - "async-trait", "aws-config", "aws-sdk-sqs", "clap", diff --git a/crates/bin/docs_rs_watcher/Cargo.toml b/crates/bin/docs_rs_watcher/Cargo.toml index 0e5d2bf4b..64844d680 100644 --- a/crates/bin/docs_rs_watcher/Cargo.toml +++ b/crates/bin/docs_rs_watcher/Cargo.toml @@ -9,7 +9,6 @@ edition.workspace = true [dependencies] anyhow = { workspace = true } aws-config = { workspace = true } -async-trait = { workspace = true } aws-sdk-sqs = { version = "1.99.0", default-features = false, features = ["default-https-client", "rt-tokio"] } clap = { workspace = true } # NOTE: on the new infra, switch back from `git-https-reqwest` to `git-https` (curl) once the curl version is new enough diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 342cb5a2c..8e8f8a140 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -3,7 +3,6 @@ pub mod consistency; mod db; mod index; pub mod index_watcher; -mod message_queue; mod rebuilds; mod service_metrics; pub mod subscriber; diff --git a/crates/bin/docs_rs_watcher/src/main.rs b/crates/bin/docs_rs_watcher/src/main.rs index e5a276a9b..d4138a375 100644 --- a/crates/bin/docs_rs_watcher/src/main.rs +++ b/crates/bin/docs_rs_watcher/src/main.rs @@ -1,5 +1,3 @@ -#![recursion_limit = "256"] - use anyhow::{Context as _, Result}; use clap::{Parser, Subcommand}; use docs_rs_config::AppConfig as _; diff --git a/crates/bin/docs_rs_watcher/src/message_queue/fake.rs b/crates/bin/docs_rs_watcher/src/message_queue/fake.rs deleted file mode 100644 index 463238daa..000000000 --- a/crates/bin/docs_rs_watcher/src/message_queue/fake.rs +++ /dev/null @@ -1,83 +0,0 @@ -use crate::message_queue::{MessageQueueClient, ReceivedMessage}; -use anyhow::{Result, anyhow}; -use std::{ - sync::{Arc, Mutex}, - time::Duration, -}; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum FakeAction { - Delete { - receipt_handle: String, - }, - Retry { - receipt_handle: String, - delay: Duration, - }, -} - -#[derive(Clone)] -pub(crate) struct FakeMessageQueueClient { - pub(crate) receive_result: Arc, String>>>, - pub(crate) actions: Arc>>, - pub(crate) delete_error: Arc>>, - pub(crate) retry_error: Arc>>, -} - -impl FakeMessageQueueClient { - pub(crate) fn new() -> Self { - Self::default() - } - - pub(crate) fn with_messages(messages: Vec) -> Self { - Self { - receive_result: Arc::new(Mutex::new(Ok(messages))), - ..Self::default() - } - } -} - -impl Default for FakeMessageQueueClient { - fn default() -> Self { - Self { - receive_result: Arc::new(Mutex::new(Ok(Vec::new()))), - actions: Arc::new(Mutex::new(Vec::new())), - delete_error: Arc::new(Mutex::new(None)), - retry_error: Arc::new(Mutex::new(None)), - } - } -} - -#[async_trait::async_trait] -impl MessageQueueClient for FakeMessageQueueClient { - async fn receive_messages(&self) -> Result> { - self.receive_result - .lock() - .unwrap() - .clone() - .map_err(|err| anyhow!(err)) - } - - async fn delete_message(&self, receipt_handle: &str) -> Result<()> { - self.actions.lock().unwrap().push(FakeAction::Delete { - receipt_handle: receipt_handle.to_string(), - }); - if let Some(err) = self.delete_error.lock().unwrap().clone() { - Err(anyhow!(err)) - } else { - Ok(()) - } - } - - async fn retry_message(&self, receipt_handle: &str, delay: Duration) -> Result<()> { - self.actions.lock().unwrap().push(FakeAction::Retry { - receipt_handle: receipt_handle.to_string(), - delay, - }); - if let Some(err) = self.retry_error.lock().unwrap().clone() { - Err(anyhow!(err)) - } else { - Ok(()) - } - } -} diff --git a/crates/bin/docs_rs_watcher/src/message_queue/mod.rs b/crates/bin/docs_rs_watcher/src/message_queue/mod.rs deleted file mode 100644 index 424d31343..000000000 --- a/crates/bin/docs_rs_watcher/src/message_queue/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -use anyhow::Result; -use std::time::Duration; - -#[cfg(test)] -pub(crate) mod fake; -pub(crate) mod sqs; - -#[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct ReceivedMessage { - pub(crate) body: Option, - pub(crate) receipt_handle: Option, -} - -#[async_trait::async_trait] -pub(crate) trait MessageQueueClient: Sync { - async fn receive_messages(&self) -> Result>; - async fn delete_message(&self, receipt_handle: &str) -> Result<()>; - async fn retry_message(&self, receipt_handle: &str, delay: Duration) -> Result<()>; -} diff --git a/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs b/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs deleted file mode 100644 index 267d959cd..000000000 --- a/crates/bin/docs_rs_watcher/src/message_queue/sqs.rs +++ /dev/null @@ -1,80 +0,0 @@ -use crate::{ - message_queue::{MessageQueueClient, ReceivedMessage}, - subscriber::WAIT_TIME, -}; -use anyhow::Result; -use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; -use aws_sdk_sqs::Client; -use std::time::Duration; -use url::Url; - -/// visibility timeout: -/// should be longer than the longest time our server takes to handle a message. -/// -/// if we fetch a message, and don't delete it in this time, it will be redelivered. -const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); - -pub(crate) struct AwsSqsClient { - inner: Client, - queue_url: String, -} - -impl AwsSqsClient { - pub(crate) async fn new(queue_url: &Url, region: impl Into, max_retries: u32) -> Self { - let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; - Self { - queue_url: queue_url.to_string(), - inner: Client::from_conf( - aws_sdk_sqs::config::Builder::from(&shared_config) - .retry_config(RetryConfig::standard().with_max_attempts(max_retries)) - .region(Region::new(region.into())) - .build(), - ), - } - } -} - -#[async_trait::async_trait] -impl MessageQueueClient for AwsSqsClient { - async fn receive_messages(&self) -> Result> { - let response = self - .inner - .receive_message() - .queue_url(&self.queue_url) - .max_number_of_messages(10) - .wait_time_seconds(WAIT_TIME.as_secs() as i32) - .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) - .send() - .await?; - - Ok(response - .messages() - .iter() - .map(|message| ReceivedMessage { - body: message.body().map(str::to_owned), - receipt_handle: message.receipt_handle().map(str::to_owned), - }) - .collect()) - } - - async fn delete_message(&self, receipt_handle: &str) -> Result<()> { - self.inner - .delete_message() - .queue_url(&self.queue_url) - .receipt_handle(receipt_handle) - .send() - .await?; - Ok(()) - } - - async fn retry_message(&self, receipt_handle: &str, delay: Duration) -> Result<()> { - self.inner - .change_message_visibility() - .queue_url(&self.queue_url) - .receipt_handle(receipt_handle) - .visibility_timeout(delay.as_secs() as i32) - .send() - .await?; - Ok(()) - } -} diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 26ca00167..ffaaa6e92 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -4,10 +4,11 @@ use crate::{ process_crate_deleted, process_version_added, process_version_deleted, process_version_yank_status, }, - message_queue::{MessageQueueClient, ReceivedMessage, sqs::AwsSqsClient}, synchronization::CrateLocks, }; use anyhow::{Context as _, Result}; +use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; +use aws_sdk_sqs::Client; use docs_rs_context::Context; use docs_rs_crates_io::events::{IndexChangeEventV1, IndexChangeV1}; use docs_rs_types::KrateName; @@ -34,25 +35,34 @@ const RETRY_DELAY: Duration = Duration::from_secs(30); /// How long to wait before rechecking the priorities of queued crates. const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); +/// visibility timeout: +/// should be longer than the longest time our server takes to handle a message. +/// +/// if we fetch a message, and don't delete it in this time, it will be redelivered. +const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); + +#[derive(Clone, Debug, PartialEq, Eq)] +struct ReceivedMessage { + body: Option, + receipt_handle: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +enum MessageOutcome { + Ack, + RetryLater(Duration), + Ignore, +} + pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> Result<()> { let (Some(region), Some(queue_url)) = (&config.sqs_region, &config.sqs_queue_url) else { warn!("missing sqs region or url, disabling crates.io SQS subscriber"); return Ok(()); }; - - let client = AwsSqsClient::new(queue_url, region, config.aws_sdk_max_retries).await; - - listen_with_client(&client, config, context, locks).await -} - -async fn listen_with_client( - client: &dyn MessageQueueClient, - config: &Config, - context: &Context, - locks: &CrateLocks, -) -> Result<()> { let mut last_priority_recheck = Instant::now(); let queue = context.build_queue()?; + let client = build_client(region, config.aws_sdk_max_retries).await; + let queue_url = queue_url.to_string(); loop { if queue.is_locked().await? { @@ -61,10 +71,46 @@ async fn listen_with_client( continue; } - if let Err(err) = poll_once(client, context, config, locks).await { - error!(?err,); - time::sleep(WAIT_TIME).await; - continue; + let messages = match receive_messages(&client, &queue_url).await { + Ok(messages) => messages, + Err(err) => { + error!( + ?err, + queue_url, "error receiving messages from sqs, retrying" + ); + time::sleep(WAIT_TIME).await; + continue; + } + }; + + for message in messages { + match handle_message(context, config, locks, message.body.as_deref()).await { + MessageOutcome::Ack => { + if let Some(receipt_handle) = message.receipt_handle.as_deref() + && let Err(err) = delete_message(&client, &queue_url, receipt_handle).await + { + error!(?err, receipt_handle, "error deleting message from queue"); + } + } + MessageOutcome::RetryLater(delay) => { + error!( + ?message, + ?delay, + body = message.body.as_deref().unwrap_or_default(), + "error handling message. Retrying." + ); + if let Some(receipt_handle) = message.receipt_handle.as_deref() + && let Err(err) = + retry_message(&client, &queue_url, receipt_handle, delay).await + { + warn!( + ?err, + receipt_handle, "error setting visibility_timeout for retry" + ); + } + } + MessageOutcome::Ignore => {} + } } if last_priority_recheck.elapsed() >= DELAY_BETWEEN_PRIORITY_RECHECK { @@ -79,30 +125,71 @@ async fn listen_with_client( } } -async fn poll_once( - client: &dyn MessageQueueClient, - context: &Context, - config: &Config, - locks: &CrateLocks, -) -> Result<()> { - let messages = client.receive_messages().await?; +async fn build_client(region: &str, max_retries: u32) -> Client { + let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; - for message in messages { - handle_message(client, &message, context, config, locks).await; - } + Client::from_conf( + aws_sdk_sqs::config::Builder::from(&shared_config) + .retry_config(RetryConfig::standard().with_max_attempts(max_retries)) + .region(Region::new(region.to_string())) + .build(), + ) +} + +async fn receive_messages(client: &Client, queue_url: &str) -> Result> { + let response = client + .receive_message() + .queue_url(queue_url) + .max_number_of_messages(10) + .wait_time_seconds(WAIT_TIME.as_secs() as i32) + .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) + .send() + .await?; + + Ok(response + .messages() + .iter() + .map(|message| ReceivedMessage { + body: message.body().map(str::to_owned), + receipt_handle: message.receipt_handle().map(str::to_owned), + }) + .collect()) +} +async fn delete_message(client: &Client, queue_url: &str, receipt_handle: &str) -> Result<()> { + client + .delete_message() + .queue_url(queue_url) + .receipt_handle(receipt_handle) + .send() + .await?; + Ok(()) +} + +async fn retry_message( + client: &Client, + queue_url: &str, + receipt_handle: &str, + delay: Duration, +) -> Result<()> { + client + .change_message_visibility() + .queue_url(queue_url) + .receipt_handle(receipt_handle) + .visibility_timeout(delay.as_secs() as i32) + .send() + .await?; Ok(()) } async fn handle_message( - client: &dyn MessageQueueClient, - message: &ReceivedMessage, context: &Context, config: &Config, locks: &CrateLocks, -) { - let Some(body) = message.body.as_deref() else { - return; + body: Option<&str>, +) -> MessageOutcome { + let Some(body) = body else { + return MessageOutcome::Ignore; }; match retry_async( @@ -111,30 +198,15 @@ async fn handle_message( ) .await { - Ok(_) => { - if let Some(receipt_handle) = message.receipt_handle.as_deref() - && let Err(err) = client.delete_message(receipt_handle).await - { - error!(?err, receipt_handle, "error deleting message from queue"); - } - } + Ok(_) => MessageOutcome::Ack, Err(err) => { error!( ?err, - ?message, ?RETRY_DELAY, body, "error handling message. Retrying." ); - - if let Some(receipt_handle) = message.receipt_handle.as_deref() - && let Err(err) = client.retry_message(receipt_handle, RETRY_DELAY).await - { - warn!( - ?err, - receipt_handle, "error setting visibility_timeout for retry" - ); - } + MessageOutcome::RetryLater(RETRY_DELAY) } } } @@ -192,10 +264,7 @@ pub(crate) async fn process_change( #[cfg(test)] mod tests { use super::*; - use crate::{ - message_queue::fake::{FakeAction, FakeMessageQueueClient}, - testing::TestEnvironment, - }; + use crate::testing::TestEnvironment; use docs_rs_config::AppConfig as _; use docs_rs_crates_io::events::CrateVersion; use docs_rs_types::testing::{KRATE, V1, V2}; @@ -369,25 +438,16 @@ mod tests { let mut config = Config::test_config()?; config.sqs_dry_run = false; let env = TestEnvironment::builder().config(config).build().await?; - let client = FakeMessageQueueClient::new(); - - handle_message( - &client, - &ReceivedMessage { - body: Some(added_event_json("krate", &V1.to_string())), - receipt_handle: Some("receipt-1".to_string()), - }, - &env, - env.config(), - &CrateLocks::new(), - ) - .await; assert_eq!( - *client.actions.lock().unwrap(), - vec![FakeAction::Delete { - receipt_handle: "receipt-1".to_string(), - }] + handle_message( + &env, + env.config(), + &CrateLocks::new(), + Some(&added_event_json("krate", &V1.to_string())), + ) + .await, + MessageOutcome::Ack ); Ok(()) @@ -396,59 +456,24 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_handle_message_retries_failed_processing() -> Result<()> { let env = TestEnvironment::new().await?; - let client = FakeMessageQueueClient::new(); - - handle_message( - &client, - &ReceivedMessage { - body: Some("{bad json".to_string()), - receipt_handle: Some("receipt-2".to_string()), - }, - &env, - env.config(), - &CrateLocks::new(), - ) - .await; assert_eq!( - *client.actions.lock().unwrap(), - vec![FakeAction::Retry { - receipt_handle: "receipt-2".to_string(), - delay: RETRY_DELAY, - }] + handle_message(&env, env.config(), &CrateLocks::new(), Some("{bad json")).await, + MessageOutcome::RetryLater(RETRY_DELAY) ); Ok(()) } #[tokio::test(flavor = "multi_thread")] - async fn test_poll_once_processes_batch() -> Result<()> { - let mut config = Config::test_config()?; - config.sqs_dry_run = false; - let env = TestEnvironment::builder().config(config).build().await?; - let client = FakeMessageQueueClient::with_messages(vec![ - ReceivedMessage { - body: Some(added_event_json("krate", &V1.to_string())), - receipt_handle: Some("receipt-1".to_string()), - }, - ReceivedMessage { - body: None, - receipt_handle: Some("receipt-ignored".to_string()), - }, - ]); - - poll_once(&client, &env, env.config(), &CrateLocks::new()).await?; + async fn test_handle_message_ignores_missing_body() -> Result<()> { + let env = TestEnvironment::new().await?; - let queue = env.build_queue()?.queued_crates().await?; - assert_eq!(queue.len(), 1); - assert_eq!(queue[0].name, KRATE); - assert_eq!(queue[0].version, V1); assert_eq!( - *client.actions.lock().unwrap(), - vec![FakeAction::Delete { - receipt_handle: "receipt-1".to_string(), - }] + handle_message(&env, env.config(), &CrateLocks::new(), None).await, + MessageOutcome::Ignore ); + assert!(env.build_queue()?.queued_crates().await?.is_empty()); Ok(()) } From f80a63408ad86eeeb4bc2e606acdd8eda9ce1365 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:39:23 +0200 Subject: [PATCH 39/74] kk --- Cargo.toml | 1 - crates/bin/docs_rs_watcher/src/subscriber.rs | 81 +++++++------------ .../lib/docs_rs_repository_stats/Cargo.toml | 2 +- 3 files changed, 31 insertions(+), 53 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5c741a5e6..dd6c3544e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,6 @@ edition = "2024" [workspace.dependencies] anyhow = { version = "1.0.42", features = ["backtrace"] } askama = "0.16.0" -async-trait = "0.1.89" async-stream = "0.3.5" # The default `rustls` feature pulls in the legacy hyper 0.14 + rustls 0.21 # stack via `aws-smithy-runtime/tls-rustls`, which includes the vulnerable diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index ffaaa6e92..abcdd57c4 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -71,8 +71,23 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R continue; } - let messages = match receive_messages(&client, &queue_url).await { - Ok(messages) => messages, + let messages = match client + .receive_message() + .queue_url(&queue_url) + .max_number_of_messages(10) + .wait_time_seconds(WAIT_TIME.as_secs() as i32) + .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) + .send() + .await + { + Ok(response) => response + .messages() + .iter() + .map(|message| ReceivedMessage { + body: message.body().map(str::to_owned), + receipt_handle: message.receipt_handle().map(str::to_owned), + }) + .collect::>(), Err(err) => { error!( ?err, @@ -87,7 +102,12 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R match handle_message(context, config, locks, message.body.as_deref()).await { MessageOutcome::Ack => { if let Some(receipt_handle) = message.receipt_handle.as_deref() - && let Err(err) = delete_message(&client, &queue_url, receipt_handle).await + && let Err(err) = client + .delete_message() + .queue_url(&queue_url) + .receipt_handle(receipt_handle) + .send() + .await { error!(?err, receipt_handle, "error deleting message from queue"); } @@ -100,8 +120,13 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R "error handling message. Retrying." ); if let Some(receipt_handle) = message.receipt_handle.as_deref() - && let Err(err) = - retry_message(&client, &queue_url, receipt_handle, delay).await + && let Err(err) = client + .change_message_visibility() + .queue_url(&queue_url) + .receipt_handle(receipt_handle) + .visibility_timeout(delay.as_secs() as i32) + .send() + .await { warn!( ?err, @@ -136,52 +161,6 @@ async fn build_client(region: &str, max_retries: u32) -> Client { ) } -async fn receive_messages(client: &Client, queue_url: &str) -> Result> { - let response = client - .receive_message() - .queue_url(queue_url) - .max_number_of_messages(10) - .wait_time_seconds(WAIT_TIME.as_secs() as i32) - .visibility_timeout(VISIBILITY_TIMEOUT.as_secs() as i32) - .send() - .await?; - - Ok(response - .messages() - .iter() - .map(|message| ReceivedMessage { - body: message.body().map(str::to_owned), - receipt_handle: message.receipt_handle().map(str::to_owned), - }) - .collect()) -} - -async fn delete_message(client: &Client, queue_url: &str, receipt_handle: &str) -> Result<()> { - client - .delete_message() - .queue_url(queue_url) - .receipt_handle(receipt_handle) - .send() - .await?; - Ok(()) -} - -async fn retry_message( - client: &Client, - queue_url: &str, - receipt_handle: &str, - delay: Duration, -) -> Result<()> { - client - .change_message_visibility() - .queue_url(queue_url) - .receipt_handle(receipt_handle) - .visibility_timeout(delay.as_secs() as i32) - .send() - .await?; - Ok(()) -} - async fn handle_message( context: &Context, config: &Config, diff --git a/crates/lib/docs_rs_repository_stats/Cargo.toml b/crates/lib/docs_rs_repository_stats/Cargo.toml index 5626e4416..6b3c9f46e 100644 --- a/crates/lib/docs_rs_repository_stats/Cargo.toml +++ b/crates/lib/docs_rs_repository_stats/Cargo.toml @@ -7,7 +7,7 @@ edition = "2024" [dependencies] anyhow = { workspace = true } -async-trait = { workspace = true } +async-trait = "0.1.89" chrono = { workspace = true } docs_rs_cargo_metadata = { path = "../docs_rs_cargo_metadata" } docs_rs_config = { path = "../docs_rs_config" } From b6ef36653c03358a42c72dac7bc18eb781f775a2 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:41:05 +0200 Subject: [PATCH 40/74] kk --- crates/bin/docs_rs_watcher/src/subscriber.rs | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index abcdd57c4..f6d83fdac 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -41,12 +41,6 @@ const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); /// if we fetch a message, and don't delete it in this time, it will be redelivered. const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); -#[derive(Clone, Debug, PartialEq, Eq)] -struct ReceivedMessage { - body: Option, - receipt_handle: Option, -} - #[derive(Debug, Clone, PartialEq, Eq)] enum MessageOutcome { Ack, @@ -80,14 +74,7 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R .send() .await { - Ok(response) => response - .messages() - .iter() - .map(|message| ReceivedMessage { - body: message.body().map(str::to_owned), - receipt_handle: message.receipt_handle().map(str::to_owned), - }) - .collect::>(), + Ok(response) => response.messages().to_vec(), Err(err) => { error!( ?err, From 28015cf964f63d3fbfbadd231a94499b723c6628 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:42:41 +0200 Subject: [PATCH 41/74] kk --- crates/bin/docs_rs_watcher/src/subscriber.rs | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index f6d83fdac..b1a6bca05 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -55,7 +55,14 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R }; let mut last_priority_recheck = Instant::now(); let queue = context.build_queue()?; - let client = build_client(region, config.aws_sdk_max_retries).await; + let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; + let client = Client::from_conf( + aws_sdk_sqs::config::Builder::from(&shared_config) + .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) + .region(Region::new(region.to_string())) + .build(), + ); + let queue_url = queue_url.to_string(); loop { @@ -137,17 +144,6 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R } } -async fn build_client(region: &str, max_retries: u32) -> Client { - let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; - - Client::from_conf( - aws_sdk_sqs::config::Builder::from(&shared_config) - .retry_config(RetryConfig::standard().with_max_attempts(max_retries)) - .region(Region::new(region.to_string())) - .build(), - ) -} - async fn handle_message( context: &Context, config: &Config, From e70bafa7167e3e60fb75c509ecc3b9196da47f47 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:46:49 +0200 Subject: [PATCH 42/74] feat(watcher): add local ElasticMQ support Wire ElasticMQ into docker compose for watcher development and allow the watcher SQS client to target a custom endpoint URL. --- .docker.env.sample | 5 +++++ crates/bin/docs_rs_watcher/src/config.rs | 2 ++ crates/bin/docs_rs_watcher/src/subscriber.rs | 13 +++++++------ docker-compose.yml | 19 +++++++++++++++++++ dockerfiles/elasticmq.conf | 18 ++++++++++++++++++ 5 files changed, 51 insertions(+), 6 deletions(-) create mode 100644 dockerfiles/elasticmq.conf diff --git a/.docker.env.sample b/.docker.env.sample index 743cc0613..9488e78e1 100644 --- a/.docker.env.sample +++ b/.docker.env.sample @@ -10,3 +10,8 @@ DOCSRS_TOOLCHAIN=nightly # for the registry watcher, automatically queued reqbuidls. DOCSRS_MAX_QUEUED_REBUILDS: 10 +# optional overrides for local ElasticMQ testing +# DOCSRS_SQS_QUEUE_URL=http://elasticmq:9324/queue/docsrs-events +# DOCSRS_SQS_REGION=elasticmq +# DOCSRS_SQS_ENDPOINT_URL=http://elasticmq:9324 +# DOCS_RS_SQS_DRY_RUN=false diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index c14f77590..565277d4f 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -11,6 +11,7 @@ pub struct Config { pub registry_dry_run: bool, pub sqs_queue_url: Option, pub sqs_region: Option, + pub sqs_endpoint_url: Option, pub sqs_dry_run: bool, pub aws_sdk_max_retries: u32, @@ -39,6 +40,7 @@ impl AppConfig for Config { sqs_queue_url: maybe_env("DOCSRS_SQS_QUEUE_URL")?, sqs_region: maybe_env("DOCSRS_SQS_REGION")?, + sqs_endpoint_url: maybe_env("DOCSRS_SQS_ENDPOINT_URL")?, sqs_dry_run: env("DOCS_RS_SQS_DRY_RUN", true)?, aws_sdk_max_retries: env("DOCSRS_AWS_SDK_MAX_RETRIES", 6u32)?, diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index b1a6bca05..4d2fe4d49 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -56,12 +56,13 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R let mut last_priority_recheck = Instant::now(); let queue = context.build_queue()?; let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; - let client = Client::from_conf( - aws_sdk_sqs::config::Builder::from(&shared_config) - .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) - .region(Region::new(region.to_string())) - .build(), - ); + let mut client_config = aws_sdk_sqs::config::Builder::from(&shared_config) + .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) + .region(Region::new(region.to_string())); + if let Some(endpoint_url) = &config.sqs_endpoint_url { + client_config = client_config.endpoint_url(endpoint_url); + } + let client = Client::from_conf(client_config.build()); let queue_url = queue_url.to_string(); diff --git a/docker-compose.yml b/docker-compose.yml index b94f73248..c8acd94ec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,7 @@ # - repo-stats updater # - cdn invalidator # - release-rebuild-enqueuer +# * `elasticmq` -> local SQS-compatible queue for watcher testing # # optional profile: `metrics`: # * `opentelemetry` -> a debug opentelemetry receiver @@ -120,6 +121,7 @@ x-registry-watcher: ®istry-watcher depends_on: - db - s3 + - elasticmq volumes: - "./ignored/docker-registry-watcher/prefix:/opt/docsrs/prefix" - crates-io-index:/opt/docsrs/crates.io-index @@ -132,6 +134,10 @@ x-registry-watcher: ®istry-watcher REGISTRY_INDEX_PATH: /opt/docsrs/crates.io-index # configure the rebuild-queuer, DOCSRS_MAX_QUEUED_REBUILDS: ${DOCSRS_MAX_QUEUED_REBUILDS:-10} + DOCSRS_SQS_QUEUE_URL: ${DOCSRS_SQS_QUEUE_URL:-http://elasticmq:9324/queue/docsrs-events} + DOCSRS_SQS_REGION: ${DOCSRS_SQS_REGION:-elasticmq} + DOCSRS_SQS_ENDPOINT_URL: ${DOCSRS_SQS_ENDPOINT_URL:-http://elasticmq:9324} + DOCS_RS_SQS_DRY_RUN: ${DOCS_RS_SQS_DRY_RUN:-false} env_file: - .docker.env @@ -170,6 +176,19 @@ services: # watcher-CLI should not be run as background daemon, just manually - manual + elasticmq: + image: softwaremill/elasticmq + ports: + - "127.0.0.1:9324:9324" + - "127.0.0.1:9325:9325" + volumes: + - "./dockerfiles/elasticmq.conf:/opt/elasticmq.conf:ro" + command: ["-Dconfig.file=/opt/elasticmq.conf"] + profiles: + - watcher + - full + - manual + builder-a: <<: *builder volumes: diff --git a/dockerfiles/elasticmq.conf b/dockerfiles/elasticmq.conf new file mode 100644 index 000000000..fb77fac15 --- /dev/null +++ b/dockerfiles/elasticmq.conf @@ -0,0 +1,18 @@ +include classpath("application.conf") + +node-address { + protocol = http + host = "*" + port = 9324 + context-path = "" +} + +rest-sqs { + enabled = true + bind-port = 9324 + bind-hostname = "0.0.0.0" +} + +queues { + docsrs-events { } +} From 697c831ff86cd2667b444213559a778d58558135 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:51:10 +0200 Subject: [PATCH 43/74] no ui --- docker-compose.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index c8acd94ec..e0b2e93c5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -180,7 +180,6 @@ services: image: softwaremill/elasticmq ports: - "127.0.0.1:9324:9324" - - "127.0.0.1:9325:9325" volumes: - "./dockerfiles/elasticmq.conf:/opt/elasticmq.conf:ro" command: ["-Dconfig.file=/opt/elasticmq.conf"] From 72121335eefb62e03431023a67f444b73b79bfb1 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:58:17 +0200 Subject: [PATCH 44/74] logs --- crates/bin/docs_rs_watcher/src/main.rs | 2 ++ crates/bin/docs_rs_watcher/src/subscriber.rs | 3 +++ 2 files changed, 5 insertions(+) diff --git a/crates/bin/docs_rs_watcher/src/main.rs b/crates/bin/docs_rs_watcher/src/main.rs index d4138a375..e5a276a9b 100644 --- a/crates/bin/docs_rs_watcher/src/main.rs +++ b/crates/bin/docs_rs_watcher/src/main.rs @@ -1,3 +1,5 @@ +#![recursion_limit = "256"] + use anyhow::{Context as _, Result}; use clap::{Parser, Subcommand}; use docs_rs_config::AppConfig as _; diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 4d2fe4d49..bb9408068 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -55,6 +55,8 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R }; let mut last_priority_recheck = Instant::now(); let queue = context.build_queue()?; + + debug!("creating SQS client..."); let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; let mut client_config = aws_sdk_sqs::config::Builder::from(&shared_config) .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) @@ -73,6 +75,7 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R continue; } + debug!("receiving messages..."); let messages = match client .receive_message() .queue_url(&queue_url) From fe8ed9d9e58ea0db43074b9e8068e57a394d9c45 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 00:59:01 +0200 Subject: [PATCH 45/74] fix wait time --- crates/bin/docs_rs_watcher/src/subscriber.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index bb9408068..3bd8f46d5 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -23,7 +23,8 @@ use tracing::{debug, error, instrument, warn}; /// wait-time (long polling): /// /// How long should the request be kept open when there are no messages. -pub(crate) const WAIT_TIME: Duration = Duration::from_secs(30); +/// SQS only accepts values in the range 0..=20 seconds. +pub(crate) const WAIT_TIME: Duration = Duration::from_secs(20); /// when one long-polling request is finished, how long to sleep before starting the next? const SLEEP_BETWEEN_REQUESTS: Duration = Duration::from_secs(1); From 405014999a750e92a328bdd58a85c77b967e687e Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:04:27 +0200 Subject: [PATCH 46/74] dummy --- justfiles/utils.just | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/justfiles/utils.just b/justfiles/utils.just index 866f6a677..412fb1df6 100644 --- a/justfiles/utils.just +++ b/justfiles/utils.just @@ -1,9 +1,15 @@ _ensure_db_and_s3_are_running: _touch-docker-env - # dependencies in the docker-cli file are ignored - # here. Instead we explicitly start any dependent services first. - docker compose up -d db s3 --wait + # dependencies in the docker-cli file are ignored + # here. Instead we explicitly start any dependent services first. + docker compose up -d db s3 --wait _touch-docker-env: - touch .docker.env + touch .docker.env +send-sqs-payload: + aws sqs send-message \ + --endpoint-url $DOCSRS_SQS_ENDPOINT_URL \ + --region elasticmq \ + --queue-url $DOCSRS_SQS_QUEUE_URL \ + --message-body '{"id":"evt_1","occurred_at":"2026-07-02T12:00:00Z","type":"added","payload":{"name":"demo-crate","vers":"1.2.3"}}' From 28ebe7173c983bd2b4b3de960a5f13d00987316b Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:12:41 +0200 Subject: [PATCH 47/74] sort --- crates/bin/docs_rs_watcher/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/bin/docs_rs_watcher/Cargo.toml b/crates/bin/docs_rs_watcher/Cargo.toml index 64844d680..fc7cdb60d 100644 --- a/crates/bin/docs_rs_watcher/Cargo.toml +++ b/crates/bin/docs_rs_watcher/Cargo.toml @@ -17,8 +17,8 @@ crates-index = { version = "3.0.0", default-features = false, features = ["git", crates-index-diff = { version = "31.0.0", default-features = false, features = ["http-reqwest", "max-performance", "semver"] } docs_rs_build_queue = { path = "../../lib/docs_rs_build_queue" } docs_rs_config = { path = "../../lib/docs_rs_config" } -docs_rs_crates_io = { path = "../../lib/docs_rs_crates_io" } docs_rs_context = { path = "../../lib/docs_rs_context" } +docs_rs_crates_io = { path = "../../lib/docs_rs_crates_io" } docs_rs_database = { path = "../../lib/docs_rs_database" } docs_rs_env_vars = { path = "../../lib/docs_rs_env_vars" } docs_rs_fastly = { path = "../../lib/docs_rs_fastly" } @@ -30,9 +30,9 @@ docs_rs_types = { path = "../../lib/docs_rs_types" } docs_rs_utils = { path = "../../lib/docs_rs_utils" } futures-util = { workspace = true } itertools = { workspace = true } -serde_json = { workspace = true } opentelemetry = { workspace = true } rayon = "1.6.1" +serde_json = { workspace = true } sqlx = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } From 11c8ecba158740bd5a270d8c82290e647165a87c Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:19:17 +0200 Subject: [PATCH 48/74] no locks --- ...29e35750da2bea995fb0c433893addb253214.json | 15 ------- ...86e6c7eed78a2d4f01f316772949f5d688f42.json | 14 +++++++ ...83bd73f73ecc1cf9b4fc24c457d5f26fd582b.json | 29 ++++++++++++++ ...f4254924db75fc7f76a6c78d17a3fc06d663.json} | 7 ++-- ...29e35750da2bea995fb0c433893addb253214.json | 15 ------- ...86e6c7eed78a2d4f01f316772949f5d688f42.json | 14 +++++++ ...83bd73f73ecc1cf9b4fc24c457d5f26fd582b.json | 29 ++++++++++++++ ...f4254924db75fc7f76a6c78d17a3fc06d663.json} | 7 ++-- crates/bin/docs_rs_watcher/src/config.rs | 6 +-- .../bin/docs_rs_watcher/src/index_watcher.rs | 20 ++-------- crates/bin/docs_rs_watcher/src/lib.rs | 15 +++---- crates/bin/docs_rs_watcher/src/subscriber.rs | 40 ++++++------------- .../docs_rs_watcher/src/synchronization.rs | 30 -------------- 13 files changed, 115 insertions(+), 126 deletions(-) delete mode 100644 .sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json create mode 100644 .sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json create mode 100644 .sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json rename .sqlx/{query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json => query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json} (65%) delete mode 100644 crates/bin/cratesfyi/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json create mode 100644 crates/bin/cratesfyi/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json create mode 100644 crates/bin/cratesfyi/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json rename crates/bin/cratesfyi/.sqlx/{query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json => query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json} (65%) delete mode 100644 crates/bin/docs_rs_watcher/src/synchronization.rs diff --git a/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json b/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json deleted file mode 100644 index 2109f69cf..000000000 --- a/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "DELETE FROM builds_logs bl\n USING builds b\n JOIN releases r ON b.rid = r.id\n WHERE bl.build_id = b.id AND r.crate_id = $1 AND r.version = $2;", - "describe": { - "columns": [], - "parameters": { - "Left": [ - "Int4", - "Text" - ] - }, - "nullable": [] - }, - "hash": "2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214" -} diff --git a/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json b/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json new file mode 100644 index 000000000..89cbc239c --- /dev/null +++ b/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "DELETE FROM builds_logs bl\n USING builds b\n WHERE bl.build_id = b.id AND b.rid = $1;", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int4" + ] + }, + "nullable": [] + }, + "hash": "66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42" +} diff --git a/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json b/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json new file mode 100644 index 000000000..effb9f9ac --- /dev/null +++ b/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json @@ -0,0 +1,29 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT id FROM releases WHERE crate_id = $1 AND version = $2", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int4", + "origin": { + "Table": { + "table": "releases", + "name": "id" + } + } + } + ], + "parameters": { + "Left": [ + "Int4", + "Text" + ] + }, + "nullable": [ + false + ] + }, + "hash": "7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b" +} diff --git a/.sqlx/query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json b/.sqlx/query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json similarity index 65% rename from .sqlx/query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json rename to .sqlx/query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json index 380bd9ea6..1f16abd55 100644 --- a/.sqlx/query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json +++ b/.sqlx/query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "DELETE FROM releases WHERE crate_id = $1 AND version = $2 RETURNING is_library", + "query": "DELETE FROM releases WHERE id = $1 RETURNING is_library", "describe": { "columns": [ { @@ -17,13 +17,12 @@ ], "parameters": { "Left": [ - "Int4", - "Text" + "Int4" ] }, "nullable": [ true ] }, - "hash": "014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948" + "hash": "fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663" } diff --git a/crates/bin/cratesfyi/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json b/crates/bin/cratesfyi/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json deleted file mode 100644 index 2109f69cf..000000000 --- a/crates/bin/cratesfyi/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "DELETE FROM builds_logs bl\n USING builds b\n JOIN releases r ON b.rid = r.id\n WHERE bl.build_id = b.id AND r.crate_id = $1 AND r.version = $2;", - "describe": { - "columns": [], - "parameters": { - "Left": [ - "Int4", - "Text" - ] - }, - "nullable": [] - }, - "hash": "2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214" -} diff --git a/crates/bin/cratesfyi/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json b/crates/bin/cratesfyi/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json new file mode 100644 index 000000000..89cbc239c --- /dev/null +++ b/crates/bin/cratesfyi/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "DELETE FROM builds_logs bl\n USING builds b\n WHERE bl.build_id = b.id AND b.rid = $1;", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int4" + ] + }, + "nullable": [] + }, + "hash": "66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42" +} diff --git a/crates/bin/cratesfyi/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json b/crates/bin/cratesfyi/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json new file mode 100644 index 000000000..effb9f9ac --- /dev/null +++ b/crates/bin/cratesfyi/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json @@ -0,0 +1,29 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT id FROM releases WHERE crate_id = $1 AND version = $2", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int4", + "origin": { + "Table": { + "table": "releases", + "name": "id" + } + } + } + ], + "parameters": { + "Left": [ + "Int4", + "Text" + ] + }, + "nullable": [ + false + ] + }, + "hash": "7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b" +} diff --git a/crates/bin/cratesfyi/.sqlx/query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json b/crates/bin/cratesfyi/.sqlx/query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json similarity index 65% rename from crates/bin/cratesfyi/.sqlx/query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json rename to crates/bin/cratesfyi/.sqlx/query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json index 380bd9ea6..1f16abd55 100644 --- a/crates/bin/cratesfyi/.sqlx/query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json +++ b/crates/bin/cratesfyi/.sqlx/query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "DELETE FROM releases WHERE crate_id = $1 AND version = $2 RETURNING is_library", + "query": "DELETE FROM releases WHERE id = $1 RETURNING is_library", "describe": { "columns": [ { @@ -17,13 +17,12 @@ ], "parameters": { "Left": [ - "Int4", - "Text" + "Int4" ] }, "nullable": [ true ] }, - "hash": "014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948" + "hash": "fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663" } diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index 565277d4f..3907199ad 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -8,11 +8,10 @@ use url::Url; pub struct Config { pub registry_index_path: PathBuf, pub registry_url: Option, - pub registry_dry_run: bool, pub sqs_queue_url: Option, pub sqs_region: Option, pub sqs_endpoint_url: Option, - pub sqs_dry_run: bool, + pub sqs_active: bool, pub aws_sdk_max_retries: u32, /// How long to wait between registry checks @@ -36,12 +35,11 @@ impl AppConfig for Config { Ok(Self { registry_index_path: env("REGISTRY_INDEX_PATH", prefix.join("crates.io-index"))?, registry_url: maybe_env("REGISTRY_URL")?, - registry_dry_run: env("DOCS_RS_REGISTRY_DRY_RUN", false)?, sqs_queue_url: maybe_env("DOCSRS_SQS_QUEUE_URL")?, sqs_region: maybe_env("DOCSRS_SQS_REGION")?, sqs_endpoint_url: maybe_env("DOCSRS_SQS_ENDPOINT_URL")?, - sqs_dry_run: env("DOCS_RS_SQS_DRY_RUN", true)?, + sqs_active: env("DOCS_RS_SQS_DRY_RUN", true)?, aws_sdk_max_retries: env("DOCSRS_AWS_SDK_MAX_RETRIES", 6u32)?, delay_between_registry_fetches: Duration::from_secs(env::( diff --git a/crates/bin/docs_rs_watcher/src/index_watcher.rs b/crates/bin/docs_rs_watcher/src/index_watcher.rs index 98a6febb2..a3ccda247 100644 --- a/crates/bin/docs_rs_watcher/src/index_watcher.rs +++ b/crates/bin/docs_rs_watcher/src/index_watcher.rs @@ -2,7 +2,6 @@ use crate::{ Config, db::{delete_crate, delete_version}, index::Index, - synchronization::CrateLocks, }; use anyhow::{Context as _, Result}; use crates_index_diff::Change; @@ -111,7 +110,6 @@ async fn queue_crate_invalidation(krate: &KrateName, cdn: Option<&Cdn>) { /// Returns the number of crates added pub(crate) async fn get_new_crates( context: &Context, - locks: &CrateLocks, index: &Index, config: &Config, ) -> Result { @@ -135,7 +133,7 @@ pub(crate) async fn get_new_crates( debug!(last_seen_reference=%last_seen_reference, new_reference=%new_reference, "queueing changes"); - let crates_added = process_changes(context, locks, &changes, config).await; + let crates_added = process_changes(context, &changes, config).await; if let Err(err) = context.build_queue()?.deprioritize_workspaces().await { error!(?err, "error deprioritizing workspaces"); @@ -149,22 +147,17 @@ pub(crate) async fn get_new_crates( Ok(crates_added) } -async fn process_changes( - context: &Context, - locks: &CrateLocks, - changes: &Vec, - config: &Config, -) -> usize { +async fn process_changes(context: &Context, changes: &Vec, config: &Config) -> usize { let mut crates_added = 0; for change in changes { debug!(?change, "received change from git index"); - if config.registry_dry_run { + if config.sqs_active { continue; } - match process_change(context, locks, change, config).await { + match process_change(context, change, config).await { Ok(added) => { if added { crates_added += 1; @@ -181,7 +174,6 @@ async fn process_changes( /// Process a crate change, returning whether the change was a crate addition or not. pub(crate) async fn process_change( context: &Context, - locks: &CrateLocks, change: &Change, config: &Config, ) -> Result { @@ -192,8 +184,6 @@ pub(crate) async fn process_change( .clone() .try_into()?; - let _guard = locks.lock(crate_version.name.to_string()).await; - match change { Change::Added(_release) => process_version_added(context, &crate_version).await?, Change::AddedAndYanked(_release) => { @@ -553,10 +543,8 @@ mod tests { name: "krate_already_present".parse()?, version: V2, }; - let locks = CrateLocks::new(); let added = process_changes( &env, - &locks, &vec![ // Should be added correctly Change::Added(krate1.into()), diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 8e8f8a140..4bc7abe57 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -6,7 +6,6 @@ pub mod index_watcher; mod rebuilds; mod service_metrics; pub mod subscriber; -pub mod synchronization; #[cfg(test)] mod testing; @@ -15,9 +14,7 @@ pub use db::{delete_crate, delete_version}; pub use index::Index; pub use rebuilds::queue_rebuilds; -use crate::{ - index_watcher::get_new_crates, service_metrics::OtelServiceMetrics, synchronization::CrateLocks, -}; +use crate::{index_watcher::get_new_crates, service_metrics::OtelServiceMetrics}; use anyhow::Result; use docs_rs_context::Context; use docs_rs_utils::start_async_cron; @@ -26,12 +23,10 @@ use tokio::time::{self, Instant}; use tracing::{debug, error, info, trace}; pub async fn watch(config: &Config, context: &Context) { - let locks = CrateLocks::new(); - loop { if let Err(err) = tokio::try_join!( - crate::watch_registry(config, context, &locks), - crate::subscriber::listen(config, context, &locks), + crate::watch_registry(config, context), + crate::subscriber::listen(config, context), ) { error!( ?err, @@ -45,7 +40,7 @@ pub async fn watch(config: &Config, context: &Context) { /// Run the registry watcher /// NOTE: this should only be run once, otherwise crates would be added /// to the queue multiple times. -pub async fn watch_registry(config: &Config, context: &Context, locks: &CrateLocks) -> Result<()> { +pub async fn watch_registry(config: &Config, context: &Context) -> Result<()> { let mut last_gc = Instant::now(); let queue = context.build_queue()?; @@ -57,7 +52,7 @@ pub async fn watch_registry(config: &Config, context: &Context, locks: &CrateLoc debug!("Checking new crates"); let index = Index::from_config(config).await?; - match get_new_crates(context, locks, &index, config).await { + match get_new_crates(context, &index, config).await { Ok(n) => debug!("{} crates added to queue", n), Err(e) => { error!(?e, "Failed to get new crates"); diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 3bd8f46d5..ebc6a48b8 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -4,7 +4,6 @@ use crate::{ process_crate_deleted, process_version_added, process_version_deleted, process_version_yank_status, }, - synchronization::CrateLocks, }; use anyhow::{Context as _, Result}; use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; @@ -49,7 +48,7 @@ enum MessageOutcome { Ignore, } -pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> Result<()> { +pub async fn listen(config: &Config, context: &Context) -> Result<()> { let (Some(region), Some(queue_url)) = (&config.sqs_region, &config.sqs_queue_url) else { warn!("missing sqs region or url, disabling crates.io SQS subscriber"); return Ok(()); @@ -98,7 +97,7 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R }; for message in messages { - match handle_message(context, config, locks, message.body.as_deref()).await { + match handle_message(context, config, message.body.as_deref()).await { MessageOutcome::Ack => { if let Some(receipt_handle) = message.receipt_handle.as_deref() && let Err(err) = client @@ -149,18 +148,13 @@ pub async fn listen(config: &Config, context: &Context, locks: &CrateLocks) -> R } } -async fn handle_message( - context: &Context, - config: &Config, - locks: &CrateLocks, - body: Option<&str>, -) -> MessageOutcome { +async fn handle_message(context: &Context, config: &Config, body: Option<&str>) -> MessageOutcome { let Some(body) = body else { return MessageOutcome::Ignore; }; match retry_async( - || async move { process_message(context, config, locks, body).await }, + || async move { process_message(context, config, body).await }, 3, ) .await @@ -178,21 +172,14 @@ async fn handle_message( } } -#[instrument(skip(context, config, locks))] -async fn process_message( - context: &Context, - config: &Config, - locks: &CrateLocks, - body: &str, -) -> Result<()> { +#[instrument(skip(context, config))] +async fn process_message(context: &Context, config: &Config, body: &str) -> Result<()> { let event: IndexChangeEventV1 = serde_json::from_str(body).context("error parsing event from json")?; debug!(?event, "received event from sqs"); - let _guard = locks.lock(event.change.name()).await; - - if !config.sqs_dry_run { + if !config.sqs_active { process_change(context, &event.change, config) .await .context("error processing change")?; @@ -348,13 +335,12 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_process_message_dispatches_added_event() -> Result<()> { let mut config = Config::test_config()?; - config.sqs_dry_run = false; + config.sqs_active = false; let env = TestEnvironment::builder().config(config).build().await?; process_message( &env, env.config(), - &CrateLocks::new(), &added_event_json("krate", &V1.to_string()), ) .await?; @@ -374,7 +360,6 @@ mod tests { process_message( &env, env.config(), - &CrateLocks::new(), &added_event_json("krate", &V1.to_string()), ) .await?; @@ -388,7 +373,7 @@ mod tests { async fn test_process_message_rejects_invalid_json() -> Result<()> { let env = TestEnvironment::new().await?; - let err = process_message(&env, env.config(), &CrateLocks::new(), "{not json").await; + let err = process_message(&env, env.config(), "{not json").await; assert!(err.is_err()); let err = format!("{:?}", err.unwrap_err()); @@ -403,14 +388,13 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_handle_message_acknowledges_success() -> Result<()> { let mut config = Config::test_config()?; - config.sqs_dry_run = false; + config.sqs_active = false; let env = TestEnvironment::builder().config(config).build().await?; assert_eq!( handle_message( &env, env.config(), - &CrateLocks::new(), Some(&added_event_json("krate", &V1.to_string())), ) .await, @@ -425,7 +409,7 @@ mod tests { let env = TestEnvironment::new().await?; assert_eq!( - handle_message(&env, env.config(), &CrateLocks::new(), Some("{bad json")).await, + handle_message(&env, env.config(), Some("{bad json")).await, MessageOutcome::RetryLater(RETRY_DELAY) ); @@ -437,7 +421,7 @@ mod tests { let env = TestEnvironment::new().await?; assert_eq!( - handle_message(&env, env.config(), &CrateLocks::new(), None).await, + handle_message(&env, env.config(), None).await, MessageOutcome::Ignore ); assert!(env.build_queue()?.queued_crates().await?.is_empty()); diff --git a/crates/bin/docs_rs_watcher/src/synchronization.rs b/crates/bin/docs_rs_watcher/src/synchronization.rs deleted file mode 100644 index b1720fe91..000000000 --- a/crates/bin/docs_rs_watcher/src/synchronization.rs +++ /dev/null @@ -1,30 +0,0 @@ -use std::{collections::HashMap, sync::Arc}; -use tokio::sync::{Mutex, OwnedMutexGuard}; - -/// shared locks so we can serialize changes to the same crate, -/// for the transition phase where we might get input from both -/// the git index and the sqs queue. -#[derive(Clone, Default)] -pub struct CrateLocks { - locks: Arc>>>>, -} - -impl CrateLocks { - pub fn new() -> Self { - Self { - locks: Arc::new(Mutex::new(HashMap::new())), - } - } - - pub async fn lock(&self, crate_name: impl Into) -> OwnedMutexGuard<()> { - let lock = { - let mut locks = self.locks.lock().await; - locks - .entry(crate_name.into()) - .or_insert_with(|| Arc::new(Mutex::new(()))) - .clone() - }; - - lock.lock_owned().await - } -} From b7525670f0a1c595c962c52d12f3c0a89cf86029 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:21:15 +0200 Subject: [PATCH 49/74] config --- crates/bin/docs_rs_watcher/src/config.rs | 2 +- crates/bin/docs_rs_watcher/src/lib.rs | 30 +++++++++++++++++------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index 3907199ad..cee913066 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -39,7 +39,7 @@ impl AppConfig for Config { sqs_queue_url: maybe_env("DOCSRS_SQS_QUEUE_URL")?, sqs_region: maybe_env("DOCSRS_SQS_REGION")?, sqs_endpoint_url: maybe_env("DOCSRS_SQS_ENDPOINT_URL")?, - sqs_active: env("DOCS_RS_SQS_DRY_RUN", true)?, + sqs_active: env("DOCS_RS_SQS_ACTIVE", false)?, aws_sdk_max_retries: env("DOCSRS_AWS_SDK_MAX_RETRIES", 6u32)?, delay_between_registry_fetches: Duration::from_secs(env::( diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 4bc7abe57..56a72707b 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -24,15 +24,27 @@ use tracing::{debug, error, info, trace}; pub async fn watch(config: &Config, context: &Context) { loop { - if let Err(err) = tokio::try_join!( - crate::watch_registry(config, context), - crate::subscriber::listen(config, context), - ) { - error!( - ?err, - "unexpected error watching registry or SQS, will retry" - ); - time::sleep(Duration::from_secs(10)).await; + if config.sqs_active { + if let Err(err) = crate::subscriber::listen(config, context).await { + error!(?err, "unexpected error watching SQS, will retry"); + time::sleep(Duration::from_secs(10)).await; + } + } else { + // intermediate mode: + // - still fetch from git for events + // - listen so SQS, and log the events so we can test SQS connection, and compare events + // + // Later: just SQS + if let Err(err) = tokio::try_join!( + crate::watch_registry(config, context), + crate::subscriber::listen(config, context), + ) { + error!( + ?err, + "unexpected error watching registry or SQS, will retry" + ); + time::sleep(Duration::from_secs(10)).await; + } } } } From 6c6d69437fdf7293203742bd0226c6311c41744a Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:24:32 +0200 Subject: [PATCH 50/74] sqlx --- ...29e35750da2bea995fb0c433893addb253214.json | 15 ---------- ...86e6c7eed78a2d4f01f316772949f5d688f42.json | 14 +++++++++ ...83bd73f73ecc1cf9b4fc24c457d5f26fd582b.json | 29 +++++++++++++++++++ ...f4254924db75fc7f76a6c78d17a3fc06d663.json} | 7 ++--- crates/bin/docs_rs_watcher/src/config.rs | 1 + 5 files changed, 47 insertions(+), 19 deletions(-) delete mode 100644 crates/bin/docs_rs_watcher/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json create mode 100644 crates/bin/docs_rs_watcher/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json create mode 100644 crates/bin/docs_rs_watcher/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json rename crates/bin/docs_rs_watcher/.sqlx/{query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json => query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json} (65%) diff --git a/crates/bin/docs_rs_watcher/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json b/crates/bin/docs_rs_watcher/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json deleted file mode 100644 index 2109f69cf..000000000 --- a/crates/bin/docs_rs_watcher/.sqlx/query-2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "DELETE FROM builds_logs bl\n USING builds b\n JOIN releases r ON b.rid = r.id\n WHERE bl.build_id = b.id AND r.crate_id = $1 AND r.version = $2;", - "describe": { - "columns": [], - "parameters": { - "Left": [ - "Int4", - "Text" - ] - }, - "nullable": [] - }, - "hash": "2dc065cc08f262c937c54f9cc8629e35750da2bea995fb0c433893addb253214" -} diff --git a/crates/bin/docs_rs_watcher/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json b/crates/bin/docs_rs_watcher/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json new file mode 100644 index 000000000..89cbc239c --- /dev/null +++ b/crates/bin/docs_rs_watcher/.sqlx/query-66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "DELETE FROM builds_logs bl\n USING builds b\n WHERE bl.build_id = b.id AND b.rid = $1;", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int4" + ] + }, + "nullable": [] + }, + "hash": "66b0ba6978880b79ce7a179bbe986e6c7eed78a2d4f01f316772949f5d688f42" +} diff --git a/crates/bin/docs_rs_watcher/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json b/crates/bin/docs_rs_watcher/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json new file mode 100644 index 000000000..effb9f9ac --- /dev/null +++ b/crates/bin/docs_rs_watcher/.sqlx/query-7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b.json @@ -0,0 +1,29 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT id FROM releases WHERE crate_id = $1 AND version = $2", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int4", + "origin": { + "Table": { + "table": "releases", + "name": "id" + } + } + } + ], + "parameters": { + "Left": [ + "Int4", + "Text" + ] + }, + "nullable": [ + false + ] + }, + "hash": "7b7dd5795cddcb66b140b57157983bd73f73ecc1cf9b4fc24c457d5f26fd582b" +} diff --git a/crates/bin/docs_rs_watcher/.sqlx/query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json b/crates/bin/docs_rs_watcher/.sqlx/query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json similarity index 65% rename from crates/bin/docs_rs_watcher/.sqlx/query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json rename to crates/bin/docs_rs_watcher/.sqlx/query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json index 380bd9ea6..1f16abd55 100644 --- a/crates/bin/docs_rs_watcher/.sqlx/query-014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948.json +++ b/crates/bin/docs_rs_watcher/.sqlx/query-fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "DELETE FROM releases WHERE crate_id = $1 AND version = $2 RETURNING is_library", + "query": "DELETE FROM releases WHERE id = $1 RETURNING is_library", "describe": { "columns": [ { @@ -17,13 +17,12 @@ ], "parameters": { "Left": [ - "Int4", - "Text" + "Int4" ] }, "nullable": [ true ] }, - "hash": "014a054d852f0937191e1a54f742d4b4c454361689fb3841cc12fd7dd1094948" + "hash": "fab139cabc0987a1f2ad706060a3f4254924db75fc7f76a6c78d17a3fc06d663" } diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index cee913066..2ed4109a1 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -11,6 +11,7 @@ pub struct Config { pub sqs_queue_url: Option, pub sqs_region: Option, pub sqs_endpoint_url: Option, + /// temporary, to switch between the sources for the index (git index vs SQS) pub sqs_active: bool, pub aws_sdk_max_retries: u32, From af73c65869affa8f5e38c59284e8feb2728a6ce3 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:35:59 +0200 Subject: [PATCH 51/74] config, fixes, comment --- .../bin/docs_rs_watcher/src/index_watcher.rs | 6 +++++- crates/bin/docs_rs_watcher/src/lib.rs | 4 ++++ crates/bin/docs_rs_watcher/src/subscriber.rs | 19 +++++++++---------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/index_watcher.rs b/crates/bin/docs_rs_watcher/src/index_watcher.rs index a3ccda247..ef44a78c0 100644 --- a/crates/bin/docs_rs_watcher/src/index_watcher.rs +++ b/crates/bin/docs_rs_watcher/src/index_watcher.rs @@ -153,7 +153,11 @@ async fn process_changes(context: &Context, changes: &Vec, config: &Conf for change in changes { debug!(?change, "received change from git index"); - if config.sqs_active { + if !config.sqs_active { + // just to be safe. + // Generally we don't even start the git-index-watcher when + // SQS is active. + // Will be removed with the git index watcher code when SQS is stable. continue; } diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 56a72707b..70ef74a0a 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -22,6 +22,10 @@ use std::{sync::Arc, time::Duration}; use tokio::time::{self, Instant}; use tracing::{debug, error, info, trace}; +/// main index-watcher / subscriber loop. +/// mostly wraps either the git index watcher loop, or the sqs subscriber loop. +/// Only here so unexpected errors lead to a sentry report & restart instead of +/// the daemon / watcher just stopping. pub async fn watch(config: &Config, context: &Context) { loop { if config.sqs_active { diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index ebc6a48b8..5dd52710a 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -16,14 +16,11 @@ use std::time::{Duration, Instant}; use tokio::time; use tracing::{debug, error, instrument, warn}; -// TODO: -// * when should we run deprioritize_workspaces ? - /// wait-time (long polling): /// /// How long should the request be kept open when there are no messages. /// SQS only accepts values in the range 0..=20 seconds. -pub(crate) const WAIT_TIME: Duration = Duration::from_secs(20); +const WAIT_TIME: Duration = Duration::from_secs(20); /// when one long-polling request is finished, how long to sleep before starting the next? const SLEEP_BETWEEN_REQUESTS: Duration = Duration::from_secs(1); @@ -33,12 +30,13 @@ const SLEEP_BETWEEN_REQUESTS: Duration = Duration::from_secs(1); const RETRY_DELAY: Duration = Duration::from_secs(30); /// How long to wait before rechecking the priorities of queued crates. +/// Right now only runs `deprioritize_workspaces`. const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); /// visibility timeout: /// should be longer than the longest time our server takes to handle a message. /// -/// if we fetch a message, and don't delete it in this time, it will be redelivered. +/// If we fetch a message, and don't delete it in this time, it will be redelivered. const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); #[derive(Debug, Clone, PartialEq, Eq)] @@ -179,7 +177,7 @@ async fn process_message(context: &Context, config: &Config, body: &str) -> Resu debug!(?event, "received event from sqs"); - if !config.sqs_active { + if config.sqs_active { process_change(context, &event.change, config) .await .context("error processing change")?; @@ -354,8 +352,10 @@ mod tests { } #[tokio::test(flavor = "multi_thread")] - async fn test_process_message_respects_sqs_dry_run() -> Result<()> { - let env = TestEnvironment::new().await?; + async fn test_process_message_respects_sqs_active() -> Result<()> { + let mut config = Config::test_config()?; + config.sqs_active = false; + let env = TestEnvironment::builder().config(config).build().await?; process_message( &env, @@ -387,8 +387,7 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_handle_message_acknowledges_success() -> Result<()> { - let mut config = Config::test_config()?; - config.sqs_active = false; + let config = Config::test_config()?; let env = TestEnvironment::builder().config(config).build().await?; assert_eq!( From 44c0dd658e0823ffa24edc5e5b55e3721e61ec1a Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:39:55 +0200 Subject: [PATCH 52/74] fix test --- crates/bin/docs_rs_watcher/src/config.rs | 7 +++++++ crates/bin/docs_rs_watcher/src/index_watcher.rs | 2 +- crates/bin/docs_rs_watcher/src/subscriber.rs | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index 2ed4109a1..af0bc8539 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -56,4 +56,11 @@ impl AppConfig for Config { repository: docs_rs_repository_stats::Config::from_environment()?, }) } + + #[cfg(test)] + fn test_config() -> Result { + let mut config = Self::from_environment()?; + config.sqs_active = false; + Ok(config) + } } diff --git a/crates/bin/docs_rs_watcher/src/index_watcher.rs b/crates/bin/docs_rs_watcher/src/index_watcher.rs index ef44a78c0..689213df7 100644 --- a/crates/bin/docs_rs_watcher/src/index_watcher.rs +++ b/crates/bin/docs_rs_watcher/src/index_watcher.rs @@ -153,7 +153,7 @@ async fn process_changes(context: &Context, changes: &Vec, config: &Conf for change in changes { debug!(?change, "received change from git index"); - if !config.sqs_active { + if config.sqs_active { // just to be safe. // Generally we don't even start the git-index-watcher when // SQS is active. diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 5dd52710a..62b3fd33c 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -333,7 +333,7 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_process_message_dispatches_added_event() -> Result<()> { let mut config = Config::test_config()?; - config.sqs_active = false; + config.sqs_active = true; let env = TestEnvironment::builder().config(config).build().await?; process_message( From 8f3bd6e7e630d31b006ee0e7f1cff95137745ac2 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:48:00 +0200 Subject: [PATCH 53/74] nnma --- crates/bin/docs_rs_watcher/src/subscriber.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 62b3fd33c..9e9bf6fb3 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -39,6 +39,8 @@ const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); /// If we fetch a message, and don't delete it in this time, it will be redelivered. const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); +/// Result type for `handle_message`, so we can unit-test it without needing +/// fake SQS. #[derive(Debug, Clone, PartialEq, Eq)] enum MessageOutcome { Ack, From 98caa4bce7e11da94cee99f5675057c2fd6a49b8 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:50:57 +0200 Subject: [PATCH 54/74] refactor(watcher): clarify subscriber naming Rename the SQS subscriber entrypoints to better reflect the transport, body-handling, and event-processing layers. --- crates/bin/docs_rs_watcher/src/lib.rs | 4 +- crates/bin/docs_rs_watcher/src/subscriber.rs | 43 +++++++++++--------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 70ef74a0a..9362264ed 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -29,7 +29,7 @@ use tracing::{debug, error, info, trace}; pub async fn watch(config: &Config, context: &Context) { loop { if config.sqs_active { - if let Err(err) = crate::subscriber::listen(config, context).await { + if let Err(err) = crate::subscriber::run_sqs_subscriber(config, context).await { error!(?err, "unexpected error watching SQS, will retry"); time::sleep(Duration::from_secs(10)).await; } @@ -41,7 +41,7 @@ pub async fn watch(config: &Config, context: &Context) { // Later: just SQS if let Err(err) = tokio::try_join!( crate::watch_registry(config, context), - crate::subscriber::listen(config, context), + crate::subscriber::run_sqs_subscriber(config, context), ) { error!( ?err, diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 9e9bf6fb3..baa241e03 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -14,7 +14,7 @@ use docs_rs_types::KrateName; use docs_rs_utils::retry_async; use std::time::{Duration, Instant}; use tokio::time; -use tracing::{debug, error, instrument, warn}; +use tracing::{debug, error, warn}; /// wait-time (long polling): /// @@ -39,7 +39,7 @@ const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); /// If we fetch a message, and don't delete it in this time, it will be redelivered. const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); -/// Result type for `handle_message`, so we can unit-test it without needing +/// Result type for `handle_message_body`, so we can unit-test it without needing /// fake SQS. #[derive(Debug, Clone, PartialEq, Eq)] enum MessageOutcome { @@ -48,7 +48,7 @@ enum MessageOutcome { Ignore, } -pub async fn listen(config: &Config, context: &Context) -> Result<()> { +pub async fn run_sqs_subscriber(config: &Config, context: &Context) -> Result<()> { let (Some(region), Some(queue_url)) = (&config.sqs_region, &config.sqs_queue_url) else { warn!("missing sqs region or url, disabling crates.io SQS subscriber"); return Ok(()); @@ -97,7 +97,7 @@ pub async fn listen(config: &Config, context: &Context) -> Result<()> { }; for message in messages { - match handle_message(context, config, message.body.as_deref()).await { + match handle_message_body(context, config, message.body.as_deref()).await { MessageOutcome::Ack => { if let Some(receipt_handle) = message.receipt_handle.as_deref() && let Err(err) = client @@ -148,13 +148,17 @@ pub async fn listen(config: &Config, context: &Context) -> Result<()> { } } -async fn handle_message(context: &Context, config: &Config, body: Option<&str>) -> MessageOutcome { +async fn handle_message_body( + context: &Context, + config: &Config, + body: Option<&str>, +) -> MessageOutcome { let Some(body) = body else { return MessageOutcome::Ignore; }; match retry_async( - || async move { process_message(context, config, body).await }, + || async move { process_sqs_event(context, config, body).await }, 3, ) .await @@ -172,8 +176,7 @@ async fn handle_message(context: &Context, config: &Config, body: Option<&str>) } } -#[instrument(skip(context, config))] -async fn process_message(context: &Context, config: &Config, body: &str) -> Result<()> { +async fn process_sqs_event(context: &Context, config: &Config, body: &str) -> Result<()> { let event: IndexChangeEventV1 = serde_json::from_str(body).context("error parsing event from json")?; @@ -333,12 +336,12 @@ mod tests { } #[tokio::test(flavor = "multi_thread")] - async fn test_process_message_dispatches_added_event() -> Result<()> { + async fn test_process_sqs_event_dispatches_added_event() -> Result<()> { let mut config = Config::test_config()?; config.sqs_active = true; let env = TestEnvironment::builder().config(config).build().await?; - process_message( + process_sqs_event( &env, env.config(), &added_event_json("krate", &V1.to_string()), @@ -354,12 +357,12 @@ mod tests { } #[tokio::test(flavor = "multi_thread")] - async fn test_process_message_respects_sqs_active() -> Result<()> { + async fn test_process_sqs_event_respects_sqs_active() -> Result<()> { let mut config = Config::test_config()?; config.sqs_active = false; let env = TestEnvironment::builder().config(config).build().await?; - process_message( + process_sqs_event( &env, env.config(), &added_event_json("krate", &V1.to_string()), @@ -372,10 +375,10 @@ mod tests { } #[tokio::test(flavor = "multi_thread")] - async fn test_process_message_rejects_invalid_json() -> Result<()> { + async fn test_process_sqs_event_rejects_invalid_json() -> Result<()> { let env = TestEnvironment::new().await?; - let err = process_message(&env, env.config(), "{not json").await; + let err = process_sqs_event(&env, env.config(), "{not json").await; assert!(err.is_err()); let err = format!("{:?}", err.unwrap_err()); @@ -388,12 +391,12 @@ mod tests { } #[tokio::test(flavor = "multi_thread")] - async fn test_handle_message_acknowledges_success() -> Result<()> { + async fn test_handle_message_body_acknowledges_success() -> Result<()> { let config = Config::test_config()?; let env = TestEnvironment::builder().config(config).build().await?; assert_eq!( - handle_message( + handle_message_body( &env, env.config(), Some(&added_event_json("krate", &V1.to_string())), @@ -406,11 +409,11 @@ mod tests { } #[tokio::test(flavor = "multi_thread")] - async fn test_handle_message_retries_failed_processing() -> Result<()> { + async fn test_handle_message_body_retries_failed_processing() -> Result<()> { let env = TestEnvironment::new().await?; assert_eq!( - handle_message(&env, env.config(), Some("{bad json")).await, + handle_message_body(&env, env.config(), Some("{bad json")).await, MessageOutcome::RetryLater(RETRY_DELAY) ); @@ -418,11 +421,11 @@ mod tests { } #[tokio::test(flavor = "multi_thread")] - async fn test_handle_message_ignores_missing_body() -> Result<()> { + async fn test_handle_message_body_ignores_missing_body() -> Result<()> { let env = TestEnvironment::new().await?; assert_eq!( - handle_message(&env, env.config(), None).await, + handle_message_body(&env, env.config(), None).await, MessageOutcome::Ignore ); assert!(env.build_queue()?.queued_crates().await?.is_empty()); From 5b1d257d274f1bf8f6be3e36a0330f26f36531f5 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:53:41 +0200 Subject: [PATCH 55/74] fix(watcher): keep registry watcher alive on SQS errors Supervise the SQS subscriber separately in mixed mode so SQS failures are logged and restarted without stopping the legacy registry watcher. --- crates/bin/docs_rs_watcher/src/lib.rs | 36 +++++++++++++++++++++------ 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 9362264ed..8500529ca 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -39,20 +39,40 @@ pub async fn watch(config: &Config, context: &Context) { // - listen so SQS, and log the events so we can test SQS connection, and compare events // // Later: just SQS - if let Err(err) = tokio::try_join!( - crate::watch_registry(config, context), - crate::subscriber::run_sqs_subscriber(config, context), - ) { - error!( - ?err, - "unexpected error watching registry or SQS, will retry" - ); + let sqs_is_configured = config.sqs_region.is_some() && config.sqs_queue_url.is_some(); + + let registry_result = if sqs_is_configured { + tokio::select! { + result = crate::watch_registry(config, context) => result, + _ = supervise_sqs_subscriber(config, context) => unreachable!("SQS supervisor never returns"), + } + } else { + crate::watch_registry(config, context).await + }; + + if let Err(err) = registry_result { + error!(?err, "unexpected error watching registry, will retry"); time::sleep(Duration::from_secs(10)).await; } } } } +async fn supervise_sqs_subscriber(config: &Config, context: &Context) { + loop { + match crate::subscriber::run_sqs_subscriber(config, context).await { + Ok(()) => { + error!("SQS subscriber exited unexpectedly, restarting"); + } + Err(err) => { + error!(?err, "unexpected error watching SQS, restarting"); + } + } + + time::sleep(Duration::from_secs(10)).await; + } +} + /// Run the registry watcher /// NOTE: this should only be run once, otherwise crates would be added /// to the queue multiple times. From da0323006203b6ef7990990ce63c55335cd319e6 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 01:59:22 +0200 Subject: [PATCH 56/74] clean --- crates/bin/docs_rs_watcher/src/lib.rs | 36 +++++++-------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 8500529ca..1c2c946f7 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -15,7 +15,7 @@ pub use index::Index; pub use rebuilds::queue_rebuilds; use crate::{index_watcher::get_new_crates, service_metrics::OtelServiceMetrics}; -use anyhow::Result; +use anyhow::{Error, Result}; use docs_rs_context::Context; use docs_rs_utils::start_async_cron; use std::{sync::Arc, time::Duration}; @@ -38,19 +38,16 @@ pub async fn watch(config: &Config, context: &Context) { // - still fetch from git for events // - listen so SQS, and log the events so we can test SQS connection, and compare events // - // Later: just SQS - let sqs_is_configured = config.sqs_region.is_some() && config.sqs_queue_url.is_some(); + // We don't retry on unespected SQS errors yet. - let registry_result = if sqs_is_configured { - tokio::select! { - result = crate::watch_registry(config, context) => result, - _ = supervise_sqs_subscriber(config, context) => unreachable!("SQS supervisor never returns"), + if let (Err(err), _) = tokio::join!(crate::watch_registry(config, context), async { + // unexpected SQS errors are caught here, and we don't retry. + if let Err(err) = crate::subscriber::run_sqs_subscriber(config, context).await { + error!(?err, "error setting up SQS test subscriber"); } - } else { - crate::watch_registry(config, context).await - }; - - if let Err(err) = registry_result { + Ok::<_, Error>(()) + }) { + // unexpected index watcher errors lead to a report & retry. error!(?err, "unexpected error watching registry, will retry"); time::sleep(Duration::from_secs(10)).await; } @@ -58,21 +55,6 @@ pub async fn watch(config: &Config, context: &Context) { } } -async fn supervise_sqs_subscriber(config: &Config, context: &Context) { - loop { - match crate::subscriber::run_sqs_subscriber(config, context).await { - Ok(()) => { - error!("SQS subscriber exited unexpectedly, restarting"); - } - Err(err) => { - error!(?err, "unexpected error watching SQS, restarting"); - } - } - - time::sleep(Duration::from_secs(10)).await; - } -} - /// Run the registry watcher /// NOTE: this should only be run once, otherwise crates would be added /// to the queue multiple times. From 405ab70c29e6cb74b8f3f59220faef556a291a46 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 02:02:18 +0200 Subject: [PATCH 57/74] comments --- crates/bin/docs_rs_watcher/src/subscriber.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index baa241e03..de188c065 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -27,16 +27,20 @@ const SLEEP_BETWEEN_REQUESTS: Duration = Duration::from_secs(1); /// when we have an error handling a message, how long should SQS wait until /// it redelivers this message. +/// +/// With FIFO queues, other messages will wait behind. const RETRY_DELAY: Duration = Duration::from_secs(30); -/// How long to wait before rechecking the priorities of queued crates. +/// How regularly to recheck the priorities of queued crates. /// Right now only runs `deprioritize_workspaces`. const DELAY_BETWEEN_PRIORITY_RECHECK: Duration = Duration::from_secs(60); /// visibility timeout: -/// should be longer than the longest time our server takes to handle a message. +/// SQS visibility timeout is the period after a consumer receives a message during +/// which that message is hidden from other consumers, and if it is not deleted before +/// the timeout expires, it becomes visible again for redelivery. /// -/// If we fetch a message, and don't delete it in this time, it will be redelivered. +/// Should be longer than the longest time our server takes to handle a message. const VISIBILITY_TIMEOUT: Duration = Duration::from_secs(60); /// Result type for `handle_message_body`, so we can unit-test it without needing From ea9a7b3e499836341dd9ce7000ae2e8f57671136 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 02:11:00 +0200 Subject: [PATCH 58/74] added dummy --- crates/bin/docs_rs_watcher/src/lib.rs | 14 +++++++++++--- crates/bin/docs_rs_watcher/src/metrics.rs | 20 ++++++++++++++++++++ crates/bin/docs_rs_watcher/src/subscriber.rs | 7 ++++++- 3 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 crates/bin/docs_rs_watcher/src/metrics.rs diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index 1c2c946f7..e683f7d87 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -3,6 +3,7 @@ pub mod consistency; mod db; mod index; pub mod index_watcher; +mod metrics; mod rebuilds; mod service_metrics; pub mod subscriber; @@ -14,7 +15,9 @@ pub use db::{delete_crate, delete_version}; pub use index::Index; pub use rebuilds::queue_rebuilds; -use crate::{index_watcher::get_new_crates, service_metrics::OtelServiceMetrics}; +use crate::{ + index_watcher::get_new_crates, metrics::WatcherMetrics, service_metrics::OtelServiceMetrics, +}; use anyhow::{Error, Result}; use docs_rs_context::Context; use docs_rs_utils::start_async_cron; @@ -27,9 +30,12 @@ use tracing::{debug, error, info, trace}; /// Only here so unexpected errors lead to a sentry report & restart instead of /// the daemon / watcher just stopping. pub async fn watch(config: &Config, context: &Context) { + let metrics = WatcherMetrics::new(context.meter_provider()); + loop { if config.sqs_active { - if let Err(err) = crate::subscriber::run_sqs_subscriber(config, context).await { + if let Err(err) = crate::subscriber::run_sqs_subscriber(config, context, &metrics).await + { error!(?err, "unexpected error watching SQS, will retry"); time::sleep(Duration::from_secs(10)).await; } @@ -42,7 +48,9 @@ pub async fn watch(config: &Config, context: &Context) { if let (Err(err), _) = tokio::join!(crate::watch_registry(config, context), async { // unexpected SQS errors are caught here, and we don't retry. - if let Err(err) = crate::subscriber::run_sqs_subscriber(config, context).await { + if let Err(err) = + crate::subscriber::run_sqs_subscriber(config, context, &metrics).await + { error!(?err, "error setting up SQS test subscriber"); } Ok::<_, Error>(()) diff --git a/crates/bin/docs_rs_watcher/src/metrics.rs b/crates/bin/docs_rs_watcher/src/metrics.rs new file mode 100644 index 000000000..baf05b51e --- /dev/null +++ b/crates/bin/docs_rs_watcher/src/metrics.rs @@ -0,0 +1,20 @@ +use docs_rs_opentelemetry::AnyMeterProvider; +use opentelemetry::metrics::Counter; + +#[derive(Debug)] +pub(crate) struct WatcherMetrics { + dummy: Counter, +} + +impl WatcherMetrics { + pub(crate) fn new(meter_provider: &AnyMeterProvider) -> Self { + let meter = meter_provider.meter("watcher"); + const PREFIX: &str = "docsrs.watcher"; + Self { + dummy: meter + .u64_counter(format!("{PREFIX}.dummy")) + .with_unit("1") + .build(), + } + } +} diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index de188c065..a5cb7223d 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -4,6 +4,7 @@ use crate::{ process_crate_deleted, process_version_added, process_version_deleted, process_version_yank_status, }, + metrics::WatcherMetrics, }; use anyhow::{Context as _, Result}; use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; @@ -52,7 +53,11 @@ enum MessageOutcome { Ignore, } -pub async fn run_sqs_subscriber(config: &Config, context: &Context) -> Result<()> { +pub async fn run_sqs_subscriber( + config: &Config, + context: &Context, + metrics: &WatcherMetrics, +) -> Result<()> { let (Some(region), Some(queue_url)) = (&config.sqs_region, &config.sqs_queue_url) else { warn!("missing sqs region or url, disabling crates.io SQS subscriber"); return Ok(()); From 79ba59df8a85c8e0fc4f26bbbc469fdc4f1e23d5 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 02:17:17 +0200 Subject: [PATCH 59/74] feat(watcher): instrument SQS subscriber metrics Add starter counters and histograms for SQS intake, failures, retries, processing time, event lag, and applied change types, and record them from the subscriber flow. --- Cargo.lock | 1 + crates/bin/docs_rs_watcher/Cargo.toml | 1 + crates/bin/docs_rs_watcher/src/metrics.rs | 52 ++++++++++- crates/bin/docs_rs_watcher/src/subscriber.rs | 93 ++++++++++++++++++-- 4 files changed, 135 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8ee007bb0..db727b068 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2485,6 +2485,7 @@ dependencies = [ "anyhow", "aws-config", "aws-sdk-sqs", + "chrono", "clap", "crates-index", "crates-index-diff", diff --git a/crates/bin/docs_rs_watcher/Cargo.toml b/crates/bin/docs_rs_watcher/Cargo.toml index fc7cdb60d..a9abb3371 100644 --- a/crates/bin/docs_rs_watcher/Cargo.toml +++ b/crates/bin/docs_rs_watcher/Cargo.toml @@ -11,6 +11,7 @@ anyhow = { workspace = true } aws-config = { workspace = true } aws-sdk-sqs = { version = "1.99.0", default-features = false, features = ["default-https-client", "rt-tokio"] } clap = { workspace = true } +chrono = { workspace = true } # NOTE: on the new infra, switch back from `git-https-reqwest` to `git-https` (curl) once the curl version is new enough crates-index = { version = "3.0.0", default-features = false, features = ["git", "git-https-reqwest", "git-performance", "parallel"] } # NOTE: on the new infra, switch back from `http-reqwest` to `http-curl` once the curl version is new enough diff --git a/crates/bin/docs_rs_watcher/src/metrics.rs b/crates/bin/docs_rs_watcher/src/metrics.rs index baf05b51e..3f0bbb02a 100644 --- a/crates/bin/docs_rs_watcher/src/metrics.rs +++ b/crates/bin/docs_rs_watcher/src/metrics.rs @@ -1,9 +1,18 @@ use docs_rs_opentelemetry::AnyMeterProvider; -use opentelemetry::metrics::Counter; +use opentelemetry::{ + KeyValue, + metrics::{Counter, Histogram}, +}; #[derive(Debug)] pub(crate) struct WatcherMetrics { - dummy: Counter, + pub(crate) sqs_messages_received_total: Counter, + pub(crate) sqs_poll_errors_total: Counter, + pub(crate) sqs_message_failures_total: Counter, + pub(crate) sqs_retries_total: Counter, + pub(crate) changes_applied_total: Counter, + pub(crate) sqs_message_processing_seconds: Histogram, + pub(crate) sqs_event_lag_seconds: Histogram, } impl WatcherMetrics { @@ -11,10 +20,45 @@ impl WatcherMetrics { let meter = meter_provider.meter("watcher"); const PREFIX: &str = "docsrs.watcher"; Self { - dummy: meter - .u64_counter(format!("{PREFIX}.dummy")) + sqs_messages_received_total: meter + .u64_counter(format!("{PREFIX}.sqs_messages_received_total")) .with_unit("1") .build(), + sqs_poll_errors_total: meter + .u64_counter(format!("{PREFIX}.sqs_poll_errors_total")) + .with_unit("1") + .build(), + sqs_message_failures_total: meter + .u64_counter(format!("{PREFIX}.sqs_message_failures_total")) + .with_unit("1") + .build(), + sqs_retries_total: meter + .u64_counter(format!("{PREFIX}.sqs_retries_total")) + .with_unit("1") + .build(), + changes_applied_total: meter + .u64_counter(format!("{PREFIX}.changes_applied_total")) + .with_unit("1") + .build(), + sqs_message_processing_seconds: meter + .f64_histogram(format!("{PREFIX}.sqs_message_processing_seconds")) + .with_boundaries(vec![ + 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, + ]) + .with_unit("s") + .build(), + sqs_event_lag_seconds: meter + .f64_histogram(format!("{PREFIX}.sqs_event_lag_seconds")) + .with_boundaries(vec![ + 0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 300.0, 900.0, 3600.0, + ]) + .with_unit("s") + .build(), } } + + pub(crate) fn record_change_applied(&self, change_type: &'static str) { + self.changes_applied_total + .add(1, &[KeyValue::new("type", change_type)]); + } } diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index a5cb7223d..781877085 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -9,6 +9,7 @@ use crate::{ use anyhow::{Context as _, Result}; use aws_config::{BehaviorVersion, Region, retry::RetryConfig}; use aws_sdk_sqs::Client; +use chrono::Utc; use docs_rs_context::Context; use docs_rs_crates_io::events::{IndexChangeEventV1, IndexChangeV1}; use docs_rs_types::KrateName; @@ -53,7 +54,7 @@ enum MessageOutcome { Ignore, } -pub async fn run_sqs_subscriber( +pub(crate) async fn run_sqs_subscriber( config: &Config, context: &Context, metrics: &WatcherMetrics, @@ -96,6 +97,7 @@ pub async fn run_sqs_subscriber( { Ok(response) => response.messages().to_vec(), Err(err) => { + metrics.sqs_poll_errors_total.add(1, &[]); error!( ?err, queue_url, "error receiving messages from sqs, retrying" @@ -104,9 +106,12 @@ pub async fn run_sqs_subscriber( continue; } }; + metrics + .sqs_messages_received_total + .add(messages.len() as u64, &[]); for message in messages { - match handle_message_body(context, config, message.body.as_deref()).await { + match handle_message_body(context, config, metrics, message.body.as_deref()).await { MessageOutcome::Ack => { if let Some(receipt_handle) = message.receipt_handle.as_deref() && let Err(err) = client @@ -160,20 +165,32 @@ pub async fn run_sqs_subscriber( async fn handle_message_body( context: &Context, config: &Config, + metrics: &WatcherMetrics, body: Option<&str>, ) -> MessageOutcome { let Some(body) = body else { return MessageOutcome::Ignore; }; + let start = Instant::now(); match retry_async( - || async move { process_sqs_event(context, config, body).await }, + || async move { process_sqs_event(context, config, metrics, body).await }, 3, ) .await { - Ok(_) => MessageOutcome::Ack, + Ok(_) => { + metrics + .sqs_message_processing_seconds + .record(start.elapsed().as_secs_f64(), &[]); + MessageOutcome::Ack + } Err(err) => { + metrics + .sqs_message_processing_seconds + .record(start.elapsed().as_secs_f64(), &[]); + metrics.sqs_message_failures_total.add(1, &[]); + metrics.sqs_retries_total.add(1, &[]); error!( ?err, ?RETRY_DELAY, @@ -185,21 +202,39 @@ async fn handle_message_body( } } -async fn process_sqs_event(context: &Context, config: &Config, body: &str) -> Result<()> { +async fn process_sqs_event( + context: &Context, + config: &Config, + metrics: &WatcherMetrics, + body: &str, +) -> Result<()> { let event: IndexChangeEventV1 = serde_json::from_str(body).context("error parsing event from json")?; debug!(?event, "received event from sqs"); + let lag_seconds = (Utc::now() - event.occurred_at).num_milliseconds().max(0) as f64 / 1000.0; + metrics.sqs_event_lag_seconds.record(lag_seconds, &[]); if config.sqs_active { process_change(context, &event.change, config) .await .context("error processing change")?; + metrics.record_change_applied(change_type(&event.change)); } Ok(()) } +fn change_type(change: &IndexChangeV1) -> &'static str { + match change { + IndexChangeV1::Added(_) => "added", + IndexChangeV1::Yanked(_) => "yanked", + IndexChangeV1::Unyanked(_) => "unyanked", + IndexChangeV1::CrateDeleted { .. } => "crate_deleted", + IndexChangeV1::VersionDeleted(_) => "version_deleted", + } +} + /// Process a crate change, returning whether the change was a crate addition or not. pub(crate) async fn process_change( context: &Context, @@ -349,10 +384,12 @@ mod tests { let mut config = Config::test_config()?; config.sqs_active = true; let env = TestEnvironment::builder().config(config).build().await?; + let metrics = WatcherMetrics::new(&env.context().meter_provider); process_sqs_event( &env, env.config(), + &metrics, &added_event_json("krate", &V1.to_string()), ) .await?; @@ -361,6 +398,20 @@ mod tests { assert_eq!(queue.len(), 1); assert_eq!(queue[0].name, KRATE); assert_eq!(queue[0].version, V1); + let collected = env.collected_metrics(); + let applied_metric = + collected.get_metric("watcher", "docsrs.watcher.changes_applied_total")?; + let applied = applied_metric.get_u64_counter(); + let change_type = applied + .attributes() + .find(|kv| kv.key.as_str() == "type") + .unwrap() + .value + .to_string(); + assert_eq!(change_type, "added"); + assert_eq!(applied.value(), 1); + let lag_metric = collected.get_metric("watcher", "docsrs.watcher.sqs_event_lag_seconds")?; + assert_eq!(lag_metric.get_f64_histogram().count(), 1); Ok(()) } @@ -370,10 +421,12 @@ mod tests { let mut config = Config::test_config()?; config.sqs_active = false; let env = TestEnvironment::builder().config(config).build().await?; + let metrics = WatcherMetrics::new(&env.context().meter_provider); process_sqs_event( &env, env.config(), + &metrics, &added_event_json("krate", &V1.to_string()), ) .await?; @@ -386,8 +439,9 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_process_sqs_event_rejects_invalid_json() -> Result<()> { let env = TestEnvironment::new().await?; + let metrics = WatcherMetrics::new(&env.context().meter_provider); - let err = process_sqs_event(&env, env.config(), "{not json").await; + let err = process_sqs_event(&env, env.config(), &metrics, "{not json").await; assert!(err.is_err()); let err = format!("{:?}", err.unwrap_err()); @@ -403,16 +457,22 @@ mod tests { async fn test_handle_message_body_acknowledges_success() -> Result<()> { let config = Config::test_config()?; let env = TestEnvironment::builder().config(config).build().await?; + let metrics = WatcherMetrics::new(&env.context().meter_provider); assert_eq!( handle_message_body( &env, env.config(), + &metrics, Some(&added_event_json("krate", &V1.to_string())), ) .await, MessageOutcome::Ack ); + let collected = env.collected_metrics(); + let processing_metric = + collected.get_metric("watcher", "docsrs.watcher.sqs_message_processing_seconds")?; + assert_eq!(processing_metric.get_f64_histogram().count(), 1); Ok(()) } @@ -420,11 +480,27 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_handle_message_body_retries_failed_processing() -> Result<()> { let env = TestEnvironment::new().await?; + let metrics = WatcherMetrics::new(&env.context().meter_provider); assert_eq!( - handle_message_body(&env, env.config(), Some("{bad json")).await, + handle_message_body(&env, env.config(), &metrics, Some("{bad json")).await, MessageOutcome::RetryLater(RETRY_DELAY) ); + let collected = env.collected_metrics(); + assert_eq!( + collected + .get_metric("watcher", "docsrs.watcher.sqs_message_failures_total")? + .get_u64_counter() + .value(), + 1 + ); + assert_eq!( + collected + .get_metric("watcher", "docsrs.watcher.sqs_retries_total")? + .get_u64_counter() + .value(), + 1 + ); Ok(()) } @@ -432,9 +508,10 @@ mod tests { #[tokio::test(flavor = "multi_thread")] async fn test_handle_message_body_ignores_missing_body() -> Result<()> { let env = TestEnvironment::new().await?; + let metrics = WatcherMetrics::new(&env.context().meter_provider); assert_eq!( - handle_message_body(&env, env.config(), None).await, + handle_message_body(&env, env.config(), &metrics, None).await, MessageOutcome::Ignore ); assert!(env.build_queue()?.queued_crates().await?.is_empty()); From 1bc440c182bef17ca56e21556e4e30369784f68d Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 02:27:25 +0200 Subject: [PATCH 60/74] metrics --- crates/bin/docs_rs_watcher/src/metrics.rs | 5 ----- crates/bin/docs_rs_watcher/src/subscriber.rs | 21 ++++++++++---------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/metrics.rs b/crates/bin/docs_rs_watcher/src/metrics.rs index 3f0bbb02a..b3b71f8bb 100644 --- a/crates/bin/docs_rs_watcher/src/metrics.rs +++ b/crates/bin/docs_rs_watcher/src/metrics.rs @@ -8,7 +8,6 @@ use opentelemetry::{ pub(crate) struct WatcherMetrics { pub(crate) sqs_messages_received_total: Counter, pub(crate) sqs_poll_errors_total: Counter, - pub(crate) sqs_message_failures_total: Counter, pub(crate) sqs_retries_total: Counter, pub(crate) changes_applied_total: Counter, pub(crate) sqs_message_processing_seconds: Histogram, @@ -28,10 +27,6 @@ impl WatcherMetrics { .u64_counter(format!("{PREFIX}.sqs_poll_errors_total")) .with_unit("1") .build(), - sqs_message_failures_total: meter - .u64_counter(format!("{PREFIX}.sqs_message_failures_total")) - .with_unit("1") - .build(), sqs_retries_total: meter .u64_counter(format!("{PREFIX}.sqs_retries_total")) .with_unit("1") diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 781877085..6ecbbdc85 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -16,7 +16,7 @@ use docs_rs_types::KrateName; use docs_rs_utils::retry_async; use std::time::{Duration, Instant}; use tokio::time; -use tracing::{debug, error, warn}; +use tracing::{debug, error, field, instrument, warn}; /// wait-time (long polling): /// @@ -189,7 +189,6 @@ async fn handle_message_body( metrics .sqs_message_processing_seconds .record(start.elapsed().as_secs_f64(), &[]); - metrics.sqs_message_failures_total.add(1, &[]); metrics.sqs_retries_total.add(1, &[]); error!( ?err, @@ -202,6 +201,7 @@ async fn handle_message_body( } } +#[instrument(skip_all, fields(change_type = field::Empty, krate = field::Empty))] async fn process_sqs_event( context: &Context, config: &Config, @@ -211,6 +211,14 @@ async fn process_sqs_event( let event: IndexChangeEventV1 = serde_json::from_str(body).context("error parsing event from json")?; + let change_type = change_type(&event.change); + + { + let span = tracing::Span::current(); + span.record("change_type", change_type); + span.record("krate", event.change.name()); + } + debug!(?event, "received event from sqs"); let lag_seconds = (Utc::now() - event.occurred_at).num_milliseconds().max(0) as f64 / 1000.0; metrics.sqs_event_lag_seconds.record(lag_seconds, &[]); @@ -219,7 +227,7 @@ async fn process_sqs_event( process_change(context, &event.change, config) .await .context("error processing change")?; - metrics.record_change_applied(change_type(&event.change)); + metrics.record_change_applied(change_type); } Ok(()) @@ -487,13 +495,6 @@ mod tests { MessageOutcome::RetryLater(RETRY_DELAY) ); let collected = env.collected_metrics(); - assert_eq!( - collected - .get_metric("watcher", "docsrs.watcher.sqs_message_failures_total")? - .get_u64_counter() - .value(), - 1 - ); assert_eq!( collected .get_metric("watcher", "docsrs.watcher.sqs_retries_total")? From 84e5739c5b691892e904e4faa41cfe55b1446b1e Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 02:40:29 +0200 Subject: [PATCH 61/74] more instrument --- crates/bin/docs_rs_watcher/src/subscriber.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 6ecbbdc85..4da53dfdd 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -244,6 +244,7 @@ fn change_type(change: &IndexChangeV1) -> &'static str { } /// Process a crate change, returning whether the change was a crate addition or not. +#[instrument(skip(context, config))] pub(crate) async fn process_change( context: &Context, change: &IndexChangeV1, From 1835ae7e972f7524583a6947bfefe65f29a4d27d Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 02:59:01 +0200 Subject: [PATCH 62/74] small things --- crates/bin/docs_rs_watcher/src/config.rs | 15 +++++---- crates/bin/docs_rs_watcher/src/metrics.rs | 5 +-- crates/bin/docs_rs_watcher/src/subscriber.rs | 35 +++++++++----------- crates/lib/docs_rs_crates_io/src/events.rs | 22 ++++++------ 4 files changed, 38 insertions(+), 39 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index af0bc8539..99b73224b 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -6,21 +6,22 @@ use url::Url; #[derive(Debug)] pub struct Config { + /// registry watching config. Also used for database-synchonize pub registry_index_path: PathBuf, pub registry_url: Option, + /// How long to wait between registry checks + pub delay_between_registry_fetches: Duration, + // Time between 'git gc --auto' calls in seconds + pub registry_gc_interval: u64, + + /// SQS watching config. pub sqs_queue_url: Option, pub sqs_region: Option, - pub sqs_endpoint_url: Option, + pub sqs_endpoint_url: Option, /// temporary, to switch between the sources for the index (git index vs SQS) pub sqs_active: bool, pub aws_sdk_max_retries: u32, - /// How long to wait between registry checks - pub delay_between_registry_fetches: Duration, - - // Time between 'git gc --auto' calls in seconds - pub registry_gc_interval: u64, - // automatic rebuild configuration pub max_queued_rebuilds: Option, diff --git a/crates/bin/docs_rs_watcher/src/metrics.rs b/crates/bin/docs_rs_watcher/src/metrics.rs index b3b71f8bb..358fa239e 100644 --- a/crates/bin/docs_rs_watcher/src/metrics.rs +++ b/crates/bin/docs_rs_watcher/src/metrics.rs @@ -1,3 +1,4 @@ +use docs_rs_crates_io::events::IndexChangeV1; use docs_rs_opentelemetry::AnyMeterProvider; use opentelemetry::{ KeyValue, @@ -52,8 +53,8 @@ impl WatcherMetrics { } } - pub(crate) fn record_change_applied(&self, change_type: &'static str) { + pub(crate) fn record_change_applied(&self, change: &IndexChangeV1) { self.changes_applied_total - .add(1, &[KeyValue::new("type", change_type)]); + .add(1, &[KeyValue::new("type", change.kind())]); } } diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 4da53dfdd..bd76ad936 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -72,7 +72,7 @@ pub(crate) async fn run_sqs_subscriber( .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) .region(Region::new(region.to_string())); if let Some(endpoint_url) = &config.sqs_endpoint_url { - client_config = client_config.endpoint_url(endpoint_url); + client_config = client_config.endpoint_url(endpoint_url.to_string()); } let client = Client::from_conf(client_config.build()); @@ -227,7 +227,7 @@ async fn process_sqs_event( process_change(context, &event.change, config) .await .context("error processing change")?; - metrics.record_change_applied(change_type); + metrics.record_change_applied(&event.change); } Ok(()) @@ -243,32 +243,32 @@ fn change_type(change: &IndexChangeV1) -> &'static str { } } -/// Process a crate change, returning whether the change was a crate addition or not. +/// Process a crate change #[instrument(skip(context, config))] pub(crate) async fn process_change( context: &Context, change: &IndexChangeV1, config: &Config, -) -> Result { +) -> Result<()> { match change { IndexChangeV1::Added(crate_version) => { - process_version_added(context, &crate_version.try_into().unwrap()).await? + process_version_added(context, &crate_version.try_into()?).await? } IndexChangeV1::Yanked(crate_version) => { - process_version_yank_status(context, &crate_version.try_into().unwrap(), true).await? + process_version_yank_status(context, &crate_version.try_into()?, true).await? } IndexChangeV1::Unyanked(crate_version) => { - process_version_yank_status(context, &crate_version.try_into().unwrap(), false).await? + process_version_yank_status(context, &crate_version.try_into()?, false).await? } IndexChangeV1::CrateDeleted { name, .. } => { let name: KrateName = name.parse()?; process_crate_deleted(context, config, &name).await? } IndexChangeV1::VersionDeleted(crate_version) => { - process_version_deleted(context, config, &crate_version.try_into().unwrap()).await? + process_version_deleted(context, config, &crate_version.try_into()?).await? } }; - Ok(change.added().is_some()) + Ok(()) } #[cfg(test)] @@ -290,7 +290,7 @@ mod tests { async fn test_process_change_added_queues_crate() -> Result<()> { let env = TestEnvironment::new().await?; - let added = process_change( + process_change( &env, &IndexChangeV1::Added(CrateVersion { name: KRATE.to_string(), @@ -300,7 +300,6 @@ mod tests { ) .await?; - assert!(added); let queue = env.build_queue()?.queued_crates().await?; assert_eq!(queue.len(), 1); assert_eq!(queue[0].name, KRATE); @@ -322,7 +321,7 @@ mod tests { .create() .await?; - let added = process_change( + process_change( &env, &IndexChangeV1::Yanked(CrateVersion { name: KRATE.to_string(), @@ -332,8 +331,7 @@ mod tests { ) .await?; - assert!(!added); - let row = sqlx::query!( + let yanked = sqlx::query_scalar!( "SELECT yanked FROM releases WHERE id = $1", @@ -341,7 +339,7 @@ mod tests { ) .fetch_one(&mut *conn) .await?; - assert_eq!(row.yanked, Some(true)); + assert_eq!(yanked, Some(true)); Ok(()) } @@ -365,7 +363,7 @@ mod tests { .create() .await?; - let added = process_change( + process_change( &env, &IndexChangeV1::VersionDeleted(CrateVersion { name: KRATE.to_string(), @@ -375,15 +373,14 @@ mod tests { ) .await?; - assert!(!added); - let rows = sqlx::query!( + let rows = sqlx::query_scalar!( "SELECT id FROM releases", ) .fetch_all(&mut *conn) .await?; assert_eq!(rows.len(), 1); - assert_eq!(rows[0].id, rid_1.0); + assert_eq!(rows[0], rid_1.0); Ok(()) } diff --git a/crates/lib/docs_rs_crates_io/src/events.rs b/crates/lib/docs_rs_crates_io/src/events.rs index 27487df8f..6e1b722ff 100644 --- a/crates/lib/docs_rs_crates_io/src/events.rs +++ b/crates/lib/docs_rs_crates_io/src/events.rs @@ -67,21 +67,21 @@ impl IndexChangeV1 { IndexChangeV1::VersionDeleted(crate_version) => &crate_version.name, } } + + pub fn kind(&self) -> &'static str { + match *self { + IndexChangeV1::Added(_) => "added", + IndexChangeV1::Yanked(_) => "yanked", + IndexChangeV1::CrateDeleted { .. } => "crate deleted", + IndexChangeV1::VersionDeleted(_) => "version deleted", + IndexChangeV1::Unyanked(_) => "unyanked", + } + } } impl fmt::Display for IndexChangeV1 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{}", - match *self { - IndexChangeV1::Added(_) => "added", - IndexChangeV1::Yanked(_) => "yanked", - IndexChangeV1::CrateDeleted { .. } => "crate deleted", - IndexChangeV1::VersionDeleted(_) => "version deleted", - IndexChangeV1::Unyanked(_) => "unyanked", - } - ) + f.write_str(self.kind()) } } From 9fdefe37a807afc1f4e113624294f791e6464cda Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:03:09 +0200 Subject: [PATCH 63/74] kk --- crates/bin/docs_rs_watcher/src/subscriber.rs | 25 +++++++------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index bd76ad936..da5d20d67 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -277,10 +277,13 @@ mod tests { use crate::testing::TestEnvironment; use docs_rs_config::AppConfig as _; use docs_rs_crates_io::events::CrateVersion; - use docs_rs_types::testing::{KRATE, V1, V2}; + use docs_rs_types::{ + Version, + testing::{KRATE, V1, V2}, + }; use pretty_assertions::assert_eq; - fn added_event_json(name: &str, version: &str) -> String { + fn added_event_json(name: &KrateName, version: &Version) -> String { format!( r#"{{"id":"evt_123","occurred_at":"2026-06-01T12:00:00Z","type":"added","payload":{{"name":"{name}","vers":"{version}"}}}}"# ) @@ -392,13 +395,7 @@ mod tests { let env = TestEnvironment::builder().config(config).build().await?; let metrics = WatcherMetrics::new(&env.context().meter_provider); - process_sqs_event( - &env, - env.config(), - &metrics, - &added_event_json("krate", &V1.to_string()), - ) - .await?; + process_sqs_event(&env, env.config(), &metrics, &added_event_json(&KRATE, &V1)).await?; let queue = env.build_queue()?.queued_crates().await?; assert_eq!(queue.len(), 1); @@ -429,13 +426,7 @@ mod tests { let env = TestEnvironment::builder().config(config).build().await?; let metrics = WatcherMetrics::new(&env.context().meter_provider); - process_sqs_event( - &env, - env.config(), - &metrics, - &added_event_json("krate", &V1.to_string()), - ) - .await?; + process_sqs_event(&env, env.config(), &metrics, &added_event_json(&KRATE, &V1)).await?; assert!(env.build_queue()?.queued_crates().await?.is_empty()); @@ -470,7 +461,7 @@ mod tests { &env, env.config(), &metrics, - Some(&added_event_json("krate", &V1.to_string())), + Some(&added_event_json(&KRATE, &V1)), ) .await, MessageOutcome::Ack From ecd7952b2a0dcee95d3263cb98283d230e1382a3 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:10:38 +0200 Subject: [PATCH 64/74] names --- crates/bin/docs_rs_watcher/src/metrics.rs | 12 +++--- crates/bin/docs_rs_watcher/src/subscriber.rs | 42 +++++++++----------- 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/metrics.rs b/crates/bin/docs_rs_watcher/src/metrics.rs index 358fa239e..5de1af2f1 100644 --- a/crates/bin/docs_rs_watcher/src/metrics.rs +++ b/crates/bin/docs_rs_watcher/src/metrics.rs @@ -11,8 +11,8 @@ pub(crate) struct WatcherMetrics { pub(crate) sqs_poll_errors_total: Counter, pub(crate) sqs_retries_total: Counter, pub(crate) changes_applied_total: Counter, - pub(crate) sqs_message_processing_seconds: Histogram, - pub(crate) sqs_event_lag_seconds: Histogram, + pub(crate) sqs_message_processing_time: Histogram, + pub(crate) sqs_event_lag: Histogram, } impl WatcherMetrics { @@ -36,15 +36,15 @@ impl WatcherMetrics { .u64_counter(format!("{PREFIX}.changes_applied_total")) .with_unit("1") .build(), - sqs_message_processing_seconds: meter - .f64_histogram(format!("{PREFIX}.sqs_message_processing_seconds")) + sqs_message_processing_time: meter + .f64_histogram(format!("{PREFIX}.sqs_message_processing_time")) .with_boundaries(vec![ 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, ]) .with_unit("s") .build(), - sqs_event_lag_seconds: meter - .f64_histogram(format!("{PREFIX}.sqs_event_lag_seconds")) + sqs_event_lag: meter + .f64_histogram(format!("{PREFIX}.sqs_event_lag")) .with_boundaries(vec![ 0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 300.0, 900.0, 3600.0, ]) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index da5d20d67..9d12e0282 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -181,13 +181,13 @@ async fn handle_message_body( { Ok(_) => { metrics - .sqs_message_processing_seconds + .sqs_message_processing_time .record(start.elapsed().as_secs_f64(), &[]); MessageOutcome::Ack } Err(err) => { metrics - .sqs_message_processing_seconds + .sqs_message_processing_time .record(start.elapsed().as_secs_f64(), &[]); metrics.sqs_retries_total.add(1, &[]); error!( @@ -211,17 +211,16 @@ async fn process_sqs_event( let event: IndexChangeEventV1 = serde_json::from_str(body).context("error parsing event from json")?; - let change_type = change_type(&event.change); - { let span = tracing::Span::current(); - span.record("change_type", change_type); + span.record("change_type", event.change.kind()); span.record("krate", event.change.name()); } debug!(?event, "received event from sqs"); - let lag_seconds = (Utc::now() - event.occurred_at).num_milliseconds().max(0) as f64 / 1000.0; - metrics.sqs_event_lag_seconds.record(lag_seconds, &[]); + metrics + .sqs_event_lag + .record((Utc::now() - event.occurred_at).as_seconds_f64(), &[]); if config.sqs_active { process_change(context, &event.change, config) @@ -233,16 +232,6 @@ async fn process_sqs_event( Ok(()) } -fn change_type(change: &IndexChangeV1) -> &'static str { - match change { - IndexChangeV1::Added(_) => "added", - IndexChangeV1::Yanked(_) => "yanked", - IndexChangeV1::Unyanked(_) => "unyanked", - IndexChangeV1::CrateDeleted { .. } => "crate_deleted", - IndexChangeV1::VersionDeleted(_) => "version_deleted", - } -} - /// Process a crate change #[instrument(skip(context, config))] pub(crate) async fn process_change( @@ -284,9 +273,16 @@ mod tests { use pretty_assertions::assert_eq; fn added_event_json(name: &KrateName, version: &Version) -> String { - format!( - r#"{{"id":"evt_123","occurred_at":"2026-06-01T12:00:00Z","type":"added","payload":{{"name":"{name}","vers":"{version}"}}}}"# - ) + serde_json::to_string(&serde_json::json!({ + "id":"evt_123", + "occurred_at":"2026-06-01T12:00:00Z", + "type":"added", + "payload":{ + "name": name.to_string(), + "vers": version.to_string(), + } + })) + .unwrap() } #[tokio::test(flavor = "multi_thread")] @@ -319,7 +315,7 @@ mod tests { let id = env .fake_release() .await - .name("krate") + .name(KRATE) .version(V1) .create() .await?; @@ -355,13 +351,13 @@ mod tests { let rid_1 = env .fake_release() .await - .name("krate") + .name(KRATE) .version(V1) .create() .await?; env.fake_release() .await - .name("krate") + .name(KRATE) .version(V2) .create() .await?; From c0f5b0131da46f1ba97efd7e5a870fe69cc72c3c Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:11:22 +0200 Subject: [PATCH 65/74] sort --- crates/bin/docs_rs_watcher/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/bin/docs_rs_watcher/Cargo.toml b/crates/bin/docs_rs_watcher/Cargo.toml index a9abb3371..286295618 100644 --- a/crates/bin/docs_rs_watcher/Cargo.toml +++ b/crates/bin/docs_rs_watcher/Cargo.toml @@ -10,8 +10,8 @@ edition.workspace = true anyhow = { workspace = true } aws-config = { workspace = true } aws-sdk-sqs = { version = "1.99.0", default-features = false, features = ["default-https-client", "rt-tokio"] } -clap = { workspace = true } chrono = { workspace = true } +clap = { workspace = true } # NOTE: on the new infra, switch back from `git-https-reqwest` to `git-https` (curl) once the curl version is new enough crates-index = { version = "3.0.0", default-features = false, features = ["git", "git-https-reqwest", "git-performance", "parallel"] } # NOTE: on the new infra, switch back from `http-reqwest` to `http-curl` once the curl version is new enough From a8f480d812b4cd7059c641508ee655b8acc35a09 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:15:11 +0200 Subject: [PATCH 66/74] chore(watcher): refine SQS processing buckets Add buckets around the one-minute visibility timeout so slow message handlers are easier to spot in metrics. --- crates/bin/docs_rs_watcher/src/metrics.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/bin/docs_rs_watcher/src/metrics.rs b/crates/bin/docs_rs_watcher/src/metrics.rs index 5de1af2f1..fbdf7763d 100644 --- a/crates/bin/docs_rs_watcher/src/metrics.rs +++ b/crates/bin/docs_rs_watcher/src/metrics.rs @@ -40,6 +40,7 @@ impl WatcherMetrics { .f64_histogram(format!("{PREFIX}.sqs_message_processing_time")) .with_boundaries(vec![ 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, + 45.0, 55.0, 60.0, 65.0, 90.0, 120.0, ]) .with_unit("s") .build(), From 3b5a9b18bed32bb120597c67387369b0c0f0369a Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:16:30 +0200 Subject: [PATCH 67/74] clean --- crates/bin/docs_rs_watcher/src/subscriber.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 9d12e0282..0dab46dd4 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -201,7 +201,7 @@ async fn handle_message_body( } } -#[instrument(skip_all, fields(change_type = field::Empty, krate = field::Empty))] +#[instrument(skip_all)] async fn process_sqs_event( context: &Context, config: &Config, @@ -211,12 +211,6 @@ async fn process_sqs_event( let event: IndexChangeEventV1 = serde_json::from_str(body).context("error parsing event from json")?; - { - let span = tracing::Span::current(); - span.record("change_type", event.change.kind()); - span.record("krate", event.change.name()); - } - debug!(?event, "received event from sqs"); metrics .sqs_event_lag From 089ba22a1fe187e2ba15fe25d03f203c1e4e2352 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:22:25 +0200 Subject: [PATCH 68/74] fix test --- crates/bin/docs_rs_watcher/src/subscriber.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 0dab46dd4..2fece574c 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -16,7 +16,7 @@ use docs_rs_types::KrateName; use docs_rs_utils::retry_async; use std::time::{Duration, Instant}; use tokio::time; -use tracing::{debug, error, field, instrument, warn}; +use tracing::{debug, error, instrument, warn}; /// wait-time (long polling): /// @@ -403,7 +403,7 @@ mod tests { .to_string(); assert_eq!(change_type, "added"); assert_eq!(applied.value(), 1); - let lag_metric = collected.get_metric("watcher", "docsrs.watcher.sqs_event_lag_seconds")?; + let lag_metric = collected.get_metric("watcher", "docsrs.watcher.sqs_event_lag")?; assert_eq!(lag_metric.get_f64_histogram().count(), 1); Ok(()) @@ -458,7 +458,7 @@ mod tests { ); let collected = env.collected_metrics(); let processing_metric = - collected.get_metric("watcher", "docsrs.watcher.sqs_message_processing_seconds")?; + collected.get_metric("watcher", "docsrs.watcher.sqs_message_processing_time")?; assert_eq!(processing_metric.get_f64_histogram().count(), 1); Ok(()) From 206d1ea2585b74ec1793372f01734d9c460cc70d Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:23:14 +0200 Subject: [PATCH 69/74] fix env --- crates/bin/docs_rs_watcher/src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/bin/docs_rs_watcher/src/config.rs b/crates/bin/docs_rs_watcher/src/config.rs index 99b73224b..e8bdc4f82 100644 --- a/crates/bin/docs_rs_watcher/src/config.rs +++ b/crates/bin/docs_rs_watcher/src/config.rs @@ -41,7 +41,7 @@ impl AppConfig for Config { sqs_queue_url: maybe_env("DOCSRS_SQS_QUEUE_URL")?, sqs_region: maybe_env("DOCSRS_SQS_REGION")?, sqs_endpoint_url: maybe_env("DOCSRS_SQS_ENDPOINT_URL")?, - sqs_active: env("DOCS_RS_SQS_ACTIVE", false)?, + sqs_active: env("DOCSRS_SQS_ACTIVE", false)?, aws_sdk_max_retries: env("DOCSRS_AWS_SDK_MAX_RETRIES", 6u32)?, delay_between_registry_fetches: Duration::from_secs(env::( From 307b678adcd740023688f130f22df34af6ed662d Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:28:14 +0200 Subject: [PATCH 70/74] tryform --- crates/bin/docs_rs_watcher/src/index_watcher.rs | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/index_watcher.rs b/crates/bin/docs_rs_watcher/src/index_watcher.rs index 689213df7..ffa8ffa2f 100644 --- a/crates/bin/docs_rs_watcher/src/index_watcher.rs +++ b/crates/bin/docs_rs_watcher/src/index_watcher.rs @@ -31,10 +31,10 @@ impl Default for CrateVersion { } } -impl TryFrom for CrateVersion { +impl TryFrom<&crates_index_diff::CrateVersion> for CrateVersion { type Error = anyhow::Error; - fn try_from(value: crates_index_diff::CrateVersion) -> Result { + fn try_from(value: &crates_index_diff::CrateVersion) -> Result { Ok(Self { name: value.name.parse()?, version: value.version.parse()?, @@ -53,17 +53,6 @@ impl TryFrom<&docs_rs_crates_io::events::CrateVersion> for CrateVersion { } } -impl TryFrom for CrateVersion { - type Error = anyhow::Error; - - fn try_from(value: docs_rs_crates_io::events::CrateVersion) -> Result { - Ok(Self { - name: value.name.parse()?, - version: value.version.parse()?, - }) - } -} - #[cfg(test)] impl From for crates_index_diff::CrateVersion { fn from(value: CrateVersion) -> Self { @@ -185,7 +174,6 @@ pub(crate) async fn process_change( .versions() .first() .expect("always exists") - .clone() .try_into()?; match change { From 00f0bca46ac861117d3c3d59bb368c6c997106e8 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:29:51 +0200 Subject: [PATCH 71/74] pub --- crates/bin/docs_rs_watcher/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/bin/docs_rs_watcher/src/lib.rs b/crates/bin/docs_rs_watcher/src/lib.rs index e683f7d87..4b51356ef 100644 --- a/crates/bin/docs_rs_watcher/src/lib.rs +++ b/crates/bin/docs_rs_watcher/src/lib.rs @@ -6,7 +6,7 @@ pub mod index_watcher; mod metrics; mod rebuilds; mod service_metrics; -pub mod subscriber; +mod subscriber; #[cfg(test)] mod testing; From c6e40c0078e9939a0b5b3a322264788faded26c1 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:32:16 +0200 Subject: [PATCH 72/74] sql --- crates/bin/docs_rs_watcher/src/subscriber.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/crates/bin/docs_rs_watcher/src/subscriber.rs b/crates/bin/docs_rs_watcher/src/subscriber.rs index 2fece574c..e201267b8 100644 --- a/crates/bin/docs_rs_watcher/src/subscriber.rs +++ b/crates/bin/docs_rs_watcher/src/subscriber.rs @@ -366,14 +366,12 @@ mod tests { ) .await?; - let rows = sqlx::query_scalar!( - "SELECT id - FROM releases", - ) - .fetch_all(&mut *conn) - .await?; - assert_eq!(rows.len(), 1); - assert_eq!(rows[0], rid_1.0); + assert_eq!( + sqlx::query_scalar!("SELECT id FROM releases") + .fetch_all(&mut *conn) + .await?, + vec![rid_1.0] + ); Ok(()) } From 0644add289bab49f158b8312f007ff440a49f5d9 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:33:14 +0200 Subject: [PATCH 73/74] anme --- .docker.env.sample | 2 +- docker-compose.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.docker.env.sample b/.docker.env.sample index 9488e78e1..59577253f 100644 --- a/.docker.env.sample +++ b/.docker.env.sample @@ -14,4 +14,4 @@ DOCSRS_MAX_QUEUED_REBUILDS: 10 # DOCSRS_SQS_QUEUE_URL=http://elasticmq:9324/queue/docsrs-events # DOCSRS_SQS_REGION=elasticmq # DOCSRS_SQS_ENDPOINT_URL=http://elasticmq:9324 -# DOCS_RS_SQS_DRY_RUN=false +# DOCSRS_SQS_ACTIVE=false diff --git a/docker-compose.yml b/docker-compose.yml index e0b2e93c5..690d66246 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -137,7 +137,7 @@ x-registry-watcher: ®istry-watcher DOCSRS_SQS_QUEUE_URL: ${DOCSRS_SQS_QUEUE_URL:-http://elasticmq:9324/queue/docsrs-events} DOCSRS_SQS_REGION: ${DOCSRS_SQS_REGION:-elasticmq} DOCSRS_SQS_ENDPOINT_URL: ${DOCSRS_SQS_ENDPOINT_URL:-http://elasticmq:9324} - DOCS_RS_SQS_DRY_RUN: ${DOCS_RS_SQS_DRY_RUN:-false} + DOCSRS_SQS_ACTIVE: ${DOCSRS_SQS_ACTIVE:-false} env_file: - .docker.env From dd9c9d59c8a1c2e1e3f846a7e608e5e1423eb0e5 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 2 Jul 2026 03:37:40 +0200 Subject: [PATCH 74/74] sqlx --- ...8420f7da0f0445c6e43d5d64617226c24fba1.json | 26 +++++++++++++++++++ ...8420f7da0f0445c6e43d5d64617226c24fba1.json | 26 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 .sqlx/query-5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1.json create mode 100644 crates/bin/docs_rs_watcher/.sqlx/query-5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1.json diff --git a/.sqlx/query-5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1.json b/.sqlx/query-5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1.json new file mode 100644 index 000000000..5d451984e --- /dev/null +++ b/.sqlx/query-5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1.json @@ -0,0 +1,26 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT id FROM releases", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int4", + "origin": { + "Table": { + "table": "releases", + "name": "id" + } + } + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + false + ] + }, + "hash": "5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1" +} diff --git a/crates/bin/docs_rs_watcher/.sqlx/query-5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1.json b/crates/bin/docs_rs_watcher/.sqlx/query-5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1.json new file mode 100644 index 000000000..5d451984e --- /dev/null +++ b/crates/bin/docs_rs_watcher/.sqlx/query-5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1.json @@ -0,0 +1,26 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT id FROM releases", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int4", + "origin": { + "Table": { + "table": "releases", + "name": "id" + } + } + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + false + ] + }, + "hash": "5f5fa0e89b4e13c690b1648a18e8420f7da0f0445c6e43d5d64617226c24fba1" +}