diff --git a/subprojects/crates/db/src/connection.rs b/subprojects/crates/db/src/connection.rs index 83c0d8f01..edb344f4c 100644 --- a/subprojects/crates/db/src/connection.rs +++ b/subprojects/crates/db/src/connection.rs @@ -356,12 +356,20 @@ impl Connection { CROSS JOIN LATERAL ( SELECT o.path FROM buildsteps s + -- If this step was resolved, look up outputs from + -- the resolved drv's successful buildstep instead. + -- resolvedDrvPath is a bare basename; drvPath is a + -- full path, so strip the directory prefix to compare. + LEFT JOIN buildsteps sr + ON sr.drvPath = $2 || '/' || s.resolvedDrvPath + AND sr.status = 0 JOIN buildstepoutputs o - ON s.build = o.build AND s.stepnr = o.stepnr + ON o.build = COALESCE(sr.build, s.build) + AND o.stepnr = COALESCE(sr.stepnr, s.stepnr) WHERE s.drvPath = r.drv_path AND o.name = i.chain[r.step] AND o.path IS NOT NULL - AND s.status = 0 + AND (s.status = 0 OR s.status = 13) ORDER BY s.build DESC LIMIT 1 ) sub @@ -377,6 +385,7 @@ impl Connection { ", ) .bind(&json_input) + .bind(store_dir.to_str()) .fetch_all(&mut *self.conn) .await?; @@ -1006,6 +1015,29 @@ impl Transaction<'_> { Ok(step_nr) } + /// Record which resolved drv path this step was resolved to. + #[tracing::instrument(skip(self), err)] + pub async fn set_resolved_to( + &mut self, + origin_build_id: crate::models::BuildID, + origin_step_nr: i32, + resolved_drv_path: &StorePath, + ) -> sqlx::Result<()> { + sqlx::query( + r" + UPDATE buildsteps + SET resolvedDrvPath = $3 + WHERE build = $1 AND stepnr = $2 + ", + ) + .bind(origin_build_id) + .bind(origin_step_nr) + .bind(resolved_drv_path.to_string()) + .execute(&mut *self.tx) + .await?; + Ok(()) + } + #[tracing::instrument( skip(self, start_time, stop_time, build_id, drv_path, outputs,), err, @@ -1284,11 +1316,24 @@ mod tests { } async fn insert_step(conn: &mut Connection, build: i32, stepnr: i32, drv_path: &StorePath) { + insert_step_with_status(conn, build, stepnr, drv_path, 0, None).await; + } + + async fn insert_step_with_status( + conn: &mut Connection, + build: i32, + stepnr: i32, + drv_path: &StorePath, + status: i32, + resolved_drv_path: Option<&StorePath>, + ) { let sd = test_store_dir(); - sqlx::query("INSERT INTO BuildSteps (build, stepnr, type, busy, drvPath, status) VALUES ($1, $2, 0, 0, $3, 0)") + sqlx::query("INSERT INTO BuildSteps (build, stepnr, type, busy, drvPath, status, resolvedDrvPath) VALUES ($1, $2, 0, 0, $3, $4, $5)") .bind(build) .bind(stepnr) .bind(sd.display(drv_path).to_string()) + .bind(status) + .bind(resolved_drv_path.map(|p| p.to_string())) .execute(&mut *conn.conn) .await .unwrap(); @@ -1426,6 +1471,70 @@ mod tests { ); } + /// A step that was resolved (status=13) with `resolvedDrvPath` pointing + /// to a different drv whose successful buildstep has the outputs. + #[tokio::test] + async fn resolve_through_resolved_step() { + let (_pg, mut conn) = setup().await; + + // Step 1: unresolved ca-depending-on-ca.drv, status=Resolved(13), + // resolvedDrvPath points to the resolved drv + insert_step_with_status( + &mut conn, + 1, + 1, + &sp("unresolved.drv"), + 13, + Some(&sp("resolved.drv")), + ) + .await; + // A successful buildstep for the resolved drv (could be any build) + insert_step(&mut conn, 2, 1, &sp("resolved.drv")).await; + insert_output(&mut conn, 2, 1, "out", &sp("result")).await; + + // Looking up via the unresolved drv path should find the output + // through the resolvedDrvPath chain. + let results = conn + .resolve_drv_output_chains(&test_store_dir(), &[(&sp("unresolved.drv"), &[&on("out")])]) + .await + .unwrap(); + assert_eq!(results, vec![Some(sp("result"))]); + } + + /// A depth-2 chain where the first step was resolved: + /// unresolved.drv (status=13, resolvedDrvPath→resolved.drv) → + /// resolved.drv outputs a .drv path → that .drv has the final output. + #[tokio::test] + async fn resolve_depth_2_through_resolved_step() { + let (_pg, mut conn) = setup().await; + + // Build 1: unresolved step, resolved to bbb-resolved.drv + insert_step_with_status( + &mut conn, + 1, + 1, + &sp("unresolved.drv"), + 13, + Some(&sp("resolved.drv")), + ) + .await; + insert_step(&mut conn, 2, 1, &sp("resolved.drv")).await; + insert_output(&mut conn, 2, 1, "out", &sp("intermediate.drv")).await; + + // Build 3: the intermediate drv + insert_step(&mut conn, 3, 1, &sp("intermediate.drv")).await; + insert_output(&mut conn, 3, 1, "out", &sp("final")).await; + + let results = conn + .resolve_drv_output_chains( + &test_store_dir(), + &[(&sp("unresolved.drv"), &[&on("out"), &on("out")])], + ) + .await + .unwrap(); + assert_eq!(results, vec![Some(sp("final"))]); + } + /// Batch with ragged depths: one depth-1 (Opaque), one depth-2 (Built), /// one depth-3 (Built(Built(...))). #[tokio::test] @@ -1511,101 +1620,40 @@ mod tests { assert_eq!(result, Some(sp("new-result"))); } - // -- Simulate the Rust-side loop that replaces the recursive SQL ---------- - // - // These mirror the resolved-step tests from the DB-column approach, - // but use resolve_drv_output + an in-memory map instead of - // resolvedDrvPath in the SQL. - - /// Helper: resolve a chain one level at a time using `resolve_drv_output`, - /// translating through `resolved_map` between levels. - async fn resolve_chain_with_map( - conn: &mut Connection, - resolved_map: &std::collections::HashMap, - root: &StorePath, - outputs: &[&OutputName], - ) -> Option { - let sd = test_store_dir(); - let mut current = root.clone(); - for output_name in outputs { - let translated = resolved_map.get(¤t).cloned().unwrap_or(current); - current = conn - .resolve_drv_output(&sd, &translated, output_name) - .await - .unwrap()?; - } - Some(current) - } - - /// Depth-1: unresolved.drv was resolved to resolved.drv, which has - /// the outputs. The in-memory map translates before lookup. + /// Depth-1 lookup where the only buildstep for the drv has + /// status=Resolved(13) with `resolvedDrvPath` pointing to + /// a different drv whose successful buildstep has the outputs. + /// This matches the production scenario: ca-depending-on-ca.drv + /// was resolved to a different drv, and ca-depending-on-ca- + /// depending-on-ca needs to look up its output. #[tokio::test] - async fn resolve_with_map_depth_1() { + async fn resolve_depth_1_via_resolved_step() { let (_pg, mut conn) = setup().await; - // resolved.drv was built successfully - insert_step(&mut conn, 2, 1, &sp("resolved.drv")).await; - insert_output(&mut conn, 2, 1, "out", &sp("result")).await; - - let mut map = std::collections::HashMap::new(); - map.insert(sp("unresolved.drv"), sp("resolved.drv")); - - let result = - resolve_chain_with_map(&mut conn, &map, &sp("unresolved.drv"), &[&on("out")]).await; - assert_eq!(result, Some(sp("result"))); - } - - /// Depth-2: unresolved.drv was resolved to resolved.drv, whose output - /// is an intermediate.drv that has the final output. - #[tokio::test] - async fn resolve_with_map_depth_2() { - let (_pg, mut conn) = setup().await; - - insert_step(&mut conn, 2, 1, &sp("resolved.drv")).await; - insert_output(&mut conn, 2, 1, "out", &sp("intermediate.drv")).await; - insert_step(&mut conn, 3, 1, &sp("intermediate.drv")).await; - insert_output(&mut conn, 3, 1, "out", &sp("final")).await; - - let mut map = std::collections::HashMap::new(); - map.insert(sp("unresolved.drv"), sp("resolved.drv")); - - let result = resolve_chain_with_map( + // Build 1, step 1: unresolved ca-depending-on-ca.drv + // status=13 (Resolved), resolvedDrvPath points to the resolved drv + insert_step_with_status( &mut conn, - &map, - &sp("unresolved.drv"), - &[&on("out"), &on("out")], + 1, + 1, + &sp("unresolved-ca-dep.drv"), + 13, + Some(&sp("resolved-ca-dep.drv")), ) .await; - assert_eq!(result, Some(sp("final"))); - } - - /// Depth-2 where the intermediate result was also resolved: - /// root.drv.drv (not resolved) → intermediate.drv (resolved) → final - #[tokio::test] - async fn resolve_with_map_intermediate_resolved() { - let (_pg, mut conn) = setup().await; - - // root.drv.drv^out → unresolved-intermediate.drv - insert_step(&mut conn, 1, 1, &sp("root.drv.drv")).await; - insert_output(&mut conn, 1, 1, "out", &sp("unresolved-intermediate.drv")).await; + // Build 2: the resolved drv was built successfully + insert_step(&mut conn, 2, 1, &sp("resolved-ca-dep.drv")).await; + insert_output(&mut conn, 2, 1, "out", &sp("ca-dep-output")).await; - // resolved-intermediate.drv^out → final-result - insert_step(&mut conn, 2, 1, &sp("resolved-intermediate.drv")).await; - insert_output(&mut conn, 2, 1, "out", &sp("final-result")).await; - - let mut map = std::collections::HashMap::new(); - map.insert( - sp("unresolved-intermediate.drv"), - sp("resolved-intermediate.drv"), - ); - - let result = resolve_chain_with_map( - &mut conn, - &map, - &sp("root.drv.drv"), - &[&on("out"), &on("out")], - ) - .await; - assert_eq!(result, Some(sp("final-result"))); + // Depth-1 chain: look up "out" of the unresolved drv path. + // The query should follow resolvedDrvPath to find the output. + let results = conn + .resolve_drv_output_chains( + &test_store_dir(), + &[(&sp("unresolved-ca-dep.drv"), &[&on("out")])], + ) + .await + .unwrap(); + assert_eq!(results, vec![Some(sp("ca-dep-output"))]); } } diff --git a/subprojects/crates/db/src/models.rs b/subprojects/crates/db/src/models.rs index f51f30e3c..4c7e662f1 100644 --- a/subprojects/crates/db/src/models.rs +++ b/subprojects/crates/db/src/models.rs @@ -24,7 +24,7 @@ pub enum BuildStatus { LogLimitExceeded = 10, NarSizeLimitExceeded = 11, NotDeterministic = 12, - /// step was resolved to a CA derivation + /// step was resolved to a CA derivation, see resolvedTo FK Resolved = 13, /// not stored Busy = 100, diff --git a/subprojects/hydra-queue-runner/src/state/mod.rs b/subprojects/hydra-queue-runner/src/state/mod.rs index 2e990ae84..d31ea538f 100644 --- a/subprojects/hydra-queue-runner/src/state/mod.rs +++ b/subprojects/hydra-queue-runner/src/state/mod.rs @@ -83,15 +83,6 @@ pub struct State { pub steps: Steps, pub queues: Queues, - /// In-memory mapping from unresolved CA drv path to resolved drv - /// path. Used to translate drv paths before SQL output lookups, - /// temporarily avoiding the need for a `resolvedDrvPath` column in - /// the database. - /// - /// FIXME: Replace this with proper persisted column, so we don't have to re-resolve on - /// restart. - pub resolved_drv_map: parking_lot::RwLock>, - pub fod_checker: Option>, pub started_at: jiff::Timestamp, @@ -141,7 +132,6 @@ impl State { cli, db, machines: Machines::new(), - resolved_drv_map: parking_lot::RwLock::new(HashMap::new()), log_dir, builds: Builds::new(), jobsets: Jobsets::new(), @@ -417,13 +407,11 @@ impl State { // Resolve `Built` input placeholders to concrete store // paths using outputs recorded in the DB. - let resolved_map = self.resolved_drv_map.read().clone(); - let basic_drv = - StepInfo::try_resolve(self.store.store_dir(), &self.db, drv_ref, &resolved_map) - .await - .ok_or_else(|| { - anyhow::anyhow!("Failed to resolve CAFloating derivation {drv}") - })?; + let basic_drv = StepInfo::try_resolve(self.store.store_dir(), &self.db, drv_ref) + .await + .ok_or_else(|| { + anyhow::anyhow!("Failed to resolve CAFloating derivation {drv}") + })?; let resolved_path = self.store.write_derivation(&basic_drv).await?; // If resolution changed the drv, we need a two-phase // build; otherwise just build the original directly. @@ -434,11 +422,12 @@ impl State { if let Some(resolved_path) = resolved_path { tracing::info!("resolved CA derivation {drv} -> {resolved_path}"); - // Record the resolved drv path in memory so future - // output lookups can translate through it. - self.resolved_drv_map - .write() - .insert(drv.clone(), resolved_path.clone()); + // Record the resolved drv path on the original step so + // future output lookups can follow the chain. + let mut tx = db.begin_transaction().await?; + tx.set_resolved_to(build_id, step_nr, &resolved_path) + .await?; + tx.commit().await?; // Finish original step as "resolved" in the DB and in-memory step_info.step.set_finished(true); diff --git a/subprojects/hydra-queue-runner/src/state/step_info.rs b/subprojects/hydra-queue-runner/src/state/step_info.rs index d8b6ecaeb..20684b8ec 100644 --- a/subprojects/hydra-queue-runner/src/state/step_info.rs +++ b/subprojects/hydra-queue-runner/src/state/step_info.rs @@ -44,7 +44,6 @@ impl StepInfo { store_dir: &StoreDir, db: &db::Database, drv: &Derivation, - resolved_drv_map: &hashbrown::HashMap, ) -> Option { // Input-addressed derivations should not be resolved because this would change their // output paths. @@ -75,53 +74,32 @@ impl StepInfo { let mut conn = db.get().await.ok()?; - // Memoize depth-1 lookups across all chains resolved in this call. - let mut memo = - std::collections::HashMap::<(StorePath, OutputName), Option>::new(); - drv.try_resolve(store_dir, &mut |inputs| { tokio::task::block_in_place(|| { - let rt = tokio::runtime::Handle::current(); - + // Flatten each SingleDerivedPath chain into (root, [outputs...]) + // and resolve everything in a single recursive SQL query. let chains: Vec<_> = inputs .iter() .map(|(drv_path, output_name)| flatten_chain(drv_path, output_name)) .collect(); - // Resolve each chain one level at a time, translating - // through the in-memory resolved-drv map between levels. - chains + // SQL needs forward order; OutputNameChain stores reversed. + let chain_refs: Vec<_> = chains + .iter() + .map(|(root, chain)| (root, chain.0.iter().rev().collect::>())) + .collect(); + + let sql_input: Vec<_> = chain_refs .iter() - .map(|(root, chain)| { - let mut current = root.clone(); - // OutputNameChain is in stack order; iterate - // reversed for forward (root-to-leaf) order. - for output_name in chain.0.iter().rev() { - let translated = resolved_drv_map - .get(¤t) - .cloned() - .unwrap_or_else(|| current.clone()); - let key = (translated, output_name.clone()); - let result = match memo.get(&key) { - Some(cached) => cached.clone(), - None => { - let r = rt - .block_on( - conn.resolve_drv_output(store_dir, &key.0, &key.1), - ) - .unwrap_or_else(|e| { - tracing::warn!("resolve_drv_output failed: {e}"); - None - }); - memo.insert(key, r.clone()); - r - } - }; - current = result?; - } - Some(current) + .map(|(root, outputs)| (*root, outputs.as_slice())) + .collect(); + + tokio::runtime::Handle::current() + .block_on(conn.resolve_drv_output_chains(store_dir, &sql_input)) + .unwrap_or_else(|e| { + tracing::warn!("resolve_drv_output_chains failed: {e}"); + vec![None; inputs.len()] }) - .collect() }) }) } diff --git a/subprojects/hydra-tests/content-addressed/basic.t b/subprojects/hydra-tests/content-addressed/basic.t index 3d7255ad0..bc5731936 100644 --- a/subprojects/hydra-tests/content-addressed/basic.t +++ b/subprojects/hydra-tests/content-addressed/basic.t @@ -90,6 +90,22 @@ my $downstream2_out = $downstream2->buildoutputs->find({ name => "out" }); is($downstream1_out->path, $downstream2_out->path, "Both downstream builds should create the same content-addressed output path"); +my $downstream1_step = $db->resultset('BuildSteps')->find({ + build => $downstream1->id, + drvPath => $downstream1->drvpath, +}); + +my $downstream2_step = $db->resultset('BuildSteps')->find({ + build => $downstream2->id, + drvPath => $downstream2->drvpath, +}); + +ok(length($downstream1_step->resolveddrvpath) > 32, + "Downstream build should resolve to a valid store path"); + +is($downstream1_step->resolveddrvpath, $downstream2_step->resolveddrvpath, + "Both downstream builds should resolve to the same derivation"); + ok($downstream1->iscachedbuild || $downstream2->iscachedbuild, "One downstream build should be cached"); diff --git a/subprojects/hydra/lib/Hydra/Schema/Result/BuildSteps.pm b/subprojects/hydra/lib/Hydra/Schema/Result/BuildSteps.pm index 02e4e8c17..97caf85aa 100644 --- a/subprojects/hydra/lib/Hydra/Schema/Result/BuildSteps.pm +++ b/subprojects/hydra/lib/Hydra/Schema/Result/BuildSteps.pm @@ -113,6 +113,11 @@ __PACKAGE__->table("buildsteps"); data_type: 'boolean' is_nullable: 1 +=head2 resolveddrvpath + + data_type: 'text' + is_nullable: 1 + =cut __PACKAGE__->add_columns( @@ -146,6 +151,8 @@ __PACKAGE__->add_columns( { data_type => "integer", is_nullable => 1 }, "isnondeterministic", { data_type => "boolean", is_nullable => 1 }, + "resolveddrvpath", + { data_type => "text", is_nullable => 1 }, ); =head1 PRIMARY KEY @@ -215,8 +222,8 @@ __PACKAGE__->belongs_to( ); -# Created by DBIx::Class::Schema::Loader v0.07049 @ 2021-08-26 12:02:36 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:GzztRd7OwomaT3Xi7NB2RQ +# Created by DBIx::Class::Schema::Loader v0.07051 @ 2026-04-14 13:27:30 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:Ktg5f7JBFiPfboSb0HBsMQ my %hint = ( columns => [ @@ -225,6 +232,7 @@ my %hint = ( "stepnr", "drvpath", "starttime", + "resolveddrvpath", ], eager_relations => { build => 'id' diff --git a/subprojects/hydra/sql/check-migrations.nix b/subprojects/hydra/sql/check-migrations.nix index 994569620..2687b2405 100644 --- a/subprojects/hydra/sql/check-migrations.nix +++ b/subprojects/hydra/sql/check-migrations.nix @@ -23,14 +23,18 @@ let allVersions = lib.unique (migrationVersions ++ schemaVersions); + maxMigrationVersion = lib.last (lib.sort lib.lessThan migrationVersions); + # A version N is covered when we have schema N-1, schema N, and upgrade-N.sql. # Schema-only versions (like version 1, the base) are also covered. + # The highest migration version is special: it uses hydra.sql (the current + # schema) as its target, so it doesn't need a vendored schema file. isCovered = n: ( builtins.elem n migrationVersions && builtins.elem (n - 1) schemaVersions - && builtins.elem n schemaVersions + && (builtins.elem n schemaVersions || n == maxMigrationVersion) ) || (!builtins.elem n migrationVersions && builtins.elem n schemaVersions); @@ -46,7 +50,9 @@ builtins.listToAttrs ( let from = to - 1; schemaBefore = ./schemas/hydra-${toString from}.sql; - schemaAfter = ./schemas/hydra-${toString to}.sql; + # The latest migration tests against the current schema directly, + # so we don't need to vendor a copy of it. + schemaAfter = if to == maxMigrationVersion then ./hydra.sql else ./schemas/hydra-${toString to}.sql; migration = ./migrations/upgrade-${toString to}.sql; in { diff --git a/subprojects/hydra/sql/hydra.sql b/subprojects/hydra/sql/hydra.sql index 869a24024..ba6fded93 100644 --- a/subprojects/hydra/sql/hydra.sql +++ b/subprojects/hydra/sql/hydra.sql @@ -293,9 +293,21 @@ create table BuildSteps ( -- Whether this build step produced different results when repeated. isNonDeterministic boolean, + -- If this step was resolved to a different CA derivation, stores the + -- resolved drv path so outputs can be looked up by drv path. + -- + -- Note: Unlike the other store path fields, this one does *not* include + -- the store dir. Eventually, it would be nice to migrate the other fields + -- to also not include the store dir, as it is repeated / denormalizes the + -- database. There is nothing special about this field that makes it not + -- include it, it is just newer. + resolvedDrvPath text, + primary key (build, stepnr), foreign key (build) references Builds(id) on delete cascade, - foreign key (propagatedFrom) references Builds(id) on delete cascade + foreign key (propagatedFrom) references Builds(id) on delete cascade, + -- status = 13 (Resolved) iff resolvedDrvPath is set + check ((status = 13) = (resolvedDrvPath is not null)) ); diff --git a/subprojects/hydra/sql/schemas/hydra-85.sql b/subprojects/hydra/sql/schemas/hydra-85.sql index 11b270138..869a24024 100644 --- a/subprojects/hydra/sql/schemas/hydra-85.sql +++ b/subprojects/hydra/sql/schemas/hydra-85.sql @@ -628,6 +628,8 @@ create rule IdempotentInsert as on insert to FailedPaths +-- TODO: Drop this table in a future migration. The queue runner no longer +-- writes status here; all status is served via its REST API instead. create table SystemStatus ( what text primary key not null, status json not null diff --git a/subprojects/hydra/sql/upgrade-86.sql b/subprojects/hydra/sql/upgrade-86.sql new file mode 100644 index 000000000..9f00fc5de --- /dev/null +++ b/subprojects/hydra/sql/upgrade-86.sql @@ -0,0 +1,3 @@ +ALTER TABLE BuildSteps ADD COLUMN resolvedDrvPath text; +ALTER TABLE BuildSteps ADD CONSTRAINT buildsteps_resolved_consistent + CHECK ((status = 13) = (resolvedDrvPath IS NOT NULL));