diff --git a/.claude/skills/testing-hashql/references/mir-builder-guide.md b/.claude/skills/testing-hashql/references/mir-builder-guide.md index da36d57d1c3..7b122d27f4c 100644 --- a/.claude/skills/testing-hashql/references/mir-builder-guide.md +++ b/.claude/skills/testing-hashql/references/mir-builder-guide.md @@ -56,13 +56,22 @@ body!(interner, env; @ / -> { | Component | Description | Example | | --------- | ----------- | ------- | -| `` | Body source type | `fn` (closure) or `thunk` | +| `` | Body source type | `fn`, `thunk`, `[ctor expr]`, `intrinsic` | | `` | DefId (literal or variable) | `0`, `42`, `my_def_id` | | `` | Number of function arguments | `0`, `1`, `2` | | `` | Return type | `Int`, `Bool`, `(Int, Bool)` | The `` can be a numeric literal (`0`, `1`, `42`) or a variable identifier (`callee_id`, `my_def_id`). When using a variable, it must be a `DefId` in scope. +**Source types:** + +| Syntax | Maps to | Use case | +| ------ | ------- | -------- | +| `fn` | `Source::Closure` | Regular closures/functions | +| `thunk` | `Source::Thunk` | Thunk bodies (zero-arg delayed computations) | +| `[ctor sym::path]` | `Source::Ctor(sym)` | Constructor bodies (always inlined) | +| `intrinsic` | `Source::Intrinsic` | Intrinsic bodies (never inlined) | + ### Types | Syntax | Description | Example | diff --git a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_administrative_reduction.rs b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_administrative_reduction.rs index 35c352154c8..dfb3758954a 100644 --- a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_administrative_reduction.rs +++ b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_administrative_reduction.rs @@ -2,7 +2,7 @@ use std::io::Write as _; use hashql_ast::node::expr::Expr; use hashql_core::{ - heap::{Heap, Scratch}, + heap::{Heap, ResetAllocator as _, Scratch}, id::IdVec, r#type::environment::Environment, }; @@ -52,6 +52,7 @@ pub(crate) fn mir_pass_transform_administrative_reduction<'heap>( &mut GlobalTransformState::new(&mut changed), &mut bodies, ); + scratch.reset(); process_issues(diagnostics, context.diagnostics)?; Ok((root, bodies, scratch)) diff --git a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_cfg_simplify.rs b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_cfg_simplify.rs index cdcaec8fdd2..fa54cc8f4f7 100644 --- a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_cfg_simplify.rs +++ b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_cfg_simplify.rs @@ -5,7 +5,7 @@ use std::{ use hashql_ast::node::expr::Expr; use hashql_core::{ - heap::{Heap, Scratch}, + heap::{Heap, ResetAllocator as _, Scratch}, r#type::environment::Environment, }; use hashql_diagnostics::DiagnosticIssues; @@ -65,6 +65,7 @@ pub(crate) fn mir_pass_transform_cfg_simplify<'heap>( for body in bodies.as_mut_slice() { let _: Changed = pass.run(&mut context, body); } + scratch.reset(); process_issues(diagnostics, context.diagnostics)?; diff --git a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_forward_substitution.rs b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_forward_substitution.rs index 3a34474525e..9f0c1327dca 100644 --- a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_forward_substitution.rs +++ b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_forward_substitution.rs @@ -2,7 +2,7 @@ use std::io::Write as _; use hashql_ast::node::expr::Expr; use hashql_core::{ - heap::{Heap, Scratch}, + heap::{Heap, ResetAllocator as _, Scratch}, r#type::environment::Environment, }; use hashql_diagnostics::DiagnosticIssues; @@ -46,6 +46,7 @@ pub(crate) fn mir_pass_transform_forward_substitution<'heap>( for body in bodies.as_mut_slice() { let _: Changed = pass.run(&mut context, body); } + scratch.reset(); process_issues(diagnostics, context.diagnostics)?; Ok((root, bodies, scratch)) diff --git a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_inline.rs b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_inline.rs new file mode 100644 index 00000000000..8d96f3e31f4 --- /dev/null +++ b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_inline.rs @@ -0,0 +1,180 @@ +use std::io::Write as _; + +use hashql_ast::node::expr::Expr; +use hashql_core::{ + heap::{Heap, ResetAllocator as _, Scratch}, + r#type::environment::Environment, +}; +use hashql_diagnostics::DiagnosticIssues; +use hashql_mir::{ + body::Body, + context::MirContext, + def::{DefId, DefIdVec}, + intern::Interner, + pass::{ + Changed, GlobalTransformPass as _, GlobalTransformState, + transform::{Inline, InlineConfig}, + }, +}; + +use super::{ + RunContext, Suite, SuiteDiagnostic, + common::process_issues, + mir_pass_transform_pre_inlining::{ + MirRenderer, RenderContext, Stage, mir_pass_transform_pre_inlining, + }, +}; +use crate::suite::{ + mir_pass_transform_pre_inlining::{D2Renderer, TextRenderer}, + mir_reify::{d2_output_enabled, mir_spawn_d2}, +}; + +pub(crate) fn mir_pass_transform_inline<'heap>( + heap: &'heap Heap, + expr: Expr<'heap>, + config: InlineConfig, + interner: &Interner<'heap>, + mut render: impl MirRenderer, + environment: &mut Environment<'heap>, + diagnostics: &mut Vec, +) -> Result<(DefId, DefIdVec>, Scratch), SuiteDiagnostic> { + let (root, mut bodies, mut scratch) = mir_pass_transform_pre_inlining( + heap, + expr, + interner, + &mut render, + environment, + diagnostics, + )?; + + let mut context = MirContext { + heap, + env: environment, + interner, + diagnostics: DiagnosticIssues::new(), + }; + + let mut pass = Inline::new_in(config, &mut scratch); + let _: Changed = pass.run( + &mut context, + &mut GlobalTransformState::new_in(&bodies, heap), + &mut bodies, + ); + scratch.reset(); + + process_issues(diagnostics, context.diagnostics)?; + + render.render( + &mut RenderContext { + heap, + env: environment, + stage: Stage { + id: "inline", + title: "Inlined MIR", + }, + root, + }, + &bodies, + ); + + Ok((root, bodies, scratch)) +} + +pub(crate) struct MirPassTransformInline; + +impl Suite for MirPassTransformInline { + fn priority(&self) -> usize { + 1 + } + + fn name(&self) -> &'static str { + "mir/pass/transform/inline" + } + + fn description(&self) -> &'static str { + "Inlining in the MIR" + } + + fn secondary_file_extensions(&self) -> &[&str] { + &["svg"] + } + + fn run<'heap>( + &self, + RunContext { + heap, + diagnostics, + suite_directives, + reports, + secondary_outputs, + .. + }: RunContext<'_, 'heap>, + expr: Expr<'heap>, + ) -> Result { + let mut environment = Environment::new(heap); + let interner = Interner::new(heap); + + let mut config = InlineConfig::default(); + + #[expect(clippy::cast_sign_loss, clippy::cast_possible_truncation)] + if let Some(aggressive_inline_cutoff) = suite_directives + .get("aggressive-inline-cutoff") + .and_then(toml::Value::as_integer) + { + config.aggressive_inline_cutoff = aggressive_inline_cutoff as usize; + } + + #[expect(clippy::cast_possible_truncation)] + if let Some(rvalue_input_cost) = suite_directives + .get("rvalue-input-cost") + .and_then(toml::Value::as_float) + { + config.cost.rvalue_input = rvalue_input_cost as f32; + } + + #[expect(clippy::cast_possible_truncation)] + if let Some(max_cost) = suite_directives + .get("max-cost") + .and_then(toml::Value::as_float) + { + config.heuristics.max = max_cost as f32; + } + + let skip_output = suite_directives + .get("skip-output") + .and_then(toml::Value::as_bool) + .unwrap_or(false); + + let mut buffer = Vec::new(); + let mut d2 = d2_output_enabled(self, suite_directives, reports).then(mir_spawn_d2); + + mir_pass_transform_inline( + heap, + expr, + config, + &interner, + ( + TextRenderer::new(&mut buffer), + d2.as_mut().map(|(writer, _)| D2Renderer::new(writer)), + ), + &mut environment, + diagnostics, + )?; + + if let Some((mut writer, handle)) = d2 { + writer.flush().expect("should be able to write to buffer"); + drop(writer); + + let diagram = handle.join().expect("should be able to join handle"); + let diagram = String::from_utf8_lossy_owned(diagram); + + secondary_outputs.insert("svg", diagram); + } + + if skip_output { + return Ok("[output intentionally skipped]".to_owned()); + } + + Ok(String::from_utf8_lossy_owned(buffer)) + } +} diff --git a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_inst_simplify.rs b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_inst_simplify.rs index f1f50d855a8..cabef1f12a1 100644 --- a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_inst_simplify.rs +++ b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_inst_simplify.rs @@ -2,7 +2,7 @@ use std::io::Write as _; use hashql_ast::node::expr::Expr; use hashql_core::{ - heap::{Heap, Scratch}, + heap::{Heap, ResetAllocator as _, Scratch}, r#type::environment::Environment, }; use hashql_diagnostics::DiagnosticIssues; @@ -52,6 +52,7 @@ pub(crate) fn mir_pass_transform_inst_simplify<'heap>( for body in bodies.as_mut_slice() { let _: Changed = pass.run(&mut context, body); } + scratch.reset(); process_issues(diagnostics, context.diagnostics)?; Ok((root, bodies, scratch)) diff --git a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_pre_inlining.rs b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_pre_inlining.rs index 904f54d7799..e35e3e726b9 100644 --- a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_pre_inlining.rs +++ b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_pre_inlining.rs @@ -22,8 +22,8 @@ use crate::suite::{ #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub(crate) struct Stage { - id: &'static str, - title: &'static str, + pub id: &'static str, + pub title: &'static str, } pub(crate) struct RenderContext<'env, 'heap> { diff --git a/libs/@local/hashql/compiletest/src/suite/mod.rs b/libs/@local/hashql/compiletest/src/suite/mod.rs index 94049a01e68..cc66b47a226 100644 --- a/libs/@local/hashql/compiletest/src/suite/mod.rs +++ b/libs/@local/hashql/compiletest/src/suite/mod.rs @@ -25,6 +25,7 @@ mod mir_pass_transform_administrative_reduction; mod mir_pass_transform_cfg_simplify; mod mir_pass_transform_dse; mod mir_pass_transform_forward_substitution; +mod mir_pass_transform_inline; mod mir_pass_transform_inst_simplify; mod mir_pass_transform_pre_inlining; mod mir_reify; @@ -61,6 +62,7 @@ use self::{ mir_pass_transform_cfg_simplify::MirPassTransformCfgSimplify, mir_pass_transform_dse::MirPassTransformDse, mir_pass_transform_forward_substitution::MirPassTransformForwardSubstitution, + mir_pass_transform_inline::MirPassTransformInline, mir_pass_transform_inst_simplify::MirPassTransformInstSimplify, mir_pass_transform_pre_inlining::MirPassTransformPreInlining, mir_reify::MirReifySuite, parse_syntax_dump::ParseSyntaxDumpSuite, @@ -162,6 +164,7 @@ const SUITES: &[&dyn Suite] = &[ &MirPassTransformCfgSimplify, &MirPassTransformDse, &MirPassTransformForwardSubstitution, + &MirPassTransformInline, &MirPassTransformInstSimplify, &MirPassTransformPreInlining, &MirReifySuite, diff --git a/libs/@local/hashql/core/package.json b/libs/@local/hashql/core/package.json index 5025e2b33e0..60f659b7864 100644 --- a/libs/@local/hashql/core/package.json +++ b/libs/@local/hashql/core/package.json @@ -9,7 +9,7 @@ "fix:clippy": "just clippy --fix", "lint:clippy": "just clippy", "test:codspeed": "cargo codspeed run -p hashql-core", - "test:miri": "cargo miri nextest run -- co_sort try_scan heap::transfer stable_empty_slice", + "test:miri": "cargo miri nextest run -- co_sort try_scan heap::transfer stable_empty_slice id::slice tarjan::tests::members", "test:unit": "mise run test:unit @rust/hashql-core" }, "dependencies": { diff --git a/libs/@local/hashql/core/src/collections/work_queue.rs b/libs/@local/hashql/core/src/collections/work_queue.rs index b90226471d4..b87f1f99bbc 100644 --- a/libs/@local/hashql/core/src/collections/work_queue.rs +++ b/libs/@local/hashql/core/src/collections/work_queue.rs @@ -85,7 +85,7 @@ where #[must_use] pub fn new_in(domain_size: usize, alloc: A) -> Self { Self { - queue: VecDeque::new_in(alloc), + queue: VecDeque::with_capacity_in(domain_size, alloc), set: DenseBitSet::new_empty(domain_size), } } diff --git a/libs/@local/hashql/core/src/graph/algorithms/dominators/mod.rs b/libs/@local/hashql/core/src/graph/algorithms/dominators/mod.rs index ea28b1e3a53..c1e5ebf871e 100644 --- a/libs/@local/hashql/core/src/graph/algorithms/dominators/mod.rs +++ b/libs/@local/hashql/core/src/graph/algorithms/dominators/mod.rs @@ -465,7 +465,7 @@ fn compute_access_time( } let mut max = EdgeIndex::from_u32(0); - for edge in edges.iter_mut() { + for edge in &mut edges { max.increment_by(edge.end.as_usize()); edge.start = max; edge.end = max; diff --git a/libs/@local/hashql/core/src/graph/algorithms/tarjan/mod.rs b/libs/@local/hashql/core/src/graph/algorithms/tarjan/mod.rs index 71e1337e46d..e1ead9cf860 100644 --- a/libs/@local/hashql/core/src/graph/algorithms/tarjan/mod.rs +++ b/libs/@local/hashql/core/src/graph/algorithms/tarjan/mod.rs @@ -12,13 +12,17 @@ mod tests; use alloc::{alloc::Global, vec::Vec}; use core::{alloc::Allocator, iter, ops::Range, slice}; +#[cfg(debug_assertions)] +use crate::id::bit_vec::DenseBitSet; use crate::{ collections::{FastHashSet, fast_hash_set_in}, graph::{DirectedGraph, EdgeId, Successors}, - id::{HasId, Id, IdVec}, + id::{HasId, Id, IdSlice, IdVec}, newtype, }; +newtype!(pub struct SccId(u32 is 0..=u32::MAX)); + newtype!(struct DiscoveryTime(usize is 0..=usize::MAX)); /// Trait for attaching metadata to nodes and strongly connected components during traversal. @@ -147,6 +151,27 @@ struct Component { successors: Range, } +pub struct Members { + offsets: Box, A>, + nodes: Box<[N], A>, +} + +impl Members +where + S: Id, +{ + pub fn sccs(&self) -> impl ExactSizeIterator + DoubleEndedIterator { + // Offsets is 1 longer than the number of SCCs + self.offsets.ids().take(self.offsets.len() - 1) + } + + pub fn of(&self, id: S) -> &[N] { + let range = self.offsets[id]..self.offsets[id.plus(1)]; + + &self.nodes[range] + } +} + /// Storage for the computed SCCs and their relationships. /// /// Intentionally opaque to avoid exposing internal details. @@ -165,6 +190,8 @@ struct Data { /// connected component from the original graph. Two SCCs are connected if there was an /// edge between any of their constituent nodes in the original graph. pub struct StronglyConnectedComponents = (), A: Allocator = Global> { + alloc: A, + /// Metadata tracker used during construction. pub metadata: M, @@ -186,6 +213,89 @@ where pub fn annotation(&self, scc: S) -> &M::Annotation { &self.data.components[scc].annotation } + + #[inline] + pub fn members(&self) -> Members + where + A: Clone, + { + self.members_in(Global) + } + + #[expect(unsafe_code, clippy::debug_assert_with_mut_call)] + pub fn members_in(&self, scratch: B) -> Members + where + A: Clone, + { + let num_sccs = self.data.components.len(); + let num_nodes = self.data.nodes.len(); + + // Pass 1: count nodes per SCC + let mut counts = IdVec::from_domain_in(0, &self.data.components, scratch); + for &scc in &self.data.nodes { + counts[scc] += 1; + } + + // Build offsets via prefix sum + let mut offsets = + IdSlice::from_boxed_slice(Box::new_uninit_slice_in(num_sccs + 1, self.alloc.clone())); + + let mut total = 0; + for (index, &count) in counts.iter_enumerated() { + offsets[index].write(total); + total += count; + } + offsets[S::from_usize(num_sccs)].write(total); + + // SAFETY: All `num_sccs + 1` elements are initialized: + // - The loop writes indices 0..num_sccs (one per SCC via iter_enumerated) + // - Final index at num_sccs is written just before this + let offsets = unsafe { IdSlice::boxed_assume_init(offsets) }; + + // Reuse the counts vector, for cursors, copying from the offsets vector + let mut cursor = counts; + cursor + .raw + .copy_from_slice(&offsets[..S::from_usize(num_sccs)]); + + debug_assert_eq!(total, num_nodes); + debug_assert_eq!(offsets.len(), num_sccs + 1); + + // Pass 2: place nodes + let mut nodes = Box::new_uninit_slice_in(num_nodes, self.alloc.clone()); + #[cfg(debug_assertions)] + let mut nodes_written = DenseBitSet::new_empty(num_nodes); + + for (node, &scc) in self.data.nodes.iter_enumerated() { + #[cfg(debug_assertions)] + { + debug_assert!( + nodes_written.insert(N::from_usize(cursor[scc])), + "Cursor {} has been visited multiple times.", + cursor[scc] + ); + } + + nodes[cursor[scc]].write(node); + + cursor[scc] += 1; + } + + #[cfg(debug_assertions)] + { + debug_assert_eq!(nodes_written.count(), num_nodes); + } + + // SAFETY: Every element in `nodes[0..num_nodes]` is initialized exactly once: + // - The loop iterates over all `num_nodes` nodes exactly once. + // - Each node writes to `nodes[cursor[scc]]` then increments `cursor[scc]`. + // - Cursors start at disjoint offsets (prefix sum of counts per SCC). + // - The sum of all counts equals `num_nodes` (each node belongs to exactly one SCC). + // - Therefore, the cursor writes partition and fully cover `0..num_nodes`. + let nodes = unsafe { nodes.assume_init() }; + + Members { offsets, nodes } + } } impl DirectedGraph for StronglyConnectedComponents @@ -249,6 +359,8 @@ where /// - **Time**: O(V + E) where V is nodes and E is edges /// - **Space**: O(V) for the various stacks and state tracking pub struct Tarjan<'graph, G, N, S, M: Metadata = (), A: Allocator = Global> { + alloc: A, + /// Reference to the input graph. graph: &'graph G, /// Metadata tracker for annotations. @@ -326,6 +438,8 @@ where let node_count = graph.node_count(); Self { + alloc: alloc.clone(), + graph, metadata, @@ -362,6 +476,7 @@ where } StronglyConnectedComponents { + alloc: self.alloc, data: self.data, metadata: self.metadata, } diff --git a/libs/@local/hashql/core/src/graph/algorithms/tarjan/tests.rs b/libs/@local/hashql/core/src/graph/algorithms/tarjan/tests.rs index 7e7c5fe9dc8..aaa6c518107 100644 --- a/libs/@local/hashql/core/src/graph/algorithms/tarjan/tests.rs +++ b/libs/@local/hashql/core/src/graph/algorithms/tarjan/tests.rs @@ -473,3 +473,70 @@ fn metadata_merge_within_scc() { assert_eq!(sccs.annotation(sccs.scc(n!(3))).max, 4); assert_eq!(sccs.annotation(sccs.scc(n!(5))).min, 5); } + +/// Tests that `members()` correctly returns nodes for each SCC in a simple DAG. +#[test] +fn members_simple_dag() { + let graph = TestGraph::new(&[(0, 1), (0, 2), (1, 3), (2, 3)]); + let sccs: Sccs = Tarjan::new(&graph).run(); + + let members = sccs.members(); + + // Each node is its own SCC in a DAG + for i in 0..4 { + let scc = sccs.scc(n!(i)); + let scc_members = members.of(scc); + assert_eq!(scc_members.len(), 1); + assert_eq!(scc_members[0], n!(i)); + } +} + +/// Tests that `members()` correctly returns all nodes in a single large SCC. +#[test] +fn members_single_scc() { + let graph = TestGraph::new(&[(0, 1), (1, 2), (1, 3), (2, 0), (3, 2)]); + let sccs: Sccs = Tarjan::new(&graph).run(); + + let members = sccs.members(); + let scc = sccs.scc(n!(0)); + + let mut scc_members: Vec<_> = members.of(scc).to_vec(); + scc_members.sort(); + + assert_eq!(scc_members, vec![n!(0), n!(1), n!(2), n!(3)]); +} + +/// Tests that `members()` correctly partitions nodes across multiple SCCs. +#[test] +fn members_multiple_sccs() { + let graph = TestGraph::new(&[(0, 1), (1, 2), (2, 1), (3, 2)]); + let sccs: Sccs = Tarjan::new(&graph).run(); + + let members = sccs.members(); + + // Nodes 1 and 2 form a cycle (same SCC) + let scc_1_2 = sccs.scc(n!(1)); + assert_eq!(sccs.scc(n!(2)), scc_1_2); + + let mut members_1_2: Vec<_> = members.of(scc_1_2).to_vec(); + members_1_2.sort(); + assert_eq!(members_1_2, vec![n!(1), n!(2)]); + + // Nodes 0 and 3 are their own SCCs + let scc_0 = sccs.scc(n!(0)); + let scc_3 = sccs.scc(n!(3)); + + assert_eq!(members.of(scc_0), &[n!(0)]); + assert_eq!(members.of(scc_3), &[n!(3)]); +} + +/// Tests that `members()` works correctly on an empty graph. +#[test] +fn members_empty() { + let graph = TestGraph::new(&[]); + let sccs: Sccs = Tarjan::new(&graph).run(); + + let members = sccs.members(); + assert_eq!(members.offsets.len(), 1); // Just the sentinel + assert!(members.nodes.is_empty()); +} diff --git a/libs/@local/hashql/core/src/graph/linked.rs b/libs/@local/hashql/core/src/graph/linked.rs index 5b6f5257d92..28a4be5f955 100644 --- a/libs/@local/hashql/core/src/graph/linked.rs +++ b/libs/@local/hashql/core/src/graph/linked.rs @@ -87,6 +87,7 @@ pub struct Node { impl HasId for Node { type Id = NodeId; + #[inline] fn id(&self) -> Self::Id { self.id } @@ -427,7 +428,7 @@ impl LinkedGraph { /// Removes all edges from the graph while preserving nodes. pub fn clear_edges(&mut self) { self.edges.clear(); - for node in self.nodes.iter_mut() { + for node in &mut self.nodes { node.edges = [TOMBSTONE; DIRECTIONS]; } } diff --git a/libs/@local/hashql/core/src/heap/allocator.rs b/libs/@local/hashql/core/src/heap/allocator.rs index 396418b7fe7..c453ef7a189 100644 --- a/libs/@local/hashql/core/src/heap/allocator.rs +++ b/libs/@local/hashql/core/src/heap/allocator.rs @@ -6,6 +6,8 @@ use bump_scope::{Bump, BumpBox, BumpScope}; use super::{BumpAllocator, bump::ResetAllocator}; +pub struct Checkpoint(bump_scope::Checkpoint); + /// Internal arena allocator. #[derive(Debug)] pub(super) struct Allocator(Bump); @@ -32,6 +34,7 @@ impl Allocator { } impl BumpAllocator for Allocator { + type Checkpoint = Checkpoint; type Scoped<'scope> = AllocatorScope<'scope>; #[inline] @@ -39,6 +42,17 @@ impl BumpAllocator for Allocator { self.0.scoped(|scope| func(AllocatorScope(scope))) } + #[inline] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(self.0.checkpoint()) + } + + #[inline] + unsafe fn rollback(&self, checkpoint: Self::Checkpoint) { + // SAFETY: The same safety preconditions apply + unsafe { self.0.reset_to(checkpoint.0) } + } + #[inline] fn try_allocate_slice_copy(&self, slice: &[T]) -> Result<&mut [T], alloc::AllocError> { self.0 @@ -144,6 +158,7 @@ unsafe impl alloc::Allocator for Allocator { pub struct AllocatorScope<'scope>(BumpScope<'scope>); impl BumpAllocator for AllocatorScope<'_> { + type Checkpoint = Checkpoint; type Scoped<'scope> = AllocatorScope<'scope>; #[inline] @@ -151,6 +166,17 @@ impl BumpAllocator for AllocatorScope<'_> { self.0.scoped(|scope| func(AllocatorScope(scope))) } + #[inline] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(self.0.checkpoint()) + } + + #[inline] + unsafe fn rollback(&self, checkpoint: Self::Checkpoint) { + // SAFETY: The same safety preconditions apply + unsafe { self.0.reset_to(checkpoint.0) } + } + #[inline] fn try_allocate_slice_copy(&self, slice: &[T]) -> Result<&mut [T], alloc::AllocError> { self.0 diff --git a/libs/@local/hashql/core/src/heap/bump.rs b/libs/@local/hashql/core/src/heap/bump.rs index d9689bd9bb8..59264340149 100644 --- a/libs/@local/hashql/core/src/heap/bump.rs +++ b/libs/@local/hashql/core/src/heap/bump.rs @@ -74,6 +74,7 @@ pub trait BumpAllocator: Allocator { /// This associated type allows each allocator to define its own scoped variant /// while ensuring it also implements [`BumpAllocator`]. type Scoped<'scope>: BumpAllocator; + type Checkpoint; /// Executes a closure with a scoped sub-arena. /// @@ -86,6 +87,26 @@ pub trait BumpAllocator: Allocator { /// should not outlive the computation itself. fn scoped(&mut self, func: impl FnOnce(Self::Scoped<'_>) -> T) -> T; + /// Creates a checkpoint of the current bump position. + /// + /// The checkpoint can later be passed to [`rollback`] to reset the allocator + /// to this position, freeing all allocations made after the checkpoint. + /// + /// [`rollback`]: Self::rollback + fn checkpoint(&self) -> Self::Checkpoint; + + /// Resets the bump position to a previously created checkpoint. + /// + /// All memory allocated after the checkpoint was created becomes available + /// for reuse by future allocations. + /// + /// # Safety + /// + /// - The `checkpoint` must have been created by this allocator instance. + /// - [`ResetAllocator::reset`] must not have been called since the checkpoint was created. + /// - There must be no live references to memory allocated after the checkpoint. + unsafe fn rollback(&self, checkpoint: Self::Checkpoint); + /// Copies a slice into the arena, returning a mutable reference to the copy. /// /// This is useful for transferring borrowed data into arena-owned memory. @@ -203,6 +224,7 @@ impl BumpAllocator for &mut A where A: BumpAllocator, { + type Checkpoint = A::Checkpoint; type Scoped<'scope> = A::Scoped<'scope>; #[inline] @@ -210,6 +232,18 @@ where A::scoped(self, func) } + #[inline] + fn checkpoint(&self) -> Self::Checkpoint { + A::checkpoint(self) + } + + unsafe fn rollback(&self, checkpoint: Self::Checkpoint) { + // SAFETY: same safety requirements as `A::rollback` + unsafe { + A::rollback(self, checkpoint); + } + } + #[inline] fn try_allocate_slice_copy(&self, slice: &[T]) -> Result<&mut [T], AllocError> { A::try_allocate_slice_copy(self, slice) diff --git a/libs/@local/hashql/core/src/heap/mod.rs b/libs/@local/hashql/core/src/heap/mod.rs index 35daa0d19d6..f2f33329d28 100644 --- a/libs/@local/hashql/core/src/heap/mod.rs +++ b/libs/@local/hashql/core/src/heap/mod.rs @@ -106,7 +106,7 @@ use std::sync::Mutex; use ::alloc::{boxed, collections::vec_deque, vec}; use hashbrown::HashSet; -use self::allocator::{Allocator, AllocatorScope}; +use self::allocator::{Allocator, AllocatorScope, Checkpoint}; pub use self::{ bump::{BumpAllocator, ResetAllocator}, clone::{CloneIn, TryCloneIn}, @@ -303,6 +303,7 @@ impl Default for Heap { } impl BumpAllocator for Heap { + type Checkpoint = Checkpoint; type Scoped<'scope> = AllocatorScope<'scope>; #[inline] @@ -310,6 +311,17 @@ impl BumpAllocator for Heap { self.inner.scoped(func) } + #[inline] + fn checkpoint(&self) -> Self::Checkpoint { + self.inner.checkpoint() + } + + #[inline] + unsafe fn rollback(&self, checkpoint: Self::Checkpoint) { + // SAFETY: Same safety guarantees as `Allocator::rollback`. + unsafe { self.inner.rollback(checkpoint) } + } + #[inline] fn try_allocate_slice_copy(&self, slice: &[T]) -> Result<&mut [T], alloc::AllocError> { self.inner.try_allocate_slice_copy(slice) diff --git a/libs/@local/hashql/core/src/heap/scratch.rs b/libs/@local/hashql/core/src/heap/scratch.rs index ad69f642385..ce87a754e75 100644 --- a/libs/@local/hashql/core/src/heap/scratch.rs +++ b/libs/@local/hashql/core/src/heap/scratch.rs @@ -2,7 +2,11 @@ use core::{alloc, mem, ptr}; -use super::{AllocatorScope, BumpAllocator, allocator::Allocator, bump::ResetAllocator}; +use super::{ + AllocatorScope, BumpAllocator, + allocator::{Allocator, Checkpoint}, + bump::ResetAllocator, +}; /// A resettable scratch allocator for temporary allocations. /// @@ -53,6 +57,7 @@ impl Default for Scratch { } impl BumpAllocator for Scratch { + type Checkpoint = Checkpoint; type Scoped<'scope> = AllocatorScope<'scope>; #[inline] @@ -60,6 +65,17 @@ impl BumpAllocator for Scratch { self.inner.scoped(func) } + #[inline] + fn checkpoint(&self) -> Self::Checkpoint { + self.inner.checkpoint() + } + + #[inline] + unsafe fn rollback(&self, checkpoint: Self::Checkpoint) { + // SAFETY: Same safety guarantees as `Allocator::rollback`. + unsafe { self.inner.rollback(checkpoint) } + } + #[inline] fn try_allocate_slice_copy(&self, slice: &[T]) -> Result<&mut [T], alloc::AllocError> { self.inner.try_allocate_slice_copy(slice) diff --git a/libs/@local/hashql/core/src/id/slice.rs b/libs/@local/hashql/core/src/id/slice.rs index 99c77ce7791..fc2fbea6802 100644 --- a/libs/@local/hashql/core/src/id/slice.rs +++ b/libs/@local/hashql/core/src/id/slice.rs @@ -1,6 +1,8 @@ use core::{ + alloc::Allocator, fmt::{self, Debug}, marker::PhantomData, + mem::MaybeUninit, ops::{Index, IndexMut}, ptr, slice::{self, GetDisjointMutError, GetDisjointMutIndex, SliceIndex}, @@ -75,6 +77,37 @@ where unsafe { &mut *(ptr::from_mut(self) as *mut [T]) } } + /// Creates an `IdSlice` from a boxed slice. + #[inline] + #[expect(unsafe_code, reason = "repr(transparent)")] + pub fn from_boxed_slice(slice: Box<[T], A>) -> Box { + let (ptr, alloc) = Box::into_raw_with_allocator(slice); + + // SAFETY: `IdSlice` is repr(transparent) and we simply cast the underlying pointer. + unsafe { Box::from_raw_in(ptr as *mut Self, alloc) } + } + + /// Converts to `Box, A>`. + /// + /// See [`Box::assume_init`] for additional details. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], it is up to the caller to guarantee that the values + /// really are in an initialized state. Calling this when the content is not yet fully + /// initialized causes immediate undefined behavior. + #[expect(unsafe_code)] + pub unsafe fn boxed_assume_init( + slice: Box>, A>, + ) -> Box { + let (ptr, alloc) = Box::into_raw_with_allocator(slice); + + // SAFETY: The caller guarantees all elements are initialized and valid `T`s. + // `MaybeUninit` is #[repr(transparent)] over `T`, and `IdSlice` is #[repr(transparent)] + // over `[T]`, so the pointer cast (and its slice metadata) is layout-correct. + unsafe { Box::from_raw_in(ptr as *mut [MaybeUninit] as *mut [T] as *mut Self, alloc) } + } + /// Gets a reference to an element or subslice by ID index. /// /// See [`slice::get`] for details. @@ -383,3 +416,96 @@ where IdSlice::from_raw_mut(Default::default()) } } + +#[cfg(test)] +mod tests { + #![expect(unsafe_code, clippy::cast_possible_truncation)] + use alloc::boxed::Box; + use core::mem::MaybeUninit; + + use super::IdSlice; + use crate::{id::Id as _, newtype}; + + newtype!(struct TestId(u32 is 0..=0xFFFF_FF00)); + + #[test] + fn from_raw_indexing() { + let data = [10, 20, 30]; + let slice = IdSlice::::from_raw(&data); + + assert_eq!(slice.len(), 3); + assert_eq!(slice[TestId::from_usize(0)], 10); + assert_eq!(slice[TestId::from_usize(1)], 20); + assert_eq!(slice[TestId::from_usize(2)], 30); + } + + #[test] + fn from_raw_empty() { + let data: [u32; 0] = []; + let slice = IdSlice::::from_raw(&data); + + assert!(slice.is_empty()); + } + + #[test] + fn from_raw_mut_modification() { + let mut data = [1, 2, 3]; + let slice = IdSlice::::from_raw_mut(&mut data); + + slice[TestId::from_usize(1)] = 42; + + assert_eq!(data[1], 42); + } + + #[test] + fn from_raw_mut_empty() { + let mut data: [u32; 0] = []; + let slice = IdSlice::::from_raw_mut(&mut data); + + assert!(slice.is_empty()); + } + + #[test] + fn from_boxed_slice_roundtrip() { + let boxed: Box<[u32]> = Box::new([1, 2, 3]); + let id_slice = IdSlice::::from_boxed_slice(boxed); + + assert_eq!(id_slice.len(), 3); + assert_eq!(id_slice[TestId::from_usize(0)], 1); + assert_eq!(id_slice[TestId::from_usize(2)], 3); + } + + #[test] + fn from_boxed_slice_empty() { + let boxed: Box<[u32]> = Box::new([]); + let id_slice = IdSlice::::from_boxed_slice(boxed); + + assert!(id_slice.is_empty()); + } + + #[test] + fn boxed_assume_init_fully_initialized() { + let mut uninit: Box<[MaybeUninit]> = Box::new_uninit_slice(4); + for (i, slot) in uninit.iter_mut().enumerate() { + slot.write(i as u32 * 10); + } + + let id_slice = IdSlice::::from_boxed_slice(uninit); + // SAFETY: All elements were initialized in the loop above + let init = unsafe { IdSlice::boxed_assume_init(id_slice) }; + + assert_eq!(init.len(), 4); + assert_eq!(init[TestId::from_usize(0)], 0); + assert_eq!(init[TestId::from_usize(3)], 30); + } + + #[test] + fn boxed_assume_init_empty() { + let uninit: Box<[MaybeUninit]> = Box::new_uninit_slice(0); + let id_slice = IdSlice::::from_boxed_slice(uninit); + // SAFETY: Empty slice is trivially initialized + let init = unsafe { IdSlice::boxed_assume_init(id_slice) }; + + assert!(init.is_empty()); + } +} diff --git a/libs/@local/hashql/core/src/id/vec.rs b/libs/@local/hashql/core/src/id/vec.rs index c714955602b..5a234514a40 100644 --- a/libs/@local/hashql/core/src/id/vec.rs +++ b/libs/@local/hashql/core/src/id/vec.rs @@ -7,6 +7,7 @@ use core::{ hash::{Hash, Hasher}, marker::PhantomData, ops::{Deref, DerefMut}, + slice, }; use super::{Id, slice::IdSlice}; @@ -558,6 +559,34 @@ where } } +impl<'this, I, T, A> IntoIterator for &'this IdVec +where + I: Id, + A: Allocator, +{ + type IntoIter = slice::Iter<'this, T>; + type Item = &'this T; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.raw.iter() + } +} + +impl<'this, I, T, A> IntoIterator for &'this mut IdVec +where + I: Id, + A: Allocator, +{ + type IntoIter = slice::IterMut<'this, T>; + type Item = &'this mut T; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.raw.iter_mut() + } +} + impl Default for IdVec where I: Id, diff --git a/libs/@local/hashql/core/src/lib.rs b/libs/@local/hashql/core/src/lib.rs index 82152def8d9..64671748fd5 100644 --- a/libs/@local/hashql/core/src/lib.rs +++ b/libs/@local/hashql/core/src/lib.rs @@ -19,7 +19,9 @@ allocator_api, assert_matches, binary_heap_into_iter_sorted, + clone_from_ref, debug_closure_helpers, + extend_one, formatting_options, get_disjoint_mut_helpers, iter_intersperse, @@ -30,8 +32,6 @@ step_trait, try_trait_v2, variant_count, - clone_from_ref, - extend_one )] extern crate alloc; diff --git a/libs/@local/hashql/mir/benches/transform.rs b/libs/@local/hashql/mir/benches/transform.rs index 19296ac7ef3..d1c700dc4d4 100644 --- a/libs/@local/hashql/mir/benches/transform.rs +++ b/libs/@local/hashql/mir/benches/transform.rs @@ -22,7 +22,7 @@ use hashql_mir::{ intern::Interner, op, pass::{ - GlobalTransformPass as _, GlobalTransformState, TransformPass, + GlobalTransformPass as _, GlobalTransformState, TransformPass as _, transform::{ CfgSimplify, DeadStoreElimination, ForwardSubstitution, InstSimplify, PreInlining, }, @@ -316,12 +316,20 @@ fn create_complex_cfg<'heap>(env: &Environment<'heap>, interner: &Interner<'heap fn run_bencher( bencher: &mut Bencher, body: for<'heap> fn(&Environment<'heap>, &Interner<'heap>) -> Body<'heap>, - mut func: impl for<'env, 'heap> FnMut(&mut MirContext<'env, 'heap>, &mut Body<'heap>) -> T, + mut func: impl for<'env, 'heap> FnMut( + &mut MirContext<'env, 'heap>, + &mut Body<'heap>, + &mut Scratch, + ) -> T, ) { // NOTE: `heap` must not be moved or reassigned; `heap_ptr` assumes its address is stable // for the entire duration of this function. let mut heap = Heap::new(); let heap_ptr = &raw mut heap; + // NOTE: `scratch` must not be moved or reassigned; `scratch_ptr` assumes its address is stable + // for the entire duration of this function. + let mut scratch = Scratch::new(); + let scratch_ptr = &raw mut scratch; // Using `iter_custom` here would be better, but codspeed doesn't support it yet. // @@ -340,6 +348,17 @@ fn run_bencher( let heap = unsafe { &mut *heap_ptr }; heap.reset(); + // SAFETY: We create a `&mut Scratch` from the raw pointer to call `reset()`. This is + // sound because: + // - `scratch` outlives the entire `iter_batched` call (it's a local in the outer + // scope). + // - `BatchSize::PerIteration` ensures this closure completes and its borrows end before + // the routine closure runs, so no aliasing occurs. + // - No other references to `scratch` exist during this closure's execution. + // - This code runs single-threaded. + let scratch = unsafe { &mut *scratch_ptr }; + scratch.reset(); + let env = Environment::new(heap); let interner = Interner::new(heap); let body = body(&env, &interner); @@ -352,6 +371,17 @@ fn run_bencher( // - The `env`, `interner`, and `body` already hold shared borrows of `heap` // - Adding another `&Heap` is just shared-shared aliasing, which is allowed let heap = unsafe { &*heap_ptr }; + // SAFETY: We create a mutable `&mut Scratch` reference. This is sound because: + // - The `&mut Scratch` from setup no longer exists (setup closure has returned), it is + // only used to reset. + // - The `env`, `interner`, and `body` do *not* reference `scratch`. + // - Therefore due to the sequential nature of the code, `scratch` is the sole reference + // to the variable and not aliased. + // - Scratch space data does *not* escape the closure, the return type `T` of `func` is + // irrespective of the scratch space and even if, is immediately dropped after + // execution through criterion, only after which the scratch space is reset. + // Therefore, no additional references exist. + let scratch = unsafe { &mut *scratch_ptr }; let mut context = MirContext { heap, @@ -360,40 +390,32 @@ fn run_bencher( diagnostics: DiagnosticIssues::new(), }; - let value = func(black_box(&mut context), black_box(body)); + let value = func(black_box(&mut context), black_box(body), black_box(scratch)); (context.diagnostics, value) }, BatchSize::PerIteration, ); } -#[inline] -fn run( - bencher: &mut Bencher, - body: for<'heap> fn(&Environment<'heap>, &Interner<'heap>) -> Body<'heap>, - mut pass: impl for<'env, 'heap> TransformPass<'env, 'heap>, -) { - run_bencher( - bencher, - body, - #[inline] - |context, body| pass.run(context, body), - ); -} - fn cfg_simplify(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("cfg_simplify"); group.bench_function("linear", |bencher| { - run(bencher, create_linear_cfg, CfgSimplify::new()); + run_bencher(bencher, create_linear_cfg, |context, body, scratch| { + CfgSimplify::new_in(scratch).run(context, body) + }); }); group.bench_function("diamond", |bencher| { - run(bencher, create_diamond_cfg, CfgSimplify::new()); + run_bencher(bencher, create_diamond_cfg, |context, body, scratch| { + CfgSimplify::new_in(scratch).run(context, body) + }); }); group.bench_function("complex", |bencher| { - run(bencher, create_complex_cfg, CfgSimplify::new()); + run_bencher(bencher, create_complex_cfg, |context, body, scratch| { + CfgSimplify::new_in(scratch).run(context, body) + }); }); } @@ -401,13 +423,21 @@ fn forward_substitution(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("forward_substitution"); group.bench_function("linear", |bencher| { - run(bencher, create_linear_cfg, ForwardSubstitution::new()); + run_bencher(bencher, create_linear_cfg, |context, body, scratch| { + ForwardSubstitution::new_in(scratch).run(context, body) + }); }); + group.bench_function("diamond", |bencher| { - run(bencher, create_diamond_cfg, ForwardSubstitution::new()); + run_bencher(bencher, create_diamond_cfg, |context, body, scratch| { + ForwardSubstitution::new_in(scratch).run(context, body) + }); }); + group.bench_function("complex", |bencher| { - run(bencher, create_complex_cfg, ForwardSubstitution::new()); + run_bencher(bencher, create_complex_cfg, |context, body, scratch| { + ForwardSubstitution::new_in(scratch).run(context, body) + }); }); } @@ -415,16 +445,27 @@ fn dse(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("dse"); group.bench_function("dead stores", |bencher| { - run(bencher, create_dead_store_cfg, DeadStoreElimination::new()); + run_bencher(bencher, create_dead_store_cfg, |context, body, scratch| { + DeadStoreElimination::new_in(scratch).run(context, body) + }); }); + group.bench_function("linear", |bencher| { - run(bencher, create_linear_cfg, DeadStoreElimination::new()); + run_bencher(bencher, create_linear_cfg, |context, body, scratch| { + DeadStoreElimination::new_in(scratch).run(context, body) + }); }); + group.bench_function("diamond", |bencher| { - run(bencher, create_diamond_cfg, DeadStoreElimination::new()); + run_bencher(bencher, create_diamond_cfg, |context, body, scratch| { + DeadStoreElimination::new_in(scratch).run(context, body) + }); }); + group.bench_function("complex", |bencher| { - run(bencher, create_complex_cfg, DeadStoreElimination::new()); + run_bencher(bencher, create_complex_cfg, |context, body, scratch| { + DeadStoreElimination::new_in(scratch).run(context, body) + }); }); } @@ -432,16 +473,28 @@ fn inst_simplify(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("inst_simplify"); group.bench_function("foldable", |bencher| { - run(bencher, create_inst_simplify_cfg, InstSimplify::new()); + run_bencher( + bencher, + create_inst_simplify_cfg, + |context, body, scratch| InstSimplify::new_in(scratch).run(context, body), + ); }); group.bench_function("linear", |bencher| { - run(bencher, create_linear_cfg, InstSimplify::new()); + run_bencher(bencher, create_linear_cfg, |context, body, scratch| { + InstSimplify::new_in(scratch).run(context, body) + }); }); + group.bench_function("diamond", |bencher| { - run(bencher, create_diamond_cfg, InstSimplify::new()); + run_bencher(bencher, create_diamond_cfg, |context, body, scratch| { + InstSimplify::new_in(scratch).run(context, body) + }); }); + group.bench_function("complex", |bencher| { - run(bencher, create_complex_cfg, InstSimplify::new()); + run_bencher(bencher, create_complex_cfg, |context, body, scratch| { + InstSimplify::new_in(scratch).run(context, body) + }); }); } @@ -449,12 +502,10 @@ fn pipeline(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("pipeline"); group.bench_function("linear", |bencher| { - let mut scratch = Scratch::new(); - - run_bencher(bencher, create_linear_cfg, |context, body| { + run_bencher(bencher, create_linear_cfg, |context, body, scratch| { let bodies = IdSlice::from_raw_mut(core::slice::from_mut(body)); - PreInlining::new_in(&mut scratch).run( + PreInlining::new_in(scratch).run( context, &mut GlobalTransformState::new_in(bodies, context.heap), bodies, @@ -462,12 +513,10 @@ fn pipeline(criterion: &mut Criterion) { }); }); group.bench_function("diamond", |bencher| { - let mut scratch = Scratch::new(); - - run_bencher(bencher, create_diamond_cfg, |context, body| { + run_bencher(bencher, create_diamond_cfg, |context, body, scratch| { let bodies = IdSlice::from_raw_mut(core::slice::from_mut(body)); - PreInlining::new_in(&mut scratch).run( + PreInlining::new_in(scratch).run( context, &mut GlobalTransformState::new_in(bodies, context.heap), bodies, @@ -475,12 +524,10 @@ fn pipeline(criterion: &mut Criterion) { }); }); group.bench_function("complex", |bencher| { - let mut scratch = Scratch::new(); - - run_bencher(bencher, create_complex_cfg, |context, body| { + run_bencher(bencher, create_complex_cfg, |context, body, scratch| { let bodies = IdSlice::from_raw_mut(core::slice::from_mut(body)); - PreInlining::new_in(&mut scratch).run( + PreInlining::new_in(scratch).run( context, &mut GlobalTransformState::new_in(bodies, context.heap), bodies, diff --git a/libs/@local/hashql/mir/src/body/location.rs b/libs/@local/hashql/mir/src/body/location.rs index 730321c0ce8..c52af144924 100644 --- a/libs/@local/hashql/mir/src/body/location.rs +++ b/libs/@local/hashql/mir/src/body/location.rs @@ -6,7 +6,7 @@ use super::basic_block::BasicBlockId; /// /// A [`Location`] uniquely identifies a program point by specifying both the basic block /// and the statement index within that block. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Location { /// The basic block containing this location. /// diff --git a/libs/@local/hashql/mir/src/builder/body.rs b/libs/@local/hashql/mir/src/builder/body.rs index 2f1337352a8..ea100b613c8 100644 --- a/libs/@local/hashql/mir/src/builder/body.rs +++ b/libs/@local/hashql/mir/src/builder/body.rs @@ -162,7 +162,7 @@ impl<'env, 'heap> Deref for BodyBuilder<'env, 'heap> { /// /// ## Header /// -/// - ``: Either `fn` (closure) or `thunk` +/// - ``: `fn` (closure), `thunk`, `[ctor sym::path]`, or `intrinsic` /// - ``: Numeric literal for `DefId` /// - ``: Number of function arguments /// - ``: Return type (`Int`, `Bool`, tuple `(Int, Bool)`, or custom `|t| t.foo()`) @@ -260,7 +260,7 @@ impl<'env, 'heap> Deref for BodyBuilder<'env, 'heap> { macro_rules! body { ( $interner:ident, $env:ident; - $type:ident @ $id:tt / $arity:literal -> $body_type:tt { + $type:tt @ $id:tt / $arity:literal -> $body_type:tt { decl $($param:ident: $param_type:tt),*; $(@proj $($proj:ident = $proj_base:ident.$field:literal: $proj_type:tt),*;)? @@ -339,6 +339,12 @@ macro_rules! body { (@source fn) => { $crate::body::Source::Closure(hashql_hir::node::HirId::PLACEHOLDER, None) }; + (@source [ctor $name:expr]) => { + $crate::body::Source::Ctor($name) + }; + (@source intrinsic) => { + $crate::body::Source::Intrinsic($crate::def::DefId::PLACEHOLDER) + }; } pub use body; diff --git a/libs/@local/hashql/mir/src/def.rs b/libs/@local/hashql/mir/src/def.rs index c20fad73b03..08036e0cf80 100644 --- a/libs/@local/hashql/mir/src/def.rs +++ b/libs/@local/hashql/mir/src/def.rs @@ -4,7 +4,7 @@ //! MIR bodies, including user-defined functions, closures, constants, and //! built-in operations that require MIR representation. -use hashql_core::id; +use hashql_core::id::{self, Id as _}; id::newtype!( /// A unique identifier for definitions that have a body associated with them in the HashQL MIR. @@ -72,4 +72,5 @@ impl DefId { /// the original list. Used for imperative-style list manipulation /// where performance is critical. pub const LIST_PUSH_MUT: Self = Self(0xFFFF_FE07); + pub const PLACEHOLDER: Self = Self::MAX; } diff --git a/libs/@local/hashql/mir/src/lib.rs b/libs/@local/hashql/mir/src/lib.rs index 3b8c4405a99..894d4610dd9 100644 --- a/libs/@local/hashql/mir/src/lib.rs +++ b/libs/@local/hashql/mir/src/lib.rs @@ -21,7 +21,9 @@ string_from_utf8_lossy_owned, try_trait_v2, step_trait, - maybe_uninit_fill + maybe_uninit_fill, + binary_heap_into_iter_sorted, + binary_heap_drain_sorted, )] #![expect(clippy::indexing_slicing)] extern crate alloc; diff --git a/libs/@local/hashql/mir/src/pass/analysis/callgraph/mod.rs b/libs/@local/hashql/mir/src/pass/analysis/callgraph/mod.rs index 4aa503d6122..e911f85cb8e 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/callgraph/mod.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/callgraph/mod.rs @@ -71,7 +71,7 @@ use crate::{ /// Each edge in the [`CallGraph`] is annotated with a `CallKind` to distinguish direct call sites /// from other kinds of references. This enables consumers to differentiate between actual function /// invocations and incidental references. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum CallKind { /// Direct function application at the given MIR [`Location`]. /// @@ -95,6 +95,14 @@ pub enum CallKind { Opaque, } +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CallSite { + pub caller: DefId, + pub kind: C, + + pub target: DefId, +} + /// A directed graph of [`DefId`] references across MIR bodies. /// /// Nodes correspond to [`DefId`]s and edges represent references from one definition to another, @@ -146,6 +154,90 @@ impl<'heap, A: Allocator + Clone> CallGraph<'heap, A> { } } +impl CallGraph<'_, A> { + #[inline] + pub fn callsites(&self, def: DefId) -> impl Iterator { + let node = NodeId::new(def.as_usize()); + + self.inner.outgoing_edges(node).map(move |edge| CallSite { + caller: def, + kind: edge.data, + target: DefId::new(edge.target().as_u32()), + }) + } + + #[inline] + pub fn apply_callsites(&self, def: DefId) -> impl Iterator> { + let node = NodeId::new(def.as_usize()); + + self.inner + .outgoing_edges(node) + .filter_map(move |edge| match edge.data { + CallKind::Apply(location) => Some(CallSite { + caller: def, + kind: location, + target: DefId::new(edge.target().as_u32()), + }), + CallKind::Filter(_) | CallKind::Opaque => None, + }) + } + + pub fn filters(&self) -> impl Iterator { + self.inner + .nodes() + .ids() + .filter(|&node| { + self.inner + .incoming_edges(node) + .any(|edge| matches!(edge.data, CallKind::Filter(_))) + }) + .map(|node| DefId::new(node.as_u32())) + } + + #[inline] + pub fn is_leaf(&self, def: DefId) -> bool { + let def = NodeId::new(def.as_usize()); + + self.inner.outgoing_edges(def).all(|edge| { + let target = self + .inner + .node(edge.target()) + .unwrap_or_else(|| unreachable!("target must exist")); + + // Leafs are functions, which can only have intrinsic edges + matches!(target.data, Source::Intrinsic(_)) + }) + } + + #[inline] + pub fn is_single_caller(&self, caller: DefId, target: DefId) -> bool { + let caller = NodeId::new(caller.as_usize()); + let target = NodeId::new(target.as_usize()); + + self.inner + .incoming_edges(target) + .all(|edge| matches!(edge.data, CallKind::Apply(_)) && edge.source() == caller) + } + + #[inline] + pub fn unique_caller(&self, callee: DefId) -> Option { + // Same as is_single_caller, but makes sure that there is exactly one edge + let callee = NodeId::new(callee.as_usize()); + + let mut incoming = self.inner.incoming_edges(callee); + let edge = incoming.next()?; + + if incoming.next().is_some() { + return None; + } + + match edge.data { + CallKind::Apply(_) => Some(DefId::new(edge.source().as_u32())), + CallKind::Filter(_) | CallKind::Opaque => None, + } + } +} + impl fmt::Display for CallGraph<'_, A> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { for edge in self.inner.edges() { @@ -319,7 +411,7 @@ impl<'heap, A: Allocator> Visitor<'heap> for CallGraphVisitor<'_, 'heap, A> { return Ok(()); } - for argument in arguments.iter() { + for argument in arguments { self.visit_operand(location, argument)?; } diff --git a/libs/@local/hashql/mir/src/pass/analysis/mod.rs b/libs/@local/hashql/mir/src/pass/analysis/mod.rs index ed904fa50ad..6ed7b5a829c 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/mod.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/mod.rs @@ -2,6 +2,6 @@ mod callgraph; mod data_dependency; pub mod dataflow; pub use self::{ - callgraph::{CallGraph, CallGraphAnalysis, CallKind}, + callgraph::{CallGraph, CallGraphAnalysis, CallKind, CallSite}, data_dependency::{DataDependencyAnalysis, DataDependencyGraph, TransientDataDependencyGraph}, }; diff --git a/libs/@local/hashql/mir/src/pass/transform/administrative_reduction/mod.rs b/libs/@local/hashql/mir/src/pass/transform/administrative_reduction/mod.rs index 8117fa4d7fa..f794a012b19 100644 --- a/libs/@local/hashql/mir/src/pass/transform/administrative_reduction/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/administrative_reduction/mod.rs @@ -58,7 +58,7 @@ use core::{alloc::Allocator, cmp, mem}; use hashql_core::{ graph::{Successors as _, Traverse as _}, - heap::ResetAllocator, + heap::BumpAllocator, }; use self::{ @@ -186,14 +186,13 @@ impl AdministrativeReduction { /// Creates a new administrative reduction pass using the given allocator. /// /// The allocator is used for temporary data structures during the pass (call graph, - /// reducibility tracking, scratch memory). It should implement [`ResetAllocator`] so - /// that memory can be efficiently reclaimed before running the pass. + /// reducibility tracking, scratch memory). pub const fn new_in(alloc: A) -> Self { Self { alloc } } } -impl<'env, 'heap, A: ResetAllocator> GlobalTransformPass<'env, 'heap> +impl<'env, 'heap, A: BumpAllocator> GlobalTransformPass<'env, 'heap> for AdministrativeReduction { fn run( @@ -202,8 +201,6 @@ impl<'env, 'heap, A: ResetAllocator> GlobalTransformPass<'env, 'heap> state: &mut GlobalTransformState<'_>, bodies: &mut DefIdSlice>, ) -> Changed { - self.alloc.reset(); - let mut reducable = Reducable::new(bodies, &self.alloc); if reducable.is_empty() { return Changed::No; diff --git a/libs/@local/hashql/mir/src/pass/transform/administrative_reduction/visitor.rs b/libs/@local/hashql/mir/src/pass/transform/administrative_reduction/visitor.rs index 5a4c37c8a17..267ca619eb6 100644 --- a/libs/@local/hashql/mir/src/pass/transform/administrative_reduction/visitor.rs +++ b/libs/@local/hashql/mir/src/pass/transform/administrative_reduction/visitor.rs @@ -38,6 +38,22 @@ struct Reduction<'heap> { args: ArgVec, &'heap Heap>, } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub(crate) enum ClosureEnv<'heap> { + Place(Place<'heap>), + Unit, +} + +impl<'heap> ClosureEnv<'heap> { + const fn from_operand(operand: Operand<'heap>) -> Option { + match operand { + Operand::Place(place) => Some(ClosureEnv::Place(place)), + Operand::Constant(Constant::Unit) => Some(ClosureEnv::Unit), + Operand::Constant(_) => None, + } + } +} + /// Represents a known function or closure value assigned to a local. /// /// The visitor tracks these assignments to resolve indirect calls (calls through locals) @@ -47,7 +63,7 @@ pub(crate) enum Callee<'heap> { /// A bare function pointer. Fn { ptr: DefId }, /// A closure: function pointer plus captured environment. - Closure { ptr: DefId, env: Place<'heap> }, + Closure { ptr: DefId, env: ClosureEnv<'heap> }, } /// Header information for the body being transformed. @@ -289,15 +305,20 @@ impl<'heap, A: Allocator> VisitorMut<'heap> for AdministrativeReductionVisitor<' return Ok(()); } - let &[Operand::Constant(Constant::FnPtr(ptr)), Operand::Place(env)] = - &aggregate.operands[..] - else { + let &[Operand::Constant(Constant::FnPtr(ptr)), env] = &aggregate.operands[..] else { unreachable!( "Closure must have exactly two operands, with the first being a function pointer \ and the second being a place to the environment." ) }; + let Some(env) = ClosureEnv::from_operand(env) else { + unreachable!( + "Closure must have exactly two operands, with the first being a function pointer \ + and the second being a place to the environment." + ); + }; + self.callees .insert(self.state.lhs.local, Callee::Closure { ptr, env }); diff --git a/libs/@local/hashql/mir/src/pass/transform/cfg_simplify/mod.rs b/libs/@local/hashql/mir/src/pass/transform/cfg_simplify/mod.rs index 3c986418d26..0cdf78bda75 100644 --- a/libs/@local/hashql/mir/src/pass/transform/cfg_simplify/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/cfg_simplify/mod.rs @@ -48,7 +48,7 @@ use core::{iter::ExactSizeIterator as _, mem}; use hashql_core::{ collections::{WorkQueue, fast_hash_set_with_capacity_in}, graph::Predecessors as _, - heap::{BumpAllocator, ResetAllocator, Scratch, TransferInto as _}, + heap::{BumpAllocator, Scratch, TransferInto as _}, }; use super::{DeadBlockElimination, error::unreachable_switch_arm, ssa_repair::SsaRepair}; @@ -67,418 +67,427 @@ use crate::{ pass::{Changed, TransformPass}, }; -/// Control-flow graph simplification pass. +/// Returns `true` if the block contains only no-op statements. /// -/// Simplifies the CFG by merging blocks, constant-folding switches, and eliminating dead blocks. -pub struct CfgSimplify { - alloc: A, +/// A block with only no-ops can be safely bypassed or merged without affecting semantics. +fn is_noop(block: &BasicBlock<'_>) -> bool { + block + .statements + .iter() + .all(|statement| matches!(statement.kind, StatementKind::Nop)) } -impl CfgSimplify { - /// Creates a new instance of the control-flow graph simplification pass. - #[must_use] - pub fn new() -> Self { - Self { - alloc: Scratch::new(), - } +/// Attempts to simplify a `Goto` terminator by merging with its target block. +/// +/// # Simplification Cases +/// +/// 1. **Single predecessor**: The target block has only one predecessor (this block), so we can +/// fully merge by moving all statements and assuming the target's terminator. +/// +/// 2. **Multiple predecessors with no-op target**: The target block has multiple predecessors but +/// contains only no-ops, so we can safely assume its terminator without duplicating meaningful +/// work. +/// +/// Self-loops (`goto` to the same block) cannot be simplified and are skipped. +/// +/// # Merging Process +/// +/// When merging block `A` into block `B` (where `A: goto B`): +/// +/// 1. Generate assignments for `B`'s block parameters using the arguments from the `Goto` +/// 2. Append all statements from `B` to `A` +/// 3. Replace `A`'s terminator with `B`'s terminator +/// +/// SSA invariants may be temporarily broken; the [`SsaRepair`] runs afterward to fix them. +fn simplify_goto<'heap>(body: &mut Body<'heap>, id: BasicBlockId, goto: Goto<'heap>) -> bool { + // Self-loops cannot be optimized as there's no simplification possible. + if goto.target.block == id { + return false; } -} -impl CfgSimplify { - #[must_use] - pub const fn new_in(alloc: A) -> Self { - Self { alloc } + let target_predecessors_len = body.basic_blocks.predecessors(goto.target.block).len(); + + // With multiple predecessors, we can only merge if the target is effectively empty. + // Otherwise we'd duplicate statements across all predecessor paths. + if target_predecessors_len > 1 && !is_noop(&body.basic_blocks[goto.target.block]) { + return false; } - /// Returns `true` if the block contains only no-op statements. - /// - /// A block with only no-ops can be safely bypassed or merged without affecting semantics. - fn is_noop(block: &BasicBlock<'_>) -> bool { - block - .statements - .iter() - .all(|statement| matches!(statement.kind, StatementKind::Nop)) + // This is the only special case, if there are multiple predecessors, and the target itself + // is a self-loop we cannot safely merge them. The reason is that in that case we wouldn't + // be able to make any progress upon expansion, as we would replace our own terminator with + // the exact same one. We could broaden the search to also check params (which would still + // be correct), this case alone leads to more code generation as we're generating a + // superfluous assignment. + // The `target_predecessors_len` check isn't 100% necessary, as this case can only happen + // iff the target is a self-loop, hence has multiple predecessors, but allows us to be a bit + // more defensive about that fact. + if target_predecessors_len > 1 + && let TerminatorKind::Goto(target_goto) = + body.basic_blocks[goto.target.block].terminator.kind + && target_goto.target.block == goto.target.block + { + return false; } - /// Attempts to simplify a `Goto` terminator by merging with its target block. - /// - /// # Simplification Cases - /// - /// 1. **Single predecessor**: The target block has only one predecessor (this block), so we can - /// fully merge by moving all statements and assuming the target's terminator. - /// - /// 2. **Multiple predecessors with no-op target**: The target block has multiple predecessors - /// but contains only no-ops, so we can safely assume its terminator without duplicating - /// meaningful work. - /// - /// Self-loops (`goto` to the same block) cannot be simplified and are skipped. - /// - /// # Merging Process - /// - /// When merging block `A` into block `B` (where `A: goto B`): - /// - /// 1. Generate assignments for `B`'s block parameters using the arguments from the `Goto` - /// 2. Append all statements from `B` to `A` - /// 3. Replace `A`'s terminator with `B`'s terminator - /// - /// SSA invariants may be temporarily broken; the [`SsaRepair`] runs afterward to fix them. - fn simplify_goto<'heap>(body: &mut Body<'heap>, id: BasicBlockId, goto: Goto<'heap>) -> bool { - // Self-loops cannot be optimized as there's no simplification possible. - if goto.target.block == id { - return false; - } + let [block, target] = body + .basic_blocks + .as_mut() + .get_disjoint_mut([id, goto.target.block]) + .unwrap_or_else(|_err| unreachable!("self-loops excluded by check above")); + + // Step 1: Assign block parameters before moving statements to maintain def-before-use. + debug_assert_eq!(target.params.len(), goto.target.args.len()); + for (¶m, &arg) in target.params.iter().zip(goto.target.args) { + block.statements.push(Statement { + span: block.terminator.span, + kind: StatementKind::Assign(Assign { + lhs: Place::local(param), + rhs: RValue::Load(arg), + }), + }); + } - let target_predecessors_len = body.basic_blocks.predecessors(goto.target.block).len(); + // Step 2: Move statements from target to current block. + // Safe even with multiple predecessors since we verified the target only has no-ops. + block.statements.append(&mut target.statements); - // With multiple predecessors, we can only merge if the target is effectively empty. - // Otherwise we'd duplicate statements across all predecessor paths. - if target_predecessors_len > 1 && !Self::is_noop(&body.basic_blocks[goto.target.block]) { - return false; - } + // Step 3: Assume the target's terminator. + // With a single predecessor we can take ownership; otherwise we must clone. + let next_terminator = if target_predecessors_len == 1 { + let src = Terminator::unreachable(target.terminator.span); - // This is the only special case, if there are multiple predecessors, and the target itself - // is a self-loop we cannot safely merge them. The reason is that in that case we wouldn't - // be able to make any progress upon expansion, as we would replace our own terminator with - // the exact same one. We could broaden the search to also check params (which would still - // be correct), this case alone leads to more code generation as we're generating a - // superfluous assignment. - // The `target_predecessors_len` check isn't 100% necessary, as this case can only happen - // iff the target is a self-loop, hence has multiple predecessors, but allows us to be a bit - // more defensive about that fact. - if target_predecessors_len > 1 - && let TerminatorKind::Goto(target_goto) = - body.basic_blocks[goto.target.block].terminator.kind - && target_goto.target.block == goto.target.block - { - return false; - } + mem::replace(&mut target.terminator, src) + } else { + target.terminator.clone() + }; - let [block, target] = body - .basic_blocks - .as_mut() - .get_disjoint_mut([id, goto.target.block]) - .unwrap_or_else(|_err| unreachable!("self-loops excluded by check above")); - - // Step 1: Assign block parameters before moving statements to maintain def-before-use. - debug_assert_eq!(target.params.len(), goto.target.args.len()); - for (¶m, &arg) in target.params.iter().zip(goto.target.args) { - block.statements.push(Statement { - span: block.terminator.span, - kind: StatementKind::Assign(Assign { - lhs: Place::local(param), - rhs: RValue::Load(arg), - }), - }); - } + block.terminator = next_terminator; - // Step 2: Move statements from target to current block. - // Safe even with multiple predecessors since we verified the target only has no-ops. - block.statements.append(&mut target.statements); + true +} - // Step 3: Assume the target's terminator. - // With a single predecessor we can take ownership; otherwise we must clone. - let next_terminator = if target_predecessors_len == 1 { - let src = Terminator::unreachable(target.terminator.span); +/// Attempts to simplify a `SwitchInt` terminator. +/// +/// # Simplification Cases +/// +/// 1. **Constant discriminant**: The switch value is a compile-time constant, so we replace the +/// switch with a direct `Goto` to the matching arm (or `otherwise`). +/// +/// 2. **All targets identical**: Every arm jumps to the same block, so the switch degenerates to a +/// simple `Goto`. +/// +/// 3. **Only otherwise**: No explicit cases, just a default arm—degenerates to `Goto`. +/// +/// 4. **Redundant cases**: Cases that jump to the same target as `otherwise` are removed. +/// +/// 5. **Target promotion**: When a switch arm targets an empty block with a `Goto` terminator, we +/// can redirect the switch directly to that `Goto`'s target. +#[expect(clippy::too_many_lines, reason = "mostly documentation")] +fn simplify_switch_int<'heap>( + context: &mut MirContext<'_, 'heap>, + body: &mut Body<'heap>, + id: BasicBlockId, +) -> bool { + let terminator = &body.basic_blocks[id].terminator; + let TerminatorKind::SwitchInt(switch) = &terminator.kind else { + unreachable!() + }; + + // Case 1: Constant discriminant - select the matching arm directly. + if let Operand::Constant(Constant::Int(int)) = switch.discriminant { + let discriminant = int.as_uint(); + + // Look for an explicit case matching the discriminant. + if let Some(index) = switch + .targets + .values() + .iter() + .position(|&value| value == discriminant) + { + let target = switch.targets.targets()[index]; + body.basic_blocks.as_mut()[id].terminator.kind = TerminatorKind::Goto(Goto { target }); - mem::replace(&mut target.terminator, src) - } else { - target.terminator.clone() - }; + return true; + } - block.terminator = next_terminator; + // Fall back to the otherwise branch if present. + if let Some(otherwise) = switch.targets.otherwise() { + body.basic_blocks.as_mut()[id].terminator.kind = + TerminatorKind::Goto(Goto { target: otherwise }); - true - } + return true; + } - /// Attempts to simplify a `SwitchInt` terminator. - /// - /// # Simplification Cases - /// - /// 1. **Constant discriminant**: The switch value is a compile-time constant, so we replace the - /// switch with a direct `Goto` to the matching arm (or `otherwise`). - /// - /// 2. **All targets identical**: Every arm jumps to the same block, so the switch degenerates - /// to a simple `Goto`. - /// - /// 3. **Only otherwise**: No explicit cases, just a default arm—degenerates to `Goto`. - /// - /// 4. **Redundant cases**: Cases that jump to the same target as `otherwise` are removed. - /// - /// 5. **Target promotion**: When a switch arm targets an empty block with a `Goto` terminator, - /// we can redirect the switch directly to that `Goto`'s target. - #[expect(clippy::too_many_lines, reason = "mostly documentation")] - fn simplify_switch_int<'heap>( - context: &mut MirContext<'_, 'heap>, - body: &mut Body<'heap>, - id: BasicBlockId, - ) -> bool { - let terminator = &body.basic_blocks[id].terminator; - let TerminatorKind::SwitchInt(switch) = &terminator.kind else { - unreachable!() - }; + // No matching case and no otherwise—this violates compiler invariants. + context + .diagnostics + .push(unreachable_switch_arm(terminator.span)); + body.basic_blocks.as_mut()[id].terminator.kind = TerminatorKind::Unreachable; + return true; + } - // Case 1: Constant discriminant - select the matching arm directly. - if let Operand::Constant(Constant::Int(int)) = switch.discriminant { - let discriminant = int.as_uint(); + // Case 2: All targets are identical - degenerate to Goto. + if switch + .targets + .targets() + .array_windows() + .all(|[lhs, rhs]| lhs == rhs) + { + let target = switch.targets.targets()[0]; + body.basic_blocks.as_mut()[id].terminator.kind = TerminatorKind::Goto(Goto { target }); + + return true; + } - // Look for an explicit case matching the discriminant. - if let Some(index) = switch - .targets - .values() - .iter() - .position(|&value| value == discriminant) - { - let target = switch.targets.targets()[index]; - body.basic_blocks.as_mut()[id].terminator.kind = - TerminatorKind::Goto(Goto { target }); + // Case 3: Only an otherwise target with no explicit cases. + if switch.targets.values().is_empty() + && let Some(otherwise) = switch.targets.otherwise() + { + body.basic_blocks.as_mut()[id].terminator.kind = + TerminatorKind::Goto(Goto { target: otherwise }); - return true; - } + return true; + } - // Fall back to the otherwise branch if present. - if let Some(otherwise) = switch.targets.otherwise() { - body.basic_blocks.as_mut()[id].terminator.kind = - TerminatorKind::Goto(Goto { target: otherwise }); + // Case 4: Remove cases that are redundant with otherwise. + if let Some(otherwise) = switch.targets.otherwise() { + let redundant_values: Vec<_> = switch + .targets + .iter() + .filter_map(|(value, target)| (target == otherwise).then_some(value)) + .collect(); + + if !redundant_values.is_empty() { + let TerminatorKind::SwitchInt(switch) = + &mut body.basic_blocks.as_mut()[id].terminator.kind + else { + unreachable!() + }; - return true; + for value in redundant_values { + switch.targets.remove_target(value); } - // No matching case and no otherwise—this violates compiler invariants. - context - .diagnostics - .push(unreachable_switch_arm(terminator.span)); - body.basic_blocks.as_mut()[id].terminator.kind = TerminatorKind::Unreachable; return true; } + } - // Case 2: All targets are identical - degenerate to Goto. - if switch - .targets - .targets() - .array_windows() - .all(|[lhs, rhs]| lhs == rhs) - { - let target = switch.targets.targets()[0]; - body.basic_blocks.as_mut()[id].terminator.kind = TerminatorKind::Goto(Goto { target }); - - return true; + // Case 5: Promote targets that point to empty blocks with Goto terminators. + // This lets us skip intermediate blocks and potentially enable further simplifications. + // + // We can only promote when the target block is effectively empty (only no-ops). + // Otherwise, we'd change execution order by skipping those statements. + + // We don't use `InlineVec` or similar here, because it doesn't make sense – most of the + // time they are going to be empty. + let target_len = switch.targets.targets().len(); + let mut promotion_goto = Vec::new(); + + // To circumvent borrowing rules, and rule out modifications in most cases, we first check + // if any modification is even needed. If that is not the case, we return early. + for (index, &target) in switch.targets.targets().iter().enumerate() { + let is_last = index == target_len - 1; + let is_otherwise = switch.targets.has_otherwise() && is_last; + + // Skip self-loops. + if target.block == id { + continue; } - // Case 3: Only an otherwise target with no explicit cases. - if switch.targets.values().is_empty() - && let Some(otherwise) = switch.targets.otherwise() - { - body.basic_blocks.as_mut()[id].terminator.kind = - TerminatorKind::Goto(Goto { target: otherwise }); - - return true; + // We can only promote terminators if we don't pass any arguments. Otherwise, + // we'd need to assign parameters before the switch, which would affect all arms. + // If two arms point to the same block, this corrupts the other arm's semantics. + // We could insert an intermediate block, but that negates the optimization. + if !target.args.is_empty() { + continue; } - // Case 4: Remove cases that are redundant with otherwise. - if let Some(otherwise) = switch.targets.otherwise() { - let redundant_values: Vec<_> = switch - .targets - .iter() - .filter_map(|(value, target)| (target == otherwise).then_some(value)) - .collect(); - - if !redundant_values.is_empty() { - let TerminatorKind::SwitchInt(switch) = - &mut body.basic_blocks.as_mut()[id].terminator.kind - else { - unreachable!() - }; - - for value in redundant_values { - switch.targets.remove_target(value); - } - - return true; - } + let target_block = &body.basic_blocks[target.block]; + if !is_noop(target_block) { + continue; } - // Case 5: Promote targets that point to empty blocks with Goto terminators. - // This lets us skip intermediate blocks and potentially enable further simplifications. - // - // We can only promote when the target block is effectively empty (only no-ops). - // Otherwise, we'd change execution order by skipping those statements. - - // We don't use `InlineVec` or similar here, because it doesn't make sense – most of the - // time they are going to be empty. - let target_len = switch.targets.targets().len(); - let mut promotion_goto = Vec::new(); - - // To circumvent borrowing rules, and rule out modifications in most cases, we first check - // if any modification is even needed. If that is not the case, we return early. - for (index, &target) in switch.targets.targets().iter().enumerate() { - let is_last = index == target_len - 1; - let is_otherwise = switch.targets.has_otherwise() && is_last; - - // Skip self-loops. - if target.block == id { - continue; + match &target_block.terminator.kind { + TerminatorKind::Goto(_) => { + promotion_goto.push((index, target)); } - - // We can only promote terminators if we don't pass any arguments. Otherwise, - // we'd need to assign parameters before the switch, which would affect all arms. - // If two arms point to the same block, this corrupts the other arm's semantics. - // We could insert an intermediate block, but that negates the optimization. - if !target.args.is_empty() { - continue; + // SwitchInt promotion is more complex and not yet implemented. + // See: https://linear.app/hash/issue/BE-219/hashql-implement-switchint-simplification + TerminatorKind::SwitchInt(target_switch) + if !is_otherwise + && !switch.targets.has_otherwise() + && !target_switch.targets.has_otherwise() => + { + // Requires discriminant folding with arithmetic operations. } + TerminatorKind::SwitchInt(_) + | TerminatorKind::Return(_) + | TerminatorKind::GraphRead(_) + | TerminatorKind::Unreachable => {} + } + } - let target_block = &body.basic_blocks[target.block]; - if !Self::is_noop(target_block) { - continue; - } + if promotion_goto.is_empty() { + // There is not a single branch which can be promoted. + // This is the case in the majority of cases. + return false; + } - match &target_block.terminator.kind { - TerminatorKind::Goto(_) => { - promotion_goto.push((index, target)); - } - // SwitchInt promotion is more complex and not yet implemented. - // See: https://linear.app/hash/issue/BE-219/hashql-implement-switchint-simplification - TerminatorKind::SwitchInt(target_switch) - if !is_otherwise - && !switch.targets.has_otherwise() - && !target_switch.targets.has_otherwise() => - { - // Requires discriminant folding with arithmetic operations. - } - TerminatorKind::SwitchInt(_) - | TerminatorKind::Return(_) - | TerminatorKind::GraphRead(_) - | TerminatorKind::Unreachable => {} - } - } + // Apply promotions: redirect switch targets through their Goto destinations. + for (target_index, target) in promotion_goto { + let [block, target_block] = body + .basic_blocks + .as_mut() + .get_disjoint_mut([id, target.block]) + .unwrap_or_else(|_err| unreachable!("self-loops excluded above")); - if promotion_goto.is_empty() { - // There is not a single branch which can be promoted. - // This is the case in the majority of cases. - return false; - } + let TerminatorKind::SwitchInt(switch) = &mut block.terminator.kind else { + unreachable!("we're simplifying a SwitchInt") + }; - // Apply promotions: redirect switch targets through their Goto destinations. - for (target_index, target) in promotion_goto { - let [block, target_block] = body - .basic_blocks - .as_mut() - .get_disjoint_mut([id, target.block]) - .unwrap_or_else(|_err| unreachable!("self-loops excluded above")); + let TerminatorKind::Goto(goto) = target_block.terminator.kind else { + unreachable!("promotion candidates are Goto blocks") + }; - let TerminatorKind::SwitchInt(switch) = &mut block.terminator.kind else { - unreachable!("we're simplifying a SwitchInt") - }; + switch.targets.targets_mut()[target_index] = goto.target; - let TerminatorKind::Goto(goto) = target_block.terminator.kind else { - unreachable!("promotion candidates are Goto blocks") - }; + // Note: We don't mark the target as unreachable here because other switch arms + // may still reference it. + } - switch.targets.targets_mut()[target_index] = goto.target; + // for (target_index, target, _) in promotion_switch { + // We know from the previous step that: + // 1. The terminator is switch int + // 2. The target is not ourselves (disjoint) + // 3. The target is not the otherwise branch + // 4. Both the source (us) and the target do not have an otherwise branch. + + // This is a bit more complicated, than the goto case, because to make it work we need + // to fold the discriminant, which means adding some new statments. + // δ = v == j + // idx = (1 - δ)*v + δ*(|N| + r) + // = 1v + -δv + δ|N| + δr + // = v + δ*(-v + |N| + r) + // = v + δ*(|N| - v + r) + // This optimization requires access to: `BinOp::Sub`, `BinOp::Add`, `BinOp::Mul`, which + // aren't yet available. + // see: https://linear.app/hash/issue/BE-219/hashql-implement-switchint-simplification + // } + + true +} - // Note: We don't mark the target as unreachable here because other switch arms - // may still reference it. +/// Dispatches to the appropriate simplification based on terminator kind. +/// +/// After a successful simplification, marks any newly unreachable blocks as dead. +/// Returns `true` if any simplification was applied. +fn simplify<'heap, A: BumpAllocator>( + context: &mut MirContext<'_, 'heap>, + body: &mut Body<'heap>, + id: BasicBlockId, + alloc: &A, +) -> bool { + let kind = &body.basic_blocks[id].terminator.kind; + match kind { + &TerminatorKind::Goto(_) | TerminatorKind::SwitchInt(_) => {} + TerminatorKind::Return(_) | TerminatorKind::GraphRead(_) | TerminatorKind::Unreachable => { + return false; } - - // for (target_index, target, _) in promotion_switch { - // We know from the previous step that: - // 1. The terminator is switch int - // 2. The target is not ourselves (disjoint) - // 3. The target is not the otherwise branch - // 4. Both the source (us) and the target do not have an otherwise branch. - - // This is a bit more complicated, than the goto case, because to make it work we need - // to fold the discriminant, which means adding some new statments. - // δ = v == j - // idx = (1 - δ)*v + δ*(|N| + r) - // = 1v + -δv + δ|N| + δr - // = v + δ*(-v + |N| + r) - // = v + δ*(|N| - v + r) - // This optimization requires access to: `BinOp::Sub`, `BinOp::Add`, `BinOp::Mul`, which - // aren't yet available. - // see: https://linear.app/hash/issue/BE-219/hashql-implement-switchint-simplification - // } - - true } - /// Dispatches to the appropriate simplification based on terminator kind. - /// - /// After a successful simplification, marks any newly unreachable blocks as dead. - /// Returns `true` if any simplification was applied. - fn simplify<'heap>( - &self, - context: &mut MirContext<'_, 'heap>, - body: &mut Body<'heap>, - id: BasicBlockId, - ) -> bool { - let kind = &body.basic_blocks[id].terminator.kind; - match kind { - &TerminatorKind::Goto(_) | TerminatorKind::SwitchInt(_) => {} - TerminatorKind::Return(_) - | TerminatorKind::GraphRead(_) - | TerminatorKind::Unreachable => return false, + // Snapshot reachable blocks before modification to detect newly dead blocks. + // This is done *after* we check the terminator, to ensure that we don't recompute postorder + // if we don't need to. + let previous_reverse_postorder = body.basic_blocks.reverse_postorder().transfer_into(alloc); + + let changed = match kind { + &TerminatorKind::Goto(goto) => simplify_goto(body, id, goto), + TerminatorKind::SwitchInt(_) => simplify_switch_int(context, body, id), + TerminatorKind::Return(_) | TerminatorKind::GraphRead(_) | TerminatorKind::Unreachable => { + unreachable!() } + }; - // Snapshot reachable blocks before modification to detect newly dead blocks. - // This is done *after* we check the terminator, to ensure that we don't recompute postorder - // if we don't need to. - let previous_reverse_postorder = body - .basic_blocks - .reverse_postorder() - .transfer_into(&self.alloc); + if changed { + mark_dead_blocks(body, previous_reverse_postorder, alloc); + } - let changed = match kind { - &TerminatorKind::Goto(goto) => Self::simplify_goto(body, id, goto), - TerminatorKind::SwitchInt(_) => Self::simplify_switch_int(context, body, id), - TerminatorKind::Return(_) - | TerminatorKind::GraphRead(_) - | TerminatorKind::Unreachable => unreachable!(), - }; + changed +} - if changed { - self.mark_dead_blocks(body, previous_reverse_postorder); +/// Marks blocks that became unreachable after a simplification. +/// +/// Compares the current reachable blocks (via reverse postorder) against the snapshot taken +/// before simplification. Any block that was previously reachable but is no longer in the +/// traversal is marked with an `Unreachable` terminator. +/// +/// This enables cascading optimizations: marking a block dead removes it from predecessor +/// counts, potentially allowing previously blocked merges to proceed. +fn mark_dead_blocks( + body: &mut Body<'_>, + previous_reverse_postorder: &[BasicBlockId], + alloc: &A, +) { + let mut reverse_postorder = fast_hash_set_with_capacity_in(body.basic_blocks.len(), alloc); + + #[expect(unsafe_code)] + for &block in body.basic_blocks.reverse_postorder() { + // SAFETY: Reverse postorder contains each block at most once. + unsafe { + reverse_postorder.insert_unique_unchecked(block); } - - changed } - /// Marks blocks that became unreachable after a simplification. - /// - /// Compares the current reachable blocks (via reverse postorder) against the snapshot taken - /// before simplification. Any block that was previously reachable but is no longer in the - /// traversal is marked with an `Unreachable` terminator. - /// - /// This enables cascading optimizations: marking a block dead removes it from predecessor - /// counts, potentially allowing previously blocked merges to proceed. - fn mark_dead_blocks(&self, body: &mut Body<'_>, previous_reverse_postorder: &[BasicBlockId]) { - let mut reverse_postorder = - fast_hash_set_with_capacity_in(body.basic_blocks.len(), &self.alloc); - - #[expect(unsafe_code)] - for &block in body.basic_blocks.reverse_postorder() { - // SAFETY: Reverse postorder contains each block at most once. - unsafe { - reverse_postorder.insert_unique_unchecked(block); - } + // Mark blocks that disappeared from the reachable set. + for &block in previous_reverse_postorder { + if !reverse_postorder.contains(&block) { + body.basic_blocks.as_mut()[block].terminator.kind = TerminatorKind::Unreachable; } + } +} - // Mark blocks that disappeared from the reachable set. - for &block in previous_reverse_postorder { - if !reverse_postorder.contains(&block) { - body.basic_blocks.as_mut()[block].terminator.kind = TerminatorKind::Unreachable; - } +/// Control-flow graph simplification pass. +/// +/// Simplifies the CFG by merging blocks, constant-folding switches, and eliminating dead blocks. +pub struct CfgSimplify { + alloc: A, +} + +impl CfgSimplify { + /// Creates a new instance of the control-flow graph simplification pass. + #[must_use] + pub fn new() -> Self { + Self { + alloc: Scratch::new(), } } } +impl CfgSimplify { + #[must_use] + pub const fn new_in(alloc: A) -> Self { + Self { alloc } + } + + fn simplify<'heap>( + &mut self, + context: &mut MirContext<'_, 'heap>, + body: &mut Body<'heap>, + id: BasicBlockId, + ) -> bool { + self.alloc + .scoped(|alloc| simplify(context, body, id, &alloc)) + } +} + impl Default for CfgSimplify { fn default() -> Self { Self::new() } } -impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for CfgSimplify { +impl<'env, 'heap, A: BumpAllocator> TransformPass<'env, 'heap> for CfgSimplify { /// Runs the CFG simplification pass on the given body. /// /// Uses a worklist algorithm that processes blocks in reverse postorder and re-enqueues @@ -498,7 +507,6 @@ impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for CfgSimplify< // Repeatedly simplify until no more changes—catches cascading opportunities // like SwitchInt → Goto → inline. loop { - self.alloc.reset(); if !self.simplify(context, body, block) { break; } @@ -524,11 +532,14 @@ impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for CfgSimplify< } // Unreachable blocks will be dead, therefore must be removed - let mut dbe = DeadBlockElimination::new_in(&mut self.alloc); - let _: Changed = dbe.run(context, body); + let _: Changed = self + .alloc + .scoped(|alloc| DeadBlockElimination::new_in(alloc).run(context, body)); // Simplifications may break SSA (e.g., merged blocks with conflicting definitions). - let _: Changed = SsaRepair::new_in(&mut self.alloc).run(context, body); + let _: Changed = self + .alloc + .scoped(|alloc| SsaRepair::new_in(alloc).run(context, body)); // We ignore the changed of the sub-passes above, because we **know** that we already // modified, if they don't doesn't matter. diff --git a/libs/@local/hashql/mir/src/pass/transform/copy_propagation/mod.rs b/libs/@local/hashql/mir/src/pass/transform/copy_propagation/mod.rs index 3570242e3e8..55e866ff43f 100644 --- a/libs/@local/hashql/mir/src/pass/transform/copy_propagation/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/copy_propagation/mod.rs @@ -52,7 +52,7 @@ use core::{alloc::Allocator, convert::Infallible}; use hashql_core::{ graph::Predecessors as _, - heap::{BumpAllocator, ResetAllocator, Scratch, TransferInto as _}, + heap::{BumpAllocator, Scratch, TransferInto as _}, id::IdVec, }; @@ -224,10 +224,8 @@ impl CopyPropagation { } } -impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for CopyPropagation { +impl<'env, 'heap, A: BumpAllocator> TransformPass<'env, 'heap> for CopyPropagation { fn run(&mut self, context: &mut MirContext<'env, 'heap>, body: &mut Body<'heap>) -> Changed { - self.alloc.reset(); - let mut visitor = CopyPropagationVisitor { interner: context.interner, values: IdVec::with_capacity_in(body.local_decls.len(), &self.alloc), diff --git a/libs/@local/hashql/mir/src/pass/transform/dbe/mod.rs b/libs/@local/hashql/mir/src/pass/transform/dbe/mod.rs index 89d649aff7d..ad7a427af02 100644 --- a/libs/@local/hashql/mir/src/pass/transform/dbe/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/dbe/mod.rs @@ -26,11 +26,7 @@ mod tests; use core::convert::Infallible; -use hashql_core::{ - collections::fast_hash_set_with_capacity_in, - heap::{BumpAllocator, ResetAllocator}, - id::Id as _, -}; +use hashql_core::{collections::fast_hash_set_with_capacity_in, heap::BumpAllocator, id::Id as _}; use crate::{ body::{ @@ -59,10 +55,8 @@ impl DeadBlockElimination { } } -impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for DeadBlockElimination { +impl<'env, 'heap, A: BumpAllocator> TransformPass<'env, 'heap> for DeadBlockElimination { fn run(&mut self, context: &mut MirContext<'env, 'heap>, body: &mut Body<'heap>) -> Changed { - self.alloc.reset(); - let mut reachable = fast_hash_set_with_capacity_in( body.basic_blocks.reverse_postorder().len(), &self.alloc, diff --git a/libs/@local/hashql/mir/src/pass/transform/dle/mod.rs b/libs/@local/hashql/mir/src/pass/transform/dle/mod.rs index 3d89123f4ea..29f0be76ceb 100644 --- a/libs/@local/hashql/mir/src/pass/transform/dle/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/dle/mod.rs @@ -53,7 +53,7 @@ mod tests; use core::convert::Infallible; use hashql_core::{ - heap::{BumpAllocator, ResetAllocator}, + heap::BumpAllocator, id::{Id as _, bit_vec::DenseBitSet}, }; @@ -103,10 +103,8 @@ impl DeadLocalElimination { } } -impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for DeadLocalElimination { +impl<'env, 'heap, A: BumpAllocator> TransformPass<'env, 'heap> for DeadLocalElimination { fn run(&mut self, context: &mut MirContext<'env, 'heap>, body: &mut Body<'heap>) -> Changed { - self.alloc.reset(); - let mut dead = if let Some(dead) = self.dead.take() { dead } else { diff --git a/libs/@local/hashql/mir/src/pass/transform/dse/mod.rs b/libs/@local/hashql/mir/src/pass/transform/dse/mod.rs index 6df8f23246a..1569d61d809 100644 --- a/libs/@local/hashql/mir/src/pass/transform/dse/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/dse/mod.rs @@ -50,7 +50,7 @@ use core::{alloc::Allocator, convert::Infallible}; use hashql_core::{ collections::WorkQueue, - heap::{BumpAllocator, ResetAllocator, Scratch}, + heap::{BumpAllocator, Scratch}, id::{ Id as _, bit_vec::{BitMatrix, DenseBitSet}, @@ -106,8 +106,8 @@ impl DeadStoreElimination { /// Uses a backwards "mark-live" algorithm: starting from root uses (observable uses like /// return values and branch conditions), propagates liveness through the dependency graph. /// Returns the complement (all locals not marked live). - fn dead_locals(&self, body: &Body<'_>) -> DenseBitSet { - let mut dependencies = DependencyVisitor::new_in(body, &self.alloc); + fn dead_locals(body: &Body<'_>, scratch: &S) -> DenseBitSet { + let mut dependencies = DependencyVisitor::new_in(body, scratch); dependencies.visit_body(body); let DependencyVisitor { @@ -137,35 +137,38 @@ impl Default for DeadStoreElimination { } } -impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for DeadStoreElimination { +impl<'env, 'heap, A: BumpAllocator> TransformPass<'env, 'heap> for DeadStoreElimination { fn run(&mut self, context: &mut MirContext<'env, 'heap>, body: &mut Body<'heap>) -> Changed { - self.alloc.reset(); - let dead = self.dead_locals(body); + let dead = self.alloc.scoped(|alloc| Self::dead_locals(body, &alloc)); if dead.is_empty() { return Changed::No; } - let mut visitor = EliminationVisitor { - dead: &dead, - params: BasicBlockVec::from_fn_in( - body.basic_blocks.len(), - |block| body.basic_blocks[block].params, - &self.alloc, - ), - interner: context.interner, - changed: false, - scratch_locals: Vec::new_in(&self.alloc), - scratch_operands: Vec::new_in(&self.alloc), - }; - - Ok(()) = visitor.visit_body_preserving_cfg(body); - - let mut changed = Changed::from(visitor.changed); - drop(visitor); + let mut changed = self.alloc.scoped(|alloc| { + let mut visitor = EliminationVisitor { + dead: &dead, + params: BasicBlockVec::from_fn_in( + body.basic_blocks.len(), + |block| body.basic_blocks[block].params, + &alloc, + ), + interner: context.interner, + changed: false, + scratch_locals: Vec::new_in(&alloc), + scratch_operands: Vec::new_in(&alloc), + }; + + Ok(()) = visitor.visit_body_preserving_cfg(body); + + Changed::from(visitor.changed) + }); - let mut dle = DeadLocalElimination::new_in(&mut self.alloc).with_dead(dead); - changed = changed.max(dle.run(context, body)); + changed |= self.alloc.scoped(|alloc| { + DeadLocalElimination::new_in(alloc) + .with_dead(dead) + .run(context, body) + }); changed } diff --git a/libs/@local/hashql/mir/src/pass/transform/error.rs b/libs/@local/hashql/mir/src/pass/transform/error.rs index 99b48237172..4869911ee1b 100644 --- a/libs/@local/hashql/mir/src/pass/transform/error.rs +++ b/libs/@local/hashql/mir/src/pass/transform/error.rs @@ -15,9 +15,15 @@ const UNREACHABLE_SWITCH_ARM: TerminalDiagnosticCategory = TerminalDiagnosticCat name: "Unreachable switch arm", }; +const EXCESSIVE_INLINING_DEPTH: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "excessive-inlining-depth", + name: "Excessive inlining depth", +}; + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum TransformationDiagnosticCategory { UnreachableSwitchArm, + ExcessiveInliningDepth, } impl DiagnosticCategory for TransformationDiagnosticCategory { @@ -32,6 +38,7 @@ impl DiagnosticCategory for TransformationDiagnosticCategory { fn subcategory(&self) -> Option<&dyn DiagnosticCategory> { match *self { Self::UnreachableSwitchArm => Some(&UNREACHABLE_SWITCH_ARM), + Self::ExcessiveInliningDepth => Some(&EXCESSIVE_INLINING_DEPTH), } } } @@ -64,3 +71,31 @@ pub fn unreachable_switch_arm(span: SpanId) -> MirDiagnostic { diagnostic } + +/// Creates a diagnostic warning when aggressive inlining reaches its iteration limit. +/// +/// This indicates the filter function has deeply nested call chains that couldn't be fully +/// inlined within the configured cutoff. The code will still work correctly, but some +/// call overhead may remain. +pub fn excessive_inlining_depth(span: SpanId, cutoff: usize) -> MirDiagnostic { + let mut diagnostic = Diagnostic::new( + MirDiagnosticCategory::Transformation( + TransformationDiagnosticCategory::ExcessiveInliningDepth, + ), + Severity::Warning, + ) + .primary(Label::new( + span, + "filter has deeply nested calls that could not be fully inlined", + )); + + diagnostic.add_message(Message::note(format!( + "aggressive inlining stopped after {cutoff} iterations" + ))); + + diagnostic.add_message(Message::help( + "consider refactoring to reduce call chain depth", + )); + + diagnostic +} diff --git a/libs/@local/hashql/mir/src/pass/transform/forward_substitution.rs b/libs/@local/hashql/mir/src/pass/transform/forward_substitution.rs index f1e763b534b..145b0c6c70e 100644 --- a/libs/@local/hashql/mir/src/pass/transform/forward_substitution.rs +++ b/libs/@local/hashql/mir/src/pass/transform/forward_substitution.rs @@ -66,10 +66,9 @@ //! [`DataDependencyAnalysis`]: crate::pass::analysis::DataDependencyAnalysis //! [`CfgSimplify`]: super::CfgSimplify +use alloc::alloc::Global; use core::{alloc::Allocator, convert::Infallible}; -use hashql_core::heap::{BumpAllocator, ResetAllocator, Scratch}; - use crate::{ body::{Body, location::Location, operand::Operand}, context::MirContext, @@ -120,30 +119,26 @@ impl<'heap, A: Allocator + Clone> VisitorMut<'heap> for PlaceVisitor<'_, 'heap, /// projections, assignments, and block parameters. This enables downstream passes to work with /// simplified operands and, when combined with dead store elimination, achieves SROA-like /// decomposition of aggregates. -pub struct ForwardSubstitution { +pub struct ForwardSubstitution { alloc: A, } impl ForwardSubstitution { #[must_use] - pub fn new() -> Self { - Self { - alloc: Scratch::new(), - } + pub const fn new() -> Self { + Self { alloc: Global } } } -impl ForwardSubstitution { +impl ForwardSubstitution { #[must_use] pub const fn new_in(alloc: A) -> Self { Self { alloc } } } -impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for ForwardSubstitution { +impl<'env, 'heap, A: Allocator> TransformPass<'env, 'heap> for ForwardSubstitution { fn run(&mut self, context: &mut MirContext<'env, 'heap>, body: &mut Body<'heap>) -> Changed { - self.alloc.reset(); - let mut analysis = DataDependencyAnalysis::new_in(&self.alloc); analysis.run(context, body); let analysis = analysis.finish(); diff --git a/libs/@local/hashql/mir/src/pass/transform/inline/analysis.rs b/libs/@local/hashql/mir/src/pass/transform/inline/analysis.rs new file mode 100644 index 00000000000..747c70bdc01 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/transform/inline/analysis.rs @@ -0,0 +1,351 @@ +//! Cost estimation and body analysis for the inline pass. +//! +//! This module computes properties of each function body that inform inlining decisions: +//! - **Cost**: A scalar approximating MIR size/complexity, used by heuristics. +//! - **Directive**: Whether the function should always/never be inlined or use heuristics. +//! - **Loop blocks**: Which basic blocks are inside loops (for callsite scoring). +//! +//! # Cost Model +//! +//! The cost model converts MIR into a single scalar value per function body. Higher costs +//! make functions less likely to be inlined. The cost is computed by summing weighted +//! contributions from: +//! +//! - Each basic block (control flow overhead) +//! - Each rvalue (computation complexity) +//! - Each terminator (control flow and I/O operations) +//! +//! # Example Costs +//! +//! With default weights: +//! - Simple helper (1 block, 2 loads, 1 binary, 1 return): ~6 +//! - Medium function (3 blocks, 5 loads, 2 aggregates, 1 graph read): ~21 +//! - Complex filter (5 blocks, 10 loads, 3 switches, 2 applies): ~29 + +use core::{alloc::Allocator, f32}; + +use hashql_core::{ + graph::{ + Successors as _, + algorithms::{ + Tarjan, + tarjan::{Metadata, SccId}, + }, + }, + id::{IdVec, bit_vec::DenseBitSet}, +}; + +use crate::{ + body::{ + Body, Source, + basic_block::{BasicBlock, BasicBlockId}, + location::Location, + rvalue::RValue, + terminator::{Terminator, TerminatorKind}, + }, + def::{DefIdSlice, DefIdVec}, + pass::analysis::CallGraph, + visit::{self, Visitor}, +}; + +/// Controls whether a function should be inlined and how. +/// +/// The directive is determined by the function's [`Source`]: +/// - [`Source::Ctor`] → [`Always`](Self::Always): Constructors are always inlined. +/// - [`Source::Closure`] / [`Source::Thunk`] → [`Heuristic`](Self::Heuristic): Use scoring. +/// - [`Source::Intrinsic`] → [`Never`](Self::Never): Intrinsics cannot be inlined. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub(super) enum InlineDirective { + /// Always inline this function regardless of cost or budget. + /// + /// Used for constructors which are typically trivial and benefit from inlining. + Always, + /// Use heuristic scoring to decide whether to inline. + /// + /// The score considers cost, bonuses (loop, leaf, single caller), and budget. + Heuristic, + /// Never inline this function. + /// + /// Used for intrinsics which cannot be meaningfully inlined. + Never, +} + +/// Properties of a function body relevant to inlining decisions. +#[derive(Debug, Copy, Clone, PartialEq)] +pub(super) struct BodyProperties { + /// How this function should be treated for inlining. + pub directive: InlineDirective, + /// Estimated cost/complexity of the function body. + /// + /// This value is updated during inlining: when a callee is inlined into a caller, + /// the caller's cost increases by the callee's cost (minus the `Apply` cost). + pub cost: f32, + /// Whether this function has no outgoing calls (except to intrinsics). + /// + /// Leaf functions receive a bonus during scoring because inlining them doesn't + /// trigger further inlining cascades. + pub is_leaf: bool, +} + +/// Maps each function to its set of basic blocks that are inside loops. +/// +/// Used during scoring to detect callsites in loops, which receive bonus points +/// since inlining hot code is more beneficial. +pub(super) type BasicBlockLoopVec = DefIdVec>, A>; + +/// Tarjan metadata that counts members in each SCC. +/// +/// Used to detect loops: an SCC with >1 member, or a single node with a self-edge, +/// indicates a loop. +struct MemberCount; + +impl Metadata for MemberCount { + type Annotation = u32; + + fn annotate_node(&mut self, _: N) -> Self::Annotation { + 1 + } + + fn annotate_scc(&mut self, _: S, _: N) -> Self::Annotation { + 0 + } + + fn merge_into_scc(&mut self, lhs: &mut Self::Annotation, other: Self::Annotation) { + *lhs += other; + } + + fn merge_reachable(&mut self, _: &mut Self::Annotation, _: &Self::Annotation) {} +} + +/// Configuration for cost estimation weights. +/// +/// Each field specifies the cost contribution for a particular MIR construct. +/// Higher weights make functions containing those constructs less likely to be inlined. +/// +/// # Design Principles +/// +/// - **Cheap operations** (`load`, `goto`, `return`): Low weights (~1.0) since they add minimal +/// complexity. +/// - **Computation** (`binary`, `unary`, `aggregate`): Moderate weights (2.0-3.0) reflecting actual +/// work performed. +/// - **External effects** (`input`, `graph_read`): High weights (5.0-7.0) because these represent +/// expensive I/O operations that shouldn't be duplicated carelessly. +/// - **Calls** (`apply`): Moderate-high weight (4.0) since each call is a potential inline site +/// itself. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct InlineCostEstimationConfig { + /// Cost of loading a value from a place. + pub rvalue_load: f32, + /// Cost of a binary operation (+, -, *, /, etc). + pub rvalue_binary: f32, + /// Cost of a unary operation (!, -). + pub rvalue_unary: f32, + /// Cost of constructing an aggregate (tuple, struct). + pub rvalue_aggregate: f32, + /// Cost of accessing function input parameters. + pub rvalue_input: f32, + /// Cost of a function application (call). + pub rvalue_apply: f32, + + /// Base cost of a `SwitchInt` terminator. + pub terminator_switch_int_base: f32, + /// Additional cost per branch in a `SwitchInt`. + /// + /// Total switch cost = `base + multiplier × num_targets`. + pub terminator_switch_int_branch_multiplier: f32, + /// Cost of a graph database read operation. + /// + /// Set high (7.0) because graph reads are expensive I/O operations. + pub terminator_graph_read: f32, + /// Cost of an unconditional jump. + pub terminator_goto: f32, + /// Cost of a function return. + pub terminator_return: f32, + /// Cost of unreachable code (dead code, no runtime cost). + pub terminator_unreachable: f32, + + /// Base cost per basic block (control flow overhead). + pub basic_block: f32, +} + +impl Default for InlineCostEstimationConfig { + fn default() -> Self { + Self { + rvalue_load: 1.0, + rvalue_binary: 2.0, + rvalue_unary: 2.0, + rvalue_aggregate: 3.0, + rvalue_input: 5.0, + rvalue_apply: 4.0, + + terminator_switch_int_base: 1.0, + terminator_switch_int_branch_multiplier: 0.5, + terminator_graph_read: 7.0, + terminator_goto: 1.0, + terminator_return: 1.0, + terminator_unreachable: 0.0, + + basic_block: 1.0, + } + } +} + +/// Results from body analysis, consumed by the inline pass. +pub(crate) struct CostEstimationResidual { + /// Properties for each function body. + pub properties: DefIdVec, + /// For each function, which basic blocks are inside loops. + pub loops: BasicBlockLoopVec, +} + +/// Analyzes all function bodies to compute inlining-relevant properties. +/// +/// For each body, computes: +/// - [`InlineDirective`] based on the function's source. +/// - Cost using [`InlineCostEstimationConfig`] weights. +/// - Which basic blocks are inside loops (for callsite scoring). +/// - Whether the function is a leaf (no outgoing calls). +pub(crate) struct BodyAnalysis<'ctx, 'heap, A: Allocator> { + alloc: A, + config: InlineCostEstimationConfig, + + properties: DefIdVec, + loops: BasicBlockLoopVec, + graph: &'ctx CallGraph<'heap, A>, +} + +impl<'ctx, 'heap, A: Allocator> BodyAnalysis<'ctx, 'heap, A> { + pub(crate) fn new( + graph: &'ctx CallGraph<'heap, A>, + bodies: &'ctx DefIdSlice>, + config: InlineCostEstimationConfig, + + alloc: A, + ) -> Self + where + A: Clone, + { + let properties = DefIdVec::from_domain_in( + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 0.0, + is_leaf: true, + }, + bodies, + alloc.clone(), + ); + let loops = IdVec::new_in(alloc.clone()); + + Self { + alloc, + config, + properties, + loops, + graph, + } + } + + pub(crate) fn finish(self) -> CostEstimationResidual { + CostEstimationResidual { + properties: self.properties, + loops: self.loops, + } + } + + /// Analyze a single function body. + /// + /// Computes: + /// 1. The inline directive based on [`Body::source`]. + /// 2. Loop detection using Tarjan's algorithm on the CFG. + /// 3. Cost by visiting all rvalues and terminators. + pub(crate) fn run(&mut self, body: &Body<'heap>) { + let inline = match body.source { + Source::Ctor(_) => InlineDirective::Always, + Source::Closure(_, _) | Source::Thunk(_, _) => InlineDirective::Heuristic, + Source::Intrinsic(_) => InlineDirective::Never, + }; + + // Detect loops using SCC analysis on the CFG. + // A block is in a loop if its SCC has >1 member or it has a self-edge. + let tarjan: Tarjan<_, _, SccId, _, _> = + Tarjan::new_with_metadata_in(&body.basic_blocks, MemberCount, &self.alloc); + let scc = tarjan.run(); + + let mut bitset = None; + for id in body.basic_blocks.ids() { + let component = scc.scc(id); + + if *scc.annotation(component) > 1 + || body.basic_blocks.successors(id).any(|succ| succ == id) + { + let bitset = + bitset.get_or_insert_with(|| DenseBitSet::new_empty(body.basic_blocks.len())); + bitset.insert(id); + } + } + + let mut visitor = CostEstimationVisitor { + config: self.config, + total: 0.0, + }; + visitor.visit_body(body); + + self.properties[body.id] = BodyProperties { + directive: inline, + cost: visitor.total, + is_leaf: self.graph.is_leaf(body.id), + }; + + if let Some(bitset) = bitset { + self.loops.insert(body.id, bitset); + } + } +} + +/// Visitor that sums up cost contributions from all MIR constructs. +struct CostEstimationVisitor { + config: InlineCostEstimationConfig, + total: f32, +} + +#[expect(clippy::cast_precision_loss)] +impl<'heap> Visitor<'heap> for CostEstimationVisitor { + type Result = Result<(), !>; + + fn visit_rvalue(&mut self, _: Location, rvalue: &RValue<'heap>) -> Self::Result { + let cost = match rvalue { + RValue::Load(_) => self.config.rvalue_load, + RValue::Binary(_) => self.config.rvalue_binary, + RValue::Unary(_) => self.config.rvalue_unary, + RValue::Aggregate(_) => self.config.rvalue_aggregate, + RValue::Input(_) => self.config.rvalue_input, + RValue::Apply(_) => self.config.rvalue_apply, + }; + + self.total += cost; + Ok(()) + } + + fn visit_terminator(&mut self, _: Location, terminator: &Terminator<'heap>) -> Self::Result { + let cost = match &terminator.kind { + TerminatorKind::SwitchInt(switch_int) => (switch_int.targets.targets().len() as f32) + .mul_add( + self.config.terminator_switch_int_branch_multiplier, + self.config.terminator_switch_int_base, + ), + TerminatorKind::GraphRead(_) => self.config.terminator_graph_read, + TerminatorKind::Goto(_) => self.config.terminator_goto, + TerminatorKind::Return(_) => self.config.terminator_return, + TerminatorKind::Unreachable => self.config.terminator_unreachable, + }; + + self.total += cost; + Ok(()) + } + + fn visit_basic_block(&mut self, id: BasicBlockId, block: &BasicBlock<'heap>) -> Self::Result { + self.total += self.config.basic_block; + + visit::r#ref::walk_basic_block(self, id, block) + } +} diff --git a/libs/@local/hashql/mir/src/pass/transform/inline/find.rs b/libs/@local/hashql/mir/src/pass/transform/inline/find.rs new file mode 100644 index 00000000000..c2f2e16f423 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/transform/inline/find.rs @@ -0,0 +1,71 @@ +//! Callsite discovery for aggressive inlining. +//! +//! This module provides [`FindCallsiteVisitor`], which scans a function body to find +//! callsites that are eligible for aggressive inlining during the filter inlining phase. +//! +//! Unlike the normal phase which uses the call graph, the aggressive phase needs to +//! re-discover callsites after each inlining iteration because the body has changed. + +use core::alloc::Allocator; + +use super::{InlineState, InlineStateMemory}; +use crate::{ + body::{constant::Constant, location::Location, operand::Operand, rvalue::Apply}, + def::DefId, + pass::analysis::CallSite, + visit::Visitor, +}; + +/// Visitor that finds eligible callsites for aggressive inlining. +/// +/// Used during the aggressive inlining phase to discover new callsites in filter +/// functions after previous inlining iterations have modified the body. +/// +/// A callsite is eligible if: +/// - It's a direct call (function is a constant `FnPtr`). +/// - Its target SCC has not already been inlined into this caller. +/// +/// The SCC check prevents cycles: once we've inlined a function (or any function +/// in its SCC) into a filter, we won't inline it again. +pub(crate) struct FindCallsiteVisitor<'ctx, 'state, 'env, 'heap, A: Allocator> { + /// The filter function we're finding callsites in. + pub caller: DefId, + + /// Shared inlining state for SCC and inlined-set lookups. + pub state: &'ctx InlineState<'ctx, 'state, 'env, 'heap, A>, + /// Memory to collect discovered callsites into. + pub mem: &'ctx mut InlineStateMemory, +} + +impl<'heap, A: Allocator> Visitor<'heap> for FindCallsiteVisitor<'_, '_, '_, 'heap, A> { + type Result = Result<(), !>; + + fn visit_rvalue_apply( + &mut self, + location: Location, + Apply { + function, + arguments: _, + }: &Apply<'heap>, + ) -> Self::Result { + // Only handle direct calls (constant function pointers). + let &Operand::Constant(Constant::FnPtr(ptr)) = function else { + return Ok(()); + }; + + let target_component = self.state.components.scc(ptr); + + // Skip if we've already inlined this SCC into this caller. + if self.state.inlined.contains(self.caller, target_component) { + return Ok(()); + } + + self.mem.callsites.push(CallSite { + caller: self.caller, + kind: location, + target: ptr, + }); + + Ok(()) + } +} diff --git a/libs/@local/hashql/mir/src/pass/transform/inline/heuristics.rs b/libs/@local/hashql/mir/src/pass/transform/inline/heuristics.rs new file mode 100644 index 00000000000..2449e6d21fc --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/transform/inline/heuristics.rs @@ -0,0 +1,234 @@ +//! Heuristic scoring for inline candidates. +//! +//! This module implements the scoring function that determines whether a callsite +//! should be inlined. The scoring balances the benefits of inlining (reduced call +//! overhead, optimization opportunities) against the costs (code size increase). +//! +//! # Scoring Algorithm +//! +//! For each callsite, the score is computed as: +//! +//! ```text +//! 1. Check InlineDirective: +//! - Always → +∞ (unconditional inline) +//! - Never → -∞ (never inline) +//! +//! 2. Check always_inline threshold: +//! - If cost < always_inline → +∞ +//! +//! 3. Check max threshold: +//! - max_cost = max × (max_loop_multiplier if in_loop else 1.0) +//! - If cost > max_cost → -∞ +//! +//! 4. Compute score: +//! score = loop_bonus + leaf_bonus + single_caller_bonus + unique_callsite_bonus +//! - cost × size_penalty_factor +//! ``` +//! +//! # Score Interpretation +//! +//! - `+∞`: Always inline, bypasses budget. +//! - `> 0`: Candidate for inlining, consumes budget. +//! - `≤ 0`: Not inlined. +//! - `-∞`: Never inline. + +use core::alloc::Allocator; + +use super::analysis::{BasicBlockLoopVec, BodyProperties, InlineDirective}; +use crate::{ + body::location::Location, + def::DefIdSlice, + pass::analysis::{CallGraph, CallSite}, +}; + +/// Configuration for inline heuristics. +/// +/// Controls thresholds, bonuses, and penalties that determine which callsites +/// are selected for inlining. +/// +/// # Thresholds +/// +/// - [`always_inline`](Self::always_inline): Functions below this cost always inline. +/// - [`max`](Self::max): Functions above this cost never inline (via heuristics). +/// - [`max_loop_multiplier`](Self::max_loop_multiplier): Raises the max threshold for callsites +/// inside loops. +/// +/// # Bonuses +/// +/// Bonuses are added to the score for beneficial callsite properties: +/// - [`loop_bonus`](Self::loop_bonus): Callsite is inside a loop (hot code). +/// - [`leaf_bonus`](Self::leaf_bonus): Target has no outgoing calls. +/// - [`single_caller_bonus`](Self::single_caller_bonus): Only one caller calls this target. +/// - [`unique_callsite_bonus`](Self::unique_callsite_bonus): Exactly one callsite to target. +/// +/// # Penalty +/// +/// - [`size_penalty_factor`](Self::size_penalty_factor): Multiplier for cost when subtracting from +/// score. Values > 1.0 bias against larger functions. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct InlineHeuristicsConfig { + /// Cost threshold below which functions are always inlined. + /// + /// Functions with `cost < always_inline` get score `+∞` and bypass the budget + /// entirely. This ensures trivial helpers are always inlined regardless of + /// call frequency. + /// + /// Default: `10.0` (~1-2 blocks with a few operations). + pub always_inline: f32, + + /// Maximum cost for a function to be considered for inlining. + /// + /// Functions with `cost > max` (or `max × max_loop_multiplier` in loops) get + /// score `-∞` and are never inlined via heuristics. + /// + /// Also used as the base for computing per-caller budget: + /// `budget = max × budget_multiplier`. + /// + /// Default: `60.0` (~6-8 moderate blocks). + pub max: f32, + + /// Multiplier for `max` when the callsite is inside a loop. + /// + /// Allows slightly larger functions to be inlined in hot paths. + /// Effective max in loops = `max × max_loop_multiplier`. + /// + /// Default: `1.5` (raises ceiling from 60 to 90 in loops). + pub max_loop_multiplier: f32, + + /// Bonus for callsites inside loops. + /// + /// Loop bodies execute many times, so inlining amortizes call overhead + /// and enables loop-specific optimizations. + /// + /// Default: `20.0`. + pub loop_bonus: f32, + + /// Bonus for leaf functions (no outgoing calls except intrinsics). + /// + /// Leaf functions are simpler and won't trigger further inlining cascades, + /// making them safer to inline. + /// + /// Default: `10.0`. + pub leaf_bonus: f32, + + /// Bonus when this caller is the only function that calls the target. + /// + /// Single-caller functions are good candidates because the code exists + /// only for this caller anyway. + /// + /// Default: `5.0`. + pub single_caller_bonus: f32, + + /// Bonus when there is exactly one callsite to the target in the entire program. + /// + /// This implies both single caller and single callsite, meaning inlining + /// causes zero code duplication. + /// + /// Default: `12.0`. + pub unique_callsite_bonus: f32, + + /// Multiplier for target cost when computing the size penalty. + /// + /// The penalty subtracted from the score is `cost × size_penalty_factor`. + /// Values > 1.0 bias against larger functions, requiring more bonuses to + /// achieve a positive score. + /// + /// Default: `1.1` (mild bias against size). + pub size_penalty_factor: f32, +} + +impl Default for InlineHeuristicsConfig { + fn default() -> Self { + Self { + always_inline: 10.0, + max: 60.0, + max_loop_multiplier: 1.5, + + loop_bonus: 20.0, + leaf_bonus: 10.0, + single_caller_bonus: 5.0, + unique_callsite_bonus: 12.0, + size_penalty_factor: 1.1, + } + } +} + +/// Scores callsites to determine inlining desirability. +/// +/// Uses [`InlineHeuristicsConfig`] along with call graph and body properties +/// to compute a score for each callsite. +pub(crate) struct InlineHeuristics<'ctx, 'heap, A: Allocator> { + pub config: InlineHeuristicsConfig, + pub graph: &'ctx CallGraph<'heap, A>, + pub loops: &'ctx BasicBlockLoopVec, + pub properties: &'ctx DefIdSlice, +} + +impl InlineHeuristics<'_, '_, A> { + /// Compute the inlining score for a callsite. + /// + /// Returns: + /// - `+∞` for unconditional inlining (directive or below `always_inline`). + /// - `-∞` for functions that should never be inlined. + /// - A finite score otherwise, where positive means "candidate for inlining". + #[expect(clippy::float_arithmetic)] + pub(crate) fn score( + &self, + CallSite { + caller, + kind: location, + target, + }: CallSite, + ) -> f32 { + // Check directive first: Always/Never override all heuristics. + match self.properties[target].directive { + InlineDirective::Always => return f32::INFINITY, + InlineDirective::Heuristic => {} + InlineDirective::Never => return f32::NEG_INFINITY, + } + + let target_cost = self.properties[target].cost; + + // Trivially small functions bypass scoring and budget. + if target_cost < self.config.always_inline { + return f32::INFINITY; + } + + let call_in_loop = self + .loops + .lookup(caller) + .is_some_and(|set| set.contains(location.block)); + + // Loops get a higher max threshold to allow larger functions. + let max_multiplier = if call_in_loop { + self.config.max_loop_multiplier + } else { + 1.0 + }; + + let max_cost = self.config.max * max_multiplier; + if target_cost > max_cost { + return f32::NEG_INFINITY; + } + + // Accumulate bonuses for beneficial properties. + let mut score = 0.0; + if call_in_loop { + score += self.config.loop_bonus; + } + if self.properties[target].is_leaf { + score += self.config.leaf_bonus; + } + if self.graph.is_single_caller(caller, target) { + score += self.config.single_caller_bonus; + } + if self.graph.unique_caller(target) == Some(caller) { + score += self.config.unique_callsite_bonus; + } + + // Subtract size penalty: larger functions need more bonuses to be profitable. + score -= target_cost * self.config.size_penalty_factor; + + score + } +} diff --git a/libs/@local/hashql/mir/src/pass/transform/inline/mod.rs b/libs/@local/hashql/mir/src/pass/transform/inline/mod.rs new file mode 100644 index 00000000000..985d597236d --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/transform/inline/mod.rs @@ -0,0 +1,636 @@ +//! Function inlining pass for MIR. +//! +//! This pass inlines function calls to reduce call overhead and enable further optimizations. +//! It operates in two phases: +//! +//! 1. **Normal phase**: Processes all functions using heuristic scoring and budget constraints. +//! 2. **Aggressive phase**: For filter closures only, inlines until fixpoint or cutoff. +//! +//! # Architecture +//! +//! The inliner uses several key components: +//! +//! - [`BodyAnalysis`]: Computes cost, directive, and loop information for each function. +//! - [`InlineHeuristics`]: Scores callsites based on cost, bonuses, and penalties. +//! - [`InlineState`]: Tracks inlining state including SCC membership and budget. +//! +//! # Normal Phase +//! +//! For non-filter functions, the normal phase: +//! 1. Processes SCCs in dependency order (callees before callers). +//! 2. For each callsite, computes a score using [`InlineHeuristics::score`]. +//! 3. Selects candidates with positive scores, limited by per-caller budget. +//! 4. Updates caller costs after inlining to prevent cascade explosions. +//! +//! Recursive calls (same SCC) are never inlined to prevent infinite expansion. +//! +//! # Aggressive Phase +//! +//! Filter closures (used in graph read pipelines) bypass normal heuristics and get +//! aggressive inlining to fully flatten the filter logic. The aggressive phase: +//! 1. Iterates up to `aggressive_inline_cutoff` times per filter. +//! 2. On each iteration, inlines all eligible callsites found in the filter. +//! 3. Tracks which SCCs have been inlined to prevent cycles. +//! 4. Emits a diagnostic if the cutoff is reached. +//! +//! # Budget System +//! +//! Each caller has a budget of `max × budget_multiplier` cost units. When selecting +//! candidates: +//! - Candidates are sorted by score (highest first). +//! - Each inlined callee consumes its cost from the budget. +//! - Callsites with infinite score (directive or `always_inline`) bypass budget entirely. +//! +//! After inlining, the caller's cost is updated: the `Apply` cost is removed and the +//! callee's cost is added. This ensures subsequent inlining decisions see the true +//! accumulated cost. + +#![expect(clippy::float_arithmetic)] +use alloc::collections::BinaryHeap; +use core::{alloc::Allocator, cmp, mem}; + +use hashql_core::{ + graph::{ + DirectedGraph as _, + algorithms::{ + Tarjan, + tarjan::{SccId, StronglyConnectedComponents}, + }, + }, + heap::{BumpAllocator, Heap}, + id::{ + Id as _, IdSlice, + bit_vec::{DenseBitSet, SparseBitMatrix}, + }, + span::SpanId, +}; + +pub use self::{analysis::InlineCostEstimationConfig, heuristics::InlineHeuristicsConfig}; +use self::{ + analysis::{BasicBlockLoopVec, BodyAnalysis, BodyProperties}, + find::FindCallsiteVisitor, + heuristics::InlineHeuristics, + rename::RenameVisitor, +}; +use crate::{ + body::{ + Body, + basic_block::{BasicBlock, BasicBlockId}, + local::{Local, LocalDecl}, + location::Location, + operand::Operand, + place::Place, + rvalue::RValue, + statement::{Assign, Statement, StatementKind}, + terminator::{Goto, Target, Terminator, TerminatorKind}, + }, + context::MirContext, + def::{DefId, DefIdSlice, DefIdVec}, + intern::Interner, + pass::{ + Changed, GlobalTransformPass, GlobalTransformState, + analysis::{CallGraph, CallSite}, + transform::error, + }, + visit::{Visitor as _, VisitorMut as _}, +}; + +mod analysis; +mod find; +mod heuristics; +mod rename; + +#[cfg(test)] +mod tests; + +/// A candidate callsite for inlining, with its computed score. +struct Candidate { + score: f32, + callsite: CallSite, +} + +impl PartialEq for Candidate { + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == cmp::Ordering::Equal + } +} + +impl Eq for Candidate {} + +impl PartialOrd for Candidate { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Candidate { + fn cmp(&self, other: &Self) -> cmp::Ordering { + let Self { score, callsite: _ } = self; + + // Reverse ordering: higher scores come first (max-heap behavior). + score.total_cmp(&other.score) + } +} + +/// Top-level configuration for the inline pass. +/// +/// Combines cost estimation, heuristics, and pass-level parameters. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct InlineConfig { + /// Weights for computing function body costs. + pub cost: InlineCostEstimationConfig, + /// Thresholds and bonuses for scoring callsites. + pub heuristics: InlineHeuristicsConfig, + /// Multiplier for computing per-caller budget. + /// + /// Budget = `heuristics.max × budget_multiplier`. + /// Limits how much code can be inlined into a single function. + /// + /// Default: `2.0` (budget of 120 with default max of 60). + pub budget_multiplier: f32, + /// Maximum iterations for aggressive filter inlining. + /// + /// The aggressive phase runs up to this many iterations per filter, + /// inlining all eligible callsites each iteration. If the limit is + /// reached, a diagnostic is emitted. + /// + /// Default: `16` (generous for deep pipelines). + pub aggressive_inline_cutoff: usize, +} + +impl Default for InlineConfig { + fn default() -> Self { + Self { + cost: InlineCostEstimationConfig::default(), + heuristics: InlineHeuristicsConfig::default(), + budget_multiplier: 2.0, + aggressive_inline_cutoff: 16, + } + } +} + +/// Reusable memory for callsite collection during inlining. +struct InlineStateMemory { + /// Collected callsites to inline. + callsites: Vec, A>, + /// Priority queue of candidates sorted by score. + candidates: BinaryHeap, +} + +impl InlineStateMemory { + fn new(alloc: A) -> Self + where + A: Clone, + { + Self { + callsites: Vec::new_in(alloc.clone()), + candidates: BinaryHeap::new_in(alloc), + } + } +} + +/// State maintained during the inlining process. +struct InlineState<'ctx, 'state, 'env, 'heap, A: Allocator> { + config: InlineConfig, + interner: &'env Interner<'heap>, + + graph: CallGraph<'heap, A>, + + /// Functions that require aggressive inlining (filter closures). + filters: DenseBitSet, + /// Tracks which SCCs have been inlined into each function. + /// + /// Used to prevent cycles during aggressive inlining: once an SCC + /// has been inlined into a filter, it won't be inlined again. + inlined: SparseBitMatrix, + + /// Body properties for each function. + properties: DefIdVec, + /// For each function, which basic blocks are inside loops. + loops: BasicBlockLoopVec, + /// SCC membership for cycle detection. + components: StronglyConnectedComponents, + + global: &'ctx mut GlobalTransformState<'state>, +} + +impl<'heap, A: Allocator> InlineState<'_, '_, '_, 'heap, A> { + /// Collect all non-recursive callsites for aggressive inlining. + /// + /// Used for filter functions which bypass normal heuristics. + /// Records inlined SCCs to prevent cycles in subsequent iterations. + fn collect_all_callsites(&mut self, body: DefId, mem: &mut InlineStateMemory) { + let component = self.components.scc(body); + + self.graph + .apply_callsites(body) + .filter(|callsite| self.components.scc(callsite.target) != component) + .collect_into(&mut mem.callsites); + + self.inlined.insert(body, component); + for callsite in &mem.callsites { + self.inlined + .insert(body, self.components.scc(callsite.target)); + } + } + + /// Collect callsites using heuristic scoring and budget. + /// + /// For filter functions, delegates to [`collect_all_callsites`](Self::collect_all_callsites). + /// For normal functions: + /// 1. Scores each callsite using [`InlineHeuristics`]. + /// 2. Skips negative scores (not beneficial) and recursive calls (same SCC). + /// 3. Infinite scores bypass budget; finite positive scores are ranked. + /// 4. Selects candidates in score order until budget is exhausted. + /// 5. Updates caller cost to reflect inlined code. + #[expect(clippy::cast_precision_loss)] + fn collect_callsites(&mut self, body: DefId, mem: &mut InlineStateMemory) { + if self.filters.contains(body) { + return self.collect_all_callsites(body, mem); + } + + let component = self.components.scc(body); + let scorer = InlineHeuristics { + config: self.config.heuristics, + graph: &self.graph, + loops: &self.loops, + properties: &self.properties, + }; + + let targets = &mut mem.callsites; + let candidates = &mut mem.candidates; + + for callsite in self.graph.apply_callsites(body) { + if self.components.scc(callsite.target) == component { + continue; + } + + let score = scorer.score(callsite); + if score.is_sign_negative() { + continue; + } + + if score.is_infinite() { + targets.push(callsite); + continue; + } + + candidates.push(Candidate { score, callsite }); + } + + let mut remaining_budget = self.config.heuristics.max * self.config.budget_multiplier; + + for candidate in candidates.drain_sorted() { + let target_cost = self.properties[candidate.callsite.target].cost; + + if remaining_budget >= target_cost { + remaining_budget -= target_cost; + targets.push(candidate.callsite); + } + } + + // Update caller cost: remove Apply costs and add callee costs. + // This ensures subsequent callers see the true accumulated cost. + self.properties[body].cost -= (targets.len() as f32) * self.config.cost.rvalue_apply; + for target in targets { + debug_assert_eq!(target.caller, body); + + let Ok([caller, target]) = self + .properties + .get_disjoint_mut([target.caller, target.target]) + else { + unreachable!("`inlinable_callsites` should have filtered out self-calls") + }; + + caller.cost += target.cost; + } + } + + /// Perform the actual inlining of a callsite. + /// + /// This involves: + /// 1. Splitting the caller's basic block at the call statement. + /// 2. Creating a continuation block for code after the call. + /// 3. Copying the callee's basic blocks and locals into the caller. + /// 4. Renaming all references to account for the new offsets. + /// 5. Redirecting the call to jump into the inlined code. + fn inline( + &self, + bodies: &mut IdSlice>, + + CallSite { + caller, + kind: location, + target, + }: CallSite, + ) { + let Ok([source, target]) = bodies.get_disjoint_mut([caller, target]) else { + unreachable!("`inlinable_callsites` should have filtered out self-calls") + }; + // Downgrade to shared ref to prevent accidental modification. + let target = &*target; + + let bb_offset = source.basic_blocks.len(); + + let block = &mut source.basic_blocks.as_mut()[location.block]; + + debug_assert!( + target.basic_blocks[BasicBlockId::START].params.is_empty(), + "function entry block must have no params" + ); + // Replace the block's terminator with a goto to the inlined entry block. + // +1 because we push the continuation block first (see `apply`). + let terminator = mem::replace( + &mut block.terminator, + Terminator { + span: SpanId::SYNTHETIC, + kind: TerminatorKind::Goto(Goto { + target: Target::block(BasicBlockId::START.plus(bb_offset + 1)), + }), + }, + ); + debug_assert!( + location.statement_index > 0, + "callsite location must point to a statement, not block params" + ); + // statement_index is 1-based (0 = block params), so split_off gives statements + // after the call, and pop removes the call statement itself. + let mut after = block.statements.split_off(location.statement_index); + let callsite = block.statements.pop().unwrap_or_else(|| unreachable!()); + + block.terminator.span = callsite.span; + + let StatementKind::Assign(Assign { + lhs, + rhs: RValue::Apply(apply), + }) = callsite.kind + else { + unreachable!("`inlinable_callsites` should only point to apply statements") + }; + + // Determine where to store the return value. + // If lhs has projections (e.g., `foo.bar = call()`), we can't use it directly as a + // block param. Create a temp local and prepend an assignment to write it back. + let result = if lhs.projections.is_empty() { + lhs.local + } else { + let type_id = lhs.type_id(&source.local_decls); + let local = source.local_decls.push(LocalDecl { + span: callsite.span, + r#type: type_id, + name: None, + }); + + // Prepend assignment to write the result back to the projected place. + after.insert( + 0, + Statement { + span: callsite.span, + kind: StatementKind::Assign(Assign { + lhs, + rhs: RValue::Load(Operand::Place(Place::local(local))), + }), + }, + ); + + local + }; + + let local_offset = source.local_decls.len(); + + // Assign arguments to the callee's parameter locals. + debug_assert_eq!(apply.arguments.len(), target.args); + for (index, arg) in apply.arguments.into_iter().enumerate() { + block.statements.push(Statement { + span: callsite.span, + kind: StatementKind::Assign(Assign { + lhs: Place::local(Local::new(local_offset + index)), + rhs: RValue::Load(arg), + }), + }); + } + + self.apply(source, target, result, after, terminator); + } + + /// Apply the callee's code to the caller body. + /// + /// Creates the continuation block, copies callee blocks and locals, + /// and renames all references to use the new offsets. + fn apply( + &self, + source: &mut Body<'heap>, + callee: &Body<'heap>, + + result: Local, + statements: Vec, &'heap Heap>, + terminator: Terminator<'heap>, + ) { + // Create continuation block first. The inlined code's returns will jump here, + // passing the return value as a block argument to `result`. + let continuation = source.basic_blocks.as_mut().push(BasicBlock { + params: self.interner.locals.intern_slice(&[result]), + statements, + terminator, + }); + + // Record offsets before extending - these are used to rename all references. + let bb_offset = source.basic_blocks.bound(); + // This must match the `local_offset` used in `inline` for argument assignments. + let local_offset = source.local_decls.len(); + + // Copy callee's blocks and locals into caller. + source + .basic_blocks + .as_mut() + .extend(callee.basic_blocks.iter().cloned()); + + source + .local_decls + .extend(callee.local_decls.iter().copied()); + + // Rename all references in the copied blocks to use the new offsets. + let mut visitor = RenameVisitor { + local_offset, + bb_offset: bb_offset.as_usize(), + continuation, + interner: self.interner, + }; + + for (index, block) in source.basic_blocks.as_mut()[bb_offset..] + .iter_mut() + .enumerate() + { + visitor.visit_basic_block(bb_offset.plus(index), block); + } + } + + /// Process a single function: collect callsites and inline them. + fn run( + &mut self, + bodies: &mut DefIdSlice>, + body: DefId, + mem: &mut InlineStateMemory, + ) -> Changed { + self.collect_callsites(body, mem); + // Sort in reverse order so later callsites are processed first. + // This avoids index shifting issues when modifying the body. + mem.callsites + .sort_unstable_by(|lhs, rhs| lhs.kind.cmp(&rhs.kind).reverse()); + + if mem.callsites.is_empty() { + return Changed::No; + } + + for callsite in mem.callsites.drain(..) { + self.inline(bodies, callsite); + } + + Changed::Yes + } +} + +/// The main inline pass. +/// +/// Inlines function calls to reduce overhead and enable optimizations. +pub struct Inline { + alloc: A, + + config: InlineConfig, +} + +impl Inline { + pub const fn new_in(config: InlineConfig, alloc: A) -> Self { + Self { alloc, config } + } + + /// Build initial state by analyzing all bodies. + fn state<'ctx, 'state, 'env, 'heap>( + &self, + state: &'ctx mut GlobalTransformState<'state>, + interner: &'env Interner<'heap>, + bodies: &DefIdSlice>, + ) -> InlineState<'ctx, 'state, 'env, 'heap, &A> { + let graph = CallGraph::analyze_in(bodies, &self.alloc); + let mut analysis = BodyAnalysis::new(&graph, bodies, self.config.cost, &self.alloc); + + for body in bodies { + analysis.run(body); + } + + let mut filters = DenseBitSet::new_empty(bodies.len()); + for filter in graph.filters() { + filters.insert(filter); + } + + let costs = analysis.finish(); + + let tarjan = Tarjan::new_in(&graph, &self.alloc); + let components = tarjan.run(); + + InlineState { + config: self.config, + filters, + inlined: SparseBitMatrix::new_in(components.node_count(), &self.alloc), + interner, + graph, + properties: costs.properties, + loops: costs.loops, + components, + global: state, + } + } + + /// Run the normal inlining phase. + /// + /// Processes SCCs in dependency order (callees before callers) so that + /// cost updates propagate correctly. + fn normal<'heap, 'alloc>( + &self, + state: &mut InlineState<'_, '_, '_, 'heap, &'alloc A>, + bodies: &mut IdSlice>, + mem: &mut InlineStateMemory<&'alloc A>, + ) -> Changed { + let members = state.components.members_in(&self.alloc); + + let mut any_changed = Changed::No; + for scc in members.sccs() { + for &id in members.of(scc) { + let changed = state.run(bodies, id, mem); + any_changed |= changed; + state.global.mark(id, changed); + } + } + any_changed + } + + /// Run the aggressive inlining phase for filter functions. + /// + /// For each filter, iteratively inlines all eligible callsites until + /// no more are found or the cutoff is reached. + fn aggressive<'heap, 'alloc>( + &self, + context: &mut MirContext<'_, 'heap>, + state: &mut InlineState<'_, '_, '_, 'heap, &'alloc A>, + bodies: &mut IdSlice>, + mem: &mut InlineStateMemory<&'alloc A>, + ) -> Changed { + let mut any_changed = Changed::No; + + for filter in &state.filters { + let mut iteration = 0; + while iteration < self.config.aggressive_inline_cutoff { + let mut visitor = FindCallsiteVisitor { + caller: filter, + state, + mem, + }; + visitor.visit_body(&bodies[filter]); + + if mem.callsites.is_empty() { + break; + } + + any_changed = Changed::Yes; + state.global.mark(filter, Changed::Yes); + + mem.callsites + .sort_unstable_by(|lhs, rhs| lhs.kind.cmp(&rhs.kind).reverse()); + for callsite in mem.callsites.drain(..) { + let target_component = state.components.scc(callsite.target); + state.inlined.insert(filter, target_component); + + state.inline(bodies, callsite); + } + + iteration += 1; + } + + if iteration == self.config.aggressive_inline_cutoff { + context.diagnostics.push(error::excessive_inlining_depth( + bodies[filter].span, + self.config.aggressive_inline_cutoff, + )); + } + } + + any_changed + } +} + +impl<'env, 'heap, A: BumpAllocator> GlobalTransformPass<'env, 'heap> for Inline { + fn run( + &mut self, + context: &mut MirContext<'env, 'heap>, + state: &mut GlobalTransformState<'_>, + bodies: &mut DefIdSlice>, + ) -> Changed { + let mut state = self.state(state, context.interner, bodies); + let mut mem = InlineStateMemory::new(&self.alloc); + + let mut changed = Changed::No; + changed |= self.normal(&mut state, bodies, &mut mem); + changed |= self.aggressive(context, &mut state, bodies, &mut mem); + changed + } +} diff --git a/libs/@local/hashql/mir/src/pass/transform/inline/rename.rs b/libs/@local/hashql/mir/src/pass/transform/inline/rename.rs new file mode 100644 index 00000000000..d3914910145 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/transform/inline/rename.rs @@ -0,0 +1,101 @@ +//! Reference renaming for inlined code. +//! +//! When inlining a callee into a caller, all references in the callee's code must be +//! adjusted to account for: +//! +//! - **Local offset**: The callee's locals are appended after the caller's locals. +//! - **Basic block offset**: The callee's blocks are appended after the caller's blocks. +//! - **Return transformation**: The callee's `Return` terminators become `Goto` to the continuation +//! block, passing the return value as a block argument. +//! +//! [`RenameVisitor`] performs these transformations on the inlined code. + +use core::convert::Infallible; + +use hashql_core::id::Id as _; + +use crate::{ + body::{ + basic_block::BasicBlockId, + local::Local, + location::Location, + place::PlaceContext, + terminator::{Goto, Return, Target, Terminator, TerminatorKind}, + }, + intern::Interner, + visit::{self, VisitorMut, r#mut::filter}, +}; + +/// Visitor that renames references in inlined code. +/// +/// After copying a callee's basic blocks and locals into a caller, this visitor +/// adjusts all references so they point to the correct locations in the combined body. +pub(crate) struct RenameVisitor<'env, 'heap> { + /// Offset to add to all local indices. + /// + /// The callee's `Local(0)` becomes `Local(local_offset)` in the caller. + pub local_offset: usize, + /// Offset to add to all basic block indices. + /// + /// The callee's `BasicBlockId(0)` becomes `BasicBlockId(bb_offset)` in the caller. + pub bb_offset: usize, + /// The continuation block to jump to instead of returning. + /// + /// When the callee would return, we instead jump to this block, passing + /// the return value as a block argument. + pub continuation: BasicBlockId, + /// Interner for creating new interned slices (e.g., block arguments). + pub interner: &'env Interner<'heap>, +} + +impl<'heap> VisitorMut<'heap> for RenameVisitor<'_, 'heap> { + type Filter = filter::Deep; + type Residual = Result; + type Result + = Result + where + T: 'heap; + + fn interner(&self) -> &Interner<'heap> { + self.interner + } + + /// Rename a local reference by adding the local offset. + fn visit_local(&mut self, _: Location, _: PlaceContext, local: &mut Local) -> Self::Result<()> { + local.increment_by(self.local_offset); + Ok(()) + } + + /// Rename a basic block reference by adding the block offset. + fn visit_basic_block_id( + &mut self, + _: Location, + basic_block_id: &mut BasicBlockId, + ) -> Self::Result<()> { + basic_block_id.increment_by(self.bb_offset); + Ok(()) + } + + /// Transform terminators, converting `Return` to `Goto` continuation. + /// + /// First walks the terminator to rename any nested references, then checks + /// if it's a `Return` and converts it to a `Goto` to the continuation block. + fn visit_terminator( + &mut self, + location: Location, + terminator: &mut Terminator<'heap>, + ) -> Self::Result<()> { + Ok(()) = visit::r#mut::walk_terminator(self, location, terminator); + + if let TerminatorKind::Return(Return { value }) = terminator.kind { + terminator.kind = TerminatorKind::Goto(Goto { + target: Target { + block: self.continuation, + args: self.interner.operands.intern_slice(&[value]), + }, + }); + } + + Ok(()) + } +} diff --git a/libs/@local/hashql/mir/src/pass/transform/inline/tests.rs b/libs/@local/hashql/mir/src/pass/transform/inline/tests.rs new file mode 100644 index 00000000000..a56106b909e --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/transform/inline/tests.rs @@ -0,0 +1,1089 @@ +#![expect(clippy::min_ident_chars, clippy::similar_names, reason = "tests")] + +use alloc::{alloc::Global, collections::BinaryHeap, vec}; +use core::{f32, fmt::Write as _}; +use std::path::PathBuf; + +use bstr::ByteVec as _; +use hashql_core::{ + heap::Heap, + pretty::Formatter, + symbol::sym, + r#type::{TypeFormatter, TypeFormatterOptions, environment::Environment}, +}; +use hashql_diagnostics::DiagnosticIssues; +use insta::{Settings, assert_snapshot}; + +use super::{ + BodyAnalysis, Inline, InlineConfig, InlineCostEstimationConfig, InlineHeuristicsConfig, +}; +use crate::{ + body::{Body, Source, basic_block::BasicBlockId, location::Location}, + builder::body, + context::MirContext, + def::{DefId, DefIdSlice, DefIdVec}, + intern::Interner, + pass::{ + Changed, GlobalTransformPass as _, OwnedGlobalTransformState, + analysis::{CallGraph, CallSite}, + transform::inline::{ + BodyProperties, Candidate, analysis::InlineDirective, heuristics::InlineHeuristics, + }, + }, + pretty::TextFormat, +}; + +/// Creates an identity function: `fn(x: Int) -> Int { return x; }`. +fn identity_callee<'heap>( + interner: &Interner<'heap>, + env: &Environment<'heap>, + id: DefId, +) -> Body<'heap> { + body!(interner, env; fn@id/1 -> Int { + decl x: Int; + bb0() { return x; } + }) +} + +/// Creates a simple caller thunk that calls the given callee with one argument. +fn simple_caller<'heap>( + interner: &Interner<'heap>, + env: &Environment<'heap>, + id: DefId, + callee_id: DefId, +) -> Body<'heap> { + body!(interner, env; thunk@id/0 -> Int { + decl out: Int; + bb0() { + out = apply (callee_id), 1; + return out; + } + }) +} + +/// Creates a caller/callee pair for heuristics testing. +fn callee_caller_pair<'heap>( + interner: &Interner<'heap>, + env: &Environment<'heap>, +) -> (Body<'heap>, Body<'heap>) { + let callee_id = DefId::new(0); + let caller_id = DefId::new(1); + ( + identity_callee(interner, env, callee_id), + simple_caller(interner, env, caller_id, callee_id), + ) +} + +fn format_bodies<'heap>( + bodies: &DefIdSlice>, + context: &MirContext<'_, 'heap>, +) -> String { + let formatter = Formatter::new(context.heap); + let mut formatter = TypeFormatter::new( + &formatter, + context.env, + TypeFormatterOptions::terse().with_qualified_opaque_names(true), + ); + let mut text_format = TextFormat { + writer: Vec::new(), + indent: 4, + sources: (), + types: &mut formatter, + }; + + text_format + .format(bodies, &[]) + .expect("should be able to write bodies"); + + text_format.writer.into_string_lossy() +} + +#[track_caller] +fn assert_inline_pass<'heap>( + name: &'static str, + bodies: &mut [Body<'heap>], + context: &mut MirContext<'_, 'heap>, + config: InlineConfig, +) { + let bodies = DefIdSlice::from_raw_mut(bodies); + let before = format_bodies(bodies, context); + + let mut heap = Heap::new(); + let mut pass = Inline::new_in(config, &mut heap); + let _: Changed = pass.run( + context, + &mut OwnedGlobalTransformState::new_in(bodies, Global).as_mut(), + bodies, + ); + + let after = format_bodies(bodies, context); + + let mut output = before; + write!(output, "\n\n{:=^50}\n\n", " After Inlining ").expect("infallible"); + output.push_str(&after); + + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let mut settings = Settings::clone_current(); + settings.set_snapshot_path(dir.join("tests/ui/pass/inline")); + settings.set_prepend_module_to_snapshot(false); + + let _drop = settings.bind_to_scope(); + assert_snapshot!(name, output); +} + +fn default_callsite() -> CallSite { + CallSite { + caller: DefId::new(1), + kind: Location { + block: BasicBlockId::new(0), + statement_index: 1, + }, + target: DefId::new(0), + } +} + +#[test] +fn inline_simple_leaf() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let callee = body!(interner, env; fn@0/1 -> Bool { + decl x: Int, result: Bool; + bb0() { + result = bin.== x x; + return result; + } + }); + + let caller = body!(interner, env; thunk@1/0 -> Bool { + decl out: Bool; + bb0() { + out = apply (callee.id), 21; + return out; + } + }); + + let mut bodies = [callee, caller]; + + assert_inline_pass( + "inline_simple_leaf", + &mut bodies, + &mut MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + InlineConfig::default(), + ); +} + +#[test] +fn inline_multiple_args() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let compare3 = body!(interner, env; fn@0/3 -> Bool { + decl a: Int, b: Int, c: Int, tmp1: Bool, tmp2: Bool, result: Bool; + bb0() { + tmp1 = bin.== a b; + tmp2 = bin.== b c; + result = bin.& tmp1 tmp2; + return result; + } + }); + + let caller = body!(interner, env; thunk@1/0 -> Bool { + decl out: Bool; + bb0() { + out = apply (compare3.id), 1, 2, 3; + return out; + } + }); + + let mut bodies = [compare3, caller]; + + assert_inline_pass( + "inline_multiple_args", + &mut bodies, + &mut MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + InlineConfig::default(), + ); +} + +#[test] +fn inline_multiple_blocks() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let callee = body!(interner, env; fn@0/2 -> Int { + decl a: Int, b: Int, cond: Bool, result: Int; + bb0() { + cond = bin.> a b; + if cond then bb1() else bb2(); + }, + bb1() { goto bb3(a); }, + bb2() { goto bb3(b); }, + bb3(result) { return result; } + }); + + let caller = body!(interner, env; thunk@1/0 -> Int { + decl out: Int; + bb0() { + out = apply (callee.id), 10, 20; + return out; + } + }); + + let mut bodies = [callee, caller]; + + assert_inline_pass( + "inline_multiple_blocks", + &mut bodies, + &mut MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + InlineConfig::default(), + ); +} + +#[test] +fn inline_continuation_terminator() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let callee_id = DefId::new(0); + let callee = identity_callee(&interner, &env, callee_id); + + let caller = body!(interner, env; thunk@1/0 -> Bool { + decl tmp: Int, out: Bool; + bb0() { + tmp = apply (callee_id), 5; + out = bin.== tmp 10; + return out; + } + }); + + let mut bodies = [callee, caller]; + + assert_inline_pass( + "inline_continuation_terminator", + &mut bodies, + &mut MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + InlineConfig::default(), + ); +} + +#[test] +fn inline_chained_calls() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let c_id = DefId::new(0); + let b_id = DefId::new(1); + let a_id = DefId::new(2); + + let c = identity_callee(&interner, &env, c_id); + + let b = body!(interner, env; fn@b_id/1 -> Int { + decl y: Int, tmp: Int; + bb0() { + tmp = apply (c_id), y; + return tmp; + } + }); + + let a = simple_caller(&interner, &env, a_id, b_id); + + let mut bodies = [c, b, a]; + + assert_inline_pass( + "inline_chained_calls", + &mut bodies, + &mut MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + InlineConfig::default(), + ); +} + +/// Tests that inlining correctly handles assignment to projections. +#[test] +fn inline_projection_assignment() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let callee_id = DefId::new(0); + let callee = identity_callee(&interner, &env, callee_id); + + // Caller assigns apply result directly to a projection + let caller = body!(interner, env; thunk@1/0 -> Int { + decl tup: (Int, Int), out: Int; + @proj tup_0 = tup.0: Int; + bb0() { + tup = tuple 0, 0; + tup_0 = apply (callee_id), 5; + out = load tup_0; + return out; + } + }); + + let mut bodies = [callee, caller]; + + assert_inline_pass( + "inline_projection_assignment", + &mut bodies, + &mut MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + InlineConfig::default(), + ); +} + +#[test] +fn inline_recursive_not_inlined() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let recursive_id = DefId::new(0); + let caller_id = DefId::new(1); + + let recursive = body!(interner, env; fn@recursive_id/1 -> Int { + decl n: Int, cond: Bool, result: Int, sub_result: Int; + bb0() { + cond = bin.== n 0; + if cond then bb1() else bb2(); + }, + bb1() { goto bb3(n); }, + bb2() { + sub_result = apply (recursive_id), n; + goto bb3(sub_result); + }, + bb3(result) { return result; } + }); + + let caller = simple_caller(&interner, &env, caller_id, recursive_id); + + let mut bodies = [recursive, caller]; + + assert_inline_pass( + "inline_recursive_not_inlined", + &mut bodies, + &mut MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + InlineConfig::default(), + ); +} + +#[test] +fn inline_budget_exhaustion() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let callee = body!(interner, env; fn@0/1 -> Bool { + decl x: Int, a: Bool, b: Bool, c: Bool, d: Bool, e: Bool; + bb0() { + a = bin.== x 1; + b = bin.> x 2; + c = bin.< x 3; + d = bin.== x 4; + e = bin.> x 5; + return e; + } + }); + + let caller = body!(interner, env; thunk@1/0 -> Bool { + decl o1: Bool, o2: Bool, o3: Bool, o4: Bool, o5: Bool, o6: Bool, result: Bool; + bb0() { + o1 = apply (callee.id), 1; + o2 = apply (callee.id), 2; + o3 = apply (callee.id), 3; + o4 = apply (callee.id), 4; + o5 = apply (callee.id), 5; + o6 = apply (callee.id), 6; + result = bin.== o1 o2; + return result; + } + }); + + let mut bodies = [callee, caller]; + + let config = InlineConfig { + heuristics: InlineHeuristicsConfig { + always_inline: 5.0, + max: 100.0, + ..InlineHeuristicsConfig::default() + }, + budget_multiplier: 0.5, + ..InlineConfig::default() + }; + + assert_inline_pass( + "inline_budget_exhaustion", + &mut bodies, + &mut MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + config, + ); +} + +/// Tests that candidates are processed in max-heap order (highest score first). +/// +/// This is a regression test for a bug where the ordering was accidentally reversed. +#[test] +#[expect(clippy::float_cmp)] +fn candidates_ordered_by_descending_score() { + // Create callsites with different scores + let callsite_low = CallSite { + caller: DefId::new(1), + kind: Location { + block: BasicBlockId::new(0), + statement_index: 0, + }, + target: DefId::new(0), + }; + let callsite_mid = CallSite { + caller: DefId::new(1), + kind: Location { + block: BasicBlockId::new(0), + statement_index: 1, + }, + target: DefId::new(0), + }; + let callsite_high = CallSite { + caller: DefId::new(1), + kind: Location { + block: BasicBlockId::new(0), + statement_index: 2, + }, + target: DefId::new(0), + }; + + let mut candidates: BinaryHeap = BinaryHeap::new(); + candidates.push(Candidate { + score: 10.0, + callsite: callsite_low, + }); + candidates.push(Candidate { + score: 50.0, + callsite: callsite_high, + }); + candidates.push(Candidate { + score: 30.0, + callsite: callsite_mid, + }); + + let drained: Vec<_> = candidates.drain_sorted().collect(); + + assert_eq!(drained.len(), 3); + assert!( + drained[0].score > drained[1].score && drained[1].score > drained[2].score, + "Expected descending order: {:?}", + drained.iter().map(|c| c.score).collect::>() + ); + assert_eq!(drained[0].score, 50.0, "Highest score should be first"); + assert_eq!(drained[1].score, 30.0, "Middle score should be second"); + assert_eq!(drained[2].score, 10.0, "Lowest score should be last"); +} + +#[test] +fn analysis_directives_by_source() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // Create one body and clone it for different sources + let mut closure_body = identity_callee(&interner, &env, DefId::new(0)); + + let mut ctor_body = closure_body.clone(); + ctor_body.id = DefId::new(1); + ctor_body.source = Source::Ctor(sym::lexical::Some); + + let mut intrinsic_body = closure_body.clone(); + intrinsic_body.id = DefId::new(2); + intrinsic_body.source = Source::Intrinsic(DefId::PLACEHOLDER); + + // Fix closure_body id to be 0 + closure_body.id = DefId::new(0); + + let bodies = [closure_body, ctor_body, intrinsic_body]; + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let mut analysis = BodyAnalysis::new( + &graph, + bodies_slice, + InlineCostEstimationConfig::default(), + &heap, + ); + + for body in &bodies { + analysis.run(body); + } + let result = analysis.finish(); + + assert_eq!( + result.properties[DefId::new(0)].directive, + InlineDirective::Heuristic + ); + assert_eq!( + result.properties[DefId::new(1)].directive, + InlineDirective::Always + ); + assert_eq!( + result.properties[DefId::new(2)].directive, + InlineDirective::Never + ); +} + +#[test] +fn analysis_cost_estimation() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; fn@0/1 -> Int { + decl x: Int, y: Bool; + bb0() { + y = bin.== x x; + return y; + } + }); + + let bodies = [body]; + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let config = InlineCostEstimationConfig::default(); + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let mut analysis = BodyAnalysis::new(&graph, bodies_slice, config, &heap); + + analysis.run(&bodies[0]); + let result = analysis.finish(); + + let expected = config.rvalue_binary + config.basic_block + config.terminator_return; + assert!( + (result.properties[DefId::new(0)].cost - expected).abs() < f32::EPSILON, + "expected cost {expected}, got {}", + result.properties[DefId::new(0)].cost + ); +} + +#[test] +fn analysis_is_leaf() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let bodies: [_; 2] = callee_caller_pair(&interner, &env).into(); + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let mut analysis = BodyAnalysis::new( + &graph, + bodies_slice, + InlineCostEstimationConfig::default(), + &heap, + ); + + for body in &bodies { + analysis.run(body); + } + let result = analysis.finish(); + + assert!(result.properties[DefId::new(0)].is_leaf); + assert!(!result.properties[DefId::new(1)].is_leaf); +} + +#[test] +fn analysis_loop_detection() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; fn@0/1 -> Int { + decl i: Int, cond: Bool; + bb0() { + cond = bin.== i 10; + goto bb1(); + }, + bb1() { if cond then bb2() else bb0(); }, + bb2() { return i; } + }); + + let bodies = [body]; + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let mut analysis = BodyAnalysis::new( + &graph, + bodies_slice, + InlineCostEstimationConfig::default(), + &heap, + ); + + analysis.run(&bodies[0]); + let result = analysis.finish(); + + let loop_blocks = result + .loops + .lookup(DefId::new(0)) + .expect("should detect loop"); + assert!(loop_blocks.contains(BasicBlockId::new(0))); + assert!(loop_blocks.contains(BasicBlockId::new(1))); + assert!(!loop_blocks.contains(BasicBlockId::new(2))); +} + +#[test] +fn heuristics_directive_scores() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let bodies: [_; 2] = callee_caller_pair(&interner, &env).into(); + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let loops = DefIdVec::new_in(&heap); + let config = InlineHeuristicsConfig::default(); + let callsite = default_callsite(); + + // Test Always -> +∞ + let properties = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Always, + cost: 100.0, + is_leaf: true, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 10.0, + is_leaf: false, + }, + ]); + let heuristics = InlineHeuristics { + config, + graph: &graph, + loops: &loops, + properties: properties.as_slice(), + }; + let score = heuristics.score(callsite); + assert!(score.is_infinite() && score.is_sign_positive()); + + // Test Never -> -∞ + let properties = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Never, + cost: 5.0, + is_leaf: true, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 10.0, + is_leaf: false, + }, + ]); + let heuristics = InlineHeuristics { + config, + graph: &graph, + loops: &loops, + properties: properties.as_slice(), + }; + let score = heuristics.score(callsite); + assert!(score.is_infinite() && score.is_sign_negative()); +} + +#[test] +fn heuristics_cost_thresholds() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let bodies: [_; 2] = callee_caller_pair(&interner, &env).into(); + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let loops = DefIdVec::new_in(&heap); + let config = InlineHeuristicsConfig::default(); + let callsite = default_callsite(); + + // Below always_inline -> +∞ + let properties = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Heuristic, + cost: config.always_inline - 1.0, + is_leaf: true, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 10.0, + is_leaf: false, + }, + ]); + let heuristics = InlineHeuristics { + config, + graph: &graph, + loops: &loops, + properties: properties.as_slice(), + }; + assert!( + heuristics.score(callsite).is_infinite() && heuristics.score(callsite).is_sign_positive() + ); + + // Above max -> -∞ + let properties = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Heuristic, + cost: config.max + 1.0, + is_leaf: true, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 10.0, + is_leaf: false, + }, + ]); + let heuristics = InlineHeuristics { + config, + graph: &graph, + loops: &loops, + properties: properties.as_slice(), + }; + assert!( + heuristics.score(callsite).is_infinite() && heuristics.score(callsite).is_sign_negative() + ); +} + +#[test] +fn heuristics_leaf_bonus() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let bodies: [_; 2] = callee_caller_pair(&interner, &env).into(); + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let loops = DefIdVec::new_in(&heap); + let config = InlineHeuristicsConfig::default(); + let cost = 50.0; + let callsite = default_callsite(); + + let props_leaf = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Heuristic, + cost, + is_leaf: true, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 10.0, + is_leaf: false, + }, + ]); + let props_non_leaf = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Heuristic, + cost, + is_leaf: false, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 10.0, + is_leaf: false, + }, + ]); + + let h_leaf = InlineHeuristics { + config, + graph: &graph, + loops: &loops, + properties: props_leaf.as_slice(), + }; + let h_non_leaf = InlineHeuristics { + config, + graph: &graph, + loops: &loops, + properties: props_non_leaf.as_slice(), + }; + + let diff = h_leaf.score(callsite) - h_non_leaf.score(callsite); + assert!((diff - config.leaf_bonus).abs() < f32::EPSILON); +} + +#[test] +fn heuristics_loop_bonus() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let callee_id = DefId::new(0); + let caller_id = DefId::new(1); + + let callee = identity_callee(&interner, &env, callee_id); + // Caller with loop: bb0 loops back to itself + let caller = body!(interner, env; thunk@caller_id/0 -> Int { + decl out: Int, cond: Bool; + bb0() { + out = apply (callee_id), 1; + cond = bin.== out 10; + if cond then bb1() else bb0(); + }, + bb1() { return out; } + }); + + let bodies = [callee, caller]; + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let config = InlineHeuristicsConfig::default(); + let cost = 30.0; + + let properties = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Heuristic, + cost, + is_leaf: true, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 50.0, + is_leaf: false, + }, + ]); + + // Run analysis to detect loops + let mut analysis = BodyAnalysis::new( + &graph, + bodies_slice, + InlineCostEstimationConfig::default(), + &heap, + ); + for body in &bodies { + analysis.run(body); + } + let result = analysis.finish(); + + let empty_loops = DefIdVec::new_in(&heap); + let callsite = default_callsite(); + + let h_with_loops = InlineHeuristics { + config, + graph: &graph, + loops: &result.loops, + properties: properties.as_slice(), + }; + let h_no_loops = InlineHeuristics { + config, + graph: &graph, + loops: &empty_loops, + properties: properties.as_slice(), + }; + + let diff = h_with_loops.score(callsite) - h_no_loops.score(callsite); + assert!((diff - config.loop_bonus).abs() < f32::EPSILON); +} + +#[test] +fn heuristics_max_loop_multiplier() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let callee_id = DefId::new(0); + let caller_id = DefId::new(1); + + let callee = identity_callee(&interner, &env, callee_id); + let caller = body!(interner, env; thunk@caller_id/0 -> Int { + decl out: Int, cond: Bool; + bb0() { + out = apply (callee_id), 1; + cond = bin.== out 10; + if cond then bb1() else bb0(); + }, + bb1() { return out; } + }); + + let bodies = [callee, caller]; + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let config = InlineHeuristicsConfig::default(); + let cost = config.max + 5.0; // Between max and max * multiplier + + let properties = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Heuristic, + cost, + is_leaf: true, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 50.0, + is_leaf: false, + }, + ]); + + let mut analysis = BodyAnalysis::new( + &graph, + bodies_slice, + InlineCostEstimationConfig::default(), + &heap, + ); + for body in &bodies { + analysis.run(body); + } + let result = analysis.finish(); + + let empty_loops = DefIdVec::new_in(&heap); + let callsite = default_callsite(); + + let h_with_loops = InlineHeuristics { + config, + graph: &graph, + loops: &result.loops, + properties: properties.as_slice(), + }; + let h_no_loops = InlineHeuristics { + config, + graph: &graph, + loops: &empty_loops, + properties: properties.as_slice(), + }; + + // In loop: allowed (finite score) + assert!(h_with_loops.score(callsite).is_finite()); + // Not in loop: rejected (-∞) + assert!( + h_no_loops.score(callsite).is_infinite() && h_no_loops.score(callsite).is_sign_negative() + ); +} + +#[test] +fn heuristics_caller_bonuses() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let bodies: [_; 2] = callee_caller_pair(&interner, &env).into(); + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let loops = DefIdVec::new_in(&heap); + let config = InlineHeuristicsConfig::default(); + let cost = 30.0; + + let properties = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Heuristic, + cost, + is_leaf: true, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 50.0, + is_leaf: false, + }, + ]); + + let heuristics = InlineHeuristics { + config, + graph: &graph, + loops: &loops, + properties: properties.as_slice(), + }; + + let expected = config.leaf_bonus + config.single_caller_bonus + config.unique_callsite_bonus + - cost * config.size_penalty_factor; + + assert!((heuristics.score(default_callsite()) - expected).abs() < f32::EPSILON); +} + +#[test] +fn heuristics_no_unique_callsite_bonus_multiple_calls() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let callee_id = DefId::new(0); + let callee = identity_callee(&interner, &env, callee_id); + + // Two calls to same callee + let caller = body!(interner, env; thunk@1/0 -> Bool { + decl out1: Int, out2: Int, result: Bool; + bb0() { + out1 = apply (callee_id), 1; + out2 = apply (callee_id), 2; + result = bin.== out1 out2; + return result; + } + }); + + let bodies = [callee, caller]; + let bodies_slice = DefIdSlice::from_raw(&bodies); + + let graph = CallGraph::analyze_in(bodies_slice, &heap); + let loops = DefIdVec::new_in(&heap); + let config = InlineHeuristicsConfig::default(); + let cost = 30.0; + + let properties = DefIdVec::from_raw(vec![ + BodyProperties { + directive: InlineDirective::Heuristic, + cost, + is_leaf: true, + }, + BodyProperties { + directive: InlineDirective::Heuristic, + cost: 50.0, + is_leaf: false, + }, + ]); + + let heuristics = InlineHeuristics { + config, + graph: &graph, + loops: &loops, + properties: properties.as_slice(), + }; + + // No unique_callsite_bonus because 2 callsites + let expected = + config.leaf_bonus + config.single_caller_bonus - cost * config.size_penalty_factor; + + assert!((heuristics.score(default_callsite()) - expected).abs() < f32::EPSILON); +} diff --git a/libs/@local/hashql/mir/src/pass/transform/inst_simplify/mod.rs b/libs/@local/hashql/mir/src/pass/transform/inst_simplify/mod.rs index 7994038d535..ae485e57db4 100644 --- a/libs/@local/hashql/mir/src/pass/transform/inst_simplify/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/inst_simplify/mod.rs @@ -92,7 +92,7 @@ mod tests; use core::{alloc::Allocator, convert::Infallible}; use hashql_core::{ - heap::{BumpAllocator, ResetAllocator, Scratch, TransferInto as _}, + heap::{BumpAllocator, Scratch, TransferInto as _}, id::IdVec, r#type::{environment::Environment, kind::PrimitiveType}, }; @@ -171,10 +171,8 @@ impl InstSimplify { } } -impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for InstSimplify { +impl<'env, 'heap, A: BumpAllocator> TransformPass<'env, 'heap> for InstSimplify { fn run(&mut self, context: &mut MirContext<'env, 'heap>, body: &mut Body<'heap>) -> Changed { - self.alloc.reset(); - let mut visitor = InstSimplifyVisitor { env: context.env, interner: context.interner, diff --git a/libs/@local/hashql/mir/src/pass/transform/mod.rs b/libs/@local/hashql/mir/src/pass/transform/mod.rs index 8fd04d8dde9..d404f7c0a43 100644 --- a/libs/@local/hashql/mir/src/pass/transform/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/mod.rs @@ -6,13 +6,21 @@ mod dle; mod dse; pub mod error; mod forward_substitution; +mod inline; mod inst_simplify; mod pre_inlining; mod ssa_repair; pub use self::{ - administrative_reduction::AdministrativeReduction, cfg_simplify::CfgSimplify, - copy_propagation::CopyPropagation, dbe::DeadBlockElimination, dle::DeadLocalElimination, - dse::DeadStoreElimination, forward_substitution::ForwardSubstitution, - inst_simplify::InstSimplify, pre_inlining::PreInlining, ssa_repair::SsaRepair, + administrative_reduction::AdministrativeReduction, + cfg_simplify::CfgSimplify, + copy_propagation::CopyPropagation, + dbe::DeadBlockElimination, + dle::DeadLocalElimination, + dse::DeadStoreElimination, + forward_substitution::ForwardSubstitution, + inline::{Inline, InlineConfig, InlineCostEstimationConfig, InlineHeuristicsConfig}, + inst_simplify::InstSimplify, + pre_inlining::PreInlining, + ssa_repair::SsaRepair, }; diff --git a/libs/@local/hashql/mir/src/pass/transform/pre_inlining.rs b/libs/@local/hashql/mir/src/pass/transform/pre_inlining.rs index 6f179200c6c..77e0e176f73 100644 --- a/libs/@local/hashql/mir/src/pass/transform/pre_inlining.rs +++ b/libs/@local/hashql/mir/src/pass/transform/pre_inlining.rs @@ -3,10 +3,9 @@ //! This module contains the [`PreInlining`] pass, which runs a fixpoint loop of local and global //! transformations to optimize MIR bodies before inlining occurs. -use alloc::alloc::Global; use core::alloc::Allocator; -use hashql_core::{heap::ResetAllocator, id::bit_vec::DenseBitSet}; +use hashql_core::{heap::BumpAllocator, id::bit_vec::DenseBitSet}; use super::{ AdministrativeReduction, CfgSimplify, DeadStoreElimination, ForwardSubstitution, InstSimplify, @@ -14,7 +13,7 @@ use super::{ use crate::{ body::Body, context::MirContext, - def::{DefId, DefIdSlice, DefIdVec}, + def::{DefId, DefIdSlice}, pass::{ Changed, GlobalTransformPass, GlobalTransformState, TransformPass, transform::CopyPropagation, @@ -50,7 +49,7 @@ pub struct PreInlining { alloc: A, } -impl PreInlining { +impl PreInlining { /// Creates a new pre-inlining pass with the given allocator. /// /// The allocator is used for temporary data structures within sub-passes and is reset @@ -107,8 +106,10 @@ impl PreInlining { unstable: &DenseBitSet, state: &mut DefIdSlice, ) -> Changed { - let pass = CopyPropagation::new_in(&mut self.alloc); - Self::run_local_pass(context, bodies, pass, unstable, state) + self.alloc.scoped(|alloc| { + let pass = CopyPropagation::new_in(alloc); + Self::run_local_pass(context, bodies, pass, unstable, state) + }) } fn cfg_simplify<'heap>( @@ -118,8 +119,10 @@ impl PreInlining { unstable: &DenseBitSet, state: &mut DefIdSlice, ) -> Changed { - let pass = CfgSimplify::new_in(&mut self.alloc); - Self::run_local_pass(context, bodies, pass, unstable, state) + self.alloc.scoped(|alloc| { + let pass = CfgSimplify::new_in(alloc); + Self::run_local_pass(context, bodies, pass, unstable, state) + }) } fn inst_simplify<'heap>( @@ -129,8 +132,10 @@ impl PreInlining { unstable: &DenseBitSet, state: &mut DefIdSlice, ) -> Changed { - let pass = InstSimplify::new_in(&mut self.alloc); - Self::run_local_pass(context, bodies, pass, unstable, state) + self.alloc.scoped(|alloc| { + let pass = InstSimplify::new_in(alloc); + Self::run_local_pass(context, bodies, pass, unstable, state) + }) } fn forward_substitution<'heap>( @@ -140,8 +145,10 @@ impl PreInlining { unstable: &DenseBitSet, state: &mut DefIdSlice, ) -> Changed { - let pass = ForwardSubstitution::new_in(&mut self.alloc); - Self::run_local_pass(context, bodies, pass, unstable, state) + self.alloc.scoped(|alloc| { + let pass = ForwardSubstitution::new_in(alloc); + Self::run_local_pass(context, bodies, pass, unstable, state) + }) } fn administrative_reduction<'heap>( @@ -151,8 +158,10 @@ impl PreInlining { state: &mut DefIdSlice, ) -> Changed { - let pass = AdministrativeReduction::new_in(&mut self.alloc); - Self::run_global_pass(context, bodies, pass, state) + self.alloc.scoped(|alloc| { + let pass = AdministrativeReduction::new_in(alloc); + Self::run_global_pass(context, bodies, pass, state) + }) } fn dse<'heap>( @@ -162,14 +171,16 @@ impl PreInlining { unstable: &DenseBitSet, state: &mut DefIdSlice, ) -> Changed { - let pass = DeadStoreElimination::new_in(&mut self.alloc); - Self::run_local_pass(context, bodies, pass, unstable, state) + self.alloc.scoped(|alloc| { + let pass = DeadStoreElimination::new_in(alloc); + Self::run_local_pass(context, bodies, pass, unstable, state) + }) } } const MAX_ITERATIONS: usize = 16; -impl<'env, 'heap, A: ResetAllocator> GlobalTransformPass<'env, 'heap> for PreInlining { +impl<'env, 'heap, A: BumpAllocator> GlobalTransformPass<'env, 'heap> for PreInlining { #[expect(clippy::integer_division_remainder_used)] fn run( &mut self, @@ -177,11 +188,16 @@ impl<'env, 'heap, A: ResetAllocator> GlobalTransformPass<'env, 'heap> for PreInl _: &mut GlobalTransformState<'_>, bodies: &mut DefIdSlice>, ) -> Changed { - self.alloc.reset(); - - // We would be able to move this to the scratch space, if we only had proper checkpointing - // support. - let mut state = DefIdVec::from_domain_in(Changed::No, bodies, Global); + // We allocate state on the heap rather than scratch because bump scopes require + // `&mut` access across iterations, and our generic allocator can't express the + // necessary lifetime bounds cleanly (limitation of the underlying bump-scope crate). + // Acceptable since this meta-pass runs once and the data is a single byte per body. + let state = { + let uninit = context.heap.allocate_slice_uninit(bodies.len()); + let init = uninit.write_filled(Changed::No); + + DefIdSlice::from_raw_mut(init) + }; let mut unstable = DenseBitSet::new_filled(bodies.len()); // Pre-pass: run CP + CFG once before the fixpoint loop. @@ -191,8 +207,8 @@ impl<'env, 'heap, A: ResetAllocator> GlobalTransformPass<'env, 'heap> for PreInl // merges straight-line code. This shrinks the MIR upfront so more expensive passes // run on smaller, cleaner bodies. let mut global_changed = Changed::No; - global_changed |= self.copy_propagation(context, bodies, &unstable, &mut state); - global_changed |= self.cfg_simplify(context, bodies, &unstable, &mut state); + global_changed |= self.copy_propagation(context, bodies, &unstable, state); + global_changed |= self.cfg_simplify(context, bodies, &unstable, state); let mut iter = 0; loop { @@ -217,8 +233,8 @@ impl<'env, 'heap, A: ResetAllocator> GlobalTransformPass<'env, 'heap> for PreInl // producing a minimal CFG that maximizes the next iteration's effectiveness. let mut changed = Changed::No; - changed |= self.administrative_reduction(context, bodies, &mut state); - changed |= self.inst_simplify(context, bodies, &unstable, &mut state); + changed |= self.administrative_reduction(context, bodies, state); + changed |= self.inst_simplify(context, bodies, &unstable, state); // FS vs CP strategy: ForwardSubstitution is more powerful but expensive; // CopyPropagation is cheaper but weaker. We start with FS (iter=0) to @@ -226,13 +242,13 @@ impl<'env, 'heap, A: ResetAllocator> GlobalTransformPass<'env, 'heap> for PreInl // redundancy. Subsequent iterations alternate: CP maintains propagation // cheaply, while periodic FS picks up deeper opportunities. changed |= if iter % 2 == 0 { - self.forward_substitution(context, bodies, &unstable, &mut state) + self.forward_substitution(context, bodies, &unstable, state) } else { - self.copy_propagation(context, bodies, &unstable, &mut state) + self.copy_propagation(context, bodies, &unstable, state) }; - changed |= self.dse(context, bodies, &unstable, &mut state); - changed |= self.cfg_simplify(context, bodies, &unstable, &mut state); + changed |= self.dse(context, bodies, &unstable, state); + changed |= self.cfg_simplify(context, bodies, &unstable, state); global_changed |= changed; if changed == Changed::No { diff --git a/libs/@local/hashql/mir/src/pass/transform/ssa_repair/mod.rs b/libs/@local/hashql/mir/src/pass/transform/ssa_repair/mod.rs index fbf17cd46f1..91c91265a55 100644 --- a/libs/@local/hashql/mir/src/pass/transform/ssa_repair/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/ssa_repair/mod.rs @@ -40,7 +40,7 @@ use hashql_core::{ Predecessors as _, algorithms::{IteratedDominanceFrontier, dominance_frontiers, iterated_dominance_frontier}, }, - heap::{BumpAllocator, Heap, ResetAllocator, Scratch}, + heap::{BumpAllocator, Heap, Scratch}, intern::Interned, }; @@ -161,9 +161,8 @@ impl Default for SsaRepair { } } -impl<'env, 'heap, A: ResetAllocator> TransformPass<'env, 'heap> for SsaRepair { +impl<'env, 'heap, A: BumpAllocator> TransformPass<'env, 'heap> for SsaRepair { fn run(&mut self, context: &mut MirContext<'env, 'heap>, body: &mut Body<'heap>) -> Changed { - self.alloc.reset(); let mut sites = DefSites::new_in(body, &self.alloc); sites.visit_body(body); diff --git a/libs/@local/hashql/mir/src/pretty/text.rs b/libs/@local/hashql/mir/src/pretty/text.rs index d4d1d726ea3..9a775868953 100644 --- a/libs/@local/hashql/mir/src/pretty/text.rs +++ b/libs/@local/hashql/mir/src/pretty/text.rs @@ -711,7 +711,7 @@ where self.writer.write_all(b"apply ")?; self.format_part(*function)?; - for argument in arguments.iter() { + for argument in arguments { self.writer.write_all(b" ")?; self.format_part(*argument)?; } diff --git a/libs/@local/hashql/mir/src/visit/ref.rs b/libs/@local/hashql/mir/src/visit/ref.rs index 9c7c5886309..16ed463c647 100644 --- a/libs/@local/hashql/mir/src/visit/ref.rs +++ b/libs/@local/hashql/mir/src/visit/ref.rs @@ -630,7 +630,7 @@ pub fn walk_rvalue_aggregate<'heap, T: Visitor<'heap> + ?Sized>( | AggregateKind::Closure => {} } - for operand in operands.iter() { + for operand in operands { visitor.visit_operand(location, operand)?; } @@ -656,7 +656,7 @@ pub fn walk_rvalue_apply<'heap, T: Visitor<'heap> + ?Sized>( ) -> T::Result { visitor.visit_operand(location, function)?; - for argument in arguments.iter() { + for argument in arguments { visitor.visit_operand(location, argument)?; } diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/.spec.toml b/libs/@local/hashql/mir/tests/ui/pass/inline/.spec.toml new file mode 100644 index 00000000000..b7b57c53123 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/.spec.toml @@ -0,0 +1 @@ +suite = "mir/pass/transform/inline" diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/closure-inline.jsonc b/libs/@local/hashql/mir/tests/ui/pass/inline/closure-inline.jsonc new file mode 100644 index 00000000000..ad27820f419 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/closure-inline.jsonc @@ -0,0 +1,14 @@ +//@ run: pass +//@ description: Closure with input side effect requires main inline pass (not pre-inlined) +[ + "let", + "get_axis", + [ + "fn", + { "#tuple": [] }, + { "#struct": {} }, + "::graph::QueryTemporalAxes", + ["input", "axis", "::graph::QueryTemporalAxes"] + ], + ["get_axis"] +] diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/closure-inline.stdout b/libs/@local/hashql/mir/tests/ui/pass/inline/closure-inline.stdout new file mode 100644 index 00000000000..b6c1f3a900d --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/closure-inline.stdout @@ -0,0 +1,119 @@ +════ Initial MIR ═══════════════════════════════════════════════════════════════ + +fn {closure#3}(%0: ()) -> ::graph::TimeAxis { + let %1: ::graph::TimeAxis + + bb0(): { + %1 = input LOAD axis + + return %1 + } +} + +thunk get_axis:0() -> () -> ::graph::TimeAxis { + let %0: () -> ::graph::TimeAxis + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#3} as FnPtr), %1) + + return %0 + } +} + +*thunk {thunk#2}() -> ::graph::TimeAxis { + let %0: () -> ::graph::TimeAxis + let %1: ::graph::TimeAxis + + bb0(): { + %0 = apply (get_axis:0 as FnPtr) + %1 = apply %0.0 %0.1 + + return %1 + } +} + +════ Pre-inlining MIR ══════════════════════════════════════════════════════════ + +fn {closure#3}(%0: ()) -> ::graph::TimeAxis { + let %1: ::graph::TimeAxis + + bb0(): { + %1 = input LOAD axis + + return %1 + } +} + +thunk get_axis:0() -> () -> ::graph::TimeAxis { + let %0: () -> ::graph::TimeAxis + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#3} as FnPtr), %1) + + return %0 + } +} + +*thunk {thunk#2}() -> ::graph::TimeAxis { + let %0: ::graph::TimeAxis + let %1: () + + bb0(): { + %1 = () + %0 = apply ({closure#3} as FnPtr) %1 + + return %0 + } +} + +════ Inlined MIR ═══════════════════════════════════════════════════════════════ + +fn {closure#3}(%0: ()) -> ::graph::TimeAxis { + let %1: ::graph::TimeAxis + + bb0(): { + %1 = input LOAD axis + + return %1 + } +} + +thunk get_axis:0() -> () -> ::graph::TimeAxis { + let %0: () -> ::graph::TimeAxis + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#3} as FnPtr), %1) + + return %0 + } +} + +*thunk {thunk#2}() -> ::graph::TimeAxis { + let %0: ::graph::TimeAxis + let %1: () + let %2: () + let %3: ::graph::TimeAxis + + bb0(): { + %1 = () + %2 = %1 + + goto -> bb2() + } + + bb1(%0): { + return %0 + } + + bb2(): { + %3 = input LOAD axis + + goto -> bb1(%3) + } +} \ No newline at end of file diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/excessive-depth.jsonc b/libs/@local/hashql/mir/tests/ui/pass/inline/excessive-depth.jsonc new file mode 100644 index 00000000000..fff9af5cfe4 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/excessive-depth.jsonc @@ -0,0 +1,70 @@ +//@ run: pass +//@ description: Deep call chain of heavy functions in filter exceeds aggressive_inline_cutoff +//@ suite#aggressive-inline-cutoff: 1 +//@ suite#rvalue-input-cost: 120.0 +//@ suite#max-cost: 20.0 +//@ suite#skip-output: true +// With rvalue_input=120 and max=20, 1 input = cost 120 (far exceeds max). +// Chain of 4 functions triggers depth cutoff warning. +[ + "::graph::tail::collect", + [ + "::graph::body::filter", + [ + "::graph::head::entities", + ["input", "axis", "::graph::QueryTemporalAxes"] + ], + [ + "fn", + { "#tuple": [] }, + { "#struct": { "vertex": "_" } }, + "_", + [ + "let", + "f4", + [ + "fn", + { "#tuple": [] }, + { "#struct": { "x": "Boolean" } }, + "_", + ["input", "a", "Boolean"] + ], + [ + "let", + "f3", + [ + "fn", + { "#tuple": [] }, + { "#struct": { "x": "Boolean" } }, + "_", + ["&&", ["f4", "x"], ["input", "b", "Boolean"]] + ], + [ + "let", + "f2", + [ + "fn", + { "#tuple": [] }, + { "#struct": { "x": "Boolean" } }, + "_", + ["&&", ["f3", "x"], ["input", "c", "Boolean"]] + ], + [ + "let", + "f1", + [ + "fn", + { "#tuple": [] }, + { "#struct": { "x": "Boolean" } }, + "_", + ["&&", ["f2", "x"], ["input", "d", "Boolean"]] + ], + ["f1", ["==", "vertex.link_data", { "#literal": null }]] + ] + ] + ] + ] + ] + //~^ WARNING Excessive inlining depth + ] +] diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/excessive-depth.stderr b/libs/@local/hashql/mir/tests/ui/pass/inline/excessive-depth.stderr new file mode 100644 index 00000000000..ca873735088 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/excessive-depth.stderr @@ -0,0 +1,12 @@ +warning[mir::transform::excessive-inlining-depth]: Excessive inlining depth + ╭▸ +17 │ ┏ [ +18 │ ┃ "fn", +19 │ ┃ { "#tuple": [] }, +20 │ ┃ { "#struct": { "vertex": "_" } }, + ‡ ┃ +67 │ ┃ ] + │ ┗━━━━━┛ filter has deeply nested calls that could not be fully inlined + │ + ├ note: aggressive inlining stopped after 1 iterations + ╰ help: consider refactoring to reduce call chain depth \ No newline at end of file diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/excessive-depth.stdout b/libs/@local/hashql/mir/tests/ui/pass/inline/excessive-depth.stdout new file mode 100644 index 00000000000..eb22dfe3cae --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/excessive-depth.stdout @@ -0,0 +1 @@ +[output intentionally skipped] \ No newline at end of file diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/filter-aggressive.jsonc b/libs/@local/hashql/mir/tests/ui/pass/inline/filter-aggressive.jsonc new file mode 100644 index 00000000000..db62c30004d --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/filter-aggressive.jsonc @@ -0,0 +1,19 @@ +//@ run: pass +//@ description: Filter body (marked *) triggers aggressive inlining of thunks with side effects +[ + "::graph::tail::collect", + [ + "::graph::body::filter", + [ + "::graph::head::entities", + ["input", "axis", "::graph::QueryTemporalAxes"] + ], + [ + "fn", + { "#tuple": [] }, + { "#struct": { "vertex": "_" } }, + "_", + ["==", "vertex.link_data", { "#literal": null }] + ] + ] +] diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/filter-aggressive.stdout b/libs/@local/hashql/mir/tests/ui/pass/inline/filter-aggressive.stdout new file mode 100644 index 00000000000..1132d09c95f --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/filter-aggressive.stdout @@ -0,0 +1,132 @@ +════ Initial MIR ═══════════════════════════════════════════════════════════════ + +thunk {thunk#1}() -> ::graph::TimeAxis { + let %0: ::graph::TimeAxis + + bb0(): { + %0 = input LOAD axis + + return %0 + } +} + +fn {closure@7}(%0: (), %1: ::graph::types::knowledge::entity::Entity) -> Boolean { + let %2: Boolean + + bb0(): { + %2 = %1.link_data == null + + return %2 + } +} + +*thunk {thunk#3}() -> List<::graph::types::knowledge::entity::Entity> { + let %0: ::graph::TimeAxis + let %1: List<::graph::types::knowledge::entity::Entity> + let %2: () + + bb0(): { + %0 = apply ({thunk#1} as FnPtr) + %2 = () + + graph read entities(%0) + |> filter({closure@7}, %2) + |> collect -> bb1(_) + } + + bb1(%1): { + return %1 + } +} + +════ Pre-inlining MIR ══════════════════════════════════════════════════════════ + +thunk {thunk#1}() -> ::graph::TimeAxis { + let %0: ::graph::TimeAxis + + bb0(): { + %0 = input LOAD axis + + return %0 + } +} + +fn {closure@7}(%0: (), %1: ::graph::types::knowledge::entity::Entity) -> Boolean { + let %2: Boolean + + bb0(): { + %2 = %1.link_data == null + + return %2 + } +} + +*thunk {thunk#3}() -> List<::graph::types::knowledge::entity::Entity> { + let %0: ::graph::TimeAxis + let %1: List<::graph::types::knowledge::entity::Entity> + let %2: () + + bb0(): { + %0 = apply ({thunk#1} as FnPtr) + %2 = () + + graph read entities(%0) + |> filter({closure@7}, %2) + |> collect -> bb1(_) + } + + bb1(%1): { + return %1 + } +} + +════ Inlined MIR ═══════════════════════════════════════════════════════════════ + +thunk {thunk#1}() -> ::graph::TimeAxis { + let %0: ::graph::TimeAxis + + bb0(): { + %0 = input LOAD axis + + return %0 + } +} + +fn {closure@7}(%0: (), %1: ::graph::types::knowledge::entity::Entity) -> Boolean { + let %2: Boolean + + bb0(): { + %2 = %1.link_data == null + + return %2 + } +} + +*thunk {thunk#3}() -> List<::graph::types::knowledge::entity::Entity> { + let %0: ::graph::TimeAxis + let %1: List<::graph::types::knowledge::entity::Entity> + let %2: () + let %3: ::graph::TimeAxis + + bb0(): { + goto -> bb3() + } + + bb1(%1): { + return %1 + } + + bb2(%0): { + %2 = () + + graph read entities(%0) + |> filter({closure@7}, %2) + |> collect -> bb1(_) + } + + bb3(): { + %3 = input LOAD axis + + goto -> bb2(%3) + } +} \ No newline at end of file diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/filter-with-ctor.jsonc b/libs/@local/hashql/mir/tests/ui/pass/inline/filter-with-ctor.jsonc new file mode 100644 index 00000000000..ca5c3ce43f9 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/filter-with-ctor.jsonc @@ -0,0 +1,29 @@ +//@ run: pass +//@ description: Filter with ctor: ctors inlined by AR, thunk with input LOAD by aggressive inline +[ + "::graph::tail::collect", + [ + "::graph::body::filter", + [ + "::graph::head::entities", + ["input", "axis", "::graph::QueryTemporalAxes"] + ], + [ + "fn", + { "#tuple": [] }, + { "#struct": { "vertex": "_" } }, + "_", + [ + "==", + "vertex.id.entity_id.entity_uuid", + [ + "::graph::types::knowledge::entity::EntityUuid", + [ + "::core::uuid::Uuid", + { "#literal": "e2851dbb-7376-4959-9bca-f72cafc4448f" } + ] + ] + ] + ] + ] +] diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/filter-with-ctor.stdout b/libs/@local/hashql/mir/tests/ui/pass/inline/filter-with-ctor.stdout new file mode 100644 index 00000000000..0c0ffba04ae --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/filter-with-ctor.stdout @@ -0,0 +1,332 @@ +════ Initial MIR ═══════════════════════════════════════════════════════════════ + +thunk {thunk#1}() -> ::graph::TimeAxis { + let %0: ::graph::TimeAxis + + bb0(): { + %0 = input LOAD axis + + return %0 + } +} + +fn {ctor#::core::uuid::Uuid}(%0: (), %1: String) -> ::core::uuid::Uuid { + let %2: ::core::uuid::Uuid + + bb0(): { + %2 = opaque(::core::uuid::Uuid, %1) + + return %2 + } +} + +thunk {thunk#2}() -> (String) -> ::core::uuid::Uuid { + let %0: (String) -> ::core::uuid::Uuid + + bb0(): { + %0 = closure(({ctor#::core::uuid::Uuid} as FnPtr), ()) + + return %0 + } +} + +thunk {thunk#3}() -> ::core::uuid::Uuid { + let %0: (String) -> ::core::uuid::Uuid + let %1: ::core::uuid::Uuid + + bb0(): { + %0 = apply ({thunk#2} as FnPtr) + %1 = apply %0.0 %0.1 "e2851dbb-7376-4959-9bca-f72cafc4448f" + + return %1 + } +} + +fn {ctor#::graph::types::knowledge::entity::EntityUuid}(%0: (), %1: ::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %2: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %2 = opaque(::graph::types::knowledge::entity::EntityUuid, %1) + + return %2 + } +} + +thunk {thunk#4}() -> (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %0: (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = closure(({ctor#::graph::types::knowledge::entity::EntityUuid} as FnPtr), ()) + + return %0 + } +} + +thunk {thunk#5}() -> ::graph::types::knowledge::entity::EntityUuid { + let %0: ::core::uuid::Uuid + let %1: (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid + let %2: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = apply ({thunk#3} as FnPtr) + %1 = apply ({thunk#4} as FnPtr) + %2 = apply %1.0 %1.1 %0 + + return %2 + } +} + +fn {closure@7}(%0: (), %1: ::graph::types::knowledge::entity::Entity) -> Boolean { + let %2: ::graph::types::knowledge::entity::EntityUuid + let %3: Boolean + + bb0(): { + %2 = apply ({thunk#5} as FnPtr) + %3 = %1.id.entity_id.entity_uuid == %2 + + return %3 + } +} + +*thunk {thunk#7}() -> List<::graph::types::knowledge::entity::Entity> { + let %0: ::graph::TimeAxis + let %1: List<::graph::types::knowledge::entity::Entity> + let %2: () + + bb0(): { + %0 = apply ({thunk#1} as FnPtr) + %2 = () + + graph read entities(%0) + |> filter({closure@7}, %2) + |> collect -> bb1(_) + } + + bb1(%1): { + return %1 + } +} + +════ Pre-inlining MIR ══════════════════════════════════════════════════════════ + +thunk {thunk#1}() -> ::graph::TimeAxis { + let %0: ::graph::TimeAxis + + bb0(): { + %0 = input LOAD axis + + return %0 + } +} + +fn {ctor#::core::uuid::Uuid}(%0: (), %1: String) -> ::core::uuid::Uuid { + let %2: ::core::uuid::Uuid + + bb0(): { + %2 = opaque(::core::uuid::Uuid, %1) + + return %2 + } +} + +thunk {thunk#2}() -> (String) -> ::core::uuid::Uuid { + let %0: (String) -> ::core::uuid::Uuid + + bb0(): { + %0 = closure(({ctor#::core::uuid::Uuid} as FnPtr), ()) + + return %0 + } +} + +thunk {thunk#3}() -> ::core::uuid::Uuid { + let %0: ::core::uuid::Uuid + + bb0(): { + %0 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + + return %0 + } +} + +fn {ctor#::graph::types::knowledge::entity::EntityUuid}(%0: (), %1: ::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %2: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %2 = opaque(::graph::types::knowledge::entity::EntityUuid, %1) + + return %2 + } +} + +thunk {thunk#4}() -> (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %0: (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = closure(({ctor#::graph::types::knowledge::entity::EntityUuid} as FnPtr), ()) + + return %0 + } +} + +thunk {thunk#5}() -> ::graph::types::knowledge::entity::EntityUuid { + let %0: ::core::uuid::Uuid + let %1: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + %1 = opaque(::graph::types::knowledge::entity::EntityUuid, %0) + + return %1 + } +} + +fn {closure@7}(%0: (), %1: ::graph::types::knowledge::entity::Entity) -> Boolean { + let %2: Boolean + let %3: ::core::uuid::Uuid + let %4: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %3 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + %4 = opaque(::graph::types::knowledge::entity::EntityUuid, %3) + %2 = %1.id.entity_id.entity_uuid == %4 + + return %2 + } +} + +*thunk {thunk#7}() -> List<::graph::types::knowledge::entity::Entity> { + let %0: ::graph::TimeAxis + let %1: List<::graph::types::knowledge::entity::Entity> + let %2: () + + bb0(): { + %0 = apply ({thunk#1} as FnPtr) + %2 = () + + graph read entities(%0) + |> filter({closure@7}, %2) + |> collect -> bb1(_) + } + + bb1(%1): { + return %1 + } +} + +════ Inlined MIR ═══════════════════════════════════════════════════════════════ + +thunk {thunk#1}() -> ::graph::TimeAxis { + let %0: ::graph::TimeAxis + + bb0(): { + %0 = input LOAD axis + + return %0 + } +} + +fn {ctor#::core::uuid::Uuid}(%0: (), %1: String) -> ::core::uuid::Uuid { + let %2: ::core::uuid::Uuid + + bb0(): { + %2 = opaque(::core::uuid::Uuid, %1) + + return %2 + } +} + +thunk {thunk#2}() -> (String) -> ::core::uuid::Uuid { + let %0: (String) -> ::core::uuid::Uuid + + bb0(): { + %0 = closure(({ctor#::core::uuid::Uuid} as FnPtr), ()) + + return %0 + } +} + +thunk {thunk#3}() -> ::core::uuid::Uuid { + let %0: ::core::uuid::Uuid + + bb0(): { + %0 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + + return %0 + } +} + +fn {ctor#::graph::types::knowledge::entity::EntityUuid}(%0: (), %1: ::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %2: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %2 = opaque(::graph::types::knowledge::entity::EntityUuid, %1) + + return %2 + } +} + +thunk {thunk#4}() -> (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %0: (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = closure(({ctor#::graph::types::knowledge::entity::EntityUuid} as FnPtr), ()) + + return %0 + } +} + +thunk {thunk#5}() -> ::graph::types::knowledge::entity::EntityUuid { + let %0: ::core::uuid::Uuid + let %1: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + %1 = opaque(::graph::types::knowledge::entity::EntityUuid, %0) + + return %1 + } +} + +fn {closure@7}(%0: (), %1: ::graph::types::knowledge::entity::Entity) -> Boolean { + let %2: Boolean + let %3: ::core::uuid::Uuid + let %4: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %3 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + %4 = opaque(::graph::types::knowledge::entity::EntityUuid, %3) + %2 = %1.id.entity_id.entity_uuid == %4 + + return %2 + } +} + +*thunk {thunk#7}() -> List<::graph::types::knowledge::entity::Entity> { + let %0: ::graph::TimeAxis + let %1: List<::graph::types::knowledge::entity::Entity> + let %2: () + let %3: ::graph::TimeAxis + + bb0(): { + goto -> bb3() + } + + bb1(%1): { + return %1 + } + + bb2(%0): { + %2 = () + + graph read entities(%0) + |> filter({closure@7}, %2) + |> collect -> bb1(_) + } + + bb3(): { + %3 = input LOAD axis + + goto -> bb2(%3) + } +} \ No newline at end of file diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/heuristic-inline.jsonc b/libs/@local/hashql/mir/tests/ui/pass/inline/heuristic-inline.jsonc new file mode 100644 index 00000000000..87353a22eab --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/heuristic-inline.jsonc @@ -0,0 +1,25 @@ +//@ run: pass +//@ description: Small function with side effect (input) gets inlined via normal heuristics (cost < 15) +[ + "let", + "load_flag", + [ + "fn", + { "#tuple": [] }, + { "#struct": {} }, + "Boolean", + ["input", "flag", "Boolean"] + ], + [ + "let", + "use_flag", + [ + "fn", + { "#tuple": [] }, + { "#struct": {} }, + "Boolean", + ["&&", ["load_flag"], ["load_flag"]] + ], + ["use_flag"] + ] +] diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/heuristic-inline.stdout b/libs/@local/hashql/mir/tests/ui/pass/inline/heuristic-inline.stdout new file mode 100644 index 00000000000..21c4c182f62 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/heuristic-inline.stdout @@ -0,0 +1,247 @@ +════ Initial MIR ═══════════════════════════════════════════════════════════════ + +fn {closure#7}(%0: ()) -> Boolean { + let %1: Boolean + + bb0(): { + %1 = input LOAD flag + + return %1 + } +} + +thunk load_flag:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#7} as FnPtr), %1) + + return %0 + } +} + +fn {closure#10}(%0: ()) -> Boolean { + let %1: () -> Boolean + let %2: Boolean + let %3: Boolean + let %4: () -> Boolean + let %5: Boolean + + bb0(): { + %1 = apply (load_flag:0 as FnPtr) + %2 = apply %1.0 %1.1 + + switchInt(%2) -> [0: bb2(), 1: bb1()] + } + + bb1(): { + %4 = apply (load_flag:0 as FnPtr) + %5 = apply %4.0 %4.1 + + goto -> bb3(%5) + } + + bb2(): { + goto -> bb3(0) + } + + bb3(%3): { + return %3 + } +} + +thunk use_flag:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#10} as FnPtr), %1) + + return %0 + } +} + +*thunk {thunk#6}() -> Boolean { + let %0: () -> Boolean + let %1: Boolean + + bb0(): { + %0 = apply (use_flag:0 as FnPtr) + %1 = apply %0.0 %0.1 + + return %1 + } +} + +════ Pre-inlining MIR ══════════════════════════════════════════════════════════ + +fn {closure#7}(%0: ()) -> Boolean { + let %1: Boolean + + bb0(): { + %1 = input LOAD flag + + return %1 + } +} + +thunk load_flag:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#7} as FnPtr), %1) + + return %0 + } +} + +fn {closure#10}(%0: ()) -> Boolean { + let %1: Boolean + let %2: Boolean + let %3: () + let %4: () + + bb0(): { + %3 = () + %1 = apply ({closure#7} as FnPtr) %3 + + switchInt(%1) -> [0: bb2(), 1: bb1()] + } + + bb1(): { + %4 = () + %2 = apply ({closure#7} as FnPtr) %4 + + return %2 + } + + bb2(): { + return 0 + } +} + +thunk use_flag:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#10} as FnPtr), %1) + + return %0 + } +} + +*thunk {thunk#6}() -> Boolean { + let %0: Boolean + let %1: () + + bb0(): { + %1 = () + %0 = apply ({closure#10} as FnPtr) %1 + + return %0 + } +} + +════ Inlined MIR ═══════════════════════════════════════════════════════════════ + +fn {closure#7}(%0: ()) -> Boolean { + let %1: Boolean + + bb0(): { + %1 = input LOAD flag + + return %1 + } +} + +thunk load_flag:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#7} as FnPtr), %1) + + return %0 + } +} + +fn {closure#10}(%0: ()) -> Boolean { + let %1: Boolean + let %2: Boolean + let %3: () + let %4: () + let %5: () + let %6: Boolean + let %7: () + let %8: Boolean + + bb0(): { + %3 = () + %7 = %3 + + goto -> bb6() + } + + bb1(): { + %4 = () + %5 = %4 + + goto -> bb4() + } + + bb2(): { + return 0 + } + + bb3(%2): { + return %2 + } + + bb4(): { + %6 = input LOAD flag + + goto -> bb3(%6) + } + + bb5(%1): { + switchInt(%1) -> [0: bb2(), 1: bb1()] + } + + bb6(): { + %8 = input LOAD flag + + goto -> bb5(%8) + } +} + +thunk use_flag:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#10} as FnPtr), %1) + + return %0 + } +} + +*thunk {thunk#6}() -> Boolean { + let %0: Boolean + let %1: () + + bb0(): { + %1 = () + %0 = apply ({closure#10} as FnPtr) %1 + + return %0 + } +} \ No newline at end of file diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/inline_budget_exhaustion.snap b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_budget_exhaustion.snap new file mode 100644 index 00000000000..cdca541715d --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_budget_exhaustion.snap @@ -0,0 +1,169 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/inline/tests.rs +expression: output +--- +fn {closure@4294967040}(%0: Integer) -> Boolean { + let %1: Boolean + let %2: Boolean + let %3: Boolean + let %4: Boolean + let %5: Boolean + + bb0(): { + %1 = %0 == 1 + %2 = %0 > 2 + %3 = %0 < 3 + %4 = %0 == 4 + %5 = %0 > 5 + + return %5 + } +} + +thunk {thunk@4294967040}() -> Boolean { + let %0: Boolean + let %1: Boolean + let %2: Boolean + let %3: Boolean + let %4: Boolean + let %5: Boolean + let %6: Boolean + + bb0(): { + %0 = apply ({def@0} as FnPtr) 1 + %1 = apply ({def@0} as FnPtr) 2 + %2 = apply ({def@0} as FnPtr) 3 + %3 = apply ({def@0} as FnPtr) 4 + %4 = apply ({def@0} as FnPtr) 5 + %5 = apply ({def@0} as FnPtr) 6 + %6 = %0 == %1 + + return %6 + } +} + +================= After Inlining ================= + +fn {closure@4294967040}(%0: Integer) -> Boolean { + let %1: Boolean + let %2: Boolean + let %3: Boolean + let %4: Boolean + let %5: Boolean + + bb0(): { + %1 = %0 == 1 + %2 = %0 > 2 + %3 = %0 < 3 + %4 = %0 == 4 + %5 = %0 > 5 + + return %5 + } +} + +thunk {thunk@4294967040}() -> Boolean { + let %0: Boolean + let %1: Boolean + let %2: Boolean + let %3: Boolean + let %4: Boolean + let %5: Boolean + let %6: Boolean + let %7: Integer + let %8: Boolean + let %9: Boolean + let %10: Boolean + let %11: Boolean + let %12: Boolean + let %13: Integer + let %14: Boolean + let %15: Boolean + let %16: Boolean + let %17: Boolean + let %18: Boolean + let %19: Integer + let %20: Boolean + let %21: Boolean + let %22: Boolean + let %23: Boolean + let %24: Boolean + let %25: Integer + let %26: Boolean + let %27: Boolean + let %28: Boolean + let %29: Boolean + let %30: Boolean + + bb0(): { + %25 = 1 + + goto -> bb8() + } + + bb1(%5): { + %6 = %0 == %1 + + return %6 + } + + bb2(): { + %8 = %7 == 1 + %9 = %7 > 2 + %10 = %7 < 3 + %11 = %7 == 4 + %12 = %7 > 5 + + goto -> bb1(%12) + } + + bb3(%3): { + %4 = apply ({def@0} as FnPtr) 5 + %7 = 6 + + goto -> bb2() + } + + bb4(): { + %14 = %13 == 1 + %15 = %13 > 2 + %16 = %13 < 3 + %17 = %13 == 4 + %18 = %13 > 5 + + goto -> bb3(%18) + } + + bb5(%1): { + %2 = apply ({def@0} as FnPtr) 3 + %13 = 4 + + goto -> bb4() + } + + bb6(): { + %20 = %19 == 1 + %21 = %19 > 2 + %22 = %19 < 3 + %23 = %19 == 4 + %24 = %19 > 5 + + goto -> bb5(%24) + } + + bb7(%0): { + %19 = 2 + + goto -> bb6() + } + + bb8(): { + %26 = %25 == 1 + %27 = %25 > 2 + %28 = %25 < 3 + %29 = %25 == 4 + %30 = %25 > 5 + + goto -> bb7(%30) + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/inline_chained_calls.snap b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_chained_calls.snap new file mode 100644 index 00000000000..10bf067c678 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_chained_calls.snap @@ -0,0 +1,87 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/inline/tests.rs +expression: output +--- +fn {closure@4294967040}(%0: Integer) -> Integer { + bb0(): { + return %0 + } +} + +fn {closure@4294967040}(%0: Integer) -> Integer { + let %1: Integer + + bb0(): { + %1 = apply ({def@0} as FnPtr) %0 + + return %1 + } +} + +thunk {thunk@4294967040}() -> Integer { + let %0: Integer + + bb0(): { + %0 = apply ({def@1} as FnPtr) 1 + + return %0 + } +} + +================= After Inlining ================= + +fn {closure@4294967040}(%0: Integer) -> Integer { + bb0(): { + return %0 + } +} + +fn {closure@4294967040}(%0: Integer) -> Integer { + let %1: Integer + let %2: Integer + + bb0(): { + %2 = %0 + + goto -> bb2() + } + + bb1(%1): { + return %1 + } + + bb2(): { + goto -> bb1(%2) + } +} + +thunk {thunk@4294967040}() -> Integer { + let %0: Integer + let %1: Integer + let %2: Integer + let %3: Integer + + bb0(): { + %1 = 1 + + goto -> bb2() + } + + bb1(%0): { + return %0 + } + + bb2(): { + %3 = %1 + + goto -> bb4() + } + + bb3(%2): { + goto -> bb1(%2) + } + + bb4(): { + goto -> bb3(%3) + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/inline_continuation_terminator.snap b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_continuation_terminator.snap new file mode 100644 index 00000000000..4f54de0d4d6 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_continuation_terminator.snap @@ -0,0 +1,51 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/inline/tests.rs +expression: output +--- +fn {closure@4294967040}(%0: Integer) -> Integer { + bb0(): { + return %0 + } +} + +thunk {thunk@4294967040}() -> Boolean { + let %0: Integer + let %1: Boolean + + bb0(): { + %0 = apply ({def@0} as FnPtr) 5 + %1 = %0 == 10 + + return %1 + } +} + +================= After Inlining ================= + +fn {closure@4294967040}(%0: Integer) -> Integer { + bb0(): { + return %0 + } +} + +thunk {thunk@4294967040}() -> Boolean { + let %0: Integer + let %1: Boolean + let %2: Integer + + bb0(): { + %2 = 5 + + goto -> bb2() + } + + bb1(%0): { + %1 = %0 == 10 + + return %1 + } + + bb2(): { + goto -> bb1(%2) + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/inline_multiple_args.snap b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_multiple_args.snap new file mode 100644 index 00000000000..206f3dfef35 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_multiple_args.snap @@ -0,0 +1,73 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/inline/tests.rs +expression: output +--- +fn {closure@4294967040}(%0: Integer, %1: Integer, %2: Integer) -> Boolean { + let %3: Boolean + let %4: Boolean + let %5: Boolean + + bb0(): { + %3 = %0 == %1 + %4 = %1 == %2 + %5 = %3 & %4 + + return %5 + } +} + +thunk {thunk@4294967040}() -> Boolean { + let %0: Boolean + + bb0(): { + %0 = apply ({def@0} as FnPtr) 1 2 3 + + return %0 + } +} + +================= After Inlining ================= + +fn {closure@4294967040}(%0: Integer, %1: Integer, %2: Integer) -> Boolean { + let %3: Boolean + let %4: Boolean + let %5: Boolean + + bb0(): { + %3 = %0 == %1 + %4 = %1 == %2 + %5 = %3 & %4 + + return %5 + } +} + +thunk {thunk@4294967040}() -> Boolean { + let %0: Boolean + let %1: Integer + let %2: Integer + let %3: Integer + let %4: Boolean + let %5: Boolean + let %6: Boolean + + bb0(): { + %1 = 1 + %2 = 2 + %3 = 3 + + goto -> bb2() + } + + bb1(%0): { + return %0 + } + + bb2(): { + %4 = %1 == %2 + %5 = %2 == %3 + %6 = %4 & %5 + + goto -> bb1(%6) + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/inline_multiple_blocks.snap b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_multiple_blocks.snap new file mode 100644 index 00000000000..45b0dadb8c8 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_multiple_blocks.snap @@ -0,0 +1,98 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/inline/tests.rs +expression: output +--- +fn {closure@4294967040}(%0: Integer, %1: Integer) -> Integer { + let %2: Boolean + let %3: Integer + + bb0(): { + %2 = %0 > %1 + + switchInt(%2) -> [0: bb2(), 1: bb1()] + } + + bb1(): { + goto -> bb3(%0) + } + + bb2(): { + goto -> bb3(%1) + } + + bb3(%3): { + return %3 + } +} + +thunk {thunk@4294967040}() -> Integer { + let %0: Integer + + bb0(): { + %0 = apply ({def@0} as FnPtr) 10 20 + + return %0 + } +} + +================= After Inlining ================= + +fn {closure@4294967040}(%0: Integer, %1: Integer) -> Integer { + let %2: Boolean + let %3: Integer + + bb0(): { + %2 = %0 > %1 + + switchInt(%2) -> [0: bb2(), 1: bb1()] + } + + bb1(): { + goto -> bb3(%0) + } + + bb2(): { + goto -> bb3(%1) + } + + bb3(%3): { + return %3 + } +} + +thunk {thunk@4294967040}() -> Integer { + let %0: Integer + let %1: Integer + let %2: Integer + let %3: Boolean + let %4: Integer + + bb0(): { + %1 = 10 + %2 = 20 + + goto -> bb2() + } + + bb1(%0): { + return %0 + } + + bb2(): { + %3 = %1 > %2 + + switchInt(%3) -> [0: bb4(), 1: bb3()] + } + + bb3(): { + goto -> bb5(%1) + } + + bb4(): { + goto -> bb5(%2) + } + + bb5(%4): { + goto -> bb1(%4) + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/inline_projection_assignment.snap b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_projection_assignment.snap new file mode 100644 index 00000000000..a1cb9e96b5c --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_projection_assignment.snap @@ -0,0 +1,55 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/inline/tests.rs +expression: output +--- +fn {closure@4294967040}(%0: Integer) -> Integer { + bb0(): { + return %0 + } +} + +thunk {thunk@4294967040}() -> Integer { + let %0: (Integer, Integer) + let %1: Integer + + bb0(): { + %0 = (0, 0) + %0.0 = apply ({def@0} as FnPtr) 5 + %1 = %0.0 + + return %1 + } +} + +================= After Inlining ================= + +fn {closure@4294967040}(%0: Integer) -> Integer { + bb0(): { + return %0 + } +} + +thunk {thunk@4294967040}() -> Integer { + let %0: (Integer, Integer) + let %1: Integer + let %2: Integer + let %3: Integer + + bb0(): { + %0 = (0, 0) + %3 = 5 + + goto -> bb2() + } + + bb1(%2): { + %0.0 = %2 + %1 = %0.0 + + return %1 + } + + bb2(): { + goto -> bb1(%3) + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/inline_recursive_not_inlined.snap b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_recursive_not_inlined.snap new file mode 100644 index 00000000000..adf8c4d42bc --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_recursive_not_inlined.snap @@ -0,0 +1,77 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/inline/tests.rs +expression: output +--- +fn {closure@4294967040}(%0: Integer) -> Integer { + let %1: Boolean + let %2: Integer + let %3: Integer + + bb0(): { + %1 = %0 == 0 + + switchInt(%1) -> [0: bb2(), 1: bb1()] + } + + bb1(): { + goto -> bb3(%0) + } + + bb2(): { + %3 = apply ({def@0} as FnPtr) %0 + + goto -> bb3(%3) + } + + bb3(%2): { + return %2 + } +} + +thunk {thunk@4294967040}() -> Integer { + let %0: Integer + + bb0(): { + %0 = apply ({def@0} as FnPtr) 1 + + return %0 + } +} + +================= After Inlining ================= + +fn {closure@4294967040}(%0: Integer) -> Integer { + let %1: Boolean + let %2: Integer + let %3: Integer + + bb0(): { + %1 = %0 == 0 + + switchInt(%1) -> [0: bb2(), 1: bb1()] + } + + bb1(): { + goto -> bb3(%0) + } + + bb2(): { + %3 = apply ({def@0} as FnPtr) %0 + + goto -> bb3(%3) + } + + bb3(%2): { + return %2 + } +} + +thunk {thunk@4294967040}() -> Integer { + let %0: Integer + + bb0(): { + %0 = apply ({def@0} as FnPtr) 1 + + return %0 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/inline_simple_leaf.snap b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_simple_leaf.snap new file mode 100644 index 00000000000..c273e3a30ef --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/inline_simple_leaf.snap @@ -0,0 +1,57 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/inline/tests.rs +expression: output +--- +fn {closure@4294967040}(%0: Integer) -> Boolean { + let %1: Boolean + + bb0(): { + %1 = %0 == %0 + + return %1 + } +} + +thunk {thunk@4294967040}() -> Boolean { + let %0: Boolean + + bb0(): { + %0 = apply ({def@0} as FnPtr) 21 + + return %0 + } +} + +================= After Inlining ================= + +fn {closure@4294967040}(%0: Integer) -> Boolean { + let %1: Boolean + + bb0(): { + %1 = %0 == %0 + + return %1 + } +} + +thunk {thunk@4294967040}() -> Boolean { + let %0: Boolean + let %1: Integer + let %2: Boolean + + bb0(): { + %1 = 21 + + goto -> bb2() + } + + bb1(%0): { + return %0 + } + + bb2(): { + %2 = %1 == %1 + + goto -> bb1(%2) + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/nested-ctor.jsonc b/libs/@local/hashql/mir/tests/ui/pass/inline/nested-ctor.jsonc new file mode 100644 index 00000000000..762f09ffced --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/nested-ctor.jsonc @@ -0,0 +1,6 @@ +//@ run: pass +//@ description: Nested constructors are both inlined (via AR pre-inlining since trivial) +[ + "::graph::types::knowledge::entity::EntityUuid", + ["::core::uuid::Uuid", { "#literal": "e2851dbb-7376-4959-9bca-f72cafc4448f" }] +] diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/nested-ctor.stdout b/libs/@local/hashql/mir/tests/ui/pass/inline/nested-ctor.stdout new file mode 100644 index 00000000000..e5c4b5eb2bc --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/nested-ctor.stdout @@ -0,0 +1,195 @@ +════ Initial MIR ═══════════════════════════════════════════════════════════════ + +fn {ctor#::core::uuid::Uuid}(%0: (), %1: String) -> ::core::uuid::Uuid { + let %2: ::core::uuid::Uuid + + bb0(): { + %2 = opaque(::core::uuid::Uuid, %1) + + return %2 + } +} + +thunk {thunk#0}() -> (String) -> ::core::uuid::Uuid { + let %0: (String) -> ::core::uuid::Uuid + + bb0(): { + %0 = closure(({ctor#::core::uuid::Uuid} as FnPtr), ()) + + return %0 + } +} + +thunk {thunk#1}() -> ::core::uuid::Uuid { + let %0: (String) -> ::core::uuid::Uuid + let %1: ::core::uuid::Uuid + + bb0(): { + %0 = apply ({thunk#0} as FnPtr) + %1 = apply %0.0 %0.1 "e2851dbb-7376-4959-9bca-f72cafc4448f" + + return %1 + } +} + +fn {ctor#::graph::types::knowledge::entity::EntityUuid}(%0: (), %1: ::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %2: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %2 = opaque(::graph::types::knowledge::entity::EntityUuid, %1) + + return %2 + } +} + +thunk {thunk#2}() -> (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %0: (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = closure(({ctor#::graph::types::knowledge::entity::EntityUuid} as FnPtr), ()) + + return %0 + } +} + +*thunk {thunk#3}() -> ::graph::types::knowledge::entity::EntityUuid { + let %0: ::core::uuid::Uuid + let %1: (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid + let %2: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = apply ({thunk#1} as FnPtr) + %1 = apply ({thunk#2} as FnPtr) + %2 = apply %1.0 %1.1 %0 + + return %2 + } +} + +════ Pre-inlining MIR ══════════════════════════════════════════════════════════ + +fn {ctor#::core::uuid::Uuid}(%0: (), %1: String) -> ::core::uuid::Uuid { + let %2: ::core::uuid::Uuid + + bb0(): { + %2 = opaque(::core::uuid::Uuid, %1) + + return %2 + } +} + +thunk {thunk#0}() -> (String) -> ::core::uuid::Uuid { + let %0: (String) -> ::core::uuid::Uuid + + bb0(): { + %0 = closure(({ctor#::core::uuid::Uuid} as FnPtr), ()) + + return %0 + } +} + +thunk {thunk#1}() -> ::core::uuid::Uuid { + let %0: ::core::uuid::Uuid + + bb0(): { + %0 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + + return %0 + } +} + +fn {ctor#::graph::types::knowledge::entity::EntityUuid}(%0: (), %1: ::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %2: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %2 = opaque(::graph::types::knowledge::entity::EntityUuid, %1) + + return %2 + } +} + +thunk {thunk#2}() -> (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %0: (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = closure(({ctor#::graph::types::knowledge::entity::EntityUuid} as FnPtr), ()) + + return %0 + } +} + +*thunk {thunk#3}() -> ::graph::types::knowledge::entity::EntityUuid { + let %0: ::core::uuid::Uuid + let %1: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + %1 = opaque(::graph::types::knowledge::entity::EntityUuid, %0) + + return %1 + } +} + +════ Inlined MIR ═══════════════════════════════════════════════════════════════ + +fn {ctor#::core::uuid::Uuid}(%0: (), %1: String) -> ::core::uuid::Uuid { + let %2: ::core::uuid::Uuid + + bb0(): { + %2 = opaque(::core::uuid::Uuid, %1) + + return %2 + } +} + +thunk {thunk#0}() -> (String) -> ::core::uuid::Uuid { + let %0: (String) -> ::core::uuid::Uuid + + bb0(): { + %0 = closure(({ctor#::core::uuid::Uuid} as FnPtr), ()) + + return %0 + } +} + +thunk {thunk#1}() -> ::core::uuid::Uuid { + let %0: ::core::uuid::Uuid + + bb0(): { + %0 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + + return %0 + } +} + +fn {ctor#::graph::types::knowledge::entity::EntityUuid}(%0: (), %1: ::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %2: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %2 = opaque(::graph::types::knowledge::entity::EntityUuid, %1) + + return %2 + } +} + +thunk {thunk#2}() -> (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid { + let %0: (::core::uuid::Uuid) -> ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = closure(({ctor#::graph::types::knowledge::entity::EntityUuid} as FnPtr), ()) + + return %0 + } +} + +*thunk {thunk#3}() -> ::graph::types::knowledge::entity::EntityUuid { + let %0: ::core::uuid::Uuid + let %1: ::graph::types::knowledge::entity::EntityUuid + + bb0(): { + %0 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + %1 = opaque(::graph::types::knowledge::entity::EntityUuid, %0) + + return %1 + } +} \ No newline at end of file diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/simple-ctor.jsonc b/libs/@local/hashql/mir/tests/ui/pass/inline/simple-ctor.jsonc new file mode 100644 index 00000000000..0271408e3ba --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/simple-ctor.jsonc @@ -0,0 +1,3 @@ +//@ run: pass +//@ description: Simple constructor is inlined (via AR pre-inlining since trivial) +["Some", { "#literal": 42 }] diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/simple-ctor.stdout b/libs/@local/hashql/mir/tests/ui/pass/inline/simple-ctor.stdout new file mode 100644 index 00000000000..71370dab2da --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/simple-ctor.stdout @@ -0,0 +1,97 @@ +════ Initial MIR ═══════════════════════════════════════════════════════════════ + +fn {ctor#::core::option::Some}(%0: (), %1: Integer) -> ::core::option::Some { + let %2: ::core::option::Some + + bb0(): { + %2 = opaque(::core::option::Some, %1) + + return %2 + } +} + +thunk {thunk#0}() -> (Integer) -> ::core::option::Some { + let %0: (Integer) -> ::core::option::Some + + bb0(): { + %0 = closure(({ctor#::core::option::Some} as FnPtr), ()) + + return %0 + } +} + +*thunk {thunk#1}() -> ::core::option::Some { + let %0: (Integer) -> ::core::option::Some + let %1: ::core::option::Some + + bb0(): { + %0 = apply ({thunk#0} as FnPtr) + %1 = apply %0.0 %0.1 42 + + return %1 + } +} + +════ Pre-inlining MIR ══════════════════════════════════════════════════════════ + +fn {ctor#::core::option::Some}(%0: (), %1: Integer) -> ::core::option::Some { + let %2: ::core::option::Some + + bb0(): { + %2 = opaque(::core::option::Some, %1) + + return %2 + } +} + +thunk {thunk#0}() -> (Integer) -> ::core::option::Some { + let %0: (Integer) -> ::core::option::Some + + bb0(): { + %0 = closure(({ctor#::core::option::Some} as FnPtr), ()) + + return %0 + } +} + +*thunk {thunk#1}() -> ::core::option::Some { + let %0: ::core::option::Some + + bb0(): { + %0 = opaque(::core::option::Some, 42) + + return %0 + } +} + +════ Inlined MIR ═══════════════════════════════════════════════════════════════ + +fn {ctor#::core::option::Some}(%0: (), %1: Integer) -> ::core::option::Some { + let %2: ::core::option::Some + + bb0(): { + %2 = opaque(::core::option::Some, %1) + + return %2 + } +} + +thunk {thunk#0}() -> (Integer) -> ::core::option::Some { + let %0: (Integer) -> ::core::option::Some + + bb0(): { + %0 = closure(({ctor#::core::option::Some} as FnPtr), ()) + + return %0 + } +} + +*thunk {thunk#1}() -> ::core::option::Some { + let %0: ::core::option::Some + + bb0(): { + %0 = opaque(::core::option::Some, 42) + + return %0 + } +} \ No newline at end of file diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/too-large-to-inline.jsonc b/libs/@local/hashql/mir/tests/ui/pass/inline/too-large-to-inline.jsonc new file mode 100644 index 00000000000..9bea991afbd --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/too-large-to-inline.jsonc @@ -0,0 +1,22 @@ +//@ run: pass +//@ description: Function with cost > max is not inlined by normal heuristics +//@ suite#rvalue-input-cost: 100.0 +//@ suite#max-cost: 50.0 +// With rvalue_input=100 and max=50, 1 input = cost 100 (exceeds max of 50). +[ + "let", + "big_func", + [ + "fn", + { "#tuple": [] }, + { "#struct": {} }, + "Boolean", + ["input", "v", "Boolean"] + ], + [ + "let", + "caller", + ["fn", { "#tuple": [] }, { "#struct": {} }, "Boolean", ["big_func"]], + ["caller"] + ] +] diff --git a/libs/@local/hashql/mir/tests/ui/pass/inline/too-large-to-inline.stdout b/libs/@local/hashql/mir/tests/ui/pass/inline/too-large-to-inline.stdout new file mode 100644 index 00000000000..f67b05b55cd --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/inline/too-large-to-inline.stdout @@ -0,0 +1,179 @@ +════ Initial MIR ═══════════════════════════════════════════════════════════════ + +fn {closure#5}(%0: ()) -> Boolean { + let %1: Boolean + + bb0(): { + %1 = input LOAD v + + return %1 + } +} + +thunk big_func:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#5} as FnPtr), %1) + + return %0 + } +} + +fn {closure#7}(%0: ()) -> Boolean { + let %1: () -> Boolean + let %2: Boolean + + bb0(): { + %1 = apply (big_func:0 as FnPtr) + %2 = apply %1.0 %1.1 + + return %2 + } +} + +thunk caller:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#7} as FnPtr), %1) + + return %0 + } +} + +*thunk {thunk#4}() -> Boolean { + let %0: () -> Boolean + let %1: Boolean + + bb0(): { + %0 = apply (caller:0 as FnPtr) + %1 = apply %0.0 %0.1 + + return %1 + } +} + +════ Pre-inlining MIR ══════════════════════════════════════════════════════════ + +fn {closure#5}(%0: ()) -> Boolean { + let %1: Boolean + + bb0(): { + %1 = input LOAD v + + return %1 + } +} + +thunk big_func:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#5} as FnPtr), %1) + + return %0 + } +} + +fn {closure#7}(%0: ()) -> Boolean { + let %1: Boolean + let %2: () + + bb0(): { + %2 = () + %1 = apply ({closure#5} as FnPtr) %2 + + return %1 + } +} + +thunk caller:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#7} as FnPtr), %1) + + return %0 + } +} + +*thunk {thunk#4}() -> Boolean { + let %0: Boolean + let %1: () + + bb0(): { + %1 = () + %0 = apply ({closure#5} as FnPtr) %1 + + return %0 + } +} + +════ Inlined MIR ═══════════════════════════════════════════════════════════════ + +fn {closure#5}(%0: ()) -> Boolean { + let %1: Boolean + + bb0(): { + %1 = input LOAD v + + return %1 + } +} + +thunk big_func:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#5} as FnPtr), %1) + + return %0 + } +} + +fn {closure#7}(%0: ()) -> Boolean { + let %1: Boolean + let %2: () + + bb0(): { + %2 = () + %1 = apply ({closure#5} as FnPtr) %2 + + return %1 + } +} + +thunk caller:0() -> () -> Boolean { + let %0: () -> Boolean + let %1: () + + bb0(): { + %1 = () + %0 = closure(({closure#7} as FnPtr), %1) + + return %0 + } +} + +*thunk {thunk#4}() -> Boolean { + let %0: Boolean + let %1: () + + bb0(): { + %1 = () + %0 = apply ({closure#5} as FnPtr) %1 + + return %0 + } +} \ No newline at end of file