diff --git a/.gitignore b/.gitignore
index a804ce4..968cf5b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,10 @@
 # Rust build artifacts
 /target
 
+# cargo-mutants output
+/mutants.out/
+/mutants.out.old/
+
 # Editor / OS noise
 *.rs.bk
 *.swp
diff --git a/crates/cu-profiler-cli/src/commands/baseline.rs b/crates/cu-profiler-cli/src/commands/baseline.rs
index ea2bd7d..309c726 100644
--- a/crates/cu-profiler-cli/src/commands/baseline.rs
+++ b/crates/cu-profiler-cli/src/commands/baseline.rs
@@ -67,3 +67,51 @@ pub fn approve(args: &BaselineApproveArgs, quiet: bool) -> Result<ExitCode> {
         )))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::args::{BaselineSaveArgs, CommonRun};
+
+    #[test]
+    fn save_records_simulated_scenarios_and_skips_unsimulated() {
+        let base = std::env::temp_dir().join(format!("cu-bl-{}", std::process::id()));
+        let _ = std::fs::remove_dir_all(&base);
+        let logs = base.join(".cu").join("logs");
+        std::fs::create_dir_all(&logs).unwrap();
+        // Two scenarios; only `good` has a log, so `missing` simulates to Unknown
+        // and must NOT be written to the baseline.
+        std::fs::write(
+            base.join("cu-profiler.toml"),
+            "[project]\nname=\"t\"\n[scenario.good]\n[scenario.missing]\n",
+        )
+        .unwrap();
+        std::fs::write(
+            logs.join("good.log"),
+            "Program P invoke [1]\nProgram P consumed 1000 of 200000 compute units\nProgram P success",
+        )
+        .unwrap();
+        let baseline = base.join("baseline.json");
+        let args = BaselineSaveArgs {
+            common: CommonRun {
+                config: base.join("cu-profiler.toml"),
+                logs_dir: logs,
+                scenarios: vec![],
+                tags: vec![],
+                samples: None,
+            },
+            baseline: baseline.clone(),
+        };
+        save(&args, true).expect("baseline save");
+        let store = BaselineStore::load(&baseline).unwrap();
+        let _ = std::fs::remove_dir_all(&base);
+        assert!(
+            store.get("good").is_some(),
+            "simulated scenario must be recorded"
+        );
+        assert!(
+            store.get("missing").is_none(),
+            "unsimulated (Unknown) scenario must be skipped"
+        );
+    }
+}
diff --git a/crates/cu-profiler-cli/src/commands/bench.rs b/crates/cu-profiler-cli/src/commands/bench.rs
index 119a940..bdc799d 100644
--- a/crates/cu-profiler-cli/src/commands/bench.rs
+++ b/crates/cu-profiler-cli/src/commands/bench.rs
@@ -1,6 +1,6 @@
 //! `cu-profiler bench` — turnkey real-CU path.
 //!
-//! `bench` validates a declarative [`BenchPlan`](cu_profiler_core::bench::BenchPlan),
+//! `bench` validates a declarative [`BenchPlan`],
 //! optionally builds the program with `cargo build-sbf`, then **delegates the real
 //! Mollusk measurement** to the Linux-only `cu-profiler-bench` executor, found over
 //! `PATH` (a runtime sibling, never a build dependency — so the main CLI keeps the
diff --git a/crates/cu-profiler-cli/src/commands/ci.rs b/crates/cu-profiler-cli/src/commands/ci.rs
index 436a9b6..566c617 100644
--- a/crates/cu-profiler-cli/src/commands/ci.rs
+++ b/crates/cu-profiler-cli/src/commands/ci.rs
@@ -58,3 +58,143 @@ fn write_artifact(path: &std::path::Path, contents: &str) -> Result<()> {
     std::fs::write(path, contents)?;
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::args::CommonRun;
+    use std::path::PathBuf;
+
+    fn temp_project(config_toml: &str, scenario: &str, log: &str) -> (PathBuf, RunArgs) {
+        let base = std::env::temp_dir().join(format!("cu-ci-{}-{scenario}", std::process::id()));
+        let _ = std::fs::remove_dir_all(&base);
+        let logs = base.join(".cu").join("logs");
+        std::fs::create_dir_all(&logs).unwrap();
+        std::fs::write(base.join("cu-profiler.toml"), config_toml).unwrap();
+        std::fs::write(logs.join(format!("{scenario}.log")), log).unwrap();
+        let args = RunArgs {
+            common: CommonRun {
+                config: base.join("cu-profiler.toml"),
+                logs_dir: logs,
+                scenarios: vec![],
+                tags: vec![],
+                samples: None,
+            },
+            format: None,
+            output: None,
+            baseline: None,
+            strict: false,
+            fail_on_budget: false,
+            fail_on_regression: false,
+            fail_on_low_confidence: false,
+        };
+        (base, args)
+    }
+
+    #[test]
+    fn ci_config_default_enforces_budget() {
+        let config = "[project]\nname=\"t\"\n[defaults]\nfail_on_budget=true\n\
+                      [scenario.over]\nbudget=100000\n";
+        let log = "Program P invoke [1]\n\
+                   Program P consumed 120000 of 200000 compute units\nProgram P success";
+        let (base, args) = temp_project(config, "over", log);
+        let code = run(&args, true);
+        let _ = std::fs::remove_dir_all(&base);
+        assert_eq!(code.unwrap(), ExitCode::BudgetOrRegression);
+    }
+
+    #[test]
+    fn ci_strict_fails_low_confidence() {
+        let config = "[project]\nname=\"t\"\n[scenario.shaky]\n";
+        let log = "Program P invoke [1]\nProgram log: CU_PROFILER_BEGIN name=x\n\
+                   Program P consumed 1000 of 200000 compute units\nProgram P success";
+        let (base, mut args) = temp_project(config, "shaky", log);
+        args.strict = true;
+        let code = run(&args, true);
+        let _ = std::fs::remove_dir_all(&base);
+        assert_eq!(code.unwrap(), ExitCode::LowConfidence);
+    }
+
+    #[test]
+    fn ci_config_default_enforces_regression() {
+        // Record a baseline, then re-run a higher-consuming log. `fail_on_regression`
+        // comes only from the config default (no CLI flag), so the `||` between the
+        // flag and the config must hold — an `||`→`&&` flip would mask the regression.
+        use crate::args::BaselineSaveArgs;
+
+        let config = "[project]\nname=\"t\"\n[defaults]\n\
+                      fail_on_regression=true\nmax_regression_pct=5\n[scenario.s]\n";
+        let (base, mut args) = temp_project(
+            config,
+            "s",
+            "Program P invoke [1]\nProgram P consumed 90000 of 200000 compute units\nProgram P success",
+        );
+        let baseline = base.join("baseline.json");
+
+        // 1) Record the baseline at 90k CU.
+        let save_args = BaselineSaveArgs {
+            common: args.common.clone(),
+            baseline: baseline.clone(),
+        };
+        crate::commands::baseline_save(&save_args, true).expect("baseline save");
+
+        // 2) Bump the measured CU to 110k (+22% > 5% allowance).
+        std::fs::write(
+            args.common.logs_dir.join("s.log"),
+            "Program P invoke [1]\nProgram P consumed 110000 of 200000 compute units\nProgram P success",
+        )
+        .unwrap();
+
+        // 3) Re-run via `ci`; the regression must fail from the config default alone.
+        args.baseline = Some(baseline);
+        let code = run(&args, true);
+        let _ = std::fs::remove_dir_all(&base);
+        assert_eq!(code.unwrap(), ExitCode::BudgetOrRegression);
+    }
+
+    #[test]
+    fn ci_writes_configured_artifacts_into_nested_dirs() {
+        // A configured json_path with a not-yet-existing parent must be written.
+        // Guards both `write_artifact` and its parent-creation branch. Uses an
+        // absolute path (forward slashes are valid TOML everywhere) so the test
+        // never touches the process CWD — safe under parallel test execution.
+        let base = std::env::temp_dir().join(format!("cu-ci-art-{}", std::process::id()));
+        let _ = std::fs::remove_dir_all(&base);
+        let logs = base.join(".cu").join("logs");
+        std::fs::create_dir_all(&logs).unwrap();
+        let json = base.join("artifacts").join("report.json"); // nested, absent
+        let json_toml = json.to_string_lossy().replace('\\', "/");
+        let config = format!(
+            "[project]\nname=\"t\"\n\
+             [output]\ndefault_format=\"table\"\njson_path=\"{json_toml}\"\n\
+             [scenario.s]\n"
+        );
+        std::fs::write(base.join("cu-profiler.toml"), &config).unwrap();
+        std::fs::write(
+            logs.join("s.log"),
+            "Program P invoke [1]\nProgram P consumed 1000 of 200000 compute units\nProgram P success",
+        )
+        .unwrap();
+        let args = RunArgs {
+            common: CommonRun {
+                config: base.join("cu-profiler.toml"),
+                logs_dir: logs,
+                scenarios: vec![],
+                tags: vec![],
+                samples: None,
+            },
+            format: None,
+            output: None,
+            baseline: None,
+            strict: false,
+            fail_on_budget: false,
+            fail_on_regression: false,
+            fail_on_low_confidence: false,
+        };
+        let code = run(&args, true);
+        let exists = json.exists();
+        let _ = std::fs::remove_dir_all(&base);
+        assert_eq!(code.unwrap(), ExitCode::Success);
+        assert!(exists, "ci did not write the configured json artifact");
+    }
+}
diff --git a/crates/cu-profiler-cli/src/commands/comment.rs b/crates/cu-profiler-cli/src/commands/comment.rs
index 0e58bb7..0874ca7 100644
--- a/crates/cu-profiler-cli/src/commands/comment.rs
+++ b/crates/cu-profiler-cli/src/commands/comment.rs
@@ -359,4 +359,39 @@ mod tests {
         assert_eq!(find_in_page(&comments, "cu-profiler-report"), Some(2));
         assert_eq!(find_in_page(&comments, "other-marker"), None);
     }
+
+    fn comment_args() -> CommentArgs {
+        use crate::args::CommonRun;
+        CommentArgs {
+            common: CommonRun {
+                config: "cu-profiler.toml".into(),
+                logs_dir: ".cu/logs".into(),
+                scenarios: vec![],
+                tags: vec![],
+                samples: None,
+            },
+            input: None,
+            pr: None,
+            repo: None,
+            marker: "cu-profiler-report".into(),
+            dry_run: false,
+        }
+    }
+
+    #[test]
+    fn resolve_repo_uses_the_explicit_slug() {
+        let mut args = comment_args();
+        args.repo = Some("my-org/my-repo".into());
+        assert_eq!(
+            resolve_repo(&args).unwrap(),
+            ("my-org".to_string(), "my-repo".to_string())
+        );
+    }
+
+    #[test]
+    fn resolve_pr_prefers_the_explicit_flag() {
+        let mut args = comment_args();
+        args.pr = Some(42);
+        assert_eq!(resolve_pr(&args), Some(42));
+    }
 }
diff --git a/crates/cu-profiler-cli/src/commands/explain.rs b/crates/cu-profiler-cli/src/commands/explain.rs
index 034d6b2..0ce8f22 100644
--- a/crates/cu-profiler-cli/src/commands/explain.rs
+++ b/crates/cu-profiler-cli/src/commands/explain.rs
@@ -129,4 +129,66 @@ mod tests {
         assert!(text.contains("Confidence:"));
         assert!(text.contains("near_budget_limit") || text.contains("near its compute budget"));
     }
+
+    #[test]
+    fn explain_text_quantifies_a_scope_with_a_cu_snapshot() {
+        // A scope carrying both a CU estimate and a percentage must render the
+        // quantified line ("… CU (…%, …)"), not the "CU unknown" fallback.
+        let mut backend = RecordedLogsBackend::new();
+        backend.insert_blob(
+            "swap",
+            "Program User111 invoke [1]\n\
+             Program log: CU_PROFILER_BEGIN name=validate cu=200000\n\
+             Program log: CU_PROFILER_END name=validate cu=188000\n\
+             Program User111 consumed 96000 of 100000 compute units\n\
+             Program User111 success",
+            true,
+        );
+        let report = Profiler::new().run(
+            &backend,
+            &[Scenario::new("swap")],
+            None,
+            RunMetadata::recorded("0.1.0"),
+        );
+        let text = explain_text(&report.scenarios[0]);
+        assert!(text.contains("CU ("), "scope CU/percentage missing: {text}");
+        assert!(!text.contains("validate (parent: -) — CU unknown"));
+    }
+
+    #[test]
+    fn run_finds_and_explains_the_requested_scenario() {
+        // Drives `run()` end-to-end so the `s.name == args.scenario` lookup is
+        // exercised: the narrowed run profiles only the requested scenario, so an
+        // `==`→`!=` flip finds nothing and returns an error instead of Success.
+        use crate::args::CommonRun;
+
+        let base = std::env::temp_dir().join(format!("cu-explain-run-{}", std::process::id()));
+        let _ = std::fs::remove_dir_all(&base);
+        let logs = base.join(".cu").join("logs");
+        std::fs::create_dir_all(&logs).unwrap();
+        // Two scenarios so the lookup must select by name, not by position.
+        std::fs::write(
+            base.join("cu-profiler.toml"),
+            "[project]\nname=\"t\"\n[scenario.a]\n[scenario.b]\n",
+        )
+        .unwrap();
+        std::fs::write(
+            logs.join("b.log"),
+            "Program P invoke [1]\nProgram P consumed 1000 of 200000 compute units\nProgram P success",
+        )
+        .unwrap();
+        let args = ExplainArgs {
+            scenario: "b".into(),
+            common: CommonRun {
+                config: base.join("cu-profiler.toml"),
+                logs_dir: logs,
+                scenarios: vec![],
+                tags: vec![],
+                samples: None,
+            },
+        };
+        let code = run(&args, true);
+        let _ = std::fs::remove_dir_all(&base);
+        assert_eq!(code.unwrap(), ExitCode::Success);
+    }
 }
diff --git a/crates/cu-profiler-cli/src/commands/import.rs b/crates/cu-profiler-cli/src/commands/import.rs
index ffe812b..4ac2477 100644
--- a/crates/cu-profiler-cli/src/commands/import.rs
+++ b/crates/cu-profiler-cli/src/commands/import.rs
@@ -280,4 +280,40 @@ mod tests {
         let err = logs_from_response(&v, "SIG", "rpc").unwrap_err();
         assert!(err.to_string().contains("Invalid param"));
     }
+
+    #[cfg(feature = "remote")]
+    #[test]
+    fn max_rpc_bytes_is_thirty_two_mib() {
+        assert_eq!(MAX_RPC_BYTES, 33_554_432); // 32 * 1024 * 1024
+    }
+
+    #[test]
+    fn import_from_file_writes_log_into_a_nested_logs_dir() {
+        use crate::args::ImportArgs;
+        let base = std::env::temp_dir().join(format!("cu-import-{}", std::process::id()));
+        let _ = std::fs::remove_dir_all(&base);
+        std::fs::create_dir_all(&base).unwrap();
+        let json = base.join("tx.json");
+        std::fs::write(
+            &json,
+            r#"{"result":{"meta":{"logMessages":["Program P invoke [1]","Program P success"]}}}"#,
+        )
+        .unwrap();
+        // logs_dir has a parent component that does not exist yet → exercises the
+        // create_dir_all branch.
+        let logs_dir = base.join("nested").join("logs");
+        let args = ImportArgs {
+            file: Some(json),
+            signature: None,
+            rpc: "unused".into(),
+            commitment: "confirmed".into(),
+            name: Some("mytx".into()),
+            logs_dir: logs_dir.clone(),
+        };
+        let code = run(&args, true).expect("import from file");
+        assert_eq!(code, ExitCode::Success);
+        let written = std::fs::read_to_string(logs_dir.join("mytx.log")).unwrap();
+        assert!(written.contains("Program P invoke [1]"));
+        let _ = std::fs::remove_dir_all(&base);
+    }
 }
diff --git a/crates/cu-profiler-cli/src/commands/init.rs b/crates/cu-profiler-cli/src/commands/init.rs
index d753ad0..86c001b 100644
--- a/crates/cu-profiler-cli/src/commands/init.rs
+++ b/crates/cu-profiler-cli/src/commands/init.rs
@@ -201,3 +201,37 @@ fn write_file(path: &Path, contents: &str, force: bool, quiet: bool) -> Result<(
     }
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn write_file_respects_the_force_flag() {
+        let dir = std::env::temp_dir().join(format!("cu-init-{}", std::process::id()));
+        let _ = std::fs::remove_dir_all(&dir);
+        std::fs::create_dir_all(&dir).unwrap();
+        let path = dir.join("keep.txt");
+        std::fs::write(&path, "original").unwrap();
+
+        // force = false must NOT overwrite an existing file.
+        write_file(&path, "replacement", false, true).unwrap();
+        assert_eq!(std::fs::read_to_string(&path).unwrap(), "original");
+
+        // force = true overwrites it.
+        write_file(&path, "replacement", true, true).unwrap();
+        assert_eq!(std::fs::read_to_string(&path).unwrap(), "replacement");
+
+        let _ = std::fs::remove_dir_all(&dir);
+    }
+
+    #[test]
+    fn write_file_creates_missing_parents() {
+        let dir = std::env::temp_dir().join(format!("cu-init-p-{}", std::process::id()));
+        let _ = std::fs::remove_dir_all(&dir);
+        let path = dir.join("a").join("b").join("new.txt");
+        write_file(&path, "hi", false, true).unwrap();
+        assert_eq!(std::fs::read_to_string(&path).unwrap(), "hi");
+        let _ = std::fs::remove_dir_all(&dir);
+    }
+}
diff --git a/crates/cu-profiler-cli/src/commands/mod.rs b/crates/cu-profiler-cli/src/commands/mod.rs
index b0369a8..986f204 100644
--- a/crates/cu-profiler-cli/src/commands/mod.rs
+++ b/crates/cu-profiler-cli/src/commands/mod.rs
@@ -325,8 +325,47 @@ mod tests {
         let path = std::env::temp_dir().join(format!("cu-cap-{}.txt", std::process::id()));
         std::fs::write(&path, b"0123456789").unwrap();
         assert!(read_to_string_capped(&path, 100).is_ok());
+        // Boundary: a file exactly at the limit is allowed (`len > max`, not `>=`).
+        assert!(
+            read_to_string_capped(&path, 10).is_ok(),
+            "a file exactly at the cap must be allowed"
+        );
         let err = read_to_string_capped(&path, 4).unwrap_err();
         assert!(err.to_string().contains("limit"), "{err}");
         let _ = std::fs::remove_file(&path);
     }
+
+    #[test]
+    fn max_log_bytes_is_sixty_four_mib() {
+        assert_eq!(MAX_LOG_BYTES, 67_108_864);
+    }
+
+    #[test]
+    fn build_registry_includes_builtins_and_config_labels() {
+        let config = Config::from_toml(
+            "[project]\nname = \"x\"\n[program_labels]\n\
+             \"MyProg111111111111111111111111111111111111\" = \"My Program\"\n",
+        )
+        .unwrap();
+        let reg = build_registry(&config);
+        assert_eq!(
+            reg.label("MyProg111111111111111111111111111111111111"),
+            Some("My Program"),
+            "config label missing (registry was empty?)"
+        );
+        assert!(
+            reg.label("11111111111111111111111111111111").is_some(),
+            "builtins missing (registry was Default-empty?)"
+        );
+    }
+
+    #[test]
+    fn emit_creates_missing_parent_directories() {
+        let dir = std::env::temp_dir().join(format!("cu-emit-{}", std::process::id()));
+        let _ = std::fs::remove_dir_all(&dir);
+        let path = dir.join("nested").join("report.txt");
+        emit("hello", Some(&path), true).expect("emit should create parents and write");
+        assert_eq!(std::fs::read_to_string(&path).unwrap(), "hello");
+        let _ = std::fs::remove_dir_all(&dir);
+    }
 }
diff --git a/crates/cu-profiler-cli/src/commands/run.rs b/crates/cu-profiler-cli/src/commands/run.rs
index e4016a4..7a6e864 100644
--- a/crates/cu-profiler-cli/src/commands/run.rs
+++ b/crates/cu-profiler-cli/src/commands/run.rs
@@ -28,3 +28,105 @@ pub fn run(args: &RunArgs, quiet: bool) -> Result<ExitCode> {
     };
     Ok(code_for_report(&report, flags))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::args::CommonRun;
+    use crate::exit::ExitCode;
+    use std::path::PathBuf;
+
+    /// Scaffold a temp project (config + one scenario log) and a `RunArgs` pointing
+    /// at it. Returns the base dir (for cleanup) and the args.
+    fn temp_project(config_toml: &str, scenario: &str, log: &str) -> (PathBuf, RunArgs) {
+        let base = std::env::temp_dir().join(format!("cu-run-{}-{scenario}", std::process::id()));
+        let _ = std::fs::remove_dir_all(&base);
+        let logs = base.join(".cu").join("logs");
+        std::fs::create_dir_all(&logs).unwrap();
+        let config = base.join("cu-profiler.toml");
+        std::fs::write(&config, config_toml).unwrap();
+        std::fs::write(logs.join(format!("{scenario}.log")), log).unwrap();
+        let args = RunArgs {
+            common: CommonRun {
+                config,
+                logs_dir: logs,
+                scenarios: vec![],
+                tags: vec![],
+                samples: None,
+            },
+            format: None,
+            output: None,
+            baseline: None,
+            strict: false,
+            fail_on_budget: false,
+            fail_on_regression: false,
+            fail_on_low_confidence: false,
+        };
+        (base, args)
+    }
+
+    #[test]
+    fn config_default_enforces_budget_without_a_cli_flag() {
+        // `fail_on_budget` comes only from the config default; the over-budget run
+        // must still fail. Guards the `||` between the CLI flag and the config.
+        let config = "[project]\nname=\"t\"\n[defaults]\nfail_on_budget=true\n\
+                      [scenario.over]\nbudget=100000\n";
+        let log = "Program P invoke [1]\n\
+                   Program P consumed 120000 of 200000 compute units\nProgram P success";
+        let (base, args) = temp_project(config, "over", log);
+        let code = run(&args, true);
+        let _ = std::fs::remove_dir_all(&base);
+        assert_eq!(code.unwrap(), ExitCode::BudgetOrRegression);
+    }
+
+    #[test]
+    fn strict_flag_fails_a_low_confidence_run() {
+        // An unbalanced scope marker yields a parser warning → low confidence. With
+        // `--strict` that must fail. Guards the `||` between the flag and `strict`.
+        let config = "[project]\nname=\"t\"\n[scenario.shaky]\n";
+        let log = "Program P invoke [1]\nProgram log: CU_PROFILER_BEGIN name=x\n\
+                   Program P consumed 1000 of 200000 compute units\nProgram P success";
+        let (base, mut args) = temp_project(config, "shaky", log);
+        args.strict = true;
+        let code = run(&args, true);
+        let _ = std::fs::remove_dir_all(&base);
+        assert_eq!(code.unwrap(), ExitCode::LowConfidence);
+    }
+
+    #[test]
+    fn config_default_enforces_regression_without_a_cli_flag() {
+        // Save a baseline, then re-run against a higher-consuming log. The config
+        // default (not a CLI flag) must enforce the regression. Guards the `||`
+        // between the CLI flag and the config for the regression decision.
+        use crate::args::BaselineSaveArgs;
+
+        let config = "[project]\nname=\"t\"\n[defaults]\n\
+                      fail_on_regression=true\nmax_regression_pct=5\n[scenario.s]\n";
+        let (base, mut args) = temp_project(
+            config,
+            "s",
+            "Program P invoke [1]\nProgram P consumed 90000 of 200000 compute units\nProgram P success",
+        );
+        let baseline = base.join("baseline.json");
+
+        // 1) Record the baseline at 90k CU.
+        let save_args = BaselineSaveArgs {
+            common: args.common.clone(),
+            baseline: baseline.clone(),
+        };
+        crate::commands::baseline_save(&save_args, true).expect("baseline save");
+
+        // 2) Bump the measured CU to 110k (+22% > 5% allowance).
+        std::fs::write(
+            args.common.logs_dir.join("s.log"),
+            "Program P invoke [1]\nProgram P consumed 110000 of 200000 compute units\nProgram P success",
+        )
+        .unwrap();
+
+        // 3) Re-run with the baseline; fail_on_regression comes only from the config.
+        args.baseline = Some(baseline);
+        let code = run(&args, true);
+        let _ = std::fs::remove_dir_all(&base);
+        assert_eq!(code.unwrap(), ExitCode::BudgetOrRegression);
+    }
+}
diff --git a/crates/cu-profiler-cli/src/exit.rs b/crates/cu-profiler-cli/src/exit.rs
index 22b9b0f..9fc3140 100644
--- a/crates/cu-profiler-cli/src/exit.rs
+++ b/crates/cu-profiler-cli/src/exit.rs
@@ -165,4 +165,157 @@ mod tests {
             ExitCode::Simulation
         );
     }
+
+    #[test]
+    fn error_codes_map_each_category() {
+        use cu_profiler_core::Error;
+        assert_eq!(code_for_error(&Error::Config("x".into())), ExitCode::Config);
+        assert_eq!(code_for_error(&Error::Toml("x".into())), ExitCode::Config);
+        assert_eq!(
+            code_for_error(&Error::Simulation("x".into())),
+            ExitCode::Simulation
+        );
+        assert_eq!(
+            code_for_error(&Error::BackendUnimplemented("x".into())),
+            ExitCode::Simulation
+        );
+        assert_eq!(
+            code_for_error(&Error::Baseline("x".into())),
+            ExitCode::Baseline
+        );
+        // NOTE: `Error::Parse` and `Error::Io` both map to `ParserReport`, which is
+        // also the catch-all `_` arm. Deleting either arm is an *equivalent mutant*
+        // (no behavioural change), so it cannot — and need not — be killed.
+        assert_eq!(
+            code_for_error(&Error::Parse {
+                what: "w".into(),
+                index: 0,
+                reason: "r".into()
+            }),
+            ExitCode::ParserReport
+        );
+    }
+
+    /// A scenario run against a baseline whose fingerprint does NOT match: the
+    /// comparison is present but stale (`matched == false`), which also lowers
+    /// confidence to Low. One report exercising both soft signals at once.
+    fn report_with_stale_baseline() -> Report {
+        use cu_profiler_core::baseline::{BaselineRecord, BaselineStore, Fingerprint};
+        use cu_profiler_core::confidence::ConfidenceLevel;
+        use cu_profiler_core::metadata::InstrumentationMode;
+
+        let mut backend = RecordedLogsBackend::new();
+        backend.insert_blob(
+            "s",
+            "Program P invoke [1]\nProgram P consumed 50000 of 200000 compute units\nProgram P success",
+            true,
+        );
+        let mut store = BaselineStore::new();
+        store.insert(BaselineRecord {
+            scenario: "s".into(),
+            actual_units: 50_000,
+            budget: None,
+            timestamp: None,
+            git_commit: None,
+            fingerprint: Fingerprint::new("OTHER", "OTHER", "OTHER", None),
+            solana_versions: Vec::new(),
+            profiler_version: "0.1.0".into(),
+            instrumentation: InstrumentationMode::Off,
+            confidence: ConfidenceLevel::High,
+            approved: false,
+        });
+        Profiler::new().run(
+            &backend,
+            &[Scenario::new("s")],
+            Some(&store),
+            RunMetadata::recorded("0.1.0"),
+        )
+    }
+
+    #[test]
+    fn soft_signals_do_not_fail_when_their_flags_are_off() {
+        // stale baseline + low confidence are both present, but with no flags set
+        // neither may change the exit code. Guards the `&&` in both decisions.
+        let report = report_with_stale_baseline();
+        assert_eq!(
+            code_for_report(&report, DecisionFlags::default()),
+            ExitCode::Success
+        );
+    }
+
+    #[test]
+    fn stale_baseline_exits_four_when_enabled() {
+        let report = report_with_stale_baseline();
+        let flags = DecisionFlags {
+            fail_on_stale_baseline: true,
+            ..Default::default()
+        };
+        assert_eq!(code_for_report(&report, flags), ExitCode::Baseline);
+    }
+
+    #[test]
+    fn low_confidence_exits_six_when_enabled() {
+        let report = report_with_stale_baseline();
+        let flags = DecisionFlags {
+            fail_on_low_confidence: true,
+            ..Default::default()
+        };
+        assert_eq!(code_for_report(&report, flags), ExitCode::LowConfidence);
+    }
+
+    #[test]
+    fn regression_ignored_when_flag_off() {
+        // A matched baseline with a real regression, but `fail_on_regression` is off:
+        // the regression branch must not fire. Guards its `&&`.
+        use cu_profiler_core::baseline::{BaselineRecord, BaselineStore};
+        use cu_profiler_core::confidence::ConfidenceLevel;
+        use cu_profiler_core::metadata::InstrumentationMode;
+
+        let mut backend = RecordedLogsBackend::new();
+        backend.insert_blob(
+            "s",
+            "Program P invoke [1]\nProgram P consumed 110000 of 200000 compute units\nProgram P success",
+            true,
+        );
+        let mut scenario = Scenario::new("s");
+        scenario.budget = BudgetPolicy {
+            max_regression_pct: Some(5.0),
+            ..Default::default()
+        };
+        let profiler = Profiler::new();
+        let fingerprint = profiler.fingerprint(&scenario);
+        let mut store = BaselineStore::new();
+        store.insert(BaselineRecord {
+            scenario: "s".into(),
+            actual_units: 90_000, // +22% vs 110k → regression FAIL
+            budget: None,
+            timestamp: None,
+            git_commit: None,
+            fingerprint, // matches current → comparison is non-stale, regression evaluated
+            solana_versions: Vec::new(),
+            profiler_version: "0.1.0".into(),
+            instrumentation: InstrumentationMode::Off,
+            confidence: ConfidenceLevel::High,
+            approved: false,
+        });
+        let report = profiler.run(
+            &backend,
+            &[scenario],
+            Some(&store),
+            RunMetadata::recorded("0.1.0"),
+        );
+        assert_eq!(
+            code_for_report(&report, DecisionFlags::default()),
+            ExitCode::Success
+        );
+        // Sanity: with the flag on, the same regression *does* fail.
+        let strict = DecisionFlags {
+            fail_on_regression: true,
+            ..Default::default()
+        };
+        assert_eq!(
+            code_for_report(&report, strict),
+            ExitCode::BudgetOrRegression
+        );
+    }
 }
diff --git a/crates/cu-profiler-core/src/baseline/mod.rs b/crates/cu-profiler-core/src/baseline/mod.rs
index 82c51ee..0f37454 100644
--- a/crates/cu-profiler-core/src/baseline/mod.rs
+++ b/crates/cu-profiler-core/src/baseline/mod.rs
@@ -171,4 +171,45 @@ mod tests {
         let back = BaselineStore::from_json(&json).unwrap();
         assert_eq!(store, back);
     }
+
+    #[test]
+    fn new_store_is_at_schema_version_one() {
+        assert_eq!(BaselineStore::new().version, 1);
+    }
+
+    #[cfg(feature = "json")]
+    #[test]
+    fn save_then_load_round_trips_through_disk() {
+        let mut store = BaselineStore::new();
+        store.insert(record("swap", 4242));
+        let path = std::env::temp_dir().join("cu_profiler_baseline_roundtrip_test.json");
+        let _ = std::fs::remove_file(&path);
+        store.save(&path).expect("save");
+        let loaded = BaselineStore::load(&path).expect("load");
+        assert_eq!(loaded.get("swap").map(|r| r.actual_units), Some(4242));
+        assert_eq!(loaded, store);
+        let _ = std::fs::remove_file(&path);
+    }
+
+    #[cfg(feature = "json")]
+    #[test]
+    fn load_missing_file_returns_an_empty_store() {
+        let path = std::env::temp_dir().join("cu_profiler_baseline_definitely_absent_xyzzy.json");
+        let _ = std::fs::remove_file(&path);
+        let store = BaselineStore::load(&path).expect("a missing baseline is not an error");
+        assert!(store.records.is_empty());
+    }
+
+    #[cfg(feature = "json")]
+    #[test]
+    fn load_propagates_non_notfound_errors() {
+        // A file that exists but is not valid UTF-8 yields an InvalidData (non-
+        // NotFound) read error, which must propagate rather than be swallowed as an
+        // empty store.
+        let path = std::env::temp_dir().join("cu_profiler_baseline_invalid_utf8.json");
+        std::fs::write(&path, [0xff, 0xfe, 0xff]).expect("write bytes");
+        let result = BaselineStore::load(&path);
+        let _ = std::fs::remove_file(&path);
+        assert!(result.is_err());
+    }
 }
diff --git a/crates/cu-profiler-core/src/budget/mod.rs b/crates/cu-profiler-core/src/budget/mod.rs
index 19f8fdf..6118f6f 100644
--- a/crates/cu-profiler-core/src/budget/mod.rs
+++ b/crates/cu-profiler-core/src/budget/mod.rs
@@ -323,4 +323,355 @@ mod tests {
         let results = evaluate(&m, &policy, Some(49_500));
         assert_eq!(overall_status(&results), PolicyStatus::Pass);
     }
+
+    // ---- Per-clause boundary & remediation coverage ----------------------------
+    // The integration-style tests above exercise `evaluate` end-to-end but never
+    // pin the exact-at-limit boundary or the "remediation only on failure" rule for
+    // each clause. These tests assert those laws in Rust so a boundary flip
+    // (`<=`→`>`, `>`→`>=`) or a misplaced remediation cannot pass the suite. Each
+    // calls the clause function directly.
+
+    #[test]
+    fn absolute_exactly_at_budget_passes() {
+        let r = eval_absolute(100, 100);
+        assert_eq!(r.status, PolicyStatus::Pass);
+        assert!(r.remediation.is_none());
+    }
+
+    #[test]
+    fn absolute_over_budget_fails_with_remediation() {
+        let r = eval_absolute(101, 100);
+        assert_eq!(r.status, PolicyStatus::Fail);
+        assert!(r.remediation.is_some());
+    }
+
+    #[test]
+    fn warn_threshold_fires_at_full_budget() {
+        // actual == max (100% used) must still warn, not be swallowed by the
+        // `actual > max` early return.
+        let r = eval_warn_threshold(100, 100, 90.0).expect("100% used warns");
+        assert_eq!(r.status, PolicyStatus::Warn);
+    }
+
+    #[test]
+    fn warn_threshold_fires_exactly_at_threshold() {
+        // used == warn_pct must warn (the comparison is `<`, not `<=`). 1/2 = 50.0
+        // is exactly representable, so this pins the boundary without float drift.
+        let r = eval_warn_threshold(1, 2, 50.0).expect("exactly at threshold warns");
+        assert_eq!(r.status, PolicyStatus::Warn);
+    }
+
+    #[test]
+    fn warn_threshold_silent_below_threshold() {
+        assert!(eval_warn_threshold(50, 100, 90.0).is_none());
+    }
+
+    #[test]
+    fn warn_threshold_silent_over_budget() {
+        assert!(eval_warn_threshold(120, 100, 90.0).is_none());
+    }
+
+    #[test]
+    fn min_margin_reports_exact_margin() {
+        // 1/2 used → 50% margin. Pins the subtraction so `+`/`/` mutants die.
+        let r = eval_min_margin(1, 2, 40.0);
+        assert_eq!(r.actual, Some(50.0));
+        assert_eq!(r.status, PolicyStatus::Pass);
+        assert!(r.remediation.is_none());
+    }
+
+    #[test]
+    fn min_margin_exactly_at_minimum_passes() {
+        // 50% margin == 50% minimum → Pass (the comparison is `>=`).
+        let r = eval_min_margin(1, 2, 50.0);
+        assert_eq!(r.status, PolicyStatus::Pass);
+    }
+
+    #[test]
+    fn min_margin_thin_warns_with_remediation() {
+        // 3/4 used → 25% margin < 40% minimum.
+        let r = eval_min_margin(3, 4, 40.0);
+        assert_eq!(r.status, PolicyStatus::Warn);
+        assert!(r.remediation.is_some());
+    }
+
+    #[test]
+    fn regression_pct_pass_has_no_remediation() {
+        let r = eval_regression_pct(100, 100, 5.0); // 0% regression
+        assert_eq!(r.status, PolicyStatus::Pass);
+        assert!(r.remediation.is_none());
+    }
+
+    #[test]
+    fn regression_pct_fail_has_remediation() {
+        let r = eval_regression_pct(110, 100, 5.0); // +10% > 5%
+        assert_eq!(r.status, PolicyStatus::Fail);
+        assert!(r.remediation.is_some());
+    }
+
+    #[test]
+    fn regression_units_reports_signed_delta() {
+        // +20 CU delta; pins the subtraction so `+`/`/` mutants die.
+        let r = eval_regression_units(120, 100, 30);
+        assert_eq!(r.actual, Some(20.0));
+    }
+
+    #[test]
+    fn regression_units_exactly_at_allowance_passes() {
+        let r = eval_regression_units(130, 100, 30); // delta 30 == allowance
+        assert_eq!(r.status, PolicyStatus::Pass);
+        assert!(r.remediation.is_none());
+    }
+
+    #[test]
+    fn regression_units_over_allowance_fails_with_remediation() {
+        let r = eval_regression_units(131, 100, 30); // delta 31 > 30
+        assert_eq!(r.status, PolicyStatus::Fail);
+        assert!(r.remediation.is_some());
+    }
+
+    #[test]
+    fn regression_units_improvement_never_fails() {
+        let r = eval_regression_units(80, 100, 30); // delta -20
+        assert_eq!(r.status, PolicyStatus::Pass);
+        assert_eq!(r.actual, Some(-20.0));
+    }
+
+    #[test]
+    fn cpi_count_exactly_at_limit_passes() {
+        let r = eval_cpi_count(5, 5);
+        assert_eq!(r.status, PolicyStatus::Pass);
+        assert!(r.remediation.is_none());
+    }
+
+    #[test]
+    fn cpi_count_over_limit_fails_with_remediation() {
+        let r = eval_cpi_count(6, 5);
+        assert_eq!(r.status, PolicyStatus::Fail);
+        assert!(r.remediation.is_some());
+    }
+
+    #[test]
+    fn cpi_depth_exactly_at_limit_passes() {
+        let r = eval_cpi_depth(4, 4);
+        assert_eq!(r.status, PolicyStatus::Pass);
+        assert!(r.remediation.is_none());
+    }
+
+    #[test]
+    fn cpi_depth_over_limit_fails_with_remediation() {
+        let r = eval_cpi_depth(5, 4);
+        assert_eq!(r.status, PolicyStatus::Fail);
+        assert!(r.remediation.is_some());
+    }
+
+    #[test]
+    fn unattributed_exactly_at_limit_passes() {
+        let r = eval_unattributed(10.0, 10.0);
+        assert_eq!(r.status, PolicyStatus::Pass);
+        assert!(r.remediation.is_none());
+    }
+
+    #[test]
+    fn unattributed_over_limit_warns_with_remediation() {
+        let r = eval_unattributed(11.0, 10.0);
+        assert_eq!(r.status, PolicyStatus::Warn);
+        assert!(r.remediation.is_some());
+    }
+
+    #[test]
+    fn overhead_exactly_at_limit_passes() {
+        let r = eval_overhead(5.0, 5.0);
+        assert_eq!(r.status, PolicyStatus::Pass);
+        assert!(r.remediation.is_none());
+    }
+
+    #[test]
+    fn overhead_over_limit_warns_with_remediation() {
+        let r = eval_overhead(6.0, 5.0);
+        assert_eq!(r.status, PolicyStatus::Warn);
+        assert!(r.remediation.is_some());
+    }
+
+    // ----------------------------------------------------------------------
+    // Property / law tests for the five floating-point clauses.
+    //
+    // The integer/ordinal clauses are decidable exhaustively (∀ over the
+    // domain), but the float math in these five is not. Instead we pin their
+    // *safety laws* over a dense deterministic grid — no RNG, so the suite stays
+    // reproducible — plus the exact-at-threshold case probed from both sides.
+    // The laws: boundary correctness (`<=`/`>=`, never strict),
+    // status/severity/remediation consistency, improvement-never-fails
+    // (regression), and monotonicity (a worse input can never improve the
+    // outcome).
+    // ----------------------------------------------------------------------
+
+    /// A representative grid of budget ceilings, from 1 CU to a full block.
+    const MAXES: [u64; 5] = [1, 100, 1_000, 200_000, 1_400_000];
+
+    /// `actual` at `i`% of `max` (i in 0..=100), via u128 so it never overflows.
+    fn at_pct(max: u64, i: u64) -> u64 {
+        (u128::from(max) * u128::from(i) / 100) as u64
+    }
+
+    /// Every clause that splits Pass vs not-Pass keeps status, severity and
+    /// remediation in lockstep: Pass ⟺ Info ⟺ no remediation; any non-Pass
+    /// carries a Warning/Error severity AND a remediation. Both `actual` and
+    /// `expected` are always populated for the renderer.
+    fn assert_consistent(r: &PolicyResult) {
+        match r.status {
+            PolicyStatus::Pass => {
+                assert_eq!(r.severity, Severity::Info, "Pass must be Info: {r:?}");
+                assert!(r.remediation.is_none(), "Pass carries remediation: {r:?}");
+            }
+            PolicyStatus::Warn | PolicyStatus::Fail => {
+                assert!(
+                    matches!(r.severity, Severity::Warning | Severity::Error),
+                    "non-Pass must be Warning/Error: {r:?}"
+                );
+                assert!(
+                    r.remediation.is_some(),
+                    "non-Pass must carry remediation: {r:?}"
+                );
+            }
+        }
+        assert!(
+            r.actual.is_some() && r.expected.is_some(),
+            "actual/expected must be set: {r:?}"
+        );
+    }
+
+    #[test]
+    fn min_margin_is_monotonic_and_boundary_correct() {
+        // As usage rises the margin falls, so the status may only move
+        // Pass → Warn, never recover.
+        for max in MAXES {
+            for &min_margin in &[0.0_f64, 1.0, 5.0, 10.0, 50.0, 100.0] {
+                let mut warned = false;
+                for i in 0..=100u64 {
+                    let r = eval_min_margin(at_pct(max, i), max, min_margin);
+                    match r.status {
+                        PolicyStatus::Warn => warned = true,
+                        _ => assert!(
+                            !warned,
+                            "margin status recovered: max={max} min={min_margin} i={i}"
+                        ),
+                    }
+                    assert_consistent(&r);
+                }
+            }
+        }
+        // Exact boundary: max=100, actual=90 → margin = 10.0 exactly.
+        assert_eq!(eval_min_margin(90, 100, 10.0).status, PolicyStatus::Pass); // == → Pass
+        assert_eq!(
+            eval_min_margin(90, 100, 10.000_1).status,
+            PolicyStatus::Warn
+        ); // just over
+        assert_eq!(eval_min_margin(90, 100, 9.999_9).status, PolicyStatus::Pass); // just under
+    }
+
+    #[test]
+    fn warn_threshold_within_budget_is_monotonic_and_boundary_correct() {
+        // Within budget, rising usage can only turn the warning on, never off.
+        for max in MAXES {
+            for &warn_pct in &[0.0_f64, 1.0, 50.0, 90.0, 100.0] {
+                let mut warned = false;
+                for i in 0..=100u64 {
+                    match eval_warn_threshold(at_pct(max, i), max, warn_pct) {
+                        Some(r) => {
+                            warned = true;
+                            assert_eq!(r.status, PolicyStatus::Warn);
+                            assert_consistent(&r);
+                        }
+                        None => assert!(
+                            !warned,
+                            "warning cleared as usage rose: max={max} warn={warn_pct} i={i}"
+                        ),
+                    }
+                }
+            }
+            // Over budget is the absolute clause's job, never this one's.
+            assert!(eval_warn_threshold(max + 1, max, 0.0).is_none());
+        }
+        // Exact boundary: max=100, actual=90 → used = 90.0 exactly.
+        assert!(eval_warn_threshold(90, 100, 90.0).is_some()); // == → warn
+        assert!(eval_warn_threshold(90, 100, 90.000_1).is_none()); // just under → silent
+        assert!(eval_warn_threshold(90, 100, 89.999_9).is_some()); // just over → warn
+    }
+
+    #[test]
+    fn regression_pct_improvement_never_fails() {
+        // The worst failure mode for a regression tool: a measurement at or
+        // below baseline must never be reported as a regression, for any
+        // non-negative allowance. (The float twin of the integer-clause law.)
+        for base in [1u64, 100, 50_000, 200_000, 1_400_000] {
+            for i in 0..=100u64 {
+                let actual = at_pct(base, i); // 0..=base
+                for &max_pct in &[0.0_f64, 1.0, 5.0, 25.0] {
+                    let r = eval_regression_pct(actual, base, max_pct);
+                    assert_eq!(
+                        r.status,
+                        PolicyStatus::Pass,
+                        "improvement flagged as regression: actual={actual} base={base} allow={max_pct}"
+                    );
+                    assert_consistent(&r);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn regression_pct_boundary_and_zero_baseline() {
+        // base=100, actual=110 → delta_pct = 10.0 exactly.
+        assert_eq!(
+            eval_regression_pct(110, 100, 10.0).status,
+            PolicyStatus::Pass
+        ); // == → Pass
+        assert_eq!(
+            eval_regression_pct(110, 100, 9.999_9).status,
+            PolicyStatus::Fail
+        ); // allowance just under
+        assert_eq!(
+            eval_regression_pct(110, 100, 10.000_1).status,
+            PolicyStatus::Pass
+        ); // allowance just over
+        // A zero baseline short-circuits delta_pct to 0.0 → never a regression.
+        for &max_pct in &[0.0_f64, 1.0, 100.0] {
+            assert_eq!(
+                eval_regression_pct(999_999, 0, max_pct).status,
+                PolicyStatus::Pass
+            );
+        }
+    }
+
+    #[test]
+    fn unattributed_and_overhead_obey_the_threshold_law() {
+        // Both clauses share the shape `ok = actual_pct <= max_pct` (Pass/Warn).
+        let clauses: [fn(f64, f64) -> PolicyResult; 2] = [eval_unattributed, eval_overhead];
+        for eval in clauses {
+            for &max_pct in &[0.0_f64, 1.0, 5.0, 50.0, 100.0] {
+                // Exact boundary, both sides.
+                assert_eq!(eval(max_pct, max_pct).status, PolicyStatus::Pass); // == → Pass
+                assert_eq!(eval(max_pct + 0.000_1, max_pct).status, PolicyStatus::Warn); // over
+                if max_pct > 0.0 {
+                    assert_eq!(eval(max_pct - 0.000_1, max_pct).status, PolicyStatus::Pass);
+                }
+                // Monotonicity: once it warns, more usage never un-warns.
+                let mut warned = false;
+                let mut a = 0.0_f64;
+                while a <= max_pct + 10.0 {
+                    let r = eval(a, max_pct);
+                    match r.status {
+                        PolicyStatus::Warn => warned = true,
+                        _ => assert!(
+                            !warned,
+                            "status recovered after warning: a={a} max={max_pct}"
+                        ),
+                    }
+                    assert_consistent(&r);
+                    a += 0.5;
+                }
+            }
+        }
+    }
 }
diff --git a/crates/cu-profiler-core/src/confidence.rs b/crates/cu-profiler-core/src/confidence.rs
index 279e14e..f882007 100644
--- a/crates/cu-profiler-core/src/confidence.rs
+++ b/crates/cu-profiler-core/src/confidence.rs
@@ -226,4 +226,54 @@ mod tests {
         assert!(ConfidenceLevel::High > ConfidenceLevel::Low);
         assert!(ConfidenceLevel::Medium > ConfidenceLevel::Unknown);
     }
+
+    #[test]
+    fn level_labels_are_stable() {
+        assert_eq!(ConfidenceLevel::High.label(), "High");
+        assert_eq!(ConfidenceLevel::Medium.label(), "Medium");
+        assert_eq!(ConfidenceLevel::Low.label(), "Low");
+        assert_eq!(ConfidenceLevel::Unknown.label(), "Unknown");
+    }
+
+    #[test]
+    fn parser_warnings_demote_to_medium_with_reason() {
+        let f = ConfidenceFactors {
+            parser_warnings: 1,
+            metadata_available: true,
+            ..Default::default()
+        };
+        let c = score(&f);
+        assert_eq!(c.level, ConfidenceLevel::Medium);
+        assert!(c.reasons.iter().any(|r| r.contains("1 parser warning")));
+    }
+
+    #[test]
+    fn scope_markers_add_a_reason_when_present() {
+        let f = ConfidenceFactors {
+            scope_markers: 2,
+            metadata_available: true,
+            ..Default::default()
+        };
+        assert!(
+            score(&f)
+                .reasons
+                .iter()
+                .any(|r| r.contains("2 scope markers detected"))
+        );
+    }
+
+    #[test]
+    fn no_scope_markers_adds_no_marker_reason() {
+        let f = ConfidenceFactors {
+            scope_markers: 0,
+            metadata_available: true,
+            ..Default::default()
+        };
+        assert!(
+            !score(&f)
+                .reasons
+                .iter()
+                .any(|r| r.contains("scope markers detected"))
+        );
+    }
 }
diff --git a/crates/cu-profiler-core/src/config.rs b/crates/cu-profiler-core/src/config.rs
index 36c9e5e..cb79cee 100644
--- a/crates/cu-profiler-core/src/config.rs
+++ b/crates/cu-profiler-core/src/config.rs
@@ -324,4 +324,34 @@ critical = true
         assert_eq!(scenarios.len(), 2);
         assert!(scenarios.iter().any(|s| s.name == "swap_exact_in"));
     }
+
+    #[test]
+    fn non_recorded_mode_is_not_recorded() {
+        let cfg = Config::from_toml(SAMPLE).unwrap(); // mode = "program-test"
+        assert!(!cfg.mode_is_recorded());
+    }
+
+    #[test]
+    fn default_policy_carries_the_defaults() {
+        let cfg = Config::from_toml(SAMPLE).unwrap();
+        let p = cfg.default_policy();
+        assert_eq!(p.warn_at_budget_pct, Some(90.0));
+        assert_eq!(p.max_regression_pct, Some(5.0));
+    }
+
+    #[test]
+    fn per_scenario_warn_overrides_the_default() {
+        // The scenario's 50 must win over the default 90; if the per-scenario warn
+        // field were dropped, this would fall back to 90.
+        let toml = r#"
+[project]
+name = "x"
+[defaults]
+warn_at_budget_pct = 90
+[scenario.foo]
+warn_at_budget_pct = 50
+"#;
+        let cfg = Config::from_toml(toml).unwrap();
+        assert_eq!(cfg.effective_policy("foo").warn_at_budget_pct, Some(50.0));
+    }
 }
diff --git a/crates/cu-profiler-core/src/metadata.rs b/crates/cu-profiler-core/src/metadata.rs
index 7b7a843..f2e677e 100644
--- a/crates/cu-profiler-core/src/metadata.rs
+++ b/crates/cu-profiler-core/src/metadata.rs
@@ -76,3 +76,25 @@ impl RunMetadata {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn backend_kind_as_str_is_stable() {
+        assert_eq!(BackendKind::Recorded.as_str(), "recorded");
+        assert_eq!(BackendKind::ProgramTest.as_str(), "program-test");
+        assert_eq!(BackendKind::BanksClient.as_str(), "banks-client");
+        assert_eq!(BackendKind::Mollusk.as_str(), "mollusk");
+        assert_eq!(BackendKind::RpcSimulation.as_str(), "rpc-simulation");
+    }
+
+    #[test]
+    fn recorded_metadata_uses_the_recorded_backend() {
+        let m = RunMetadata::recorded("1.2.3");
+        assert_eq!(m.backend, BackendKind::Recorded);
+        assert_eq!(m.instrumentation, InstrumentationMode::Off);
+        assert_eq!(m.profiler_version, "1.2.3");
+    }
+}
diff --git a/crates/cu-profiler-core/src/model.rs b/crates/cu-profiler-core/src/model.rs
index 1553613..6c8e0ee 100644
--- a/crates/cu-profiler-core/src/model.rs
+++ b/crates/cu-profiler-core/src/model.rs
@@ -354,4 +354,31 @@ mod tests {
         assert_eq!(r.summary.total_cu, 600);
         assert!(r.has_failures());
     }
+
+    #[test]
+    fn status_labels_are_stable() {
+        assert_eq!(Status::Pass.label(), "PASS");
+        assert_eq!(Status::Warn.label(), "WARN");
+        assert_eq!(Status::Fail.label(), "FAIL");
+        assert_eq!(Status::Unknown.label(), "UNKNOWN");
+    }
+
+    #[test]
+    fn two_samples_compute_even_median() {
+        // Exactly two samples must produce stats (the `len < 2` boundary), and an
+        // even count medians the middle pair rather than an endpoint.
+        let s = SampleStats::from_samples(&[10, 20]).expect("two samples compute");
+        assert_eq!(s.count, 2);
+        assert_eq!(s.median, 15); // (10 + 20) / 2
+    }
+
+    #[test]
+    fn report_without_failures_is_clean() {
+        let r = Report::new(
+            vec![scenario("ok", Status::Pass, 100)],
+            RunMetadata::recorded("0.1.0"),
+        );
+        assert_eq!(r.summary.failed, 0);
+        assert!(!r.has_failures());
+    }
 }
diff --git a/crates/cu-profiler-core/src/parser/mod.rs b/crates/cu-profiler-core/src/parser/mod.rs
index 7d3c899..03ea0a2 100644
--- a/crates/cu-profiler-core/src/parser/mod.rs
+++ b/crates/cu-profiler-core/src/parser/mod.rs
@@ -390,4 +390,69 @@ mod tests {
         assert!(!a.simulation_success);
         assert_eq!(a.total_cu, 8000);
     }
+
+    #[test]
+    fn cpi_nesting_at_the_limit_does_not_warn() {
+        // `build` seeds a synthetic root, so N nested invokes give structural depth
+        // N+1. MAX_DEPTH-1 invokes → depth exactly MAX_DEPTH → no flattening warning.
+        let raw: Vec<String> = (1..=cpi_tree::MAX_DEPTH - 1)
+            .map(|d| format!("Program User111 invoke [{d}]"))
+            .collect();
+        let a = analyze(&raw, &ProgramRegistry::with_builtins());
+        assert!(
+            !a.warnings.iter().any(|w| w.contains("nesting exceeded")),
+            "did not expect a flattening warning at the limit: {:?}",
+            a.warnings
+        );
+    }
+
+    #[test]
+    fn excessive_cpi_nesting_emits_a_flattening_warning() {
+        // Beyond MAX_DEPTH the tree is flattened and analyze() warns. Exercises
+        // structural_depth and the depth guard at the over-limit boundary.
+        let raw: Vec<String> = (1..=cpi_tree::MAX_DEPTH + 3)
+            .map(|d| format!("Program User111 invoke [{d}]"))
+            .collect();
+        let a = analyze(&raw, &ProgramRegistry::with_builtins());
+        assert!(
+            a.warnings.iter().any(|w| w.contains("nesting exceeded")),
+            "expected a flattening warning beyond the limit: {:?}",
+            a.warnings
+        );
+    }
+
+    #[test]
+    fn scope_point_marker_is_counted() {
+        let logs = lines(&[
+            "Program User111 invoke [1]",
+            "Program log: CU_PROFILER_POINT name=checkpoint",
+            "Program User111 consumed 1000 of 200000 compute units",
+            "Program User111 success",
+        ]);
+        let a = analyze(&logs, &ProgramRegistry::with_builtins());
+        assert_eq!(a.scope_marker_count, 1);
+    }
+
+    #[test]
+    fn zero_total_with_zero_delta_scope_withholds_percentage_without_warning() {
+        // total_cu == 0 and a zero-CU scope delta: the percentage is withheld and no
+        // "exceeds the measured total" warning is emitted (guards the `total_cu > 0`
+        // and `units > total_cu` boundaries against div-by-zero / spurious warnings).
+        let logs = lines(&[
+            "Program User111 invoke [1]",
+            "Program log: CU_PROFILER_BEGIN name=noop cu=100",
+            "Program log: CU_PROFILER_END name=noop cu=100",
+            "Program User111 consumed 0 of 200000 compute units",
+            "Program User111 success",
+        ]);
+        let a = analyze(&logs, &ProgramRegistry::with_builtins());
+        assert_eq!(a.total_cu, 0);
+        assert_eq!(a.scopes[0].units_estimated, Some(0));
+        assert_eq!(a.scopes[0].percentage_of_total, None);
+        assert!(
+            !a.warnings
+                .iter()
+                .any(|w| w.contains("exceeds the measured total"))
+        );
+    }
 }
diff --git a/crates/cu-profiler-report/src/html.rs b/crates/cu-profiler-report/src/html.rs
index c57e7f5..6d249ec 100644
--- a/crates/cu-profiler-report/src/html.rs
+++ b/crates/cu-profiler-report/src/html.rs
@@ -229,4 +229,59 @@ mod tests {
         assert!(!html.contains("<script>evil"));
         assert!(html.contains("&lt;script&gt;evil&lt;/script&gt;"));
     }
+
+    /// A report that exercises every conditional section: a near-budget Warn (→
+    /// diagnostic + confidence reasons) plus a scope with a CU-snapshot delta.
+    fn rich_report() -> Report {
+        let mut backend = RecordedLogsBackend::new();
+        backend.insert_blob(
+            "swap",
+            "Program User111 invoke [1]\n\
+             Program log: CU_PROFILER_BEGIN name=validate cu=200000\n\
+             Program log: CU_PROFILER_END name=validate cu=188000\n\
+             Program User111 consumed 96000 of 100000 compute units\n\
+             Program User111 success",
+            true,
+        );
+        let mut scenario = Scenario::new("swap");
+        scenario.budget = BudgetPolicy {
+            absolute_max_cu: Some(100_000),
+            warn_at_budget_pct: Some(90.0),
+            ..Default::default()
+        };
+        Profiler::new().run(&backend, &[scenario], None, RunMetadata::recorded("0.1.0"))
+    }
+
+    #[test]
+    fn renders_every_section_and_status_class() {
+        let html = render(&rich_report());
+        assert!(
+            html.contains("class=\"overview\""),
+            "overview table missing"
+        );
+        assert!(
+            html.contains("<th>Scenario</th>"),
+            "overview header missing"
+        );
+        // `class="warn">WARN` is produced only by status_class + the status label;
+        // a bare `class="warn"` also appears hardcoded in the summary line, so assert
+        // the status-specific form to actually pin status_class.
+        assert!(
+            html.contains("class=\"warn\">WARN"),
+            "status_class output missing"
+        );
+        assert!(
+            html.contains("<ul class=\"reasons\">"),
+            "confidence reasons missing"
+        );
+        assert!(html.contains("<h3>Scopes</h3>"), "scopes section missing");
+        assert!(
+            html.contains("CU (") && html.contains("%)"),
+            "scope CU/percentage missing"
+        );
+        assert!(
+            html.contains("<h3>Diagnostics</h3>"),
+            "diagnostics section missing"
+        );
+    }
 }
diff --git a/crates/cu-profiler-report/src/junit.rs b/crates/cu-profiler-report/src/junit.rs
index 5152b0f..39a92d2 100644
--- a/crates/cu-profiler-report/src/junit.rs
+++ b/crates/cu-profiler-report/src/junit.rs
@@ -106,4 +106,32 @@ mod tests {
     fn escapes_special_characters() {
         assert_eq!(escape("a<b>&\"'"), "a&lt;b&gt;&amp;&quot;&apos;");
     }
+
+    #[test]
+    fn failure_case_carries_message_and_detail() {
+        let mut backend = RecordedLogsBackend::new();
+        backend.insert_blob(
+            "swap",
+            "Program P invoke [1]\nProgram P consumed 120000 of 200000 compute units\nProgram P success",
+            true,
+        );
+        let mut scenario = Scenario::new("swap");
+        scenario.budget = BudgetPolicy {
+            absolute_max_cu: Some(100_000),
+            ..Default::default()
+        };
+        let report =
+            Profiler::new().run(&backend, &[scenario], None, RunMetadata::recorded("0.1.0"));
+        let xml = render(&report);
+        // failure_message is the failing policy's message; detail carries status + CU.
+        assert!(
+            xml.contains("absolute maximum"),
+            "policy message missing: {xml}"
+        );
+        assert!(xml.contains("status=FAIL"), "detail status missing: {xml}");
+        assert!(
+            xml.contains("total_cu=120000"),
+            "detail total_cu missing: {xml}"
+        );
+    }
 }
diff --git a/crates/cu-profiler-report/src/markdown.rs b/crates/cu-profiler-report/src/markdown.rs
index 91e9ce1..2a3c8c8 100644
--- a/crates/cu-profiler-report/src/markdown.rs
+++ b/crates/cu-profiler-report/src/markdown.rs
@@ -142,4 +142,32 @@ mod tests {
         );
         assert!(row.contains("evil\\|name'"), "name not sanitised: {row}");
     }
+
+    #[test]
+    fn renders_diagnostics_section_and_status_emoji() {
+        let mut backend = RecordedLogsBackend::new();
+        backend.insert_blob(
+            "swap",
+            "Program User111 invoke [1]\n\
+             Program log: CU_PROFILER_BEGIN name=validate cu=200000\n\
+             Program log: CU_PROFILER_END name=validate cu=188000\n\
+             Program User111 consumed 96000 of 100000 compute units\n\
+             Program User111 success",
+            true,
+        );
+        let mut scenario = Scenario::new("swap");
+        scenario.budget = cu_profiler_core::budget::BudgetPolicy {
+            absolute_max_cu: Some(100_000),
+            warn_at_budget_pct: Some(90.0),
+            ..Default::default()
+        };
+        let report =
+            Profiler::new().run(&backend, &[scenario], None, RunMetadata::recorded("0.1.0"));
+        let md = render(&report);
+        assert!(
+            md.contains("### Diagnostics"),
+            "diagnostics section missing: {md}"
+        );
+        assert!(md.contains('🟡'), "warn status emoji missing: {md}");
+    }
 }
diff --git a/crates/cu-profiler-report/src/model.rs b/crates/cu-profiler-report/src/model.rs
index b46e137..bc5ee26 100644
--- a/crates/cu-profiler-report/src/model.rs
+++ b/crates/cu-profiler-report/src/model.rs
@@ -54,4 +54,48 @@ mod tests {
         assert_eq!(thousands(1_000_000), "1,000,000");
         assert_eq!(thousands(999), "999");
     }
+
+    #[test]
+    fn scenario_delta_pct_reads_the_baseline_comparison() {
+        use cu_profiler_core::Profiler;
+        use cu_profiler_core::backend::RecordedLogsBackend;
+        use cu_profiler_core::baseline::{BaselineRecord, BaselineStore, Fingerprint};
+        use cu_profiler_core::confidence::ConfidenceLevel;
+        use cu_profiler_core::metadata::{InstrumentationMode, RunMetadata};
+        use cu_profiler_core::scenario::Scenario;
+
+        let mut backend = RecordedLogsBackend::new();
+        backend.insert_blob(
+            "swap",
+            "Program P invoke [1]\nProgram P consumed 1000 of 200000 compute units\nProgram P success",
+            true,
+        );
+        let mut store = BaselineStore::new();
+        store.insert(BaselineRecord {
+            scenario: "swap".into(),
+            actual_units: 800,
+            budget: None,
+            timestamp: None,
+            git_commit: None,
+            fingerprint: Fingerprint::new("swap", "fix", "cfg", None),
+            solana_versions: Vec::new(),
+            profiler_version: "0.1.0".into(),
+            instrumentation: InstrumentationMode::Off,
+            confidence: ConfidenceLevel::High,
+            approved: false,
+        });
+        let report = Profiler::new().run(
+            &backend,
+            &[Scenario::new("swap")],
+            Some(&store),
+            RunMetadata::recorded("0.1.0"),
+        );
+        // 1000 measured vs 800 baseline → +25%.
+        let delta = scenario_delta_pct(&report.scenarios[0]);
+        assert!(
+            delta.is_some(),
+            "delta should be present when a baseline matched"
+        );
+        assert!((delta.unwrap() - 25.0).abs() < 0.01, "delta = {delta:?}");
+    }
 }