onflow · liobrasil · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 17, 2026
@@ -81,22 +81,45 @@ access(all) contract FlowYieldVaultsAutoBalancers {
         return nil
     }
 
-    /// Checks if an AutoBalancer has at least one active (Scheduled) transaction.
+    /// Checks if an AutoBalancer has at least one active internally-managed transaction.
     /// Used by Supervisor to detect stuck yield vaults that need recovery.
     ///
+    /// A transaction is considered active when it is:
+    /// - still `Scheduled`, or
+    /// - already marked `Executed` by FlowTransactionScheduler and still within a bounded
+    ///   grace period after its scheduled timestamp.
+    ///
+    /// The second case matters because FlowTransactionScheduler flips status to `Executed`
+    /// before the handler actually runs. Without treating that in-flight window as active,
+    /// the Supervisor can falsely classify healthy vaults as stuck and recover them twice.
+    /// But that window must be bounded: if the handler panics after the optimistic status
+    /// update, the vault must eventually become recoverable instead of remaining "active"
+    /// forever.
+    ///
     /// @param id: The yield vault/AutoBalancer ID
-    /// @return Bool: true if there's at least one Scheduled transaction, false otherwise
+    /// @return Bool: true if there's at least one active internally-managed transaction, false otherwise
     ///
     access(all) fun hasActiveSchedule(id: UInt64): Bool {
         let autoBalancer = self.borrowAutoBalancer(id: id)
         if autoBalancer == nil {
             return false
         }
 
+        let currentTimestamp = getCurrentBlock().timestamp
+        let optimisticExecutionGracePeriod: UFix64 = 10.0
         let txnIDs = autoBalancer!.getScheduledTransactionIDs()
         for txnID in txnIDs {
-            if autoBalancer!.borrowScheduledTransaction(id: txnID)?.status() == FlowTransactionScheduler.Status.Scheduled {
-                return true
+            if let scheduledTxn = autoBalancer!.borrowScheduledTransaction(id: txnID) {
+                if let status = scheduledTxn.status() {
+                    if status == FlowTransactionScheduler.Status.Scheduled {
+                        return true
+                    }
+
+                    if status == FlowTransactionScheduler.Status.Executed
+                        && currentTimestamp <= scheduledTxn.timestamp + optimisticExecutionGracePeriod {
+                        return true
+                    }
+                }
             }
         }
         return false
@@ -123,7 +146,7 @@ access(all) contract FlowYieldVaultsAutoBalancers {
             return false // Not configured for recurring, can't be "stuck"
         }
 
-        // Check if there's an active schedule
+        // Check if there's an active schedule or an in-flight due execution
         if self.hasActiveSchedule(id: id) {
             return false // Has active schedule, not stuck
         }
@@ -226,8 +249,14 @@ access(all) contract FlowYieldVaultsAutoBalancers {
         let scheduleCap = self.account.capabilities.storage
             .issue<auth(DeFiActions.Schedule) &DeFiActions.AutoBalancer>(storagePath)
 
-        // Register yield vault in registry for global mapping of live yield vault IDs
-        FlowYieldVaultsSchedulerRegistry.register(yieldVaultID: uniqueID.id, handlerCap: handlerCap, scheduleCap: scheduleCap)
+        // Register the yield vault in the global scheduler registry.
+        // Only recurring vaults participate in the Supervisor's stuck-scan ordering.
+        FlowYieldVaultsSchedulerRegistry.register(
+            yieldVaultID: uniqueID.id,
+            handlerCap: handlerCap,
+            scheduleCap: scheduleCap,
+            participatesInStuckScan: recurringConfig != nil
+        )
 
         // Start the native AutoBalancer self-scheduling chain if recurringConfig was provided
         // This schedules the first rebalance; subsequent ones are scheduled automatically

@@ -5,11 +5,12 @@ import "UInt64LinkedList"
 
 /// FlowYieldVaultsSchedulerRegistry
 ///
-/// Stores registry of YieldVault IDs and their handler capabilities for scheduling.
+/// Stores the global registry of live YieldVault IDs and their scheduling capabilities.
 /// This contract maintains:
-/// - A registry of all yield vault IDs that participate in scheduled rebalancing
+/// - A registry of all live yield vault IDs known to the scheduler infrastructure
 /// - Handler capabilities (AutoBalancer capabilities) for each yield vault
 /// - A pending queue for yield vaults that need initial seeding or re-seeding
+/// - A recurring-only stuck-scan ordering used by the Supervisor
 /// - The global Supervisor capability for recovery operations
 ///
 access(all) contract FlowYieldVaultsSchedulerRegistry {
@@ -48,7 +49,8 @@ access(all) contract FlowYieldVaultsSchedulerRegistry {
 
     /* --- STATE --- */
 
-    /// Registry of all yield vault IDs that participate in scheduling
+    /// Registry of all live yield vault IDs known to the scheduler infrastructure.
+    /// This is broader than the recurring-only stuck-scan ordering.
     access(self) var yieldVaultRegistry: {UInt64: Bool}
 
     /// Handler capabilities (AutoBalancer) for each yield vault - keyed by yield vault ID
@@ -75,10 +77,12 @@ access(all) contract FlowYieldVaultsSchedulerRegistry {
     /* --- ACCOUNT-LEVEL FUNCTIONS --- */
 
     /// Register a YieldVault and store its handler and schedule capabilities (idempotent)
+    /// `participatesInStuckScan` should be true only for vaults that currently have recurring config.
     access(account) fun register(
         yieldVaultID: UInt64,
         handlerCap: Capability<auth(FlowTransactionScheduler.Execute) &{FlowTransactionScheduler.TransactionHandler}>,
-        scheduleCap: Capability<auth(DeFiActions.Schedule) &DeFiActions.AutoBalancer>
+        scheduleCap: Capability<auth(DeFiActions.Schedule) &DeFiActions.AutoBalancer>,
+        participatesInStuckScan: Bool
     ) {
         pre {
             handlerCap.check(): "Invalid handler capability provided for yieldVaultID \(yieldVaultID)"
@@ -87,24 +91,28 @@ access(all) contract FlowYieldVaultsSchedulerRegistry {
         self.yieldVaultRegistry[yieldVaultID] = true
         self.handlerCaps[yieldVaultID] = handlerCap
         self.scheduleCaps[yieldVaultID] = scheduleCap
-        // New vaults go to the head; they haven't executed yet but are freshly registered.
+
+        // The registry tracks all live yield vaults, but only recurring vaults
+        // participate in the Supervisor's stuck-scan ordering.
         // If already in the list (idempotent re-register), remove first to avoid duplicates.
         let list = self._list()
         if list.contains(id: yieldVaultID) {
             let _ = list.remove(id: yieldVaultID)
         }
-        list.insertAtHead(id: yieldVaultID)
+        if participatesInStuckScan {
+            list.insertAtHead(id: yieldVaultID)
+        }
         emit YieldVaultRegistered(yieldVaultID: yieldVaultID)
     }
 
-    /// Called on every execution. Moves yieldVaultID to the head (most recently executed)
-    /// so the Supervisor scans from the tail (least recently executed) for stuck detection — O(1).
-    /// If the list entry is unexpectedly missing, reinsert it to restore the ordering structure.
+    /// Called on every execution. Moves scan-participating yieldVaultID to the head
+    /// (most recently executed) so the Supervisor scans recurring participants from the tail
+    /// (least recently executed) for stuck detection — O(1).
     access(account) fun reportExecution(yieldVaultID: UInt64) {
-        if !(self.yieldVaultRegistry[yieldVaultID] ?? false) {
+        let list = self._list()
+        if !(self.yieldVaultRegistry[yieldVaultID] ?? false) || !list.contains(id: yieldVaultID) {
             return
         }
-        let list = self._list()
         let _ = list.remove(id: yieldVaultID)
         list.insertAtHead(id: yieldVaultID)
     }
@@ -211,13 +219,36 @@ access(all) contract FlowYieldVaultsSchedulerRegistry {
         return self.pendingQueue.length
     }
 
-    /// Returns up to `limit` vault IDs starting from the tail (least recently executed).
+    /// Returns up to `limit` recurring scan participants starting from the tail
+    /// (least recently executed among recurring participants).
+    /// Stale entries whose recurring config has been removed are pruned lazily as the walk proceeds.
     /// Supervisor should only scan these for stuck detection instead of all registered vaults.
     /// @param limit: Maximum number of IDs to return (caller typically passes MAX_BATCH_SIZE)
     access(all) fun getStuckScanCandidates(limit: UInt): [UInt64] {
-        return self.account.storage
-            .borrow<&UInt64LinkedList.List>(from: self.executionListStoragePath)!
-            .tailWalk(limit: limit)
+        let list = self._list()
+        var result: [UInt64] = []
+        var current = list.tail
+        while UInt(result.length) < limit {
+            if let id = current {
+                let previous = list.nodes[id]?.prev
+                let scheduleCap = self.scheduleCaps[id]
+                let isRecurringParticipant =
+                    scheduleCap != nil
+                    && scheduleCap!.check()
+                    && scheduleCap!.borrow()?.getRecurringConfig() != nil
+
+                if isRecurringParticipant {
+                    result.append(id)
+                } else {
+                    self.dequeuePending(yieldVaultID: id)
+                    let _ = list.remove(id: id)
+                }
+                current = previous
+            } else {
+                break
+            }
+        }
+        return result
     }
 
     /// Get global Supervisor capability, if set

@@ -160,7 +160,8 @@ access(all) contract FlowYieldVaultsSchedulerV1 {
         /// Detects and recovers stuck yield vaults by directly calling their scheduleNextRebalance().
         ///
         /// Detection methods:
-        /// 1. State-based: Scans for registered yield vaults with no active schedule that are overdue
+        /// 1. State-based: Scans recurring yield vaults in stuck-scan order for candidates with
+        ///    no active schedule that are overdue
         ///
         /// Recovery method:
         /// - Uses Schedule capability to call AutoBalancer.scheduleNextRebalance() directly
@@ -172,7 +173,7 @@ access(all) contract FlowYieldVaultsSchedulerV1 {
         ///   "priority": UInt8 (0=High,1=Medium,2=Low) - for Supervisor self-rescheduling
         ///   "executionEffort": UInt64 - for Supervisor self-rescheduling
         ///   "recurringInterval": UFix64 (for Supervisor self-rescheduling)
-        ///   "scanForStuck": Bool (default true - scan up to MAX_BATCH_SIZE least-recently-executed vaults for stuck ones)
+        ///   "scanForStuck": Bool (default true - scan up to MAX_BATCH_SIZE least-recently-executed recurring scan participants for stuck ones)
         /// }
         access(FlowTransactionScheduler.Execute) fun executeTransaction(id: UInt64, data: AnyStruct?) {
             let cfg = data as? {String: AnyStruct} ?? {}

@@ -1,12 +1,12 @@
 import "FlowYieldVaultsAutoBalancers"
 
-/// Returns true if the yield vault/AutoBalancer has at least one active (Scheduled) transaction.
-/// Used to verify that healthy yield vaults maintain their scheduling chain.
+/// Returns true if the yield vault/AutoBalancer has at least one active internally-managed
+/// transaction. Active includes `Scheduled`, plus a recently `Executed` transaction still
+/// within the optimistic-execution grace period.
 ///
 /// @param yieldVaultID: The YieldVault/AutoBalancer ID
-/// @return Bool: true if there's at least one Scheduled transaction, false otherwise
+/// @return Bool: true if there's at least one active internally-managed transaction, false otherwise
 ///
 access(all) fun main(yieldVaultID: UInt64): Bool {
     return FlowYieldVaultsAutoBalancers.hasActiveSchedule(id: yieldVaultID)
 }
-
@@ -599,6 +599,25 @@ fun testStuckYieldVaultDetectionLogic() {
     log("PASS: Stuck yield vault detection correctly identifies healthy yield vaults")
 }
 
+/// Returns per-yield-vault recovery event counts from YieldVaultRecovered events.
+///
+/// This is used by stress tests to distinguish "all vaults recovered at least once"
+/// from "lots of recovery events happened", which can otherwise hide duplicate
+/// recovery churn for the same vault IDs.
+///
+/// Example: 240 recovery events for 200 vaults can look healthy if the test only
+/// checks `events.length >= 200`, even though 40 of those events may be repeats.
+access(all)
+fun getRecoveredYieldVaultCounts(): {UInt64: Int} {
+    let counts: {UInt64: Int} = {}
+    let recoveredEvents = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>())
+    for recoveredAny in recoveredEvents {
+        let recovered = recoveredAny as! FlowYieldVaultsSchedulerV1.YieldVaultRecovered
+        counts[recovered.yieldVaultID] = (counts[recovered.yieldVaultID] ?? 0) + 1
+    }
+    return counts
+}
+
 /// COMPREHENSIVE TEST: Insufficient Funds -> Failure -> Recovery
 ///
 /// This test validates the COMPLETE failure and recovery cycle:
@@ -939,9 +958,21 @@ fun testInsufficientFundsAndRecovery() {
 ///
 /// Flow: create 200 yield vaults, run 2 scheduling rounds, drain FLOW so executions fail,
 /// wait for vaults to be marked stuck, refund FLOW, schedule the supervisor, then advance
-/// time for ceil(200/MAX_BATCH_SIZE)+10 supervisor ticks. Asserts all 200 vaults are
-/// recovered (YieldVaultRecovered events), none still stuck, and all have active schedules.
-/// The +10 extra ticks are a buffer so every vault is processed despite scheduler timing.
+/// time for enough supervisor ticks to recover all unique vault IDs, plus a short
+/// stabilization window. This asserts:
+/// - every one of the 200 vault IDs is recovered at least once,
+/// - no recovery failures occur,
+/// - no vault emits more than one recovery event,
+/// - once all vaults are recovered and healthy, extra supervisor ticks do not emit
+///   additional recovery events,
+/// - none remain stuck, and all have active schedules.
+///
+/// Why this was tightened:
+/// the earlier version only checked `YieldVaultRecovered.length >= n` after
+/// `ceil(n / MAX_BATCH_SIZE) + 10` supervisor ticks. That allowed the test to pass
+/// even when some vaults recovered more than once while others had not yet been
+/// uniquely validated. This version keeps the same tick budget, but uses it as a
+/// timeout ceiling instead of treating it as proof that the recovery set is clean.
 access(all)
 fun testSupervisorHandlesManyStuckVaults() {
     let n = 200
@@ -1024,19 +1055,75 @@ fun testSupervisorHandlesManyStuckVaults() {
     )
     Test.expect(schedSupRes, Test.beSucceeded())
 
-    // 7. Advance time for supervisor ticks (ceil(n/MAX_BATCH_SIZE)+10); each tick processes a batch
+    // 7. Advance time until every target vault has emitted at least one recovery event.
+    //
+    // We still compute ceil(n / MAX_BATCH_SIZE) + 10, but it now acts as a maximum
+    // allowed budget for supervisor ticks. The loop stops early once all 200 vault IDs
+    // have been seen at least once. This makes the assertion sensitive to duplicate
+    // recoveries: repeated events for the same vault no longer help the test finish.
+    //
+    // After all unique IDs are observed, run a short stabilization window to check
+    // that a healthy supervisor does not continue to emit recovery events.
     let supervisorRunsNeeded = (UInt(n) + UInt(maxBatchSize) - 1) / UInt(maxBatchSize)
+    let maxSupervisorTicks = supervisorRunsNeeded + 10
     var run = 0 as UInt
-    while run < supervisorRunsNeeded + 10 {
+    var recoveredCounts = getRecoveredYieldVaultCounts()
+    while run < maxSupervisorTicks && recoveredCounts.length < n {
         Test.moveTime(by: 60.0 * 10.0 + 10.0)
         Test.commitBlock()
         run = run + 1
+        recoveredCounts = getRecoveredYieldVaultCounts()
+    }
+    log("testSupervisorHandlesManyStuckVaults: ran \(run.toString()) supervisor ticks to reach \(recoveredCounts.length.toString()) unique recovered vaults")
+
+    let recoveryFailures = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecoveryFailed>())
+    Test.assertEqual(0, recoveryFailures.length)
+
+    // Split the validation into:
+    // - missing recoveries: some vaults never emitted YieldVaultRecovered at all
+    // - duplicate recoveries: some vaults emitted YieldVaultRecovered more than once
+    //
+    // Both matter. Missing recoveries means the supervisor did not cover the whole set.
+    // Duplicate recoveries means event volume was inflated by churn, which the old
+    // `recoveredEvents.length >= n` assertion could not distinguish from success.
+    var missingRecoveries = 0
+    var duplicatedRecoveries = 0
+    var duplicatedVaults = 0
+    for yieldVaultID in yieldVaultIDs {
+        let recoveryCount = recoveredCounts[yieldVaultID] ?? 0
+        if recoveryCount == 0 {
+            missingRecoveries = missingRecoveries + 1
+        } else if recoveryCount > 1 {
+            duplicatedRecoveries = duplicatedRecoveries + (recoveryCount - 1)
+            duplicatedVaults = duplicatedVaults + 1
+        }
     }
-    log("testSupervisorHandlesManyStuckVaults: ran \((supervisorRunsNeeded + 10).toString()) supervisor ticks")
+    Test.assert(
+        missingRecoveries == 0,
+        message: "expected every vault to recover at least once, but \(missingRecoveries.toString()) vaults emitted no YieldVaultRecovered event"
+    )
+    Test.assert(
+        duplicatedRecoveries == 0,
+        message: "expected exactly one recovery per vault, but saw \(duplicatedRecoveries.toString()) duplicate recoveries across \(duplicatedVaults.toString()) vaults"
+    )
 
-    let recoveredEvents = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>())
-    Test.assert(recoveredEvents.length >= n, message: "expected at least \(n.toString()) recovered, got \(recoveredEvents.length.toString())")
-    log("testSupervisorHandlesManyStuckVaults: recovered \(recoveredEvents.length.toString()) vaults")
+    // This second guard catches late churn that may start only after the full unique
+    // set has already been recovered. The duplicate-per-vault check above inspects the
+    // state up to this point; the stabilization window verifies the system stays quiet
+    // once recovery should be complete.
+    let recoveredEventsBeforeStabilization = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>()).length
+    var stabilizationTick = 0
+    while stabilizationTick < 2 {
+        Test.moveTime(by: 60.0 * 10.0 + 10.0)
+        Test.commitBlock()
+        stabilizationTick = stabilizationTick + 1
+    }
+    let recoveredEventsAfterStabilization = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>()).length
+    Test.assert(
+        recoveredEventsAfterStabilization == recoveredEventsBeforeStabilization,
+        message: "expected no additional recovery churn after all vaults were recovered; before stabilization: \(recoveredEventsBeforeStabilization.toString()), after: \(recoveredEventsAfterStabilization.toString())"
+    )
+    log("testSupervisorHandlesManyStuckVaults: stable recovery set of \(recoveredCounts.length.toString()) unique vaults with \(recoveredEventsAfterStabilization.toString()) total recovery events")
 
     // 8. Health check: none stuck, all have active schedules
     var stillStuck = 0