From 418443bbbfcdcf1d8e623a6bfdc201c97bbd0fe5 Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Sun, 21 Jun 2026 17:30:56 +0300 Subject: [PATCH] feat(playwright): unwrap report.json suite + show reasoning in screen capsule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The agent-browser-driver adapter read .agentResult/.turns, but the driver's canonical on-disk artifact is a TestSuiteResult ({ results: TestResult[] }) — so spansFromPlaywrightResult(report.json) returned zero spans and rendered an empty video. Accept the suite shape and map results[0] (first case; multi-case selection stays a CLI concern). The driver records per-turn reasoning on attributes.reasoning, but the screen capsule dropped it. Thread it through ScreenStep into the caption as a 💭 thinking line, so a browser run renders as a reasoning-driven screencast. --- src/adapters.test.ts | 21 +++++++++++++++++++++ src/adapters/playwright.ts | 20 +++++++++++++++++--- src/renderers/screen-capsule.ts | 9 ++++++++- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/adapters.test.ts b/src/adapters.test.ts index 704a8a3..974dde2 100644 --- a/src/adapters.test.ts +++ b/src/adapters.test.ts @@ -25,6 +25,27 @@ describe('per-surface adapters', () => { expect(screen).toHaveLength(2) expect(screen[0]?.image).toMatch(/^data:image\/jpeg;base64,/) expect(screen[0]?.url).toBe('https://app') + expect(screen[0]?.reasoning).toBe('open the app') + }) + + it('playwright TestSuiteResult (report.json) → unwraps results[0].agentResult', () => { + const spans = spansFromPlaywrightResult({ + schemaVersion: '1', + results: [ + { + agentResult: { + turns: [ + { action: { action: 'navigate', url: 'https://app' }, state: { url: 'https://app', screenshot: 'QkFTRTY0' }, reasoning: 'open' }, + { action: { action: 'click', selector: '.go' }, state: { url: 'https://app/x', screenshot: 'QkFTRTY0Mg==' } }, + ], + }, + }, + ], + } as unknown as Parameters[0]) + expect(spans).toHaveLength(2) + const screen = screenStepsFromSpans(spans) + expect(screen[0]?.image).toMatch(/^data:image\/jpeg;base64,/) + expect(screen[0]?.url).toBe('https://app') }) it('computer-use steps → computer spans for the screen capsule', () => { diff --git a/src/adapters/playwright.ts b/src/adapters/playwright.ts index a03aa82..0fcca9c 100644 --- a/src/adapters/playwright.ts +++ b/src/adapters/playwright.ts @@ -32,9 +32,20 @@ interface PwTurn { verified?: boolean error?: string } -/** Accepts the full TestResult, its `agentResult`, or a bare `{ turns }`. */ +interface PwSingleResult { + agentResult?: { turns?: PwTurn[]; result?: string } + turns?: PwTurn[] +} +/** + * Accepts every shape the driver writes: + * - a bare `Turn[]`, + * - a single `TestResult` (`{ agentResult }`) or its `agentResult` (`{ turns }`), + * - the canonical on-disk `report.json`, which is a `TestSuiteResult` + * (`{ results: TestResult[] }`). For a suite we map the first case by default; + * selecting among multiple cases is a caller/CLI concern, not the adapter's. + */ type PlaywrightResultLike = - | { agentResult?: { turns?: PwTurn[]; result?: string }; turns?: PwTurn[] } + | (PwSingleResult & { results?: PwSingleResult[] }) | PwTurn[] function asDataUri(b64OrUrl: string | undefined, mime: string): string | undefined { @@ -44,9 +55,12 @@ function asDataUri(b64OrUrl: string | undefined, mime: string): string | undefin } export function spansFromPlaywrightResult(result: PlaywrightResultLike, runId = 'playwright'): Span[] { + const single: PwSingleResult = Array.isArray(result) + ? {} + : (result.results?.[0] ?? result) const turns: PwTurn[] = Array.isArray(result) ? result - : (result.agentResult?.turns ?? result.turns ?? []) + : (single.agentResult?.turns ?? single.turns ?? []) const spans: Span[] = [] let t = 1000 diff --git a/src/renderers/screen-capsule.ts b/src/renderers/screen-capsule.ts index b2cbd80..8190b23 100644 --- a/src/renderers/screen-capsule.ts +++ b/src/renderers/screen-capsule.ts @@ -14,6 +14,9 @@ export interface ScreenStep { url?: string /** data: URI or http(s) URL of the frame, if captured. */ image?: string + /** The agent's reasoning for this step, if the trace carried one + * (`attributes.reasoning`). Surfaced as the caption's "thinking" line. */ + reasoning?: string } function str(v: unknown): string | undefined { @@ -41,10 +44,12 @@ export function screenStepsFromSpans(spans: readonly Span[]): ScreenStep[] { const image = str(attrs?.screenshot) ?? str(attrs?.screenshotUrl) ?? str(attrs?.image) ?? str(attrs?.frame) ?? str(result?.screenshot) ?? str(result?.image) + const reasoning = str(attrs?.reasoning) ?? str(attrs?.thought) ?? str(result?.reasoning) out.push({ label: str(a?.action) ?? str(a?.selector) ?? tool.toolName, url: str(a?.url) ?? str(attrs?.url), image: isImg(image) ? image : undefined, + reasoning: reasoning ? reasoning.replace(/\s+/g, ' ').trim().slice(0, 280) : undefined, }) } return out @@ -88,6 +93,8 @@ export function renderScreenCapsuleHtml( .placeholder{display:flex;flex-direction:column;align-items:center;gap:14px;color:#5b6b7d} .placeholder .big{font-size:2.4rem} .caption{position:absolute;left:0;right:0;bottom:0;padding:14px 22px;background:linear-gradient(transparent,rgba(5,8,13,.92));font-family:ui-sans-serif,system-ui,sans-serif} + .caption .think{color:#cbd6e2;font-size:.95rem;font-style:italic;margin-bottom:7px;max-width:80%;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;overflow:hidden} + .caption .think::before{content:"💭 ";font-style:normal} .caption .act{font-size:1.05rem;font-weight:600} .caption .url{color:#58a6ff;font-size:.85rem;word-break:break-all} footer{display:flex;align-items:center;gap:14px;padding:9px 16px;background:#111a29;border-top:1px solid #1e2a3a;font-size:.82rem;color:#9aa7b5} @@ -110,7 +117,7 @@ export function renderScreenCapsuleHtml( STEPS.forEach(function(s,k){ var f=document.createElement('div'); f.className='frame'; f.dataset.k=k; var inner = s.image ? '' : '
🖥️
'+esc(s.label)+'
'; - f.innerHTML=inner+'
'+esc(s.label)+'
'+(s.url?'
'+esc(s.url)+'
':'')+'
'; + f.innerHTML=inner+'
'+(s.reasoning?'
'+esc(s.reasoning)+'
':'')+'
'+esc(s.label)+'
'+(s.url?'
'+esc(s.url)+'
':'')+'
'; stage.appendChild(f); }); }