diff --git a/CLAUDE.md b/CLAUDE.md index c97a167..0afd300 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -48,6 +48,39 @@ pnpm typecheck # Type check 2. Register in `src/bin.ts` and update `src/utils/help-json.ts` command registry 3. Include JSON mode tests in spec file +## Telemetry Wiring for New Commands + +All commands auto-emit a `command` telemetry event with name, duration, and success/failure. How you register the command determines whether this is automatic: + +**Subcommands via `registerSubcommand()`** → auto-wired. Telemetry happens for free. + +```typescript +.command('user', 'Manage users', (yargs) => { + registerSubcommand(yargs, 'reset-password', '...', (y) => y, + async (argv) => { await runResetPassword(argv); }, // auto-wrapped + ); +}) +``` + +**Top-level `.command()` with inline handler** → MUST manually wrap with `wrapCommandHandler()`: + +```typescript +.command( + 'migrate', + 'Migrate from another provider', + (yargs) => yargs.options({...}), + wrapCommandHandler(async (argv) => { // <-- REQUIRED + await runMigrate(argv); + }), +) +``` + +If you forget `wrapCommandHandler`, the command still emits a telemetry event (queued by middleware), but duration will be `0` and success will always be `true` -- misleading data in dashboards. + +**Skip list**: commands in `SKIP_TELEMETRY_COMMANDS` (`command-telemetry.ts`) are excluded from command-level telemetry because they have their own session-based telemetry. Currently: `install`, `dashboard`, `root` (the default `$0` handler). Add to this set if you're building another installer entry point. + +**Aliases**: if you register a command with multiple names (e.g., `['organization', 'org']`), add the alias to `src/lib/command-aliases.ts` so metrics don't fragment across `org.list` and `organization.list`. + ## Do / Don't **Do:** diff --git a/src/bin.ts b/src/bin.ts index 169811b..44cfed8 100644 --- a/src/bin.ts +++ b/src/bin.ts @@ -31,6 +31,27 @@ import { isNonInteractiveEnvironment } from './utils/environment.js'; import { resolveOutputMode, setOutputMode, isJsonMode, outputJson, exitWithError } from './utils/output.js'; import clack from './utils/clack.js'; import { registerSubcommand } from './utils/register-subcommand.js'; +import { COMMAND_ALIASES } from './lib/command-aliases.js'; +import { installCrashReporter } from './utils/crash-reporter.js'; +import { installStoreForward, recoverPendingEvents } from './utils/telemetry-store-forward.js'; +import { commandTelemetryMiddleware, wrapCommandHandler } from './utils/command-telemetry.js'; +import { analytics } from './utils/analytics.js'; + +// Enable debug logging for all commands via env var. +// Subsumes the installer's --debug flag for non-installer commands. +if (process.env.WORKOS_DEBUG === '1') { + const { enableDebugLogs } = await import('./utils/debug.js'); + enableDebugLogs(); +} + +// Telemetry infrastructure: crash reporter, store-forward, and gateway init. +// Must be before yargs so crashes during startup are captured. +installCrashReporter(); +installStoreForward(); +analytics.initForNonInstaller(); +// Fire-and-forget: recover events from previous crashes/exits. +// NO await — must not block startup (flush timeout is 3s). +recoverPendingEvents(); // Resolve output mode early from raw argv (before yargs parses) const rawArgs = hideBin(process.argv); @@ -40,9 +61,8 @@ setOutputMode(resolveOutputMode(hasJsonFlag)); // Intercept --help --json before yargs parses (yargs exits on --help) if (hasJsonFlag && (rawArgs.includes('--help') || rawArgs.includes('-h'))) { const { buildCommandTree } = await import('./utils/help-json.js'); - const commandAliases: Record = { org: 'organization' }; const rawCommand = rawArgs.find((a) => !a.startsWith('-')); - const command = rawCommand ? (commandAliases[rawCommand] ?? rawCommand) : undefined; + const command = rawCommand ? (COMMAND_ALIASES[rawCommand] ?? rawCommand) : undefined; outputJson(buildCommandTree(command)); process.exit(0); } @@ -183,6 +203,7 @@ yargs(rawArgs) describe: 'Output results as JSON (auto-enabled in non-TTY)', global: true, }) + .middleware(commandTelemetryMiddleware(rawArgs)) .middleware(async (argv) => { // Warn about unclaimed environments before management commands. // Excluded: auth/claim/install/dashboard handle their own credential flows; @@ -344,10 +365,10 @@ yargs(rawArgs) description: 'Copy report to clipboard', }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { const { handleDoctor } = await import('./commands/doctor.js'); await handleDoctor(argv); - }, + }), ) // NOTE: When adding commands here, also update src/utils/help-json.ts .command('env', 'Manage environment configurations (API keys, endpoints, active environment)', (yargs) => { @@ -2008,6 +2029,10 @@ yargs(rawArgs) return yargs.demandCommand(1, 'Please specify an org-domain subcommand').strict(); }) // --- Workflow Commands --- + // NOTE: Top-level `.command()` registrations with inline handlers MUST wrap + // the handler with `wrapCommandHandler()` for correct command telemetry. + // Subcommands registered via `registerSubcommand()` are auto-wrapped. + // See CLAUDE.md "Telemetry Wiring for New Commands". .command( 'seed', 'Seed WorkOS environment from a YAML config file', @@ -2019,7 +2044,7 @@ yargs(rawArgs) clean: { type: 'boolean', default: false, describe: 'Tear down seeded resources' }, init: { type: 'boolean', default: false, describe: 'Create an example workos-seed.yml file' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runSeed } = await import('./commands/seed.js'); @@ -2028,7 +2053,7 @@ yargs(rawArgs) resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl(), ); - }, + }), ) .command( 'setup-org ', @@ -2040,7 +2065,7 @@ yargs(rawArgs) domain: { type: 'string', describe: 'Domain to add and verify' }, roles: { type: 'string', describe: 'Comma-separated role slugs to create' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runSetupOrg } = await import('./commands/setup-org.js'); @@ -2049,7 +2074,7 @@ yargs(rawArgs) resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl(), ); - }, + }), ) .command( 'onboard-user ', @@ -2062,7 +2087,7 @@ yargs(rawArgs) role: { type: 'string', describe: 'Role slug to assign' }, wait: { type: 'boolean', default: false, describe: 'Wait for invitation acceptance' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runOnboardUser } = await import('./commands/onboard-user.js'); @@ -2071,7 +2096,7 @@ yargs(rawArgs) resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl(), ); - }, + }), ) .command( 'debug-sso ', @@ -2081,12 +2106,12 @@ yargs(rawArgs) ...insecureStorageOption, 'api-key': { type: 'string' as const, describe: 'WorkOS API key' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runDebugSso } = await import('./commands/debug-sso.js'); await runDebugSso(argv.connectionId, resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl()); - }, + }), ) .command( 'debug-sync ', @@ -2096,12 +2121,12 @@ yargs(rawArgs) ...insecureStorageOption, 'api-key': { type: 'string' as const, describe: 'WorkOS API key' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runDebugSync } = await import('./commands/debug-sync.js'); await runDebugSync(argv.directoryId, resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl()); - }, + }), ) // Alias — canonical command is `workos env claim` .command( @@ -2111,11 +2136,11 @@ yargs(rawArgs) yargs.options({ ...insecureStorageOption, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { runClaim } = await import('./commands/claim.js'); await runClaim(); - }, + }), ) .command( 'install', @@ -2136,10 +2161,10 @@ yargs(rawArgs) port: { type: 'number', default: 4100, describe: 'Port to listen on' }, seed: { type: 'string', describe: 'Path to seed config file (YAML or JSON)' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { const { runEmulate } = await import('./commands/emulate.js'); await runEmulate({ port: argv.port, seed: argv.seed, json: argv.json as boolean }); - }, + }), ) .command( 'dev', @@ -2149,14 +2174,14 @@ yargs(rawArgs) port: { type: 'number', default: 4100, describe: 'Emulator port' }, seed: { type: 'string', describe: 'Path to seed config file' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { const { runDev } = await import('./commands/dev.js'); await runDev({ port: argv.port, seed: argv.seed, '--': argv['--'] as string[] | undefined, }); - }, + }), ) .command('debug', false, (yargs) => { yargs.options(insecureStorageOption); diff --git a/src/commands/debug.ts b/src/commands/debug.ts index c523d40..a711e44 100644 --- a/src/commands/debug.ts +++ b/src/commands/debug.ts @@ -321,6 +321,7 @@ interface EnvVarInfo { } const ENV_VAR_CATALOG: { name: string; effect: string }[] = [ + { name: 'WORKOS_DEBUG', effect: 'Set to "1" to enable verbose debug logging for all commands' }, { name: 'WORKOS_API_KEY', effect: 'Bypasses credential resolution — used directly for API calls' }, { name: 'WORKOS_FORCE_TTY', effect: 'Forces human (non-JSON) output mode, even when piped' }, { name: 'WORKOS_NO_PROMPT', effect: 'Forces non-interactive/JSON mode' }, diff --git a/src/lib/command-aliases.ts b/src/lib/command-aliases.ts new file mode 100644 index 0000000..7a8632e --- /dev/null +++ b/src/lib/command-aliases.ts @@ -0,0 +1,11 @@ +/** + * Shared canonical command alias map. + * Single source of truth for both telemetry and help-json. + * + * Keys are user-facing aliases, values are canonical command names. + * Adding an alias here updates both metrics aggregation and --help --json output. + */ +export const COMMAND_ALIASES: Record = { + org: 'organization', + claim: 'env.claim', +}; diff --git a/src/utils/analytics.spec.ts b/src/utils/analytics.spec.ts index 97dc2a4..24bf863 100644 --- a/src/utils/analytics.spec.ts +++ b/src/utils/analytics.spec.ts @@ -5,6 +5,7 @@ const mockSetGatewayUrl = vi.fn(); const mockSetAccessToken = vi.fn(); const mockQueueEvent = vi.fn(); const mockFlush = vi.fn().mockResolvedValue(undefined); +const mockReplaceLastEventOfType = vi.fn(); vi.mock('./telemetry-client.js', () => ({ telemetryClient: { @@ -12,6 +13,7 @@ vi.mock('./telemetry-client.js', () => ({ setAccessToken: mockSetAccessToken, queueEvent: mockQueueEvent, flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), }, })); @@ -25,6 +27,27 @@ vi.mock('uuid', () => ({ v4: () => 'test-session-id-123', })); +// Mock settings for initForNonInstaller +const mockGetLlmGatewayUrl = vi.fn(() => 'https://api.workos.com/llm-gateway'); +const mockSettingsConfig = { + nodeVersion: '>=18', + logging: { debugMode: false }, + telemetry: { enabled: true, eventName: 'installer_interaction' }, + documentation: { workosDocsUrl: 'https://workos.com/docs', dashboardUrl: 'https://dashboard.workos.com', issuesUrl: 'https://github.com' }, + legacy: { oauthPort: 3000 }, +}; +vi.mock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => mockGetLlmGatewayUrl(), + getConfig: () => mockSettingsConfig, + getVersion: () => '0.12.1', +})); + +// Mock credentials for initForNonInstaller +const mockGetCredentials = vi.fn(); +vi.mock('../lib/credentials.js', () => ({ + getCredentials: () => mockGetCredentials(), +})); + describe('Analytics', () => { // Need to handle WORKOS_TELEMETRY_ENABLED which is evaluated at import time const originalEnv = process.env.WORKOS_TELEMETRY; @@ -56,8 +79,17 @@ describe('Analytics', () => { setAccessToken: mockSetAccessToken, queueEvent: mockQueueEvent, flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), }, })); + vi.doMock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => mockGetLlmGatewayUrl(), + getConfig: () => mockSettingsConfig, + getVersion: () => '0.12.1', + })); + vi.doMock('../lib/credentials.js', () => ({ + getCredentials: () => mockGetCredentials(), + })); const module = await import('./analytics.js'); Analytics = module.Analytics; analytics = new Analytics(); @@ -206,6 +238,17 @@ describe('Analytics', () => { }), ); }); + + it('includes environment fingerprint fields', () => { + analytics.sessionStart('cli', '1.0.0'); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'session.start')[0]; + expect(event.attributes).toHaveProperty('env.os'); + expect(event.attributes).toHaveProperty('env.os_version'); + expect(event.attributes).toHaveProperty('env.node_version'); + expect(event.attributes).toHaveProperty('env.shell'); + expect(typeof event.attributes['env.ci']).toBe('boolean'); + }); }); describe('shutdown', () => { @@ -260,6 +303,21 @@ describe('Analytics', () => { }), ); }); + + it('includes env fingerprint and installer.mode', async () => { + analytics.sessionStart('tui', '1.0.0'); + mockQueueEvent.mockClear(); + + await analytics.shutdown('success'); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'session.end')[0]; + expect(event.attributes).toHaveProperty('env.os'); + expect(event.attributes).toHaveProperty('env.os_version'); + expect(event.attributes).toHaveProperty('env.node_version'); + expect(event.attributes).toHaveProperty('env.shell'); + expect(typeof event.attributes['env.ci']).toBe('boolean'); + expect(event.attributes['installer.mode']).toBe('tui'); + }); }); describe('getFeatureFlag', () => { @@ -306,6 +364,14 @@ describe('Analytics', () => { const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'step')[0]; expect(event.error).toBeUndefined(); }); + + it('includes startTimestamp as valid ISO 8601', () => { + analytics.stepCompleted('detect_framework', 150, true); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'step')[0]; + expect(event.startTimestamp).toBeDefined(); + expect(new Date(event.startTimestamp).toISOString()).toBe(event.startTimestamp); + }); }); describe('toolCalled', () => { @@ -334,6 +400,14 @@ describe('Analytics', () => { }), ); }); + + it('includes startTimestamp as valid ISO 8601', () => { + analytics.toolCalled('Write', 50, true); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'agent.tool')[0]; + expect(event.startTimestamp).toBeDefined(); + expect(new Date(event.startTimestamp).toISOString()).toBe(event.startTimestamp); + }); }); describe('llmRequest', () => { @@ -351,6 +425,13 @@ describe('Analytics', () => { ); }); + it('does NOT include startTimestamp (point-in-time marker)', () => { + analytics.llmRequest('claude-sonnet-4-20250514', 1000, 500); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'agent.llm')[0]; + expect(event.startTimestamp).toBeUndefined(); + }); + it('accumulates tokens for session.end', async () => { analytics.llmRequest('claude-sonnet-4-20250514', 1000, 500); analytics.llmRequest('claude-sonnet-4-20250514', 800, 300); @@ -375,6 +456,137 @@ describe('Analytics', () => { expect(sessionEnd.attributes['installer.agent.iterations']).toBe(3); }); }); + + describe('commandExecuted', () => { + it('queues a command event with correct attributes', () => { + analytics.commandExecuted('org.list', 200, true); + + expect(mockQueueEvent).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'command', + attributes: expect.objectContaining({ + 'command.name': 'org.list', + 'command.duration_ms': 200, + 'command.success': true, + 'env.os': expect.any(String), + 'env.node_version': expect.any(String), + }), + }), + ); + }); + + it('includes error info when provided', () => { + const error = new TypeError('Not found'); + analytics.commandExecuted('org.get', 50, false, { error }); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'command')[0]; + expect(event.attributes['command.error_type']).toBe('TypeError'); + expect(event.attributes['command.error_message']).toBe('Not found'); + }); + + it('includes flags as comma-separated names', () => { + analytics.commandExecuted('org.list', 100, true, { flags: ['json', 'limit'] }); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'command')[0]; + expect(event.attributes['command.flags']).toBe('json,limit'); + }); + }); + + describe('captureUnhandledCrash', () => { + it('queues a crash event with error details', () => { + const error = new Error('Unexpected failure'); + error.stack = 'Error: Unexpected failure\n at foo.ts:1'; + analytics.captureUnhandledCrash(error, { command: 'install', version: '1.0.0' }); + + expect(mockQueueEvent).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'crash', + attributes: expect.objectContaining({ + 'crash.error_type': 'Error', + 'crash.error_message': 'Unexpected failure', + 'crash.stack': 'Error: Unexpected failure\n at foo.ts:1', + 'crash.command': 'install', + 'cli.version': '1.0.0', + 'env.os': expect.any(String), + 'env.node_version': expect.any(String), + }), + }), + ); + }); + + it('truncates stack traces to 4KB with a truncation marker', () => { + const error = new Error('Big stack'); + error.stack = 'x'.repeat(5000); + analytics.captureUnhandledCrash(error); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'crash')[0]; + // sanitizeStack truncates at 4096 and appends '\n...[truncated]' + expect(event.attributes['crash.stack']).toMatch(/\n\.\.\.\[truncated\]$/); + expect(event.attributes['crash.stack'].startsWith('x'.repeat(4096))).toBe(true); + }); + + it('falls back to package version when not explicitly provided', () => { + analytics.captureUnhandledCrash(new Error('test')); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'crash')[0]; + // Falls back to getVersion() which reads from package.json — any real version string + expect(event.attributes['cli.version']).toEqual(expect.any(String)); + expect(event.attributes['cli.version']).not.toBe(''); + }); + }); + + describe('initForNonInstaller', () => { + it('sets gatewayUrl from default config', () => { + mockGetLlmGatewayUrl.mockReturnValue('https://api.workos.com/llm-gateway'); + analytics.initForNonInstaller(); + + expect(mockSetGatewayUrl).toHaveBeenCalledWith('https://api.workos.com/llm-gateway'); + }); + + it('sets access token from stored credentials', () => { + mockGetCredentials.mockReturnValue({ accessToken: 'stored-jwt-token' }); + analytics.initForNonInstaller(); + + expect(mockSetAccessToken).toHaveBeenCalledWith('stored-jwt-token'); + }); + + it('skips access token when no credentials stored', () => { + mockGetCredentials.mockReturnValue(null); + analytics.initForNonInstaller(); + + expect(mockSetAccessToken).not.toHaveBeenCalled(); + }); + }); + + describe('replaceLastCommandEvent', () => { + it('removes last command event and queues a new one', () => { + analytics.replaceLastCommandEvent('organization.list', 150, true, { flags: ['json'] }); + + expect(mockReplaceLastEventOfType).toHaveBeenCalledWith('command'); + expect(mockQueueEvent).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'command', + attributes: expect.objectContaining({ + 'command.name': 'organization.list', + 'command.duration_ms': 150, + 'command.success': true, + 'command.flags': 'json', + }), + }), + ); + }); + + it('includes error info on failure', () => { + const error = new Error('oops'); + error.name = 'CommandError'; + analytics.replaceLastCommandEvent('auth.login', 50, false, { error }); + + const event = mockQueueEvent.mock.calls[0][0]; + expect(event.attributes['command.success']).toBe(false); + expect(event.attributes['command.error_type']).toBe('CommandError'); + expect(event.attributes['command.error_message']).toBe('oops'); + }); + }); }); describe('with telemetry disabled', () => { @@ -387,8 +599,17 @@ describe('Analytics', () => { setAccessToken: mockSetAccessToken, queueEvent: mockQueueEvent, flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), }, })); + vi.doMock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => mockGetLlmGatewayUrl(), + getConfig: () => mockSettingsConfig, + getVersion: () => '0.12.1', + })); + vi.doMock('../lib/credentials.js', () => ({ + getCredentials: () => mockGetCredentials(), + })); }); it('capture does nothing', async () => { @@ -454,5 +675,42 @@ describe('Analytics', () => { expect(mockQueueEvent).not.toHaveBeenCalled(); }); + + it('commandExecuted does nothing', async () => { + const { Analytics } = await import('./analytics.js'); + const analytics = new Analytics(); + + analytics.commandExecuted('org.list', 100, true); + + expect(mockQueueEvent).not.toHaveBeenCalled(); + }); + + it('captureUnhandledCrash does nothing', async () => { + const { Analytics } = await import('./analytics.js'); + const analytics = new Analytics(); + + analytics.captureUnhandledCrash(new Error('test')); + + expect(mockQueueEvent).not.toHaveBeenCalled(); + }); + + it('initForNonInstaller does nothing', async () => { + const { Analytics } = await import('./analytics.js'); + const analytics = new Analytics(); + + analytics.initForNonInstaller(); + + expect(mockSetGatewayUrl).not.toHaveBeenCalled(); + }); + + it('replaceLastCommandEvent does nothing', async () => { + const { Analytics } = await import('./analytics.js'); + const analytics = new Analytics(); + + analytics.replaceLastCommandEvent('org.list', 100, true); + + expect(mockReplaceLastEventOfType).not.toHaveBeenCalled(); + expect(mockQueueEvent).not.toHaveBeenCalled(); + }); }); }); diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index c0f348e..a66a61e 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -1,3 +1,4 @@ +import os from 'node:os'; import { v4 as uuidv4 } from 'uuid'; import { debug } from './debug.js'; import { telemetryClient } from './telemetry-client.js'; @@ -7,14 +8,21 @@ import type { StepEvent, AgentToolEvent, AgentLLMEvent, + CommandEvent, + CrashEvent, } from './telemetry-types.js'; import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; +import { getLlmGatewayUrl, getVersion } from '../lib/settings.js'; +import { getCredentials } from '../lib/credentials.js'; +import { getActiveEnvironment, isUnclaimedEnvironment } from '../lib/config-store.js'; +import { sanitizeMessage, sanitizeStack } from './crash-reporter.js'; export class Analytics { private tags: Record = {}; private sessionId: string; private sessionStartTime: Date; private distinctId?: string; + private mode?: 'cli' | 'tui' | 'headless'; // Agent metrics tracking private totalInputTokens = 0; @@ -39,6 +47,47 @@ export class Analytics { telemetryClient.setGatewayUrl(url); } + /** + * Initialize telemetry for non-installer commands. + * Sets gatewayUrl from default config and loads auth credentials. + * + * Auth priority: + * 1. Stored JWT (Bearer) — logged-in users + * 2. Claim token — unclaimed environments + * 3. None — API-key-only users (telemetry silently dropped by API guard) + * + * Also captures the user/environment identifier for per-user analytics. + * The installer flow sets these itself in run-with-core.ts; this covers + * management commands like `org list`, `auth login`, etc. + */ + initForNonInstaller(): void { + if (!WORKOS_TELEMETRY_ENABLED) return; + + const gatewayUrl = getLlmGatewayUrl(); + telemetryClient.setGatewayUrl(gatewayUrl); + + const creds = getCredentials(); + if (creds?.accessToken) { + telemetryClient.setAccessToken(creds.accessToken); + } + if (creds?.userId) { + this.distinctId = creds.userId; + } + + // Check for unclaimed environment — fall back to claim-token auth + // so unclaimed users' telemetry still reaches the backend. + try { + const env = getActiveEnvironment(); + if (env && isUnclaimedEnvironment(env)) { + telemetryClient.setClaimTokenAuth(env.clientId, env.claimToken); + // Tag distinctId so unclaimed sessions are identifiable in analytics + this.distinctId = this.distinctId ?? `unclaimed:${env.clientId}`; + } + } catch { + // Config-store failure is non-fatal for telemetry + } + } + setTag(key: string, value: string | boolean | number | null | undefined) { this.tags[key] = value; } @@ -62,8 +111,9 @@ export class Analytics { if (!WORKOS_TELEMETRY_ENABLED) return; debug('[Analytics] captureException:', error.message, properties); - this.tags['error.type'] = error.name; - this.tags['error.message'] = error.message; + const { type, message } = this.extractErrorFields(error); + this.tags['error.type'] = type; + this.tags['error.message'] = message; } async getFeatureFlag(_flagKey: string): Promise { @@ -71,9 +121,48 @@ export class Analytics { return undefined; } + /** All capture methods that record error details MUST go through this. */ + private extractErrorFields(error: Error): { type: string; message: string } { + return { + type: error.name, + message: sanitizeMessage(error.message), + }; + } + + private detectCiProvider(): string | undefined { + if (process.env.GITHUB_ACTIONS) return 'github-actions'; + if (process.env.BUILDKITE) return 'buildkite'; + if (process.env.CIRCLECI) return 'circleci'; + if (process.env.GITLAB_CI) return 'gitlab-ci'; + if (process.env.JENKINS_URL) return 'jenkins'; + return undefined; + } + + private getEnvFingerprint() { + let osVersion: string; + try { + osVersion = os.release(); + } catch { + osVersion = 'unknown'; + } + + const ciProvider = this.detectCiProvider(); + + return { + 'env.os': process.platform, + 'env.os_version': osVersion, + 'env.node_version': process.version, + 'env.shell': process.env.SHELL ?? process.env.COMSPEC ?? 'unknown', + 'env.ci': Boolean(process.env.CI || process.env.GITHUB_ACTIONS || process.env.BUILDKITE), + ...(ciProvider ? { 'env.ci_provider': ciProvider } : {}), + }; + } + sessionStart(mode: 'cli' | 'tui' | 'headless', version: string) { if (!WORKOS_TELEMETRY_ENABLED) return; + this.mode = mode; + const event: SessionStartEvent = { type: 'session.start', sessionId: this.sessionId, @@ -82,6 +171,7 @@ export class Analytics { 'installer.version': version, 'installer.mode': mode, 'workos.user_id': this.distinctId, + ...this.getEnvFingerprint(), }, }; @@ -96,9 +186,10 @@ export class Analytics { sessionId: this.sessionId, timestamp: new Date().toISOString(), name, + startTimestamp: new Date(Date.now() - durationMs).toISOString(), durationMs, success, - error: error ? { type: error.name, message: error.message } : undefined, + error: error ? this.extractErrorFields(error) : undefined, }; telemetryClient.queueEvent(event); @@ -112,6 +203,7 @@ export class Analytics { sessionId: this.sessionId, timestamp: new Date().toISOString(), toolName, + startTimestamp: new Date(Date.now() - durationMs).toISOString(), durationMs, success, }; @@ -141,6 +233,83 @@ export class Analytics { this.agentIterations++; } + commandExecuted( + name: string, + durationMs: number, + success: boolean, + options?: { error?: Error; flags?: string[] }, + ) { + if (!WORKOS_TELEMETRY_ENABLED) return; + + const errorFields = options?.error ? this.extractErrorFields(options.error) : undefined; + + const event: CommandEvent = { + type: 'command', + sessionId: this.sessionId, + timestamp: new Date().toISOString(), + attributes: { + 'command.name': name, + 'command.duration_ms': durationMs, + 'command.success': success, + 'cli.version': getVersion(), + ...(this.distinctId ? { 'workos.user_id': this.distinctId } : {}), + ...(errorFields + ? { + 'command.error_type': errorFields.type, + 'command.error_message': errorFields.message, + } + : {}), + ...(options?.flags?.length + ? { 'command.flags': options.flags.join(',') } + : {}), + ...this.getEnvFingerprint(), + }, + }; + + telemetryClient.queueEvent(event); + } + + /** + * Replace the last queued command event with updated data. + * Used by the command handler wrapper to swap the provisional event + * (queued by middleware) with the real one after the handler completes. + */ + replaceLastCommandEvent( + name: string, + durationMs: number, + success: boolean, + options?: { error?: Error; flags?: string[] }, + ) { + if (!WORKOS_TELEMETRY_ENABLED) return; + + telemetryClient.replaceLastEventOfType('command'); + + this.commandExecuted(name, durationMs, success, options); + } + + captureUnhandledCrash(error: Error, options?: { command?: string; version?: string }) { + if (!WORKOS_TELEMETRY_ENABLED) return; + + const { type, message } = this.extractErrorFields(error); + + const event: CrashEvent = { + type: 'crash', + sessionId: this.sessionId, + timestamp: new Date().toISOString(), + attributes: { + 'crash.error_type': type, + 'crash.error_message': message, + 'crash.stack': sanitizeStack(error.stack), + ...(options?.command ? { 'crash.command': options.command } : {}), + 'cli.version': options?.version ?? getVersion(), + ...(this.distinctId ? { 'workos.user_id': this.distinctId } : {}), + ...this.getEnvFingerprint(), + }, + }; + + telemetryClient.queueEvent(event); + } + async shutdown(status: 'success' | 'error' | 'cancelled') { if (!WORKOS_TELEMETRY_ENABLED) return; @@ -152,6 +321,8 @@ export class Analytics { string | number | boolean >; + const envFingerprint = this.getEnvFingerprint(); + const event: SessionEndEvent = { type: 'session.end', sessionId: this.sessionId, @@ -162,6 +333,8 @@ export class Analytics { 'installer.agent.iterations': this.agentIterations, 'installer.agent.tokens.input': this.totalInputTokens, 'installer.agent.tokens.output': this.totalOutputTokens, + ...envFingerprint, + ...(this.mode ? { 'installer.mode': this.mode } : {}), ...extraAttributes, }, }; diff --git a/src/utils/command-telemetry.spec.ts b/src/utils/command-telemetry.spec.ts new file mode 100644 index 0000000..d0095fe --- /dev/null +++ b/src/utils/command-telemetry.spec.ts @@ -0,0 +1,159 @@ +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { resolveCanonicalName, extractUserFlags, commandTelemetryMiddleware, wrapCommandHandler } from './command-telemetry.js'; + +const mockCommandExecuted = vi.fn(); +const mockReplaceLastCommandEvent = vi.fn(); + +vi.mock('./analytics.js', () => ({ + analytics: { + commandExecuted: (...args: unknown[]) => mockCommandExecuted(...args), + replaceLastCommandEvent: (...args: unknown[]) => mockReplaceLastCommandEvent(...args), + }, +})); + +vi.mock('../lib/constants.js', () => ({ + WORKOS_TELEMETRY_ENABLED: true, +})); + +describe('command-telemetry', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('resolveCanonicalName', () => { + it('resolves aliased commands', () => { + expect(resolveCanonicalName(['org', 'list'])).toBe('organization.list'); + }); + + it('passes through non-aliased commands', () => { + expect(resolveCanonicalName(['auth', 'login'])).toBe('auth.login'); + }); + + it('returns root for empty parts', () => { + expect(resolveCanonicalName([])).toBe('root'); + }); + + it('handles single-part commands', () => { + expect(resolveCanonicalName(['install'])).toBe('install'); + }); + + it('only aliases the first part', () => { + expect(resolveCanonicalName(['org', 'org'])).toBe('organization.org'); + }); + }); + + describe('extractUserFlags', () => { + it('extracts long flags', () => { + expect(extractUserFlags(['org', 'list', '--json'])).toEqual(['json']); + }); + + it('extracts short flags', () => { + expect(extractUserFlags(['-v'])).toEqual(['v']); + }); + + it('handles flags with values', () => { + expect(extractUserFlags(['--env=staging'])).toEqual(['env']); + }); + + it('deduplicates flags', () => { + expect(extractUserFlags(['--json', '--json'])).toEqual(['json']); + }); + + it('ignores positionals', () => { + expect(extractUserFlags(['org', 'list', 'my-org'])).toEqual([]); + }); + + it('ignores multi-char short flags (not real flags)', () => { + expect(extractUserFlags(['-abc'])).toEqual([]); + }); + }); + + describe('commandTelemetryMiddleware', () => { + it('queues provisional event with duration=0', async () => { + const middleware = commandTelemetryMiddleware(['org', 'list', '--json']); + const argv: Record = { _: ['org', 'list'] }; + + await middleware(argv); + + expect(mockCommandExecuted).toHaveBeenCalledWith('organization.list', 0, true, { + flags: ['json'], + }); + }); + + it('stores telemetry metadata on argv', async () => { + const middleware = commandTelemetryMiddleware(['auth', 'login']); + const argv: Record = { _: ['auth', 'login'] }; + + await middleware(argv); + + expect(argv.__telemetryCommandName).toBe('auth.login'); + expect(argv.__telemetryStartTime).toBeTypeOf('number'); + expect(argv.__telemetryFlags).toEqual([]); + }); + }); + + describe('wrapCommandHandler', () => { + it('replaces provisional event on success', async () => { + const handler = vi.fn().mockResolvedValue(undefined); + const wrapped = wrapCommandHandler(handler); + const argv = { + __telemetryCommandName: 'organization.list', + __telemetryStartTime: Date.now() - 100, + __telemetryFlags: ['json'], + }; + + await wrapped(argv); + + expect(handler).toHaveBeenCalledWith(argv); + expect(mockReplaceLastCommandEvent).toHaveBeenCalledWith( + 'organization.list', + expect.any(Number), + true, + { flags: ['json'] }, + ); + const duration = mockReplaceLastCommandEvent.mock.calls[0][1] as number; + expect(duration).toBeGreaterThanOrEqual(100); + }); + + it('replaces provisional event on failure with error', async () => { + const error = new Error('command failed'); + const handler = vi.fn().mockRejectedValue(error); + const wrapped = wrapCommandHandler(handler); + const argv = { + __telemetryCommandName: 'organization.list', + __telemetryStartTime: Date.now(), + __telemetryFlags: [], + }; + + await expect(wrapped(argv)).rejects.toThrow('command failed'); + + expect(mockReplaceLastCommandEvent).toHaveBeenCalledWith( + 'organization.list', + expect.any(Number), + false, + { error, flags: [] }, + ); + }); + + it('re-throws the original error', async () => { + const error = new Error('original'); + const handler = vi.fn().mockRejectedValue(error); + const wrapped = wrapCommandHandler(handler); + + await expect(wrapped({ __telemetryStartTime: Date.now() })).rejects.toBe(error); + }); + + it('handles non-Error throws', async () => { + const handler = vi.fn().mockRejectedValue('string error'); + const wrapped = wrapCommandHandler(handler); + + await expect( + wrapped({ __telemetryCommandName: 'test', __telemetryStartTime: Date.now(), __telemetryFlags: [] }), + ).rejects.toBe('string error'); + + const errorArg = mockReplaceLastCommandEvent.mock.calls[0][3].error; + expect(errorArg).toBeInstanceOf(Error); + expect(errorArg.message).toBe('string error'); + }); + }); +}); diff --git a/src/utils/command-telemetry.ts b/src/utils/command-telemetry.ts new file mode 100644 index 0000000..facd812 --- /dev/null +++ b/src/utils/command-telemetry.ts @@ -0,0 +1,99 @@ +import { analytics } from './analytics.js'; +import { telemetryClient } from './telemetry-client.js'; +import { COMMAND_ALIASES } from '../lib/command-aliases.js'; +import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; + +/** Commands that have their own telemetry (e.g., installer session events). + * 'root' is the default $0 handler which prompts to run the installer. */ +const SKIP_TELEMETRY_COMMANDS = new Set(['install', 'dashboard', 'root']); + +/** + * Resolve user-typed command parts to their canonical name. + * Applies alias mapping to the top-level command only. + * + * Examples: + * ['org', 'list'] -> 'organization.list' + * ['auth', 'login'] -> 'auth.login' + * [] -> 'root' + */ +export function resolveCanonicalName(parts: string[]): string { + if (parts.length === 0) return 'root'; + const resolved = [...parts]; + resolved[0] = COMMAND_ALIASES[resolved[0]] ?? resolved[0]; + return resolved.join('.'); +} + +/** + * Extract only user-supplied flags (not positionals, not defaults). + * Parses rawArgs directly instead of argv to avoid positionals and camelCase dupes. + */ +export function extractUserFlags(rawArgs: string[]): string[] { + const passedFlags = rawArgs + .filter((arg) => arg.startsWith('--') || (arg.startsWith('-') && arg.length === 2)) + .map((arg) => arg.replace(/^-+/, '').split('=')[0]); + return [...new Set(passedFlags)]; +} + +/** + * Yargs middleware that queues a PROVISIONAL command event immediately. + * This ensures there's always an event to persist via store-forward, + * even if the handler calls process.exit() before returning. + * + * The provisional event has success=true and duration=0. It gets + * updated by the handler wrapper on normal completion/failure. + */ +export function commandTelemetryMiddleware(rawArgs: string[]) { + return async (argv: Record) => { + if (!WORKOS_TELEMETRY_ENABLED) return; + + const commandParts = (argv._ as string[]) || []; + const commandName = resolveCanonicalName(commandParts); + const flags = extractUserFlags(rawArgs); + const startTime = Date.now(); + + // Store metadata for the handler wrapper to update later + argv.__telemetryCommandName = commandName; + argv.__telemetryStartTime = startTime; + argv.__telemetryFlags = flags; + + // Skip provisional event for commands with their own telemetry (e.g., install) + const topLevelCommand = commandParts[0] ?? ''; + if (SKIP_TELEMETRY_COMMANDS.has(topLevelCommand)) return; + + // Queue provisional event NOW, before the handler runs. + // If the handler calls process.exit(), store-forward persists this. + analytics.commandExecuted(commandName, 0, true, { flags }); + }; +} + +/** + * Wraps a yargs command handler to UPDATE the provisional event + * with actual duration and success/failure on completion. + * Designed to be called inside registerSubcommand(), not at each call site. + */ +export function wrapCommandHandler( + handler: (argv: any) => Promise, +): (argv: any) => Promise { + return async (argv) => { + const commandName = String(argv.__telemetryCommandName ?? 'unknown'); + const startTime = Number(argv.__telemetryStartTime ?? Date.now()); + const flags = (argv.__telemetryFlags as string[]) ?? []; + + try { + await handler(argv); + // Replace the provisional event with the real one + analytics.replaceLastCommandEvent(commandName, Date.now() - startTime, true, { flags }); + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + analytics.replaceLastCommandEvent(commandName, Date.now() - startTime, false, { + error: err, + flags, + }); + throw error; + } finally { + // Flush in-process so events are sent immediately, not deferred to next invocation. + // If flush fails, store-forward persists on exit. + await telemetryClient.flush().catch(() => {}); + } + }; +} diff --git a/src/utils/crash-reporter.spec.ts b/src/utils/crash-reporter.spec.ts new file mode 100644 index 0000000..a36b918 --- /dev/null +++ b/src/utils/crash-reporter.spec.ts @@ -0,0 +1,149 @@ +import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest'; +import os from 'node:os'; + +const mockCaptureUnhandledCrash = vi.fn(); + +vi.mock('./analytics.js', () => ({ + analytics: { + captureUnhandledCrash: (...args: unknown[]) => mockCaptureUnhandledCrash(...args), + }, +})); + +describe('crash-reporter', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.resetModules(); + }); + + describe('sanitizeStack', () => { + let sanitizeStack: typeof import('./crash-reporter.js').sanitizeStack; + + beforeEach(async () => { + const mod = await import('./crash-reporter.js'); + sanitizeStack = mod.sanitizeStack; + }); + + it('returns empty string for undefined', () => { + expect(sanitizeStack(undefined)).toBe(''); + }); + + it('replaces home directory with ~', () => { + const home = os.homedir(); + const stack = `Error: test\n at ${home}/project/src/index.ts:1:1`; + expect(sanitizeStack(stack)).toContain('~'); + expect(sanitizeStack(stack)).not.toContain(home); + }); + + it('strips absolute paths to node_modules/dist/src', () => { + const stack = 'Error\n at /long/absolute/path/to/src/file.ts:1:1'; + const result = sanitizeStack(stack); + expect(result).toContain('src/'); + expect(result).not.toContain('/long/absolute/path/to/'); + }); + + it('truncates stacks longer than 4096 chars', () => { + const longStack = 'Error: test\n' + 'x'.repeat(5000); + const result = sanitizeStack(longStack); + expect(result.length).toBeLessThanOrEqual(4096 + '\n...[truncated]'.length); + expect(result).toContain('...[truncated]'); + }); + + it('does not truncate short stacks', () => { + const shortStack = 'Error: test\n at file.ts:1:1'; + const result = sanitizeStack(shortStack); + expect(result).toBe(shortStack); + expect(result).not.toContain('truncated'); + }); + }); + + describe('installCrashReporter', () => { + let processOnSpy: ReturnType; + let processExitSpy: ReturnType; + + beforeEach(() => { + processOnSpy = vi.spyOn(process, 'on'); + processExitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never); + }); + + afterEach(() => { + processOnSpy.mockRestore(); + processExitSpy.mockRestore(); + }); + + it('registers uncaughtException and unhandledRejection handlers', async () => { + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const eventNames = processOnSpy.mock.calls.map((call) => call[0]); + expect(eventNames).toContain('uncaughtException'); + expect(eventNames).toContain('unhandledRejection'); + }); + + it('uncaughtException handler queues crash event and exits', async () => { + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const uncaughtHandler = processOnSpy.mock.calls.find((c) => c[0] === 'uncaughtException')?.[1] as ( + err: Error, + ) => void; + + const error = new Error('boom'); + uncaughtHandler(error); + + expect(mockCaptureUnhandledCrash).toHaveBeenCalledTimes(1); + const capturedError = mockCaptureUnhandledCrash.mock.calls[0][0]; + expect(capturedError.message).toBe('boom'); + expect(processExitSpy).toHaveBeenCalledWith(1); + }); + + it('unhandledRejection handler wraps non-Error reasons', async () => { + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const rejectionHandler = processOnSpy.mock.calls.find((c) => c[0] === 'unhandledRejection')?.[1] as ( + reason: unknown, + ) => void; + + rejectionHandler('string reason'); + + expect(mockCaptureUnhandledCrash).toHaveBeenCalledTimes(1); + const capturedError = mockCaptureUnhandledCrash.mock.calls[0][0]; + expect(capturedError.message).toBe('string reason'); + }); + + it('isCrashing guard prevents recursive handling', async () => { + // Simulate the crash handler being called, then itself crashing + mockCaptureUnhandledCrash.mockImplementationOnce(() => { + // First call succeeds + }); + + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const uncaughtHandler = processOnSpy.mock.calls.find((c) => c[0] === 'uncaughtException')?.[1] as ( + err: Error, + ) => void; + + // First call sets isCrashing + uncaughtHandler(new Error('first')); + // Second call should be guarded (module-level isCrashing = true) + uncaughtHandler(new Error('second')); + + // Only first call should have reached analytics + expect(mockCaptureUnhandledCrash).toHaveBeenCalledTimes(1); + }); + + it('handlers are synchronous (no async in the critical path)', async () => { + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const uncaughtHandler = processOnSpy.mock.calls.find((c) => c[0] === 'uncaughtException')?.[1] as ( + err: Error, + ) => void; + + // Verify the handler returns void, not a Promise + const result = uncaughtHandler(new Error('sync test')); + expect(result).toBeUndefined(); + }); + }); +}); diff --git a/src/utils/crash-reporter.ts b/src/utils/crash-reporter.ts new file mode 100644 index 0000000..36381fd --- /dev/null +++ b/src/utils/crash-reporter.ts @@ -0,0 +1,72 @@ +import { analytics } from './analytics.js'; +import { homedir } from 'node:os'; + +const MAX_STACK_LENGTH = 4096; +const MAX_MESSAGE_LENGTH = 1024; +const HOME = homedir(); +let isCrashing = false; + +/** + * Redact known credential patterns (Bearer tokens, sk_test_/sk_live_ keys, + * raw JWTs). Shared by sanitizeStack and sanitizeMessage because Node echoes + * `.message` into the leading `Error.stack` line, so secrets in messages also + * surface in stacks. + */ +function redactSecrets(s: string): string { + return s + .replace(/Bearer\s+[A-Za-z0-9._-]+/g, 'Bearer ') + .replace(/\bsk_(test|live)_[A-Za-z0-9]+/g, 'sk_') + .replace(/\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+/g, ''); +} + +/** Sanitize stack trace for telemetry: homedir, absolute-path collapse, secrets, truncation. */ +export function sanitizeStack(stack: string | undefined): string { + if (!stack) return ''; + let sanitized = stack.replaceAll(HOME, '~'); + sanitized = sanitized.replace(/\/[^\s:]+\/(node_modules|dist|src)\//g, '$1/'); + sanitized = redactSecrets(sanitized); + return sanitized.length > MAX_STACK_LENGTH + ? sanitized.slice(0, MAX_STACK_LENGTH) + '\n...[truncated]' + : sanitized; +} + +/** Sanitize an error message for telemetry (homedir, secrets, truncation). */ +export function sanitizeMessage(msg: string | undefined): string { + if (!msg) return ''; + const sanitized = redactSecrets(msg.replaceAll(HOME, '~')); + return sanitized.length > MAX_MESSAGE_LENGTH + ? sanitized.slice(0, MAX_MESSAGE_LENGTH) + '...[truncated]' + : sanitized; +} + +/** + * Register global handlers for uncaughtException and unhandledRejection + * that capture crash details before the process exits. + * + * Handlers are SYNCHRONOUS. Node does NOT await async uncaughtException handlers. + * We queue the event synchronously; store-forward's process.on('exit') handler + * persists it to disk. The next CLI invocation recovers and sends. + */ +export function installCrashReporter(): void { + process.on('uncaughtException', (error) => { + reportCrashSync(error); + process.exit(1); + }); + + process.on('unhandledRejection', (reason) => { + const error = reason instanceof Error ? reason : new Error(String(reason)); + reportCrashSync(error); + process.exit(1); + }); +} + +function reportCrashSync(error: Error): void { + if (isCrashing) return; + isCrashing = true; + try { + // captureUnhandledCrash sanitizes both message and stack at the analytics boundary. + analytics.captureUnhandledCrash(error); + } catch { + // Telemetry must never prevent exit + } +} diff --git a/src/utils/register-subcommand.ts b/src/utils/register-subcommand.ts index 39cb8b2..610f4f3 100644 --- a/src/utils/register-subcommand.ts +++ b/src/utils/register-subcommand.ts @@ -1,5 +1,7 @@ import yargs from 'yargs'; import type { Argv } from 'yargs'; +import { wrapCommandHandler } from './command-telemetry.js'; +import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; interface YargsOptions { demandedOptions: Record; @@ -43,5 +45,6 @@ export function registerSubcommand( // Builder threw during probe — fall back to unenriched description } - return parentYargs.command(usage, enrichedDescription, builder, handler); + const telemetryHandler = WORKOS_TELEMETRY_ENABLED ? wrapCommandHandler(handler) : handler; + return parentYargs.command(usage, enrichedDescription, builder, telemetryHandler); } diff --git a/src/utils/telemetry-client.spec.ts b/src/utils/telemetry-client.spec.ts index c4781e5..4746f59 100644 --- a/src/utils/telemetry-client.spec.ts +++ b/src/utils/telemetry-client.spec.ts @@ -15,6 +15,18 @@ vi.mock('../lib/credentials.js', () => ({ getCredentials: () => mockGetCredentials(), })); +// Mock fs for persistToFile tests +const mockMkdirSync = vi.fn(); +const mockWriteFileSync = vi.fn(); +vi.mock('node:fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + mkdirSync: (...args: unknown[]) => mockMkdirSync(...args), + writeFileSync: (...args: unknown[]) => mockWriteFileSync(...args), + }; +}); + // Import after mocks are set up const { TelemetryClient } = await import('./telemetry-client.js'); @@ -136,33 +148,55 @@ describe('TelemetryClient', () => { expect(mockFetch).toHaveBeenCalledTimes(1); }); - it('clears events even if flush fails', async () => { + it('retains events when flush fails (for store-forward)', async () => { mockFetch.mockRejectedValueOnce(new Error('Network error')); client.setGatewayUrl('http://localhost:8000'); client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); - await client.flush(); // Should not throw - await client.flush(); // Should be no-op + await client.flush(); // Should not throw, events retained + await client.flush(); // Should retry since events are still queued - expect(mockFetch).toHaveBeenCalledTimes(1); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); + + it('retains events on non-ok response (for store-forward)', async () => { + mockFetch.mockResolvedValueOnce({ ok: false, status: 500 }); + client.setGatewayUrl('http://localhost:8000'); + client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); + + await client.flush(); // Events retained on 500 + await client.flush(); // Should retry + + expect(mockFetch).toHaveBeenCalledTimes(2); }); - it('handles network errors silently', async () => { + it('returns false on network errors (retryable)', async () => { mockFetch.mockRejectedValueOnce(new Error('Network error')); client.setGatewayUrl('http://localhost:8000'); client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); - // Should not throw - await expect(client.flush()).resolves.toBeUndefined(); + await expect(client.flush()).resolves.toBe(false); }); - it('handles non-ok responses silently', async () => { + it('returns false on 5xx (retryable, events retained)', async () => { mockFetch.mockResolvedValueOnce({ ok: false, status: 500 }); client.setGatewayUrl('http://localhost:8000'); client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); - // Should not throw - await expect(client.flush()).resolves.toBeUndefined(); + await expect(client.flush()).resolves.toBe(false); + }); + + it('drops events on 4xx and returns true (permanent failure)', async () => { + mockFetch.mockResolvedValueOnce({ ok: false, status: 401 }); + client.setGatewayUrl('http://localhost:8000'); + client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); + + const result = await client.flush(); + expect(result).toBe(true); + // Verify events were cleared — second flush should be a no-op + mockFetch.mockClear(); + await client.flush(); + expect(mockFetch).not.toHaveBeenCalled(); }); it('sends correct Content-Type header', async () => { @@ -182,4 +216,79 @@ describe('TelemetryClient', () => { ); }); }); + + describe('replaceLastEventOfType', () => { + it('removes the last event of the specified type', async () => { + client.setGatewayUrl('http://localhost:8000'); + client.queueEvent({ type: 'command', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }); + client.queueEvent({ type: 'session.start', sessionId: '1', timestamp: '2024-01-01T00:00:01Z' }); + client.queueEvent({ type: 'command', sessionId: '1', timestamp: '2024-01-01T00:00:02Z' }); + + client.replaceLastEventOfType('command'); + + await client.flush(); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.events).toHaveLength(2); + expect(body.events[0].type).toBe('command'); + expect(body.events[1].type).toBe('session.start'); + }); + + it('does nothing if no event of that type exists', () => { + client.queueEvent({ type: 'session.start', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }); + // Should not throw + client.replaceLastEventOfType('command'); + }); + }); + + describe('queueEvents', () => { + it('queues multiple events at once', async () => { + client.setGatewayUrl('http://localhost:8000'); + client.queueEvents([ + { type: 'command', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }, + { type: 'crash', sessionId: '1', timestamp: '2024-01-01T00:00:01Z' }, + ]); + + await client.flush(); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.events).toHaveLength(2); + }); + }); + + describe('persistToFile', () => { + beforeEach(() => { + mockMkdirSync.mockReset(); + mockWriteFileSync.mockReset(); + }); + + it('writes events to file and clears queue', async () => { + client.queueEvent({ type: 'session.start', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }); + client.persistToFile('/tmp/test-persist.json'); + + expect(mockMkdirSync).toHaveBeenCalledWith('/tmp', { recursive: true }); + expect(mockWriteFileSync).toHaveBeenCalledWith( + '/tmp/test-persist.json', + expect.stringContaining('session.start'), + 'utf-8', + ); + + // Queue should be empty after persist + client.setGatewayUrl('http://localhost:8000'); + await client.flush(); + expect(mockFetch).not.toHaveBeenCalled(); + }); + + it('does nothing when no events queued', () => { + client.persistToFile('/tmp/test-persist.json'); + expect(mockWriteFileSync).not.toHaveBeenCalled(); + }); + + it('fails silently on write error', () => { + mockMkdirSync.mockImplementation(() => { + throw new Error('EACCES'); + }); + + client.queueEvent({ type: 'session.start', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }); + expect(() => client.persistToFile('/tmp/test-persist.json')).not.toThrow(); + }); + }); }); diff --git a/src/utils/telemetry-client.ts b/src/utils/telemetry-client.ts index f85aea1..3df750c 100644 --- a/src/utils/telemetry-client.ts +++ b/src/utils/telemetry-client.ts @@ -1,3 +1,5 @@ +import { mkdirSync, writeFileSync } from 'node:fs'; +import { dirname } from 'node:path'; import { debug } from './debug.js'; import type { TelemetryEvent, TelemetryRequest } from './telemetry-types.js'; import { getCredentials } from '../lib/credentials.js'; @@ -9,6 +11,8 @@ import { getCredentials } from '../lib/credentials.js'; export class TelemetryClient { private events: TelemetryEvent[] = []; private accessToken: string | null = null; + private claimToken: string | null = null; + private clientId: string | null = null; private gatewayUrl: string | null = null; setGatewayUrl(url: string) { @@ -19,19 +23,55 @@ export class TelemetryClient { this.accessToken = token; } + /** + * Set claim-token auth for unclaimed environments. + * The API's LlmGatewayGuard accepts either a JWT (Bearer) or claim token + * (x-workos-claim-token + x-workos-client-id headers). + */ + setClaimTokenAuth(clientId: string, claimToken: string) { + this.clientId = clientId; + this.claimToken = claimToken; + } + queueEvent(event: TelemetryEvent) { this.events.push(event); } - async flush(): Promise { - if (this.events.length === 0) return; + /** + * Remove the last queued event of a given type. + * Used to swap a provisional event with an updated one. + */ + replaceLastEventOfType(type: TelemetryEvent['type']): void { + for (let i = this.events.length - 1; i >= 0; i--) { + if (this.events[i].type === type) { + this.events.splice(i, 1); + return; + } + } + } + + /** + * Queue multiple pre-formed events (used by store-forward recovery). + */ + queueEvents(events: TelemetryEvent[]): void { + this.events.push(...events); + } + + /** + * Flush queued events. Returns true if events were sent or intentionally + * dropped (4xx), false if they should be retried (5xx/network error). + * Uses splice to only remove the events that were in the snapshot, + * protecting any events queued concurrently during the fetch. + */ + async flush(): Promise { + if (this.events.length === 0) return true; if (!this.gatewayUrl) { debug('[Telemetry] No gateway URL configured, skipping flush'); - return; + return false; } - const payload: TelemetryRequest = { events: [...this.events] }; - this.events = []; + const count = this.events.length; + const payload: TelemetryRequest = { events: this.events.slice(0, count) }; const headers: Record = { 'Content-Type': 'application/json', @@ -41,13 +81,30 @@ export class TelemetryClient { const token = freshCreds?.accessToken ?? this.accessToken; if (token) { headers['Authorization'] = `Bearer ${token}`; + } else if (this.claimToken && this.clientId) { + // Unclaimed environment auth path — guard accepts this instead of JWT + headers['x-workos-claim-token'] = this.claimToken; + headers['x-workos-client-id'] = this.clientId; } const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 3000); try { - debug(`[Telemetry] Sending ${payload.events.length} events to ${this.gatewayUrl}/telemetry`); + const eventSummary = payload.events.map((e) => { + const attrs = e.attributes ?? {}; + switch (e.type) { + case 'session.start': return `session.start(mode=${attrs['installer.mode']}, os=${attrs['env.os']})`; + case 'session.end': return `session.end(outcome=${attrs['installer.outcome']}, duration=${attrs['installer.duration_ms']}ms)`; + case 'step': return `step(${(e as any).name}, ${(e as any).durationMs}ms, success=${(e as any).success})`; + case 'agent.tool': return `agent.tool(${(e as any).toolName}, ${(e as any).durationMs}ms)`; + case 'agent.llm': return `agent.llm(${(e as any).model}, in=${(e as any).inputTokens}, out=${(e as any).outputTokens})`; + case 'command': return `command(${attrs['command.name']}, ${attrs['command.duration_ms']}ms, success=${attrs['command.success']})`; + case 'crash': return `crash(${attrs['crash.error_type']}: ${attrs['crash.error_message']})`; + default: return e.type; + } + }).join('\n '); + debug(`[Telemetry] Sending ${payload.events.length} events to ${this.gatewayUrl}/telemetry:\n ${eventSummary}`); const response = await fetch(`${this.gatewayUrl}/telemetry`, { method: 'POST', @@ -56,15 +113,42 @@ export class TelemetryClient { signal: controller.signal, }); - if (!response.ok) { + if (response.ok) { + this.events.splice(0, count); + return true; + } else { debug(`[Telemetry] Failed to send: ${response.status}`); + // Drop on 4xx (permanent failures like 401/403 won't succeed on retry). + // Retain on 5xx (transient server errors) for store-forward. + if (response.status >= 400 && response.status < 500) { + this.events.splice(0, count); + return true; // intentionally dropped + } + return false; } } catch (error) { debug(`[Telemetry] Error sending events: ${error}`); + // Events remain in queue for store-forward to persist + return false; } finally { clearTimeout(timeout); } } + + /** + * Synchronously write pending events to a file. + * Used as last resort in process.on('exit') handler. + */ + persistToFile(filePath: string): void { + if (this.events.length === 0) return; + try { + mkdirSync(dirname(filePath), { recursive: true }); + writeFileSync(filePath, JSON.stringify(this.events), 'utf-8'); + this.events = []; + } catch { + // Silent failure — telemetry must never block exit + } + } } export const telemetryClient = new TelemetryClient(); diff --git a/src/utils/telemetry-sanitize.spec.ts b/src/utils/telemetry-sanitize.spec.ts new file mode 100644 index 0000000..9724626 --- /dev/null +++ b/src/utils/telemetry-sanitize.spec.ts @@ -0,0 +1,235 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { homedir } from 'node:os'; +import { sanitizeMessage } from './crash-reporter.js'; + +// Mock telemetry client so we can inspect queued events without HTTP. +// Use vi.hoisted so these are available when the hoisted vi.mock factory runs +// (importing sanitizeMessage transitively loads analytics.ts which loads telemetry-client.ts). +const { + mockSetGatewayUrl, + mockSetAccessToken, + mockQueueEvent, + mockFlush, + mockReplaceLastEventOfType, +} = vi.hoisted(() => ({ + mockSetGatewayUrl: vi.fn(), + mockSetAccessToken: vi.fn(), + mockQueueEvent: vi.fn(), + mockFlush: vi.fn().mockResolvedValue(undefined), + mockReplaceLastEventOfType: vi.fn(), +})); + +vi.mock('./telemetry-client.js', () => ({ + telemetryClient: { + setGatewayUrl: mockSetGatewayUrl, + setAccessToken: mockSetAccessToken, + queueEvent: mockQueueEvent, + flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), + }, +})); + +vi.mock('./debug.js', () => ({ + debug: vi.fn(), +})); + +vi.mock('uuid', () => ({ + v4: () => 'test-session-id-sanitize', +})); + +vi.mock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => 'https://api.workos.com/llm-gateway', + getConfig: () => ({ + nodeVersion: '>=18', + logging: { debugMode: false }, + telemetry: { enabled: true, eventName: 'installer_interaction' }, + documentation: { + workosDocsUrl: 'https://workos.com/docs', + dashboardUrl: 'https://dashboard.workos.com', + issuesUrl: 'https://github.com', + }, + legacy: { oauthPort: 3000 }, + }), + getVersion: () => '0.0.0-test', +})); + +vi.mock('../lib/credentials.js', () => ({ + getCredentials: vi.fn(), +})); + +describe('sanitizeMessage', () => { + it('strips the home directory', () => { + const home = homedir(); + const input = `ENOENT: no such file or directory, open '${home}/.workos/credentials.json'`; + const out = sanitizeMessage(input); + expect(out).not.toContain(home); + expect(out).toContain('~/.workos/credentials.json'); + }); + + it('redacts Bearer tokens', () => { + const out = sanitizeMessage('401 Unauthorized: Bearer abc123.def456.ghi789 invalid'); + expect(out).not.toContain('abc123.def456.ghi789'); + expect(out).toContain('Bearer '); + }); + + it('redacts sk_live_ keys', () => { + const out = sanitizeMessage('Authentication failed for sk_live_xyzABC123'); + expect(out).not.toContain('sk_live_xyzABC123'); + expect(out).toContain('sk_'); + }); + + it('redacts sk_test_ keys', () => { + const out = sanitizeMessage('Bad key sk_test_qrsTUV456 in request'); + expect(out).not.toContain('sk_test_qrsTUV456'); + expect(out).toContain('sk_'); + }); + + it('redacts raw JWTs', () => { + const jwt = 'eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjMifQ.signature_value'; + const out = sanitizeMessage(`Token ${jwt} expired`); + expect(out).not.toContain(jwt); + expect(out).toContain(''); + }); + + it('truncates messages longer than 1024 chars with marker', () => { + const long = 'a'.repeat(2000); + const out = sanitizeMessage(long); + expect(out.length).toBe(1024 + '...[truncated]'.length); + expect(out.endsWith('...[truncated]')).toBe(true); + }); + + it('redacts before truncating so secrets near the boundary are not partially preserved', () => { + // Place a JWT at position 1010 so its tail would fall past the 1024 cap. + const padding = 'x'.repeat(1010); + const jwt = 'eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjMifQ.signature_value'; + const out = sanitizeMessage(padding + jwt); + expect(out).not.toContain('signature_value'); + expect(out).not.toContain('eyJhbGciOiJIUzI1NiJ9'); + }); + + it('returns empty string for undefined or empty input', () => { + expect(sanitizeMessage(undefined)).toBe(''); + expect(sanitizeMessage('')).toBe(''); + }); + + it('redacts all marker types in a single string', () => { + const home = homedir(); + const input = `${home}/x Bearer abc.def.ghi sk_live_ABC123 eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjMifQ.signature_value`; + const out = sanitizeMessage(input); + expect(out).not.toContain(home); + expect(out).not.toContain('abc.def.ghi'); + expect(out).not.toContain('sk_live_ABC123'); + expect(out).not.toContain('eyJhbGciOiJIUzI1NiJ9'); + expect(out).toContain('~/x'); + expect(out).toContain('Bearer '); + expect(out).toContain('sk_'); + expect(out).toContain(''); + }); +}); + +describe('Analytics: no PII or secrets in queued events', () => { + const home = homedir(); + const POISON_BEARER = 'abc123.def456.ghi789'; + const POISON_SK = 'sk_live_xyzABC123'; + const POISON_JWT = 'eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjMifQ.signature_value'; + const POISON_PATH = `${home}/.workos/credentials.json`; + const POISON_MESSAGE = `ENOENT at ${POISON_PATH} Bearer ${POISON_BEARER} key=${POISON_SK} jwt=${POISON_JWT}`; + + let Analytics: typeof import('./analytics.js').Analytics; + let analytics: InstanceType; + const originalEnv = process.env.WORKOS_TELEMETRY; + + beforeEach(async () => { + vi.clearAllMocks(); + delete process.env.WORKOS_TELEMETRY; + vi.resetModules(); + vi.doMock('./telemetry-client.js', () => ({ + telemetryClient: { + setGatewayUrl: mockSetGatewayUrl, + setAccessToken: mockSetAccessToken, + queueEvent: mockQueueEvent, + flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), + }, + })); + vi.doMock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => 'https://api.workos.com/llm-gateway', + getConfig: () => ({ + nodeVersion: '>=18', + logging: { debugMode: false }, + telemetry: { enabled: true, eventName: 'installer_interaction' }, + documentation: { + workosDocsUrl: 'https://workos.com/docs', + dashboardUrl: 'https://dashboard.workos.com', + issuesUrl: 'https://github.com', + }, + legacy: { oauthPort: 3000 }, + }), + getVersion: () => '0.0.0-test', + })); + vi.doMock('../lib/credentials.js', () => ({ + getCredentials: vi.fn(), + })); + const module = await import('./analytics.js'); + Analytics = module.Analytics; + analytics = new Analytics(); + }); + + afterEach(() => { + if (originalEnv !== undefined) { + process.env.WORKOS_TELEMETRY = originalEnv; + } else { + delete process.env.WORKOS_TELEMETRY; + } + }); + + function assertCleanQueue() { + const serialized = JSON.stringify(mockQueueEvent.mock.calls); + expect(serialized).not.toContain(home); + expect(serialized).not.toContain(POISON_BEARER); + expect(serialized).not.toContain(POISON_SK); + expect(serialized).not.toContain(POISON_JWT); + } + + it('stepCompleted: poisoned error.message does not leak markers', () => { + const err = new Error(POISON_MESSAGE); + analytics.stepCompleted('test-step', 100, false, err); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); + + it('commandExecuted: poisoned error.message does not leak markers', () => { + const err = new Error(POISON_MESSAGE); + analytics.commandExecuted('test-command', 100, false, { error: err }); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); + + it('captureUnhandledCrash: poisoned error.message does not leak markers', () => { + const err = new Error(POISON_MESSAGE); + analytics.captureUnhandledCrash(err); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); + + it('captureException: poisoned error.message does not leak via session.end tags', async () => { + const err = new Error(POISON_MESSAGE); + analytics.captureException(err); + // captureException stores into tags; tags flow into session.end at shutdown. + await analytics.shutdown('error'); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); + + it('replaceLastCommandEvent: inherits sanitization via commandExecuted', () => { + const err = new Error(POISON_MESSAGE); + // First queue a provisional event (would normally happen in middleware). + analytics.commandExecuted('test-command', 0, true); + mockQueueEvent.mockClear(); + // Then replace it with the real one carrying the poisoned error. + analytics.replaceLastCommandEvent('test-command', 100, false, { error: err }); + expect(mockReplaceLastEventOfType).toHaveBeenCalledWith('command'); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); +}); diff --git a/src/utils/telemetry-schema.spec.ts b/src/utils/telemetry-schema.spec.ts new file mode 100644 index 0000000..45c27d3 --- /dev/null +++ b/src/utils/telemetry-schema.spec.ts @@ -0,0 +1,258 @@ +import { describe, it, expect } from 'vitest'; +import { z } from 'zod'; + +/** + * Mirror of the API's Zod discriminated union schema. + * These tests validate that the schema shape preserves top-level fields + * (not stripped by safeParse) and accepts all 7 event types. + * This ensures CLI and API stay in sync. + */ + +const attributesSchema = z + .record(z.string(), z.union([z.string(), z.number(), z.boolean()])) + .optional(); + +const baseFields = { + sessionId: z.string(), + timestamp: z.string(), + attributes: attributesSchema, +}; + +const SessionStartSchema = z + .object({ type: z.literal('session.start'), ...baseFields }) + .passthrough(); + +const SessionEndSchema = z + .object({ type: z.literal('session.end'), ...baseFields }) + .passthrough(); + +const StepSchema = z + .object({ + type: z.literal('step'), + ...baseFields, + name: z.string(), + startTimestamp: z.string().optional(), + durationMs: z.number(), + success: z.boolean(), + error: z.object({ type: z.string(), message: z.string() }).optional(), + }) + .passthrough(); + +const AgentToolSchema = z + .object({ + type: z.literal('agent.tool'), + ...baseFields, + toolName: z.string(), + startTimestamp: z.string().optional(), + durationMs: z.number(), + success: z.boolean(), + }) + .passthrough(); + +const AgentLlmSchema = z + .object({ + type: z.literal('agent.llm'), + ...baseFields, + model: z.string(), + inputTokens: z.number(), + outputTokens: z.number(), + }) + .passthrough(); + +const CommandSchema = z + .object({ type: z.literal('command'), ...baseFields }) + .passthrough(); + +const CrashSchema = z + .object({ type: z.literal('crash'), ...baseFields }) + .passthrough(); + +const TelemetryEventSchema = z.discriminatedUnion('type', [ + SessionStartSchema, + SessionEndSchema, + StepSchema, + AgentToolSchema, + AgentLlmSchema, + CommandSchema, + CrashSchema, +]); + +describe('TelemetryEventSchema (discriminated union)', () => { + const base = { sessionId: 'sess-1', timestamp: '2024-01-01T00:00:00Z' }; + + describe('accepts all 7 event types', () => { + it('accepts session.start', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'session.start', + ...base, + attributes: { 'installer.version': '1.0.0' }, + }); + expect(result.success).toBe(true); + }); + + it('accepts session.end', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'session.end', + ...base, + attributes: { 'installer.outcome': 'success' }, + }); + expect(result.success).toBe(true); + }); + + it('accepts step', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'step', + ...base, + name: 'detect', + startTimestamp: '2024-01-01T00:00:00Z', + durationMs: 100, + success: true, + }); + expect(result.success).toBe(true); + }); + + it('accepts agent.tool', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.tool', + ...base, + toolName: 'Write', + startTimestamp: '2024-01-01T00:00:00Z', + durationMs: 50, + success: true, + }); + expect(result.success).toBe(true); + }); + + it('accepts agent.llm', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.llm', + ...base, + model: 'claude', + inputTokens: 100, + outputTokens: 50, + }); + expect(result.success).toBe(true); + }); + + it('accepts command', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'command', + ...base, + attributes: { 'command.name': 'org.list' }, + }); + expect(result.success).toBe(true); + }); + + it('accepts crash', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'crash', + ...base, + attributes: { 'crash.error_type': 'Error' }, + }); + expect(result.success).toBe(true); + }); + }); + + describe('preserves top-level fields via .passthrough()', () => { + it('preserves name and durationMs on step events', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'step', + ...base, + name: 'detect', + durationMs: 100, + success: true, + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toHaveProperty('name', 'detect'); + expect(result.data).toHaveProperty('durationMs', 100); + } + }); + + it('preserves toolName on agent.tool events', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.tool', + ...base, + toolName: 'Write', + durationMs: 50, + success: true, + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toHaveProperty('toolName', 'Write'); + } + }); + + it('preserves model, inputTokens, outputTokens on agent.llm events', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.llm', + ...base, + model: 'claude', + inputTokens: 100, + outputTokens: 50, + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toHaveProperty('model', 'claude'); + expect(result.data).toHaveProperty('inputTokens', 100); + expect(result.data).toHaveProperty('outputTokens', 50); + } + }); + + it('preserves startTimestamp on step events', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'step', + ...base, + name: 'detect', + startTimestamp: '2024-01-01T00:00:00Z', + durationMs: 100, + success: true, + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toHaveProperty('startTimestamp', '2024-01-01T00:00:00Z'); + } + }); + }); + + describe('backward compatibility', () => { + it('accepts step event without startTimestamp', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'step', + ...base, + name: 'detect', + durationMs: 100, + success: true, + }); + expect(result.success).toBe(true); + }); + + it('accepts agent.tool event without startTimestamp', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.tool', + ...base, + toolName: 'Write', + durationMs: 50, + success: true, + }); + expect(result.success).toBe(true); + }); + }); + + describe('rejects invalid events', () => { + it('rejects events with unknown type', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'unknown', + ...base, + }); + expect(result.success).toBe(false); + }); + + it('rejects events without type', () => { + const result = TelemetryEventSchema.safeParse({ + ...base, + }); + expect(result.success).toBe(false); + }); + }); +}); diff --git a/src/utils/telemetry-store-forward.spec.ts b/src/utils/telemetry-store-forward.spec.ts new file mode 100644 index 0000000..28367b4 --- /dev/null +++ b/src/utils/telemetry-store-forward.spec.ts @@ -0,0 +1,159 @@ +import { describe, it, expect, beforeEach, vi } from 'vitest'; + +const mockPersistToFile = vi.fn(); +const mockQueueEvents = vi.fn(); +const mockFlush = vi.fn().mockResolvedValue(undefined); + +vi.mock('./telemetry-client.js', () => ({ + telemetryClient: { + persistToFile: (...args: unknown[]) => mockPersistToFile(...args), + queueEvents: (...args: unknown[]) => mockQueueEvents(...args), + flush: () => mockFlush(), + }, +})); + +vi.mock('./debug.js', () => ({ + debug: vi.fn(), +})); + +const mockExistsSync = vi.fn(); +const mockReaddirSync = vi.fn(); +const mockReadFileSync = vi.fn(); +const mockUnlinkSync = vi.fn(); + +vi.mock('node:fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + existsSync: (...args: unknown[]) => mockExistsSync(...args), + readdirSync: (...args: unknown[]) => mockReaddirSync(...args), + readFileSync: (...args: unknown[]) => mockReadFileSync(...args), + unlinkSync: (...args: unknown[]) => mockUnlinkSync(...args), + }; +}); + +describe('telemetry-store-forward', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('installStoreForward', () => { + it('registers a process exit handler', async () => { + const onSpy = vi.spyOn(process, 'on'); + vi.resetModules(); + + const { installStoreForward } = await import('./telemetry-store-forward.js'); + installStoreForward(); + + const exitHandlers = onSpy.mock.calls.filter((c) => c[0] === 'exit'); + expect(exitHandlers.length).toBeGreaterThanOrEqual(1); + + onSpy.mockRestore(); + }); + + it('exit handler calls persistToFile with PID-based path', async () => { + const onSpy = vi.spyOn(process, 'on'); + vi.resetModules(); + + const { installStoreForward } = await import('./telemetry-store-forward.js'); + installStoreForward(); + + const exitHandler = onSpy.mock.calls.find((c) => c[0] === 'exit')?.[1] as () => void; + exitHandler(); + + expect(mockPersistToFile).toHaveBeenCalledTimes(1); + const filePath = mockPersistToFile.mock.calls[0][0] as string; + expect(filePath).toContain('workos-cli-telemetry'); + expect(filePath).toContain(`pending-${process.pid}`); + + onSpy.mockRestore(); + }); + }); + + describe('recoverPendingEvents', () => { + it('does nothing if pending dir does not exist', async () => { + mockExistsSync.mockReturnValue(false); + vi.resetModules(); + + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + expect(mockReaddirSync).not.toHaveBeenCalled(); + expect(mockQueueEvents).not.toHaveBeenCalled(); + }); + + it('reads and queues events from pending files', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-1234.json', 'pending-5678.json']); + const events1 = [{ type: 'command', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }]; + const events2 = [{ type: 'crash', sessionId: '2', timestamp: '2024-01-01T00:00:01Z' }]; + mockReadFileSync + .mockReturnValueOnce(JSON.stringify(events1)) + .mockReturnValueOnce(JSON.stringify(events2)); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + expect(mockQueueEvents).toHaveBeenCalledTimes(2); + expect(mockQueueEvents).toHaveBeenCalledWith(events1); + expect(mockQueueEvents).toHaveBeenCalledWith(events2); + expect(mockFlush).toHaveBeenCalledTimes(1); + }); + + it('deletes files immediately after reading (before send)', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-1234.json']); + mockReadFileSync.mockReturnValue(JSON.stringify([{ type: 'command', sessionId: '1', timestamp: 'x' }])); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + // unlinkSync should be called before flush + expect(mockUnlinkSync).toHaveBeenCalledTimes(1); + const unlinkPath = mockUnlinkSync.mock.calls[0][0] as string; + expect(unlinkPath).toContain('pending-1234.json'); + }); + + it('handles corrupted files gracefully', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-bad.json']); + mockReadFileSync.mockReturnValue('not valid json{{{'); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + + // Should not throw + await expect(recoverPendingEvents()).resolves.toBeUndefined(); + // Should try to delete the corrupted file + expect(mockUnlinkSync).toHaveBeenCalled(); + }); + + it('skips non-pending files', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-1234.json', 'other-file.txt', 'readme.md']); + + const events = [{ type: 'command', sessionId: '1', timestamp: 'x' }]; + mockReadFileSync.mockReturnValue(JSON.stringify(events)); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + expect(mockReadFileSync).toHaveBeenCalledTimes(1); + }); + + it('skips empty event arrays', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-1234.json']); + mockReadFileSync.mockReturnValue('[]'); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + expect(mockQueueEvents).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/src/utils/telemetry-store-forward.ts b/src/utils/telemetry-store-forward.ts new file mode 100644 index 0000000..bda57b4 --- /dev/null +++ b/src/utils/telemetry-store-forward.ts @@ -0,0 +1,63 @@ +import { readFileSync, readdirSync, unlinkSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { telemetryClient } from './telemetry-client.js'; +import { debug } from './debug.js'; + +const PENDING_DIR = join(tmpdir(), 'workos-cli-telemetry'); +const PENDING_FILE = join(PENDING_DIR, `pending-${process.pid}.json`); + +/** + * Register a sync exit handler that persists unsent events to disk. + * Called once at startup. Uses PID in filename to prevent concurrent + * CLI invocations from colliding. + */ +export function installStoreForward(): void { + process.on('exit', () => { + telemetryClient.persistToFile(PENDING_FILE); + }); +} + +/** + * On startup, check for ANY pending files from previous invocations + * (could be from different PIDs) and send them. Non-blocking, fire-and-forget. + */ +export async function recoverPendingEvents(): Promise { + try { + if (!existsSync(PENDING_DIR)) return; + const files = readdirSync(PENDING_DIR).filter( + (f) => f.startsWith('pending-') && f.endsWith('.json'), + ); + + const recoveredFiles: string[] = []; + for (const file of files) { + const filePath = join(PENDING_DIR, file); + try { + const raw = readFileSync(filePath, 'utf-8'); + const events = JSON.parse(raw); + if (Array.isArray(events) && events.length > 0) { + telemetryClient.queueEvents(events); + recoveredFiles.push(filePath); + } else { + // Empty file — delete immediately + try { unlinkSync(filePath); } catch { /* ignore */ } + } + } catch { + // Corrupted file — delete and move on + try { unlinkSync(filePath); } catch { /* ignore */ } + } + } + + // Delete source files — events are now in memory regardless of flush outcome. + // If flush succeeds: events sent, done. + // If flush fails: events stay in memory, exit handler re-persists to new PID file. + for (const filePath of recoveredFiles) { + try { unlinkSync(filePath); } catch { /* ignore */ } + } + + // Flush all recovered events in one batch + await telemetryClient.flush(); + } catch { + debug('[Telemetry] Store-forward recovery failed silently'); + } +} diff --git a/src/utils/telemetry-types.ts b/src/utils/telemetry-types.ts index b9a6679..7d84eb9 100644 --- a/src/utils/telemetry-types.ts +++ b/src/utils/telemetry-types.ts @@ -4,7 +4,7 @@ */ export interface TelemetryEvent { - type: 'session.start' | 'session.end' | 'step' | 'agent.tool' | 'agent.llm'; + type: 'session.start' | 'session.end' | 'step' | 'agent.tool' | 'agent.llm' | 'command' | 'crash'; sessionId: string; timestamp: string; attributes?: Record; @@ -17,6 +17,12 @@ export interface SessionStartEvent extends TelemetryEvent { 'installer.mode': 'cli' | 'tui' | 'headless'; 'workos.user_id'?: string; 'workos.org_id'?: string; + 'env.os': string; + 'env.os_version': string; + 'env.node_version': string; + 'env.shell': string; + 'env.ci': boolean; + 'env.ci_provider'?: string; }; } @@ -31,6 +37,7 @@ export interface SessionEndEvent extends TelemetryEvent { export interface StepEvent extends TelemetryEvent { type: 'step'; name: string; + startTimestamp: string; durationMs: number; success: boolean; error?: { @@ -42,6 +49,7 @@ export interface StepEvent extends TelemetryEvent { export interface AgentToolEvent extends TelemetryEvent { type: 'agent.tool'; toolName: string; + startTimestamp: string; durationMs: number; success: boolean; } @@ -53,6 +61,44 @@ export interface AgentLLMEvent extends TelemetryEvent { outputTokens: number; } +export interface CommandEvent extends TelemetryEvent { + type: 'command'; + attributes: { + 'command.name': string; + 'command.duration_ms': number; + 'command.success': boolean; + 'command.error_type'?: string; + 'command.error_message'?: string; + 'command.flags'?: string; + 'cli.version': string; + 'workos.user_id'?: string; + 'env.os': string; + 'env.os_version': string; + 'env.node_version': string; + 'env.shell': string; + 'env.ci': boolean; + 'env.ci_provider'?: string; + }; +} + +export interface CrashEvent extends TelemetryEvent { + type: 'crash'; + attributes: { + 'crash.error_type': string; + 'crash.error_message': string; + 'crash.stack': string; + 'crash.command'?: string; + 'cli.version': string; + 'workos.user_id'?: string; + 'env.os': string; + 'env.os_version': string; + 'env.node_version': string; + 'env.shell': string; + 'env.ci': boolean; + 'env.ci_provider'?: string; + }; +} + export interface TelemetryRequest { events: TelemetryEvent[]; }