tangle-network · drewstone · Jun 22, 2026 · Jun 22, 2026
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agent-eval-rpc"
-version = "0.95.1"
+version = "0.96.0"
 description = "Python RPC client for @tangle-network/agent-eval — judge content against rubrics over HTTP or stdio RPC. Eval logic runs in the Node runtime; this package is a thin wire client."
 readme = "README.md"
 requires-python = ">=3.10"

diff --git a/clients/python/src/agent_eval_rpc/__init__.py b/clients/python/src/agent_eval_rpc/__init__.py
@@ -58,7 +58,7 @@
 try:
     __version__ = version("agent-eval-rpc")
 except PackageNotFoundError:
-    __version__ = "0.95.1"
+    __version__ = "0.96.0"
 
 __all__ = [
     "Client",

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-eval",
-  "version": "0.95.1",
+  "version": "0.96.0",
   "description": "Evaluate and improve AI agents from runs, traces, judges, and feedback. Compare candidates, cluster failures, measure lift, and gate releases.",
   "homepage": "https://github.com/tangle-network/agent-eval#readme",
   "repository": {

diff --git a/src/contract/eval-reporting-suite.ts b/src/contract/eval-reporting-suite.ts
@@ -0,0 +1,161 @@
+/**
+ * # `evalReportingSuite` — one call from runs (or a run dir) to `analysis.json`.
+ *
+ * A thin wrapper over the analysis primitive (`analyzeRuns`) and the on-disk
+ * intake adapter (`fromRunRecordDir`). It does NOT reimplement any statistics,
+ * distributions, or clustering — it resolves the input into validated
+ * `RunRecord[]`, calls `analyzeRuns` with the options you'd pass it directly,
+ * wraps the result in a small provenance envelope, and (optionally) writes a
+ * single `analysis.json` artifact.
+ *
+ * ```ts
+ * // From a directory of run files, write ./runs/analysis.json:
+ * const suite = await evalReportingSuite('./runs', { write: true })
+ * // From records already in memory, no write:
+ * const suite = await evalReportingSuite(records, { analyze: { decisionThreshold: 0.03 } })
+ * suite.report // the InsightReport — distributions, paired lift, findings rollup
+ * ```
+ */
+
+import { mkdir, writeFile } from 'node:fs/promises'
+import { dirname, join } from 'node:path'
+import type { RunRecord } from '../run-record'
+import { type AnalyzeRunsOptions, analyzeRuns } from './analyze-runs'
+import type { InsightReport } from './insight-report'
+import {
+  type FromRunRecordDirOptions,
+  type FromRunRecordDirResult,
+  fromRunRecordDir,
+} from './intake/run-record-dir'
+
+/** Either records in hand or a path to a `.json` / `.jsonl` file or a
+ *  directory of them. */
+export type EvalReportingSuiteInput = RunRecord[] | string
+
+export interface EvalReportingSuiteOptions {
+  /** Forwarded verbatim to `analyzeRuns` (everything except `runs`, which the
+   *  suite supplies from the resolved input). Use this for split selection,
+   *  baseline/candidate ids, canaries, prior-period runs, the analyst registry,
+   *  etc. */
+  analyze?: Omit<AnalyzeRunsOptions, 'runs'>
+  /** Loader options used only when the input is a path. */
+  load?: FromRunRecordDirOptions
+  /**
+   * Write the suite result as a single `analysis.json`.
+   *   - `true` — write to `<dir>/analysis.json` when the input is a directory,
+   *     or alongside the input file; throws if the input is in-memory records
+   *     (no directory to anchor to — pass an explicit path instead).
+   *   - a string — write to exactly this path (a directory path gets
+   *     `analysis.json` appended; any other path is used verbatim).
+   *   - omitted / false — do not write.
+   */
+  write?: boolean | string
+}
+
+/** The suite artifact — the `analyzeRuns` report plus provenance. This is the
+ *  exact shape serialized to `analysis.json`. */
+export interface EvalReportingSuiteResult {
+  /** The analysis itself — distributions, paired stats/lift, failure rollup,
+   *  recommendations. Produced by `analyzeRuns`. */
+  report: InsightReport
+  /** How the suite was run, so a reader can verify provenance. */
+  provenance: {
+    /** ISO timestamp the suite ran. */
+    generatedAt: string
+    /** Number of records analyzed (mirrors `report.n`). */
+    runCount: number
+    /** The source path when the input was a directory/file; null for
+     *  in-memory records. */
+    sourcePath: string | null
+    /** Files read when loading from disk; empty for in-memory input. */
+    files: string[]
+    /** Records dropped at the validation boundary. Always empty unless
+     *  `load.onInvalid` was set to `'collect'`. */
+    rejected: FromRunRecordDirResult['rejected']
+  }
+  /** The path `analysis.json` was written to, or null when `write` was unset. */
+  writtenTo: string | null
+}
+
+const ANALYSIS_ARTIFACT = 'analysis.json'
+
+/**
+ * Resolve runs (or a run dir/file), run `analyzeRuns`, and optionally persist a
+ * single `analysis.json`. The only analysis logic lives in `analyzeRuns`; this
+ * function is composition + I/O.
+ */
+export async function evalReportingSuite(
+  input: EvalReportingSuiteInput,
+  options: EvalReportingSuiteOptions = {},
+): Promise<EvalReportingSuiteResult> {
+  const fromPath = typeof input === 'string'
+
+  let runs: RunRecord[]
+  let files: string[] = []
+  let rejected: FromRunRecordDirResult['rejected'] = []
+  if (fromPath) {
+    const loaded = await fromRunRecordDir(input, options.load)
+    runs = loaded.runs
+    files = loaded.files
+    rejected = loaded.rejected
+  } else {
+    runs = input
+  }
+
+  if (runs.length === 0) {
+    throw new Error(
+      fromPath
+        ? `evalReportingSuite: no RunRecords found at '${input}'`
+        : 'evalReportingSuite: no RunRecords to analyze',
+    )
+  }
+
+  const report = await analyzeRuns({ ...options.analyze, runs })
+
+  const result: EvalReportingSuiteResult = {
+    report,
+    provenance: {
+      generatedAt: new Date().toISOString(),
+      runCount: runs.length,
+      sourcePath: fromPath ? input : null,
+      files,
+      rejected,
+    },
+    writtenTo: null,
+  }
+
+  const target = resolveWriteTarget(options.write, fromPath ? input : null)
+  if (target) {
+    await mkdir(dirname(target), { recursive: true })
+    await writeFile(target, `${JSON.stringify(result, null, 2)}\n`, 'utf8')
+    result.writtenTo = target
+  }
+
+  return result
+}
+
+/** Resolve where (if anywhere) to write `analysis.json`. Returns null when
+ *  writing is disabled. Throws on `write: true` with in-memory input — there is
+ *  no directory to anchor the artifact to, and silently inventing `cwd` would
+ *  scatter files. */
+function resolveWriteTarget(
+  write: EvalReportingSuiteOptions['write'],
+  sourcePath: string | null,
+): string | null {
+  if (!write) return null
+
+  if (typeof write === 'string') {
+    const looksLikeDir =
+      write.endsWith('/') || (!write.endsWith('.json') && !write.endsWith('.jsonl'))
+    return looksLikeDir ? join(write, ANALYSIS_ARTIFACT) : write
+  }
+
+  // write === true
+  if (sourcePath === null) {
+    throw new Error(
+      'evalReportingSuite: write:true needs a source path to anchor analysis.json — pass an explicit output path when analyzing in-memory records',
+    )
+  }
+  const isFile = sourcePath.endsWith('.json') || sourcePath.endsWith('.jsonl')
+  return isFile ? join(dirname(sourcePath), ANALYSIS_ARTIFACT) : join(sourcePath, ANALYSIS_ARTIFACT)
+}
diff --git a/src/contract/index.ts b/src/contract/index.ts
@@ -211,6 +211,15 @@ export {
 export type { AnalystFinding } from '../analyst/types'
 export type { AnalyzeRunsOptions } from './analyze-runs'
 export { analyzeRuns } from './analyze-runs'
+// One-call reporting suite: runs (or a run dir/file) → `analyzeRuns` →
+// optional `analysis.json`. Thin composition over `analyzeRuns` +
+// `fromRunRecordDir`; adds no analysis logic of its own.
+export {
+  type EvalReportingSuiteInput,
+  type EvalReportingSuiteOptions,
+  type EvalReportingSuiteResult,
+  evalReportingSuite,
+} from './eval-reporting-suite'
 export type {
   FailureClusterInsight,
   InsightReport,
@@ -258,6 +267,8 @@ export {
   type FromFeedbackTableOptions,
   type FromFeedbackTableResult,
   type FromOtelSpansOptions,
+  type FromRunRecordDirOptions,
+  type FromRunRecordDirResult,
   fromClaudeCodeSession,
   fromCodexSession,
   fromFeedbackTable,
@@ -266,9 +277,11 @@ export {
   fromOtelSpans,
   fromPigraphSession,
   fromPiSession,
+  fromRunRecordDir,
   type ParsedCodeAgentJsonl,
   type PartitionByAuthoringModelResult,
   parseAgentTrace,
   parseCodeAgentJsonl,
   partitionRunsByAuthoringModel,
+  type RunRecordRejection,
 } from './intake'
diff --git a/src/contract/intake/index.ts b/src/contract/intake/index.ts
@@ -14,6 +14,8 @@
  *   - `fromCodexSession` / `fromClaudeCodeSession` / `fromOpenCodeSession` /
  *     `fromKimiCodeSession` / `fromPiSession` — local coding-agent and
  *     graph-shaped sessions projected into process-scored `RunRecord`s.
+ *   - `fromRunRecordDir` — a `.json` / `.jsonl` file or a directory of them,
+ *     parsed and validated at the boundary.
  */
 
 export {
@@ -52,3 +54,9 @@ export {
   fromFeedbackTable,
 } from './feedback-table'
 export { type FromOtelSpansOptions, fromOtelSpans } from './otel-spans'
+export {
+  type FromRunRecordDirOptions,
+  type FromRunRecordDirResult,
+  fromRunRecordDir,
+  type RunRecordRejection,
+} from './run-record-dir'