From dbe07e76991e68c46985e63c8414e9822c52cf8c Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Thu, 2 Jul 2026 13:27:50 +0200
Subject: [PATCH 1/4] Add prompt instance expansion

---
 .../commands/eval/artifact-writer.test.ts     |  32 +-
 .../docs/docs/evaluation/eval-files.mdx       |  33 +-
 .../docs/docs/reference/result-artifacts.mdx  |  11 +-
 .../src/evaluation/loaders/config-loader.ts   |  64 +-
 packages/core/src/evaluation/orchestrator.ts  |  42 +-
 packages/core/src/evaluation/run-artifacts.ts |  27 +-
 packages/core/src/evaluation/types.ts         |  35 +-
 packages/core/src/evaluation/yaml-parser.ts   | 272 ++++++-
 .../evaluation/eval-inline-experiment.test.ts | 107 +++
 .../agentv-bench/references/eval-yaml-spec.md |  10 +-
 .../references/eval.schema.json               | 702 ++++++++++++++++++
 11 files changed, 1308 insertions(+), 27 deletions(-)

diff --git a/apps/cli/test/commands/eval/artifact-writer.test.ts b/apps/cli/test/commands/eval/artifact-writer.test.ts
index 8ab705f0e..15c342d1f 100644
--- a/apps/cli/test/commands/eval/artifact-writer.test.ts
+++ b/apps/cli/test/commands/eval/artifact-writer.test.ts
@@ -1103,17 +1103,24 @@ describe('writeArtifactsFromResults', () => {
   });
 
   it('writes repeat runs in AgentV case and run folders', async () => {
+    const prompt = { id: 'direct', label: 'Direct prompt', kind: 'string' as const };
     const results = [
       makeResult({
         testId: 'repeat-case',
+        prompt,
         score: 1,
         trials: [
           {
             attempt: 0,
+            sampleIndex: 0,
+            retryIndex: 0,
             score: 0.25,
             verdict: 'fail',
             result: makeResult({
               testId: 'repeat-case',
+              prompt,
+              sampleIndex: 0,
+              retryIndex: 0,
               score: 0.25,
               output: 'first attempt',
               durationMs: 2000,
@@ -1122,10 +1129,15 @@ describe('writeArtifactsFromResults', () => {
           },
           {
             attempt: 1,
+            sampleIndex: 1,
+            retryIndex: 0,
             score: 1,
             verdict: 'pass',
             result: makeResult({
               testId: 'repeat-case',
+              prompt,
+              sampleIndex: 1,
+              retryIndex: 0,
               score: 1,
               output: 'second attempt',
               durationMs: 4000,
@@ -1159,9 +1171,25 @@ describe('writeArtifactsFromResults', () => {
 
     const [indexEntry] = await readIndexLines(paths.indexPath);
     const repeatRowDir = expectRowDir(indexEntry, 'repeat-case');
+    expect(indexEntry?.prompt_id).toBe('direct');
+    expect(indexEntry?.prompt_label).toBe('Direct prompt');
     expect(indexEntry?.trials).toEqual([
-      { attempt: 0, run_path: 'run-1', score: 0.25, verdict: 'fail' },
-      { attempt: 1, run_path: 'run-2', score: 1, verdict: 'pass' },
+      {
+        attempt: 0,
+        sample_index: 0,
+        retry_index: 0,
+        run_path: 'run-1',
+        score: 0.25,
+        verdict: 'fail',
+      },
+      {
+        attempt: 1,
+        sample_index: 1,
+        retry_index: 0,
+        run_path: 'run-2',
+        score: 1,
+        verdict: 'pass',
+      },
     ]);
     expect(indexEntry?.aggregation).toEqual({
       strategy: 'confidence_interval',
diff --git a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
index 9dc940b97..2b6e5d819 100644
--- a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
@@ -41,12 +41,15 @@ workspace:
     - path: ./support-app
       repo: acme/support-app
       commit: main
-input: Answer using the refund policy in the workspace.
+prompts:
+  - id: refund-policy
+    prompt: Answer using the refund policy in the workspace. Customer: {{ customer_question }}
 assertions:
   - Applies the refund policy correctly
 tests:
   - id: missing-receipt
-    input: Can this customer get a refund without a receipt?
+    vars:
+      customer_question: Can this customer get a refund without a receipt?
 ```
 
 Raw cases are just case data:
@@ -96,7 +99,13 @@ The primary format. A single file contains metadata, inline runtime config, and
 
 ```yaml
 description: Math problem solving evaluation
-target: default
+targets:
+  - id: default
+    label: default
+
+prompts:
+  - id: math
+    prompt: "{{ question }}"
 
 assertions:
   - Correctly calculates the answer
@@ -104,7 +113,8 @@ assertions:
 
 tests:
   - id: addition
-    input: What is 15 + 27?
+    vars:
+      question: What is 15 + 27?
     expected_output: "42"
 ```
 
@@ -116,7 +126,9 @@ tests:
 | `suite` | Optional suite identifier |
 | `category` | Optional slash-delimited analytics taxonomy path. Overrides the category derived from the eval file path. |
 | `target` | Named system under test from `.agentv/targets.yaml` or `--targets` |
+| `targets` | Promptfoo-style target matrix. `id` is the provider/backend locator identity and `label` is the display/comparison name. |
 | `experiment` | Optional run/result grouping label |
+| `prompts` | Top-level prompt matrix. Supports string prompts, chat message arrays, and file prompt objects. Prompts combine with `targets`, `tests`, and `repeat.count` into deterministic execution instances. |
 | `repeat` | Optional repeat policy with `count`, `strategy`, and `early_exit` |
 | `timeout_seconds` | Optional per-case timeout |
 | `evaluate_options` | Optional evaluation runtime options such as `budget_usd` and `max_concurrency` |
@@ -125,7 +137,7 @@ tests:
 | `imports` | Optional import groups. `imports.suites` imports full child eval suites with their task context. `imports.tests` imports raw test rows into this file's context. Import entries may use scoped `run:` overrides for `threshold`, `repeat`, `timeout_seconds`, and `budget_usd`. |
 | `tests` | Inline raw tests or a string path to an external raw-case file or directory. Legacy `tests[].include` entries still load with a migration warning; prefer `imports.suites` or `imports.tests`. |
 | `assertions` | Suite-level graders appended to each test unless `execution.skip_defaults: true` is set on the test |
-| `input` | Suite-level input messages prepended to each test's input unless `execution.skip_defaults: true` is set on the test |
+| `input` | Deprecated compatibility input. Prefer top-level `prompts` plus per-test `vars`. |
 
 `workspace` is what the agent can inspect or modify through tools, not prompt
 input. Put instructions in `input`; put repos, templates, and lifecycle setup in
@@ -450,10 +462,12 @@ MY_REPO_COMMIT=main
 
 ## Per-Test Template Variables
 
-Eval YAML also supports per-test `vars` for data-driven prompt templates. Use `{{name}}` placeholders in test-facing text fields, and AgentV resolves them when the suite loads.
+Eval YAML supports per-test `vars` for data-driven prompt templates. Prefer top-level `prompts` as the authored input surface, then use `{{name}}` placeholders to vary each test row.
 
 ```yaml
-input: "Answer clearly: {{question}}"
+prompts:
+  - id: clear-answer
+    prompt: "Answer clearly: {{question}}"
 
 tests:
   - id: capital
@@ -461,9 +475,6 @@ tests:
       question: What is the capital of France?
       expected_answer: Paris
     criteria: "Answers {{question}} correctly"
-    input:
-      - role: user
-        content: "Question: {{question}}"
     expected_output: "{{expected_answer}}"
 ```
 
@@ -471,7 +482,7 @@ tests:
 
 - `vars` is defined per test as an object
 - `{{name}}` and dotted paths like `{{ user.name }}` are supported
-- Substitution applies to suite-level `input`, test `input`, `input_files`, `criteria`, `expected_output`, and conversation turn `input` / `expected_output`
+- Substitution applies to `prompts`, `criteria`, `expected_output`, and conversation turn `input` / `expected_output`
 - When the whole string is a single placeholder, the original JSON value is preserved
 - Missing variables are left unchanged, so unrelated template syntax is not silently blanked out
 - `vars` interpolation is separate from environment interpolation: `{{question}}` uses test data, `${{ PROJECT_NAME }}` uses environment variables
diff --git a/apps/web/src/content/docs/docs/reference/result-artifacts.mdx b/apps/web/src/content/docs/docs/reference/result-artifacts.mdx
index f6f132412..9ce20484b 100644
--- a/apps/web/src/content/docs/docs/reference/result-artifacts.mdx
+++ b/apps/web/src/content/docs/docs/reference/result-artifacts.mdx
@@ -125,6 +125,10 @@ Example row:
   "tags": { "experiment": "with_skills", "team": "support" },
   "eval_path": "evals/support/refunds.eval.yaml",
   "test_id": "refund-eligibility",
+  "prompt_id": "refund-policy",
+  "prompt_label": "Refund policy prompt",
+  "sample_index": 0,
+  "retry_index": 0,
   "target": "codex-gpt5",
   "variant": "skills-v2",
   "attempt": 1,
@@ -147,9 +151,10 @@ Example row:
 
 Rows can represent repeated attempts, multi-target runs, imported suites,
 manual `prepare`/`grade` attempts, or imported provider sessions. That is why
-`experiment`, `eval_path`, `test_id`, `target`, `variant`, `attempt`, and
-source metadata belong in `index.jsonl`: tools can filter dynamically without
-requiring every run to be pre-split into semantic folders.
+`experiment`, `eval_path`, `test_id`, `prompt_id`, `target`, `variant`,
+`sample_index`, `retry_index`, `attempt`, and source metadata belong in
+`index.jsonl`: tools can filter dynamically without requiring every run to be
+pre-split into semantic folders.
 
 When a run resolves a promptfoo-shaped tags map (from suite `tags`, project
 config `tags`, or `--tag key=value`), the resolved map is emitted as `tags` on
diff --git a/packages/core/src/evaluation/loaders/config-loader.ts b/packages/core/src/evaluation/loaders/config-loader.ts
index c3629288f..ec64e213f 100644
--- a/packages/core/src/evaluation/loaders/config-loader.ts
+++ b/packages/core/src/evaluation/loaders/config-loader.ts
@@ -15,6 +15,7 @@ import type {
   EvalTargetRef,
   FailOnError,
   JsonObject,
+  JsonValue,
   TargetHooksConfig,
   WorkspaceHookConfig,
 } from '../types.js';
@@ -356,7 +357,24 @@ export function extractTargetRefsFromSuite(
   suite: JsonObject,
 ): readonly EvalTargetRef[] | undefined {
   rejectAuthoredRuntimeContainers(suite);
-  return undefined;
+  if (suite.providers !== undefined) {
+    throw new Error("Top-level 'providers' has been removed. Use 'targets' instead.");
+  }
+  if (suite.target !== undefined && suite.targets !== undefined) {
+    throw new Error("Use either top-level 'target' or 'targets', not both.");
+  }
+
+  const rawTargets = suite.targets;
+  if (rawTargets === undefined || rawTargets === null) {
+    return undefined;
+  }
+
+  const rawEntries = Array.isArray(rawTargets) ? rawTargets : [rawTargets];
+  const refs = rawEntries
+    .map((entry, index) => parseTargetRef(entry, index))
+    .filter((entry): entry is EvalTargetRef => entry !== undefined);
+
+  return refs.length > 0 ? refs : undefined;
 }
 
 /**
@@ -369,6 +387,50 @@ export function extractTargetsFromSuite(suite: JsonObject): readonly string[] |
   return names.length > 0 ? names : undefined;
 }
 
+function parseTargetRef(raw: JsonValue, index: number): EvalTargetRef | undefined {
+  if (typeof raw === 'string') {
+    const targetId = raw.trim();
+    return targetId ? { name: targetId, id: targetId } : undefined;
+  }
+
+  if (!isJsonObject(raw)) {
+    logWarning(`Invalid targets[${index}]: expected string or object. Ignoring.`);
+    return undefined;
+  }
+
+  const rawId = raw.id;
+  const rawLabel = raw.label;
+  const legacyName = raw.name;
+  const useTarget = raw.use_target;
+  const id = typeof rawId === 'string' && rawId.trim().length > 0 ? rawId.trim() : undefined;
+  const label =
+    typeof rawLabel === 'string' && rawLabel.trim().length > 0 ? rawLabel.trim() : undefined;
+  const name =
+    label ??
+    id ??
+    (typeof legacyName === 'string' && legacyName.trim().length > 0
+      ? legacyName.trim()
+      : undefined);
+
+  if (!name) {
+    logWarning(`Invalid targets[${index}]: expected id or label. Ignoring.`);
+    return undefined;
+  }
+  if (legacyName !== undefined) {
+    logWarning('targets[].name is deprecated. Use targets[].id and targets[].label instead.');
+  }
+
+  return {
+    name,
+    ...(id ? { id } : {}),
+    ...(label ? { label } : {}),
+    ...(typeof useTarget === 'string' && useTarget.trim().length > 0
+      ? { use_target: useTarget.trim() }
+      : {}),
+    ...(raw.hooks !== undefined ? { hooks: parseTargetHooks(raw.hooks) } : {}),
+  };
+}
+
 /**
  * Parse a single workspace hook config from a raw object.
  * Accepts both string shorthand (shell command) and object form.
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index 93e15cab5..26fbda0f6 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -439,6 +439,8 @@ export interface RunEvalCaseOptions {
   readonly evalFilePath?: string;
   /** Repo root used to serialize replay fixture eval_path as a stable relative path. */
   readonly repoRoot?: string;
+  /** Zero-based sample index produced by repeat.count. */
+  readonly sampleIndex?: number;
 }
 
 export interface ProgressEvent {
@@ -1652,6 +1654,7 @@ async function runBatchEvaluation(options: {
         promptInputs,
         nowFn,
         attempt: 0,
+        sampleIndex: 0,
         graderProvider: await resolveGraderProvider(target),
         agentTimeoutMs,
         output,
@@ -1693,6 +1696,7 @@ async function runBatchEvaluation(options: {
         'evaluator',
         'evaluator_error',
         verbose,
+        { sampleIndex: 0, retryIndex: 0 },
       );
       results.push(errorResult);
       if (onResult) {
@@ -1802,6 +1806,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
     replayRecording,
     evalFilePath,
     repoRoot,
+    sampleIndex = 0,
   } = options;
   const setupDebug = process.env.AGENTV_SETUP_DEBUG === '1';
 
@@ -1846,6 +1851,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
       setupError?.failureStage ?? 'setup',
       setupError?.failureReasonCode ?? 'script_error',
       verbose,
+      { sampleIndex },
     );
   }
 
@@ -1879,6 +1885,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
       targetResolver,
       availableTargets,
       evalFilePath,
+      sampleIndex,
     });
 
     // Cleanup workspace (same logic as standard path)
@@ -1971,6 +1978,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
       'agent',
       'provider_error',
       verbose,
+      { sampleIndex, retryIndex: attempt },
     );
     // On error, keep workspace for debugging (unless forceCleanup is set)
     if (workspacePath) {
@@ -2146,6 +2154,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
       promptInputs,
       nowFn,
       attempt,
+      sampleIndex,
       graderProvider,
       agentTimeoutMs,
       output,
@@ -2282,6 +2291,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
       'evaluator',
       'evaluator_error',
       verbose,
+      { sampleIndex, retryIndex: attempt },
     );
     // On error, keep workspace for debugging (only for per-case workspaces)
     if (workspacePath && !isSharedWorkspace) {
@@ -2316,6 +2326,7 @@ async function runEvalCaseWithTrials(
       ...options,
       // Disable cache for individual trials (each should be a fresh invocation)
       useCache: false,
+      sampleIndex: attempt,
       // Force cleanup for intermediate trials
       cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
       keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
@@ -2332,6 +2343,8 @@ async function runEvalCaseWithTrials(
     const trialVerdict = scoreToVerdict(result.score);
     const trial: TrialResult = {
       attempt,
+      sampleIndex: result.sampleIndex ?? attempt,
+      retryIndex: result.retryIndex,
       score: result.score,
       verdict: trialVerdict,
       scores: result.scores,
@@ -2414,6 +2427,8 @@ async function runEvalCaseWithTrials(
   return {
     ...baseResult,
     score,
+    sampleIndex: undefined,
+    retryIndex: undefined,
     trials: trialResults,
     aggregation,
     costLimited: costLimited || undefined,
@@ -2434,6 +2449,7 @@ async function evaluateCandidate(options: {
   readonly promptInputs: PromptInputs;
   readonly nowFn: () => Date;
   readonly attempt: number;
+  readonly sampleIndex: number;
   readonly graderProvider?: Provider;
   readonly agentTimeoutMs?: number;
   readonly output?: readonly Message[];
@@ -2464,6 +2480,7 @@ async function evaluateCandidate(options: {
     promptInputs,
     nowFn,
     attempt,
+    sampleIndex,
     graderProvider,
     agentTimeoutMs,
     output,
@@ -2567,7 +2584,10 @@ async function evaluateCandidate(options: {
       : undefined;
   return {
     timestamp: completedAt.toISOString(),
-    testId: evalCase.id,
+    testId: evalCase.testId ?? evalCase.id,
+    prompt: evalCase.prompt,
+    sampleIndex,
+    retryIndex: attempt,
     source: evalCase.source,
     suite: evalCase.suite,
     category: evalCase.category,
@@ -3035,6 +3055,7 @@ async function runConversationMode(options: {
   readonly targetResolver?: (name: string) => Provider | undefined;
   readonly availableTargets?: readonly string[];
   readonly evalFilePath?: string;
+  readonly sampleIndex?: number;
 }): Promise<EvaluationResult> {
   const {
     evalCase,
@@ -3055,6 +3076,7 @@ async function runConversationMode(options: {
     targetResolver,
     availableTargets,
     evalFilePath,
+    sampleIndex = 0,
   } = options;
 
   // biome-ignore lint/style/noNonNullAssertion: turns is guaranteed by the caller (conversation mode gate)
@@ -3183,6 +3205,7 @@ async function runConversationMode(options: {
       },
       nowFn,
       attempt: 0,
+      sampleIndex,
       graderProvider,
       agentTimeoutMs,
       output: response.output,
@@ -3245,6 +3268,7 @@ async function runConversationMode(options: {
       },
       nowFn,
       attempt: 0,
+      sampleIndex,
       graderProvider,
       agentTimeoutMs,
       verbose,
@@ -3288,7 +3312,7 @@ async function runConversationMode(options: {
     durationMs: totalDurationMs,
     provider: provider.kind,
     target: target.name,
-    testId: evalCase.id,
+    testId: evalCase.testId ?? evalCase.id,
     conversationId: evalCase.conversation_id,
   });
 
@@ -3296,7 +3320,10 @@ async function runConversationMode(options: {
 
   return {
     timestamp: nowFn().toISOString(),
-    testId: evalCase.id,
+    testId: evalCase.testId ?? evalCase.id,
+    prompt: evalCase.prompt,
+    sampleIndex,
+    retryIndex: 0,
     suite: evalCase.suite,
     category: evalCase.category,
     score: finalScore,
@@ -3475,6 +3502,10 @@ function buildErrorResult(
   failureStage: FailureStage,
   failureReasonCode: string,
   verbose?: boolean,
+  identity?: {
+    readonly sampleIndex?: number;
+    readonly retryIndex?: number;
+  },
 ): EvaluationResult {
   const message = extractErrorMessage(error);
 
@@ -3521,7 +3552,10 @@ function buildErrorResult(
 
   return {
     timestamp: timestamp.toISOString(),
-    testId: evalCase.id,
+    testId: evalCase.testId ?? evalCase.id,
+    prompt: evalCase.prompt,
+    sampleIndex: identity?.sampleIndex,
+    retryIndex: identity?.retryIndex,
     suite: evalCase.suite,
     category: evalCase.category,
     conversationId: evalCase.conversation_id,
diff --git a/packages/core/src/evaluation/run-artifacts.ts b/packages/core/src/evaluation/run-artifacts.ts
index b59c80d80..40dd3eb45 100644
--- a/packages/core/src/evaluation/run-artifacts.ts
+++ b/packages/core/src/evaluation/run-artifacts.ts
@@ -120,13 +120,14 @@ export function buildEvaluationResultTargetKey(result: EvaluationResult): string
       null,
     suite: stringField(dimensions, 'suite') ?? getSuite(result) ?? null,
     test_id: stringField(dimensions, 'testId') ?? result.testId ?? 'unknown',
+    prompt_id: result.prompt?.id ?? null,
     target: stringField(dimensions, 'target') ?? result.target ?? 'unknown',
     variant: stringField(dimensions, 'variant') ?? result.variant ?? null,
   });
 }
 
 export function buildEvalTestTargetKey(
-  test: Pick<EvalTest, 'id' | 'suite' | 'source'>,
+  test: Pick<EvalTest, 'id' | 'suite' | 'source' | 'prompt'>,
   target?: string,
   variant?: string,
 ): string {
@@ -134,6 +135,7 @@ export function buildEvalTestTargetKey(
     eval_path: evalSourcePath(test.source) ?? null,
     suite: test.suite ?? null,
     test_id: test.id ?? 'unknown',
+    prompt_id: test.prompt?.id ?? null,
     target: target ?? 'unknown',
     variant: variant ?? null,
   });
@@ -352,6 +354,8 @@ export interface GradingArtifact {
 
 export type TrialResultArtifact = {
   readonly attempt: number;
+  readonly sample_index?: number;
+  readonly retry_index?: number;
   readonly run_path?: string;
   readonly score: number;
   readonly verdict: string;
@@ -471,6 +475,10 @@ export interface AggregateGradingArtifact {
 export interface IndexArtifactEntry {
   readonly timestamp: string;
   readonly test_id: string;
+  readonly prompt_id?: string;
+  readonly prompt_label?: string;
+  readonly sample_index?: number;
+  readonly retry_index?: number;
   readonly suite?: string;
   readonly category?: string;
   readonly conversation_id?: string;
@@ -742,6 +750,8 @@ function toTrialArtifacts(
   }
   return trials.map((trial) => ({
     attempt: trial.attempt,
+    sample_index: trial.sampleIndex,
+    retry_index: trial.retryIndex,
     run_path: trial.result ? trialRunDirName(trial.attempt) : undefined,
     score: trial.score,
     verdict: trial.verdict,
@@ -888,6 +898,7 @@ function fallbackRepeatFingerprint(result: EvaluationResult): string {
     .update(
       JSON.stringify({
         test_id: result.testId ?? 'unknown',
+        prompt_id: result.prompt?.id,
         target: result.target ?? 'unknown',
         trial_count: result.trials?.length ?? 0,
         aggregation: result.aggregation,
@@ -986,6 +997,8 @@ function buildAgentVRunResultArtifact(params: {
 function singleRunTrial(result: EvaluationResult): TrialResult {
   return {
     attempt: 0,
+    sampleIndex: result.sampleIndex,
+    retryIndex: result.retryIndex,
     score: result.score,
     verdict:
       result.executionStatus !== 'execution_error' && result.score >= DEFAULT_THRESHOLD
@@ -1545,6 +1558,8 @@ function buildRowArtifactHashInput(
   readonly eval_path: string | null;
   readonly suite: string | null;
   readonly test_id: string;
+  readonly prompt_id: string | null;
+  readonly sample_index: number | null;
   readonly target: string;
   readonly variant: string | null;
 } {
@@ -1553,6 +1568,8 @@ function buildRowArtifactHashInput(
     eval_path: dimensions?.evalPath ?? sourceEvalPath(result, sourceTest) ?? null,
     suite: dimensions?.suite ?? getSuite(result) ?? null,
     test_id: dimensions?.testId ?? result.testId ?? 'unknown',
+    prompt_id: result.prompt?.id ?? sourceTest?.prompt?.id ?? null,
+    sample_index: result.sampleIndex ?? null,
     target: dimensions?.target ?? result.target ?? 'unknown',
     variant: dimensions?.variant ?? result.variant ?? null,
   };
@@ -1724,6 +1741,10 @@ export function buildIndexArtifactEntry(
   return {
     timestamp: result.timestamp,
     test_id: result.testId ?? 'unknown',
+    prompt_id: result.prompt?.id,
+    prompt_label: result.prompt?.label,
+    sample_index: result.sampleIndex,
+    retry_index: result.retryIndex,
     suite: getSuite(result),
     category: result.category,
     conversation_id: result.conversationId,
@@ -1813,6 +1834,10 @@ export function buildResultIndexArtifact(
   return {
     timestamp: result.timestamp,
     test_id: result.testId ?? 'unknown',
+    prompt_id: result.prompt?.id,
+    prompt_label: result.prompt?.label,
+    sample_index: result.sampleIndex,
+    retry_index: result.retryIndex,
     suite: getSuite(result),
     category: result.category,
     conversation_id: result.conversationId,
diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts
index 1964d1336..a4b85bd16 100644
--- a/packages/core/src/evaluation/types.ts
+++ b/packages/core/src/evaluation/types.ts
@@ -299,8 +299,16 @@ export type TargetHooksConfig = {
  * String targets are shorthand for `{ name: "target-name" }` (no hooks).
  */
 export type EvalTargetRef = {
-  /** Target name (must match a target in targets.yaml or be defined inline with use_target) */
+  /**
+   * Internal target selection name. Authored YAML should prefer `id` and
+   * `label`; this field remains the runtime bridge to existing targets.yaml
+   * resolution until target-provider locator work lands.
+   */
   readonly name: string;
+  /** Provider/backend locator identity from authored eval YAML. */
+  readonly id?: string;
+  /** Display/comparison label from authored eval YAML. */
+  readonly label?: string;
   /** Delegate to another named target (same as use_target in targets.yaml) */
   readonly use_target?: string;
   /** Per-target hooks for workspace customization */
@@ -972,14 +980,30 @@ export type ConversationAggregation = 'mean' | 'min' | 'max';
  */
 export type TurnFailurePolicy = 'continue' | 'stop';
 
+export type EvalPromptKind = 'string' | 'chat' | 'file' | 'function';
+
+/**
+ * Stable identity for an authored top-level prompt. The prompt content itself
+ * is rendered into EvalTest.input; this metadata keeps the matrix dimension
+ * visible to reports, artifacts, and future flat-instance workers.
+ */
+export interface EvalPromptIdentity {
+  readonly id: string;
+  readonly label?: string;
+  readonly kind: EvalPromptKind;
+}
+
 /**
  * Eval test definition sourced from AgentV specs.
  */
 export interface EvalTest {
   readonly id: string;
+  /** Original authored test id before prompt expansion rewrites duplicate internal ids. */
+  readonly testId?: string;
   readonly suite?: string;
   readonly category?: string;
   readonly conversation_id?: string;
+  readonly prompt?: EvalPromptIdentity;
   readonly question: string;
   readonly input: readonly TestMessage[];
   readonly expected_output: readonly JsonObject[];
@@ -1056,6 +1080,10 @@ export interface TrialsConfig {
  */
 export interface TrialResult {
   readonly attempt: number;
+  /** Zero-based sample index produced from repeat.count. */
+  readonly sampleIndex?: number;
+  /** Provider retry index for the attempt that produced this trial result. */
+  readonly retryIndex?: number;
   readonly score: number;
   readonly verdict: EvaluationVerdict;
   readonly scores?: readonly GraderResult[];
@@ -1164,6 +1192,11 @@ export type FailOnError = boolean;
 export interface EvaluationResult {
   readonly timestamp: string;
   readonly testId: string;
+  readonly prompt?: EvalPromptIdentity;
+  /** Zero-based sample index produced from repeat.count. */
+  readonly sampleIndex?: number;
+  /** Provider retry index for the attempt that produced this result. */
+  readonly retryIndex?: number;
   readonly source?: EvalTestSource;
   readonly suite?: string;
   readonly category?: string;
diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts
index a5a99ef18..8c3c9e1e0 100644
--- a/packages/core/src/evaluation/yaml-parser.ts
+++ b/packages/core/src/evaluation/yaml-parser.ts
@@ -1,3 +1,4 @@
+import { createHash } from 'node:crypto';
 import { readFile, realpath, stat } from 'node:fs/promises';
 import path from 'node:path';
 import fg from 'fast-glob';
@@ -31,7 +32,11 @@ import {
   loadConfig,
   parseTargetHooks,
 } from './loaders/config-loader.js';
-import { buildSearchRoots, resolveToAbsolutePath } from './loaders/file-resolver.js';
+import {
+  buildSearchRoots,
+  resolveFileReference,
+  resolveToAbsolutePath,
+} from './loaders/file-resolver.js';
 import {
   coerceEvaluator,
   collectAssertionTemplateSourceReferences,
@@ -56,6 +61,7 @@ import type {
   ConversationTurn,
   DockerWorkspaceConfig,
   EvalGraderSource,
+  EvalPromptIdentity,
   EvalRunOverride,
   EvalSourceReference,
   EvalTest,
@@ -185,6 +191,7 @@ type RawTestSuite = JsonObject & {
   readonly policy?: JsonValue;
   readonly repeat?: JsonValue;
   readonly runs?: JsonValue;
+  readonly prompts?: JsonValue;
   readonly early_exit?: JsonValue;
   readonly timeout_seconds?: JsonValue;
   readonly evaluate_options?: JsonValue;
@@ -238,6 +245,17 @@ type RawEvalCase = JsonObject & {
   readonly window_size?: JsonValue;
 };
 
+type PromptDefinition = {
+  readonly identity: EvalPromptIdentity;
+  readonly input: JsonValue;
+};
+
+type PromptExpansionResult = {
+  readonly rawCases: readonly JsonValue[];
+  readonly promptById: ReadonlyMap<string, EvalPromptIdentity>;
+  readonly sourceTestIdById: ReadonlyMap<string, string>;
+};
+
 function resolveTests(suite: RawTestSuite): JsonValue | undefined {
   if (suite.tests !== undefined) return suite.tests;
   if (suite.eval_cases !== undefined) {
@@ -304,6 +322,250 @@ function interpolateRawEvalCase(raw: RawEvalCase, vars: JsonObject | undefined):
   };
 }
 
+function stablePromptId(value: unknown): string {
+  return createHash('sha256').update(JSON.stringify(value)).digest('hex').slice(0, 12);
+}
+
+function safePromptId(value: string): string {
+  const safe = value
+    .trim()
+    .replace(/[^A-Za-z0-9_.-]+/g, '-')
+    .replace(/^-+|-+$/g, '');
+  return safe.length > 0 ? safe.slice(0, 48) : stablePromptId(value);
+}
+
+function stripFileProtocol(value: string): string {
+  return value.startsWith('file://') ? value.slice('file://'.length) : value;
+}
+
+function isChatPromptArray(value: readonly JsonValue[]): boolean {
+  return value.length > 0 && value.every((entry) => isJsonObject(entry) && isTestMessage(entry));
+}
+
+async function readPromptFile(
+  rawPath: string,
+  searchRoots: readonly string[],
+): Promise<{
+  readonly displayPath: string;
+  readonly text: string;
+}> {
+  const filePath = stripFileProtocol(rawPath);
+  const { displayPath, resolvedPath, attempted } = await resolveFileReference(
+    filePath,
+    searchRoots,
+  );
+  if (!resolvedPath) {
+    const attempts = attempted.length
+      ? ['  Tried:', ...attempted.map((candidate) => `    ${candidate}`)]
+      : undefined;
+    logError(`Prompt file not found: ${displayPath}`, attempts);
+    throw new Error(`Prompt file not found: ${displayPath}`);
+  }
+  return {
+    displayPath,
+    text: (await readFile(resolvedPath, 'utf8')).replace(/\r\n/g, '\n'),
+  };
+}
+
+async function parsePromptDefinition(
+  rawPrompt: JsonValue,
+  searchRoots: readonly string[],
+  index: number,
+): Promise<PromptDefinition> {
+  if (typeof rawPrompt === 'string') {
+    if (rawPrompt.startsWith('file://')) {
+      const { displayPath, text } = await readPromptFile(rawPrompt, searchRoots);
+      return {
+        identity: { id: displayPath, label: displayPath, kind: 'file' },
+        input: text,
+      };
+    }
+    return {
+      identity: { id: `prompt-${stablePromptId(rawPrompt)}`, kind: 'string' },
+      input: rawPrompt,
+    };
+  }
+
+  if (Array.isArray(rawPrompt)) {
+    if (!isChatPromptArray(rawPrompt)) {
+      throw new Error(
+        'Invalid prompts entry: arrays must be chat messages or a top-level list of prompt entries.',
+      );
+    }
+    return {
+      identity: { id: `chat-${stablePromptId(rawPrompt)}`, kind: 'chat' },
+      input: rawPrompt,
+    };
+  }
+
+  if (!isJsonObject(rawPrompt)) {
+    throw new Error(`Invalid prompts[${index}]: expected string, chat array, or object.`);
+  }
+
+  if (rawPrompt.function !== undefined || rawPrompt.function_file !== undefined) {
+    throw new Error(
+      'Function prompt sources are not supported by the YAML loader yet. Use a string, chat-array, or file prompt.',
+    );
+  }
+
+  const label = asString(rawPrompt.label)?.trim();
+  const explicitId = asString(rawPrompt.id)?.trim();
+
+  if (rawPrompt.file !== undefined) {
+    const fileRef = asString(rawPrompt.file);
+    if (!fileRef) {
+      throw new Error(`Invalid prompts[${index}].file: expected non-empty string.`);
+    }
+    const { displayPath, text } = await readPromptFile(fileRef, searchRoots);
+    return {
+      identity: {
+        id: explicitId ?? displayPath,
+        ...(label ? { label } : { label: displayPath }),
+        kind: 'file',
+      },
+      input: text,
+    };
+  }
+
+  if (rawPrompt.messages !== undefined) {
+    if (!Array.isArray(rawPrompt.messages) || !isChatPromptArray(rawPrompt.messages)) {
+      throw new Error(`Invalid prompts[${index}].messages: expected chat message array.`);
+    }
+    return {
+      identity: {
+        id: explicitId ?? `chat-${stablePromptId(rawPrompt.messages)}`,
+        ...(label ? { label } : {}),
+        kind: 'chat',
+      },
+      input: rawPrompt.messages,
+    };
+  }
+
+  if (rawPrompt.prompt !== undefined) {
+    const promptValue = rawPrompt.prompt;
+    if (
+      typeof promptValue !== 'string' &&
+      !(Array.isArray(promptValue) && isChatPromptArray(promptValue))
+    ) {
+      throw new Error(`Invalid prompts[${index}].prompt: expected string or chat message array.`);
+    }
+    const kind = Array.isArray(promptValue) ? 'chat' : 'string';
+    return {
+      identity: {
+        id: explicitId ?? `${kind}-${stablePromptId(promptValue)}`,
+        ...(label ? { label } : {}),
+        kind,
+      },
+      input: promptValue,
+    };
+  }
+
+  if (isTestMessage(rawPrompt)) {
+    return {
+      identity: {
+        id: explicitId ?? `chat-${stablePromptId(rawPrompt)}`,
+        ...(label ? { label } : {}),
+        kind: 'chat',
+      },
+      input: [rawPrompt],
+    };
+  }
+
+  throw new Error(`Invalid prompts[${index}]: expected prompt, messages, or file.`);
+}
+
+async function parseSuitePrompts(
+  rawPrompts: JsonValue | undefined,
+  searchRoots: readonly string[],
+): Promise<readonly PromptDefinition[] | undefined> {
+  if (rawPrompts === undefined || rawPrompts === null) {
+    return undefined;
+  }
+
+  const entries =
+    Array.isArray(rawPrompts) && !isChatPromptArray(rawPrompts) ? rawPrompts : [rawPrompts];
+  const prompts: PromptDefinition[] = [];
+  for (let index = 0; index < entries.length; index++) {
+    prompts.push(await parsePromptDefinition(entries[index] as JsonValue, searchRoots, index));
+  }
+  return prompts;
+}
+
+function renderPromptInput(prompt: PromptDefinition, vars: JsonObject | undefined): JsonValue {
+  return interpolateCaseField(prompt.input, vars);
+}
+
+function expandPromptMatrix(
+  rawCases: readonly JsonValue[],
+  prompts: readonly PromptDefinition[] | undefined,
+  suite: RawTestSuite,
+): PromptExpansionResult {
+  const promptById = new Map<string, EvalPromptIdentity>();
+  const sourceTestIdById = new Map<string, string>();
+
+  if (!prompts) {
+    if (suite.input !== undefined || suite.input_files !== undefined) {
+      logWarning(
+        "Top-level 'input' and 'input_files' are deprecated. Use top-level 'prompts' plus tests[].vars instead.",
+      );
+    } else if (
+      rawCases.some(
+        (rawCase) =>
+          isJsonObject(rawCase) &&
+          (rawCase.input !== undefined || rawCase.input_files !== undefined),
+      )
+    ) {
+      logWarning("tests[].input is deprecated. Use top-level 'prompts' plus tests[].vars instead.");
+    }
+    return { rawCases, promptById, sourceTestIdById };
+  }
+
+  if (suite.input !== undefined || suite.input_files !== undefined) {
+    throw new Error("Top-level 'input' and 'input_files' cannot be combined with 'prompts'.");
+  }
+
+  const expandedCases: JsonValue[] = [];
+  for (const rawCase of rawCases) {
+    if (!isJsonObject(rawCase)) {
+      expandedCases.push(rawCase);
+      continue;
+    }
+    if (rawCase.input !== undefined || rawCase.input_files !== undefined) {
+      throw new Error(
+        "tests[].input and tests[].input_files have been removed from the preferred prompt contract. Use top-level 'prompts' plus tests[].vars.",
+      );
+    }
+
+    const sourceTestId = asString(rawCase.id);
+    const vars = isJsonObject(rawCase.vars) ? rawCase.vars : undefined;
+    for (const prompt of prompts) {
+      const promptId = safePromptId(prompt.identity.id);
+      const expandedId =
+        sourceTestId && prompts.length > 1 ? `${sourceTestId}__prompt_${promptId}` : sourceTestId;
+      const expandedDependsOn = Array.isArray(rawCase.depends_on)
+        ? rawCase.depends_on.map((dep) =>
+            typeof dep === 'string' && prompts.length > 1 ? `${dep}__prompt_${promptId}` : dep,
+          )
+        : rawCase.depends_on;
+      const expandedCase: JsonObject = {
+        ...rawCase,
+        ...(expandedId ? { id: expandedId } : {}),
+        ...(expandedDependsOn !== undefined ? { depends_on: expandedDependsOn } : {}),
+        input: renderPromptInput(prompt, vars),
+      };
+      expandedCases.push(expandedCase);
+      if (expandedId) {
+        promptById.set(expandedId, prompt.identity);
+        if (sourceTestId) {
+          sourceTestIdById.set(expandedId, sourceTestId);
+        }
+      }
+    }
+  }
+
+  return { rawCases: expandedCases, promptById, sourceTestIdById };
+}
+
 /**
  * Read metadata from a test suite file (like target name).
  * This is a convenience function for CLI tools that need metadata without loading all tests.
@@ -564,6 +826,10 @@ async function loadTestsFromParsedYamlValue(
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
   }
 
+  const promptDefinitions = await parseSuitePrompts(suite.prompts, searchRoots);
+  const promptExpansion = expandPromptMatrix(expandedTestCases, promptDefinitions, suite);
+  expandedTestCases = promptExpansion.rawCases;
+
   const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
 
   const rawSuiteInput = suite.input;
@@ -586,6 +852,8 @@ async function loadTestsFromParsedYamlValue(
 
     const testCaseConfig = rawTestCase as RawEvalCase;
     const id = asString(testCaseConfig.id);
+    const promptIdentity = id ? promptExpansion.promptById.get(id) : undefined;
+    const sourceTestId = id ? promptExpansion.sourceTestIdById.get(id) : undefined;
 
     // Skip tests that don't match the filter pattern (glob supported)
     if (filterPattern && (!id || !matchesFilter(id, filterPattern))) {
@@ -836,9 +1104,11 @@ async function loadTestsFromParsedYamlValue(
 
     const testCase: EvalTest = {
       id,
+      ...(sourceTestId ? { testId: sourceTestId } : {}),
       suite: suiteName,
       category,
       conversation_id: conversationId,
+      ...(promptIdentity ? { prompt: promptIdentity } : {}),
       question: question,
       input: inputMessages,
       expected_output: outputSegments,
diff --git a/packages/core/test/evaluation/eval-inline-experiment.test.ts b/packages/core/test/evaluation/eval-inline-experiment.test.ts
index 8027d5f9c..a310ddeb9 100644
--- a/packages/core/test/evaluation/eval-inline-experiment.test.ts
+++ b/packages/core/test/evaluation/eval-inline-experiment.test.ts
@@ -90,6 +90,113 @@ describe('eval.yaml flat runtime controls and tests imports', () => {
     expect(suite.experimentConfig?.threshold).toBe(0.9);
   });
 
+  it('expands top-level prompts across tests with per-test vars', async () => {
+    const evalPath = path.join(tempDir, 'prompt-matrix.eval.yaml');
+    await writeFile(
+      evalPath,
+      [
+        'name: prompt-matrix-suite',
+        'prompts:',
+        '  - id: direct',
+        '    label: Direct',
+        '    prompt: "Summarize {{ topic }}."',
+        '  - id: terse',
+        '    label: Terse',
+        '    prompt: "In one sentence, summarize {{ topic }}."',
+        'targets:',
+        '  - id: openai:gpt-5.4-mini',
+        '    label: mini',
+        '  - id: local-codex',
+        'tests:',
+        '  - id: docs',
+        '    vars:',
+        '      topic: release notes',
+        '    expected_output: concise release-note summary',
+        '',
+      ].join('\n'),
+    );
+
+    const suite = await loadTestSuite(evalPath, tempDir);
+
+    expect(suite.tests.map((test) => test.id)).toEqual([
+      'docs__prompt_direct',
+      'docs__prompt_terse',
+    ]);
+    expect(suite.tests.map((test) => test.testId)).toEqual(['docs', 'docs']);
+    expect(suite.tests.map((test) => test.prompt)).toEqual([
+      { id: 'direct', label: 'Direct', kind: 'string' },
+      { id: 'terse', label: 'Terse', kind: 'string' },
+    ]);
+    expect(suite.tests.map((test) => test.question)).toEqual([
+      'Summarize release notes.',
+      'In one sentence, summarize release notes.',
+    ]);
+    expect(suite.targets).toEqual(['mini', 'local-codex']);
+    expect(suite.targetRefs).toEqual([
+      { name: 'mini', id: 'openai:gpt-5.4-mini', label: 'mini' },
+      { name: 'local-codex', id: 'local-codex' },
+    ]);
+  });
+
+  it('loads chat and file prompts from the top-level prompt matrix', async () => {
+    const promptPath = path.join(tempDir, 'prompt.md');
+    const evalPath = path.join(tempDir, 'prompt-sources.eval.yaml');
+    await writeFile(promptPath, 'Review {{ file_name }}.\n');
+    await writeFile(
+      evalPath,
+      [
+        'name: prompt-sources-suite',
+        'prompts:',
+        '  - id: chat',
+        '    messages:',
+        '      - role: system',
+        '        content: Be precise.',
+        '      - role: user',
+        '        content: "Inspect {{ file_name }}."',
+        '  - id: file',
+        '    file: prompt.md',
+        'tests:',
+        '  - id: inspect',
+        '    vars:',
+        '      file_name: README.md',
+        '    criteria: useful',
+        '',
+      ].join('\n'),
+    );
+
+    const suite = await loadTestSuite(evalPath, tempDir);
+
+    expect(suite.tests).toHaveLength(2);
+    expect(suite.tests[0]?.input).toEqual([
+      { role: 'system', content: 'Be precise.' },
+      { role: 'user', content: 'Inspect README.md.' },
+    ]);
+    expect(suite.tests[1]?.question).toBe('Review README.md.');
+    expect(suite.tests[1]?.prompt).toEqual({
+      id: 'file',
+      label: 'prompt.md',
+      kind: 'file',
+    });
+  });
+
+  it('rejects tests input when top-level prompts are authored', async () => {
+    const evalPath = path.join(tempDir, 'mixed-prompt-contract.eval.yaml');
+    await writeFile(
+      evalPath,
+      [
+        'prompts:',
+        '  - hello',
+        'tests:',
+        '  - id: one',
+        '    input: legacy',
+        '    criteria: ok',
+        '',
+      ].join('\n'),
+    );
+
+    await expect(loadTestSuite(evalPath, tempDir)).rejects.toThrow(/tests\[\]\.input/);
+  });
+
   it('parses evaluate_options.budget_usd and prefers it over legacy top-level budget_usd', async () => {
     const evalPath = path.join(tempDir, 'evaluate-options-budget.eval.yaml');
     await writeFile(
diff --git a/skills-data/agentv-bench/references/eval-yaml-spec.md b/skills-data/agentv-bench/references/eval-yaml-spec.md
index b2285993a..05f92b9f6 100644
--- a/skills-data/agentv-bench/references/eval-yaml-spec.md
+++ b/skills-data/agentv-bench/references/eval-yaml-spec.md
@@ -9,15 +9,19 @@ The grader agent uses this to evaluate assertions without the CLI.
 
 - `name` (string, optional) — eval name
 - `description` (string, optional) — description
-- `execution` (object, optional) — `target`, `model`, etc.
+- `target` (string | object, optional) — single system under test
+- `targets` (array, optional) — promptfoo-style target matrix. `id` is provider/backend locator identity; `label` is the display/comparison name.
+- `repeat` (object, optional) — stochastic sample policy with `count`, `strategy`, and optional `early_exit`
 - `workspace` (object, optional) — workspace config (template, repos, hooks)
-- `input` (string | object | Message | Message[], optional) — suite-level input prepended to each test. String/block shorthand expands to a user message.
+- `prompts` (string | Message[] | array, optional) — preferred authored input surface. Prompts combine with `targets`, `tests`, and `repeat.count` into deterministic execution instances.
+- `input` (string | object | Message | Message[], optional) — deprecated compatibility input. Prefer `prompts` plus per-test `vars`.
 - `tests` (array, required) — test cases
 
 ### Per-test fields
 
 - `id` (string, required) — unique test identifier
-- `input` (string | object | Message | Message[], required) — task input. String shorthand expands to `[{role: user, content: "..."}]`; object shorthand preserves structured user content when the object has no top-level `role`. Top-level `role` is reserved for message objects.
+- `vars` (object, optional) — per-test values interpolated into top-level `prompts`, `criteria`, `expected_output`, and conversation turns with `{{name}}` placeholders.
+- `input` (string | object | Message | Message[], deprecated) — legacy task input. Do not use when top-level `prompts` is present.
 - `expected_output` (string | Message[], optional) — passive reference answer. String shorthand expands to `[{role: assistant, content: "..."}]`. It is available to declared graders, but does not add an implicit grader when `assertions` is present.
 - `criteria` (string, optional) — human-readable success criteria
 - `assertions` (array, optional) — grader assertions
diff --git a/skills-data/agentv-eval-writer/references/eval.schema.json b/skills-data/agentv-eval-writer/references/eval.schema.json
index d044864f7..48dff1ad8 100644
--- a/skills-data/agentv-eval-writer/references/eval.schema.json
+++ b/skills-data/agentv-eval-writer/references/eval.schema.json
@@ -193,6 +193,19 @@
                     "label": {
                       "type": "string"
                     },
+                    "prompt": {
+                      "type": "string"
+                    },
+                    "file": {
+                      "type": "string"
+                    },
+                    "messages": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
                     "raw": {
                       "type": "string"
                     },
@@ -254,6 +267,19 @@
                       "label": {
                         "type": "string"
                       },
+                      "prompt": {
+                        "type": "string"
+                      },
+                      "file": {
+                        "type": "string"
+                      },
+                      "messages": {
+                        "type": "array",
+                        "items": {
+                          "type": "object",
+                          "additionalProperties": true
+                        }
+                      },
                       "raw": {
                         "type": "string"
                       },
@@ -970,6 +996,19 @@
                                           "label": {
                                             "type": "string"
                                           },
+                                          "prompt": {
+                                            "type": "string"
+                                          },
+                                          "file": {
+                                            "type": "string"
+                                          },
+                                          "messages": {
+                                            "type": "array",
+                                            "items": {
+                                              "type": "object",
+                                              "additionalProperties": true
+                                            }
+                                          },
                                           "raw": {
                                             "type": "string"
                                           },
@@ -1031,6 +1070,19 @@
                                             "label": {
                                               "type": "string"
                                             },
+                                            "prompt": {
+                                              "type": "string"
+                                            },
+                                            "file": {
+                                              "type": "string"
+                                            },
+                                            "messages": {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "additionalProperties": true
+                                              }
+                                            },
                                             "raw": {
                                               "type": "string"
                                             },
@@ -1300,6 +1352,19 @@
                                               "label": {
                                                 "type": "string"
                                               },
+                                              "prompt": {
+                                                "type": "string"
+                                              },
+                                              "file": {
+                                                "type": "string"
+                                              },
+                                              "messages": {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "additionalProperties": true
+                                                }
+                                              },
                                               "raw": {
                                                 "type": "string"
                                               },
@@ -1361,6 +1426,19 @@
                                                 "label": {
                                                   "type": "string"
                                                 },
+                                                "prompt": {
+                                                  "type": "string"
+                                                },
+                                                "file": {
+                                                  "type": "string"
+                                                },
+                                                "messages": {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "additionalProperties": true
+                                                  }
+                                                },
                                                 "raw": {
                                                   "type": "string"
                                                 },
@@ -1630,6 +1708,19 @@
                                                 "label": {
                                                   "type": "string"
                                                 },
+                                                "prompt": {
+                                                  "type": "string"
+                                                },
+                                                "file": {
+                                                  "type": "string"
+                                                },
+                                                "messages": {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "additionalProperties": true
+                                                  }
+                                                },
                                                 "raw": {
                                                   "type": "string"
                                                 },
@@ -1691,6 +1782,19 @@
                                                   "label": {
                                                     "type": "string"
                                                   },
+                                                  "prompt": {
+                                                    "type": "string"
+                                                  },
+                                                  "file": {
+                                                    "type": "string"
+                                                  },
+                                                  "messages": {
+                                                    "type": "array",
+                                                    "items": {
+                                                      "type": "object",
+                                                      "additionalProperties": true
+                                                    }
+                                                  },
                                                   "raw": {
                                                     "type": "string"
                                                   },
@@ -1925,6 +2029,19 @@
                                   "label": {
                                     "type": "string"
                                   },
+                                  "prompt": {
+                                    "type": "string"
+                                  },
+                                  "file": {
+                                    "type": "string"
+                                  },
+                                  "messages": {
+                                    "type": "array",
+                                    "items": {
+                                      "type": "object",
+                                      "additionalProperties": true
+                                    }
+                                  },
                                   "raw": {
                                     "type": "string"
                                   },
@@ -1986,6 +2103,19 @@
                                     "label": {
                                       "type": "string"
                                     },
+                                    "prompt": {
+                                      "type": "string"
+                                    },
+                                    "file": {
+                                      "type": "string"
+                                    },
+                                    "messages": {
+                                      "type": "array",
+                                      "items": {
+                                        "type": "object",
+                                        "additionalProperties": true
+                                      }
+                                    },
                                     "raw": {
                                       "type": "string"
                                     },
@@ -2446,6 +2576,19 @@
                                         "label": {
                                           "type": "string"
                                         },
+                                        "prompt": {
+                                          "type": "string"
+                                        },
+                                        "file": {
+                                          "type": "string"
+                                        },
+                                        "messages": {
+                                          "type": "array",
+                                          "items": {
+                                            "type": "object",
+                                            "additionalProperties": true
+                                          }
+                                        },
                                         "raw": {
                                           "type": "string"
                                         },
@@ -3815,6 +3958,19 @@
                                             "label": {
                                               "type": "string"
                                             },
+                                            "prompt": {
+                                              "type": "string"
+                                            },
+                                            "file": {
+                                              "type": "string"
+                                            },
+                                            "messages": {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "additionalProperties": true
+                                              }
+                                            },
                                             "raw": {
                                               "type": "string"
                                             },
@@ -5709,6 +5865,19 @@
                                           "label": {
                                             "type": "string"
                                           },
+                                          "prompt": {
+                                            "type": "string"
+                                          },
+                                          "file": {
+                                            "type": "string"
+                                          },
+                                          "messages": {
+                                            "type": "array",
+                                            "items": {
+                                              "type": "object",
+                                              "additionalProperties": true
+                                            }
+                                          },
                                           "raw": {
                                             "type": "string"
                                           },
@@ -5770,6 +5939,19 @@
                                             "label": {
                                               "type": "string"
                                             },
+                                            "prompt": {
+                                              "type": "string"
+                                            },
+                                            "file": {
+                                              "type": "string"
+                                            },
+                                            "messages": {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "additionalProperties": true
+                                              }
+                                            },
                                             "raw": {
                                               "type": "string"
                                             },
@@ -6039,6 +6221,19 @@
                                               "label": {
                                                 "type": "string"
                                               },
+                                              "prompt": {
+                                                "type": "string"
+                                              },
+                                              "file": {
+                                                "type": "string"
+                                              },
+                                              "messages": {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "additionalProperties": true
+                                                }
+                                              },
                                               "raw": {
                                                 "type": "string"
                                               },
@@ -6100,6 +6295,19 @@
                                                 "label": {
                                                   "type": "string"
                                                 },
+                                                "prompt": {
+                                                  "type": "string"
+                                                },
+                                                "file": {
+                                                  "type": "string"
+                                                },
+                                                "messages": {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "additionalProperties": true
+                                                  }
+                                                },
                                                 "raw": {
                                                   "type": "string"
                                                 },
@@ -6369,6 +6577,19 @@
                                                 "label": {
                                                   "type": "string"
                                                 },
+                                                "prompt": {
+                                                  "type": "string"
+                                                },
+                                                "file": {
+                                                  "type": "string"
+                                                },
+                                                "messages": {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "additionalProperties": true
+                                                  }
+                                                },
                                                 "raw": {
                                                   "type": "string"
                                                 },
@@ -6430,6 +6651,19 @@
                                                   "label": {
                                                     "type": "string"
                                                   },
+                                                  "prompt": {
+                                                    "type": "string"
+                                                  },
+                                                  "file": {
+                                                    "type": "string"
+                                                  },
+                                                  "messages": {
+                                                    "type": "array",
+                                                    "items": {
+                                                      "type": "object",
+                                                      "additionalProperties": true
+                                                    }
+                                                  },
                                                   "raw": {
                                                     "type": "string"
                                                   },
@@ -6664,6 +6898,19 @@
                                   "label": {
                                     "type": "string"
                                   },
+                                  "prompt": {
+                                    "type": "string"
+                                  },
+                                  "file": {
+                                    "type": "string"
+                                  },
+                                  "messages": {
+                                    "type": "array",
+                                    "items": {
+                                      "type": "object",
+                                      "additionalProperties": true
+                                    }
+                                  },
                                   "raw": {
                                     "type": "string"
                                   },
@@ -6725,6 +6972,19 @@
                                     "label": {
                                       "type": "string"
                                     },
+                                    "prompt": {
+                                      "type": "string"
+                                    },
+                                    "file": {
+                                      "type": "string"
+                                    },
+                                    "messages": {
+                                      "type": "array",
+                                      "items": {
+                                        "type": "object",
+                                        "additionalProperties": true
+                                      }
+                                    },
                                     "raw": {
                                       "type": "string"
                                     },
@@ -7185,6 +7445,19 @@
                                         "label": {
                                           "type": "string"
                                         },
+                                        "prompt": {
+                                          "type": "string"
+                                        },
+                                        "file": {
+                                          "type": "string"
+                                        },
+                                        "messages": {
+                                          "type": "array",
+                                          "items": {
+                                            "type": "object",
+                                            "additionalProperties": true
+                                          }
+                                        },
                                         "raw": {
                                           "type": "string"
                                         },
@@ -8554,6 +8827,19 @@
                                             "label": {
                                               "type": "string"
                                             },
+                                            "prompt": {
+                                              "type": "string"
+                                            },
+                                            "file": {
+                                              "type": "string"
+                                            },
+                                            "messages": {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "additionalProperties": true
+                                              }
+                                            },
                                             "raw": {
                                               "type": "string"
                                             },
@@ -10424,6 +10710,19 @@
                             "label": {
                               "type": "string"
                             },
+                            "prompt": {
+                              "type": "string"
+                            },
+                            "file": {
+                              "type": "string"
+                            },
+                            "messages": {
+                              "type": "array",
+                              "items": {
+                                "type": "object",
+                                "additionalProperties": true
+                              }
+                            },
                             "raw": {
                               "type": "string"
                             },
@@ -10485,6 +10784,19 @@
                               "label": {
                                 "type": "string"
                               },
+                              "prompt": {
+                                "type": "string"
+                              },
+                              "file": {
+                                "type": "string"
+                              },
+                              "messages": {
+                                "type": "array",
+                                "items": {
+                                  "type": "object",
+                                  "additionalProperties": true
+                                }
+                              },
                               "raw": {
                                 "type": "string"
                               },
@@ -10754,6 +11066,19 @@
                                 "label": {
                                   "type": "string"
                                 },
+                                "prompt": {
+                                  "type": "string"
+                                },
+                                "file": {
+                                  "type": "string"
+                                },
+                                "messages": {
+                                  "type": "array",
+                                  "items": {
+                                    "type": "object",
+                                    "additionalProperties": true
+                                  }
+                                },
                                 "raw": {
                                   "type": "string"
                                 },
@@ -10815,6 +11140,19 @@
                                   "label": {
                                     "type": "string"
                                   },
+                                  "prompt": {
+                                    "type": "string"
+                                  },
+                                  "file": {
+                                    "type": "string"
+                                  },
+                                  "messages": {
+                                    "type": "array",
+                                    "items": {
+                                      "type": "object",
+                                      "additionalProperties": true
+                                    }
+                                  },
                                   "raw": {
                                     "type": "string"
                                   },
@@ -11084,6 +11422,19 @@
                                   "label": {
                                     "type": "string"
                                   },
+                                  "prompt": {
+                                    "type": "string"
+                                  },
+                                  "file": {
+                                    "type": "string"
+                                  },
+                                  "messages": {
+                                    "type": "array",
+                                    "items": {
+                                      "type": "object",
+                                      "additionalProperties": true
+                                    }
+                                  },
                                   "raw": {
                                     "type": "string"
                                   },
@@ -11145,6 +11496,19 @@
                                     "label": {
                                       "type": "string"
                                     },
+                                    "prompt": {
+                                      "type": "string"
+                                    },
+                                    "file": {
+                                      "type": "string"
+                                    },
+                                    "messages": {
+                                      "type": "array",
+                                      "items": {
+                                        "type": "object",
+                                        "additionalProperties": true
+                                      }
+                                    },
                                     "raw": {
                                       "type": "string"
                                     },
@@ -11569,6 +11933,19 @@
                                 "label": {
                                   "type": "string"
                                 },
+                                "prompt": {
+                                  "type": "string"
+                                },
+                                "file": {
+                                  "type": "string"
+                                },
+                                "messages": {
+                                  "type": "array",
+                                  "items": {
+                                    "type": "object",
+                                    "additionalProperties": true
+                                  }
+                                },
                                 "raw": {
                                   "type": "string"
                                 },
@@ -11630,6 +12007,19 @@
                                   "label": {
                                     "type": "string"
                                   },
+                                  "prompt": {
+                                    "type": "string"
+                                  },
+                                  "file": {
+                                    "type": "string"
+                                  },
+                                  "messages": {
+                                    "type": "array",
+                                    "items": {
+                                      "type": "object",
+                                      "additionalProperties": true
+                                    }
+                                  },
                                   "raw": {
                                     "type": "string"
                                   },
@@ -11899,6 +12289,19 @@
                                     "label": {
                                       "type": "string"
                                     },
+                                    "prompt": {
+                                      "type": "string"
+                                    },
+                                    "file": {
+                                      "type": "string"
+                                    },
+                                    "messages": {
+                                      "type": "array",
+                                      "items": {
+                                        "type": "object",
+                                        "additionalProperties": true
+                                      }
+                                    },
                                     "raw": {
                                       "type": "string"
                                     },
@@ -11960,6 +12363,19 @@
                                       "label": {
                                         "type": "string"
                                       },
+                                      "prompt": {
+                                        "type": "string"
+                                      },
+                                      "file": {
+                                        "type": "string"
+                                      },
+                                      "messages": {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "additionalProperties": true
+                                        }
+                                      },
                                       "raw": {
                                         "type": "string"
                                       },
@@ -12229,6 +12645,19 @@
                                       "label": {
                                         "type": "string"
                                       },
+                                      "prompt": {
+                                        "type": "string"
+                                      },
+                                      "file": {
+                                        "type": "string"
+                                      },
+                                      "messages": {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "additionalProperties": true
+                                        }
+                                      },
                                       "raw": {
                                         "type": "string"
                                       },
@@ -12290,6 +12719,19 @@
                                         "label": {
                                           "type": "string"
                                         },
+                                        "prompt": {
+                                          "type": "string"
+                                        },
+                                        "file": {
+                                          "type": "string"
+                                        },
+                                        "messages": {
+                                          "type": "array",
+                                          "items": {
+                                            "type": "object",
+                                            "additionalProperties": true
+                                          }
+                                        },
                                         "raw": {
                                           "type": "string"
                                         },
@@ -12524,6 +12966,19 @@
                         "label": {
                           "type": "string"
                         },
+                        "prompt": {
+                          "type": "string"
+                        },
+                        "file": {
+                          "type": "string"
+                        },
+                        "messages": {
+                          "type": "array",
+                          "items": {
+                            "type": "object",
+                            "additionalProperties": true
+                          }
+                        },
                         "raw": {
                           "type": "string"
                         },
@@ -12585,6 +13040,19 @@
                           "label": {
                             "type": "string"
                           },
+                          "prompt": {
+                            "type": "string"
+                          },
+                          "file": {
+                            "type": "string"
+                          },
+                          "messages": {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "additionalProperties": true
+                            }
+                          },
                           "raw": {
                             "type": "string"
                           },
@@ -12875,6 +13343,19 @@
                                         "label": {
                                           "type": "string"
                                         },
+                                        "prompt": {
+                                          "type": "string"
+                                        },
+                                        "file": {
+                                          "type": "string"
+                                        },
+                                        "messages": {
+                                          "type": "array",
+                                          "items": {
+                                            "type": "object",
+                                            "additionalProperties": true
+                                          }
+                                        },
                                         "raw": {
                                           "type": "string"
                                         },
@@ -12936,6 +13417,19 @@
                                           "label": {
                                             "type": "string"
                                           },
+                                          "prompt": {
+                                            "type": "string"
+                                          },
+                                          "file": {
+                                            "type": "string"
+                                          },
+                                          "messages": {
+                                            "type": "array",
+                                            "items": {
+                                              "type": "object",
+                                              "additionalProperties": true
+                                            }
+                                          },
                                           "raw": {
                                             "type": "string"
                                           },
@@ -13205,6 +13699,19 @@
                                             "label": {
                                               "type": "string"
                                             },
+                                            "prompt": {
+                                              "type": "string"
+                                            },
+                                            "file": {
+                                              "type": "string"
+                                            },
+                                            "messages": {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "additionalProperties": true
+                                              }
+                                            },
                                             "raw": {
                                               "type": "string"
                                             },
@@ -13266,6 +13773,19 @@
                                               "label": {
                                                 "type": "string"
                                               },
+                                              "prompt": {
+                                                "type": "string"
+                                              },
+                                              "file": {
+                                                "type": "string"
+                                              },
+                                              "messages": {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "additionalProperties": true
+                                                }
+                                              },
                                               "raw": {
                                                 "type": "string"
                                               },
@@ -13535,6 +14055,19 @@
                                               "label": {
                                                 "type": "string"
                                               },
+                                              "prompt": {
+                                                "type": "string"
+                                              },
+                                              "file": {
+                                                "type": "string"
+                                              },
+                                              "messages": {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "additionalProperties": true
+                                                }
+                                              },
                                               "raw": {
                                                 "type": "string"
                                               },
@@ -13596,6 +14129,19 @@
                                                 "label": {
                                                   "type": "string"
                                                 },
+                                                "prompt": {
+                                                  "type": "string"
+                                                },
+                                                "file": {
+                                                  "type": "string"
+                                                },
+                                                "messages": {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "additionalProperties": true
+                                                  }
+                                                },
                                                 "raw": {
                                                   "type": "string"
                                                 },
@@ -13830,6 +14376,19 @@
                                 "label": {
                                   "type": "string"
                                 },
+                                "prompt": {
+                                  "type": "string"
+                                },
+                                "file": {
+                                  "type": "string"
+                                },
+                                "messages": {
+                                  "type": "array",
+                                  "items": {
+                                    "type": "object",
+                                    "additionalProperties": true
+                                  }
+                                },
                                 "raw": {
                                   "type": "string"
                                 },
@@ -13891,6 +14450,19 @@
                                   "label": {
                                     "type": "string"
                                   },
+                                  "prompt": {
+                                    "type": "string"
+                                  },
+                                  "file": {
+                                    "type": "string"
+                                  },
+                                  "messages": {
+                                    "type": "array",
+                                    "items": {
+                                      "type": "object",
+                                      "additionalProperties": true
+                                    }
+                                  },
                                   "raw": {
                                     "type": "string"
                                   },
@@ -14116,6 +14688,19 @@
                                         "label": {
                                           "type": "string"
                                         },
+                                        "prompt": {
+                                          "type": "string"
+                                        },
+                                        "file": {
+                                          "type": "string"
+                                        },
+                                        "messages": {
+                                          "type": "array",
+                                          "items": {
+                                            "type": "object",
+                                            "additionalProperties": true
+                                          }
+                                        },
                                         "raw": {
                                           "type": "string"
                                         },
@@ -14177,6 +14762,19 @@
                                           "label": {
                                             "type": "string"
                                           },
+                                          "prompt": {
+                                            "type": "string"
+                                          },
+                                          "file": {
+                                            "type": "string"
+                                          },
+                                          "messages": {
+                                            "type": "array",
+                                            "items": {
+                                              "type": "object",
+                                              "additionalProperties": true
+                                            }
+                                          },
                                           "raw": {
                                             "type": "string"
                                           },
@@ -14446,6 +15044,19 @@
                                             "label": {
                                               "type": "string"
                                             },
+                                            "prompt": {
+                                              "type": "string"
+                                            },
+                                            "file": {
+                                              "type": "string"
+                                            },
+                                            "messages": {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "additionalProperties": true
+                                              }
+                                            },
                                             "raw": {
                                               "type": "string"
                                             },
@@ -14507,6 +15118,19 @@
                                               "label": {
                                                 "type": "string"
                                               },
+                                              "prompt": {
+                                                "type": "string"
+                                              },
+                                              "file": {
+                                                "type": "string"
+                                              },
+                                              "messages": {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "additionalProperties": true
+                                                }
+                                              },
                                               "raw": {
                                                 "type": "string"
                                               },
@@ -14776,6 +15400,19 @@
                                               "label": {
                                                 "type": "string"
                                               },
+                                              "prompt": {
+                                                "type": "string"
+                                              },
+                                              "file": {
+                                                "type": "string"
+                                              },
+                                              "messages": {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "additionalProperties": true
+                                                }
+                                              },
                                               "raw": {
                                                 "type": "string"
                                               },
@@ -14837,6 +15474,19 @@
                                                 "label": {
                                                   "type": "string"
                                                 },
+                                                "prompt": {
+                                                  "type": "string"
+                                                },
+                                                "file": {
+                                                  "type": "string"
+                                                },
+                                                "messages": {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "additionalProperties": true
+                                                  }
+                                                },
                                                 "raw": {
                                                   "type": "string"
                                                 },
@@ -15071,6 +15721,19 @@
                                 "label": {
                                   "type": "string"
                                 },
+                                "prompt": {
+                                  "type": "string"
+                                },
+                                "file": {
+                                  "type": "string"
+                                },
+                                "messages": {
+                                  "type": "array",
+                                  "items": {
+                                    "type": "object",
+                                    "additionalProperties": true
+                                  }
+                                },
                                 "raw": {
                                   "type": "string"
                                 },
@@ -15132,6 +15795,19 @@
                                   "label": {
                                     "type": "string"
                                   },
+                                  "prompt": {
+                                    "type": "string"
+                                  },
+                                  "file": {
+                                    "type": "string"
+                                  },
+                                  "messages": {
+                                    "type": "array",
+                                    "items": {
+                                      "type": "object",
+                                      "additionalProperties": true
+                                    }
+                                  },
                                   "raw": {
                                     "type": "string"
                                   },
@@ -15592,6 +16268,19 @@
                                       "label": {
                                         "type": "string"
                                       },
+                                      "prompt": {
+                                        "type": "string"
+                                      },
+                                      "file": {
+                                        "type": "string"
+                                      },
+                                      "messages": {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "additionalProperties": true
+                                        }
+                                      },
                                       "raw": {
                                         "type": "string"
                                       },
@@ -16961,6 +17650,19 @@
                                           "label": {
                                             "type": "string"
                                           },
+                                          "prompt": {
+                                            "type": "string"
+                                          },
+                                          "file": {
+                                            "type": "string"
+                                          },
+                                          "messages": {
+                                            "type": "array",
+                                            "items": {
+                                              "type": "object",
+                                              "additionalProperties": true
+                                            }
+                                          },
                                           "raw": {
                                             "type": "string"
                                           },

From 9cefd816a099955d3726522a995a68ff40beef67 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Thu, 2 Jul 2026 13:33:44 +0200
Subject: [PATCH 2/4] Sync prompt object schema

---
 .../evaluation/validation/eval-file.schema.ts |   3 +
 .../references/eval.schema.json               | 930 +++++++++++++++---
 2 files changed, 819 insertions(+), 114 deletions(-)

diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts
index a289a3f2a..56673e3c6 100644
--- a/packages/core/src/evaluation/validation/eval-file.schema.ts
+++ b/packages/core/src/evaluation/validation/eval-file.schema.ts
@@ -67,6 +67,9 @@ const PromptSchema = z.union([
     .object({
       id: z.string().optional(),
       label: z.string().optional(),
+      prompt: z.union([z.string(), z.array(JsonObjectSchema)]).optional(),
+      file: z.string().optional(),
+      messages: z.array(JsonObjectSchema).optional(),
       raw: z.string().optional(),
       path: z.string().optional(),
       prefix: z.string().optional(),
diff --git a/skills-data/agentv-eval-writer/references/eval.schema.json b/skills-data/agentv-eval-writer/references/eval.schema.json
index 48dff1ad8..2f57a4271 100644
--- a/skills-data/agentv-eval-writer/references/eval.schema.json
+++ b/skills-data/agentv-eval-writer/references/eval.schema.json
@@ -194,7 +194,19 @@
                       "type": "string"
                     },
                     "prompt": {
-                      "type": "string"
+                      "anyOf": [
+                        {
+                          "type": "string"
+                        },
+                        {
+                          "type": "array",
+                          "items": {
+                            "type": "object",
+                            "properties": {},
+                            "additionalProperties": {}
+                          }
+                        }
+                      ]
                     },
                     "file": {
                       "type": "string"
@@ -203,7 +215,8 @@
                       "type": "array",
                       "items": {
                         "type": "object",
-                        "additionalProperties": true
+                        "properties": {},
+                        "additionalProperties": {}
                       }
                     },
                     "raw": {
@@ -268,7 +281,19 @@
                         "type": "string"
                       },
                       "prompt": {
-                        "type": "string"
+                        "anyOf": [
+                          {
+                            "type": "string"
+                          },
+                          {
+                            "type": "array",
+                            "items": {
+                              "type": "object",
+                              "properties": {},
+                              "additionalProperties": {}
+                            }
+                          }
+                        ]
                       },
                       "file": {
                         "type": "string"
@@ -277,7 +302,8 @@
                         "type": "array",
                         "items": {
                           "type": "object",
-                          "additionalProperties": true
+                          "properties": {},
+                          "additionalProperties": {}
                         }
                       },
                       "raw": {
@@ -997,7 +1023,19 @@
                                             "type": "string"
                                           },
                                           "prompt": {
-                                            "type": "string"
+                                            "anyOf": [
+                                              {
+                                                "type": "string"
+                                              },
+                                              {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "properties": {},
+                                                  "additionalProperties": {}
+                                                }
+                                              }
+                                            ]
                                           },
                                           "file": {
                                             "type": "string"
@@ -1006,7 +1044,8 @@
                                             "type": "array",
                                             "items": {
                                               "type": "object",
-                                              "additionalProperties": true
+                                              "properties": {},
+                                              "additionalProperties": {}
                                             }
                                           },
                                           "raw": {
@@ -1071,7 +1110,19 @@
                                               "type": "string"
                                             },
                                             "prompt": {
-                                              "type": "string"
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "properties": {},
+                                                    "additionalProperties": {}
+                                                  }
+                                                }
+                                              ]
                                             },
                                             "file": {
                                               "type": "string"
@@ -1080,7 +1131,8 @@
                                               "type": "array",
                                               "items": {
                                                 "type": "object",
-                                                "additionalProperties": true
+                                                "properties": {},
+                                                "additionalProperties": {}
                                               }
                                             },
                                             "raw": {
@@ -1353,7 +1405,19 @@
                                                 "type": "string"
                                               },
                                               "prompt": {
-                                                "type": "string"
+                                                "anyOf": [
+                                                  {
+                                                    "type": "string"
+                                                  },
+                                                  {
+                                                    "type": "array",
+                                                    "items": {
+                                                      "type": "object",
+                                                      "properties": {},
+                                                      "additionalProperties": {}
+                                                    }
+                                                  }
+                                                ]
                                               },
                                               "file": {
                                                 "type": "string"
@@ -1362,7 +1426,8 @@
                                                 "type": "array",
                                                 "items": {
                                                   "type": "object",
-                                                  "additionalProperties": true
+                                                  "properties": {},
+                                                  "additionalProperties": {}
                                                 }
                                               },
                                               "raw": {
@@ -1427,7 +1492,19 @@
                                                   "type": "string"
                                                 },
                                                 "prompt": {
-                                                  "type": "string"
+                                                  "anyOf": [
+                                                    {
+                                                      "type": "string"
+                                                    },
+                                                    {
+                                                      "type": "array",
+                                                      "items": {
+                                                        "type": "object",
+                                                        "properties": {},
+                                                        "additionalProperties": {}
+                                                      }
+                                                    }
+                                                  ]
                                                 },
                                                 "file": {
                                                   "type": "string"
@@ -1436,7 +1513,8 @@
                                                   "type": "array",
                                                   "items": {
                                                     "type": "object",
-                                                    "additionalProperties": true
+                                                    "properties": {},
+                                                    "additionalProperties": {}
                                                   }
                                                 },
                                                 "raw": {
@@ -1709,7 +1787,19 @@
                                                   "type": "string"
                                                 },
                                                 "prompt": {
-                                                  "type": "string"
+                                                  "anyOf": [
+                                                    {
+                                                      "type": "string"
+                                                    },
+                                                    {
+                                                      "type": "array",
+                                                      "items": {
+                                                        "type": "object",
+                                                        "properties": {},
+                                                        "additionalProperties": {}
+                                                      }
+                                                    }
+                                                  ]
                                                 },
                                                 "file": {
                                                   "type": "string"
@@ -1718,7 +1808,8 @@
                                                   "type": "array",
                                                   "items": {
                                                     "type": "object",
-                                                    "additionalProperties": true
+                                                    "properties": {},
+                                                    "additionalProperties": {}
                                                   }
                                                 },
                                                 "raw": {
@@ -1783,7 +1874,19 @@
                                                     "type": "string"
                                                   },
                                                   "prompt": {
-                                                    "type": "string"
+                                                    "anyOf": [
+                                                      {
+                                                        "type": "string"
+                                                      },
+                                                      {
+                                                        "type": "array",
+                                                        "items": {
+                                                          "type": "object",
+                                                          "properties": {},
+                                                          "additionalProperties": {}
+                                                        }
+                                                      }
+                                                    ]
                                                   },
                                                   "file": {
                                                     "type": "string"
@@ -1792,7 +1895,8 @@
                                                     "type": "array",
                                                     "items": {
                                                       "type": "object",
-                                                      "additionalProperties": true
+                                                      "properties": {},
+                                                      "additionalProperties": {}
                                                     }
                                                   },
                                                   "raw": {
@@ -2030,7 +2134,19 @@
                                     "type": "string"
                                   },
                                   "prompt": {
-                                    "type": "string"
+                                    "anyOf": [
+                                      {
+                                        "type": "string"
+                                      },
+                                      {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "properties": {},
+                                          "additionalProperties": {}
+                                        }
+                                      }
+                                    ]
                                   },
                                   "file": {
                                     "type": "string"
@@ -2039,7 +2155,8 @@
                                     "type": "array",
                                     "items": {
                                       "type": "object",
-                                      "additionalProperties": true
+                                      "properties": {},
+                                      "additionalProperties": {}
                                     }
                                   },
                                   "raw": {
@@ -2104,7 +2221,19 @@
                                       "type": "string"
                                     },
                                     "prompt": {
-                                      "type": "string"
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "array",
+                                          "items": {
+                                            "type": "object",
+                                            "properties": {},
+                                            "additionalProperties": {}
+                                          }
+                                        }
+                                      ]
                                     },
                                     "file": {
                                       "type": "string"
@@ -2113,7 +2242,8 @@
                                       "type": "array",
                                       "items": {
                                         "type": "object",
-                                        "additionalProperties": true
+                                        "properties": {},
+                                        "additionalProperties": {}
                                       }
                                     },
                                     "raw": {
@@ -2577,7 +2707,19 @@
                                           "type": "string"
                                         },
                                         "prompt": {
-                                          "type": "string"
+                                          "anyOf": [
+                                            {
+                                              "type": "string"
+                                            },
+                                            {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "properties": {},
+                                                "additionalProperties": {}
+                                              }
+                                            }
+                                          ]
                                         },
                                         "file": {
                                           "type": "string"
@@ -2586,7 +2728,8 @@
                                           "type": "array",
                                           "items": {
                                             "type": "object",
-                                            "additionalProperties": true
+                                            "properties": {},
+                                            "additionalProperties": {}
                                           }
                                         },
                                         "raw": {
@@ -3959,7 +4102,19 @@
                                               "type": "string"
                                             },
                                             "prompt": {
-                                              "type": "string"
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "properties": {},
+                                                    "additionalProperties": {}
+                                                  }
+                                                }
+                                              ]
                                             },
                                             "file": {
                                               "type": "string"
@@ -3968,7 +4123,8 @@
                                               "type": "array",
                                               "items": {
                                                 "type": "object",
-                                                "additionalProperties": true
+                                                "properties": {},
+                                                "additionalProperties": {}
                                               }
                                             },
                                             "raw": {
@@ -5866,7 +6022,19 @@
                                             "type": "string"
                                           },
                                           "prompt": {
-                                            "type": "string"
+                                            "anyOf": [
+                                              {
+                                                "type": "string"
+                                              },
+                                              {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "properties": {},
+                                                  "additionalProperties": {}
+                                                }
+                                              }
+                                            ]
                                           },
                                           "file": {
                                             "type": "string"
@@ -5875,7 +6043,8 @@
                                             "type": "array",
                                             "items": {
                                               "type": "object",
-                                              "additionalProperties": true
+                                              "properties": {},
+                                              "additionalProperties": {}
                                             }
                                           },
                                           "raw": {
@@ -5940,7 +6109,19 @@
                                               "type": "string"
                                             },
                                             "prompt": {
-                                              "type": "string"
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "properties": {},
+                                                    "additionalProperties": {}
+                                                  }
+                                                }
+                                              ]
                                             },
                                             "file": {
                                               "type": "string"
@@ -5949,7 +6130,8 @@
                                               "type": "array",
                                               "items": {
                                                 "type": "object",
-                                                "additionalProperties": true
+                                                "properties": {},
+                                                "additionalProperties": {}
                                               }
                                             },
                                             "raw": {
@@ -6222,7 +6404,19 @@
                                                 "type": "string"
                                               },
                                               "prompt": {
-                                                "type": "string"
+                                                "anyOf": [
+                                                  {
+                                                    "type": "string"
+                                                  },
+                                                  {
+                                                    "type": "array",
+                                                    "items": {
+                                                      "type": "object",
+                                                      "properties": {},
+                                                      "additionalProperties": {}
+                                                    }
+                                                  }
+                                                ]
                                               },
                                               "file": {
                                                 "type": "string"
@@ -6231,7 +6425,8 @@
                                                 "type": "array",
                                                 "items": {
                                                   "type": "object",
-                                                  "additionalProperties": true
+                                                  "properties": {},
+                                                  "additionalProperties": {}
                                                 }
                                               },
                                               "raw": {
@@ -6296,7 +6491,19 @@
                                                   "type": "string"
                                                 },
                                                 "prompt": {
-                                                  "type": "string"
+                                                  "anyOf": [
+                                                    {
+                                                      "type": "string"
+                                                    },
+                                                    {
+                                                      "type": "array",
+                                                      "items": {
+                                                        "type": "object",
+                                                        "properties": {},
+                                                        "additionalProperties": {}
+                                                      }
+                                                    }
+                                                  ]
                                                 },
                                                 "file": {
                                                   "type": "string"
@@ -6305,7 +6512,8 @@
                                                   "type": "array",
                                                   "items": {
                                                     "type": "object",
-                                                    "additionalProperties": true
+                                                    "properties": {},
+                                                    "additionalProperties": {}
                                                   }
                                                 },
                                                 "raw": {
@@ -6578,7 +6786,19 @@
                                                   "type": "string"
                                                 },
                                                 "prompt": {
-                                                  "type": "string"
+                                                  "anyOf": [
+                                                    {
+                                                      "type": "string"
+                                                    },
+                                                    {
+                                                      "type": "array",
+                                                      "items": {
+                                                        "type": "object",
+                                                        "properties": {},
+                                                        "additionalProperties": {}
+                                                      }
+                                                    }
+                                                  ]
                                                 },
                                                 "file": {
                                                   "type": "string"
@@ -6587,7 +6807,8 @@
                                                   "type": "array",
                                                   "items": {
                                                     "type": "object",
-                                                    "additionalProperties": true
+                                                    "properties": {},
+                                                    "additionalProperties": {}
                                                   }
                                                 },
                                                 "raw": {
@@ -6652,7 +6873,19 @@
                                                     "type": "string"
                                                   },
                                                   "prompt": {
-                                                    "type": "string"
+                                                    "anyOf": [
+                                                      {
+                                                        "type": "string"
+                                                      },
+                                                      {
+                                                        "type": "array",
+                                                        "items": {
+                                                          "type": "object",
+                                                          "properties": {},
+                                                          "additionalProperties": {}
+                                                        }
+                                                      }
+                                                    ]
                                                   },
                                                   "file": {
                                                     "type": "string"
@@ -6661,7 +6894,8 @@
                                                     "type": "array",
                                                     "items": {
                                                       "type": "object",
-                                                      "additionalProperties": true
+                                                      "properties": {},
+                                                      "additionalProperties": {}
                                                     }
                                                   },
                                                   "raw": {
@@ -6899,7 +7133,19 @@
                                     "type": "string"
                                   },
                                   "prompt": {
-                                    "type": "string"
+                                    "anyOf": [
+                                      {
+                                        "type": "string"
+                                      },
+                                      {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "properties": {},
+                                          "additionalProperties": {}
+                                        }
+                                      }
+                                    ]
                                   },
                                   "file": {
                                     "type": "string"
@@ -6908,7 +7154,8 @@
                                     "type": "array",
                                     "items": {
                                       "type": "object",
-                                      "additionalProperties": true
+                                      "properties": {},
+                                      "additionalProperties": {}
                                     }
                                   },
                                   "raw": {
@@ -6973,7 +7220,19 @@
                                       "type": "string"
                                     },
                                     "prompt": {
-                                      "type": "string"
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "array",
+                                          "items": {
+                                            "type": "object",
+                                            "properties": {},
+                                            "additionalProperties": {}
+                                          }
+                                        }
+                                      ]
                                     },
                                     "file": {
                                       "type": "string"
@@ -6982,7 +7241,8 @@
                                       "type": "array",
                                       "items": {
                                         "type": "object",
-                                        "additionalProperties": true
+                                        "properties": {},
+                                        "additionalProperties": {}
                                       }
                                     },
                                     "raw": {
@@ -7446,7 +7706,19 @@
                                           "type": "string"
                                         },
                                         "prompt": {
-                                          "type": "string"
+                                          "anyOf": [
+                                            {
+                                              "type": "string"
+                                            },
+                                            {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "properties": {},
+                                                "additionalProperties": {}
+                                              }
+                                            }
+                                          ]
                                         },
                                         "file": {
                                           "type": "string"
@@ -7455,7 +7727,8 @@
                                           "type": "array",
                                           "items": {
                                             "type": "object",
-                                            "additionalProperties": true
+                                            "properties": {},
+                                            "additionalProperties": {}
                                           }
                                         },
                                         "raw": {
@@ -8828,7 +9101,19 @@
                                               "type": "string"
                                             },
                                             "prompt": {
-                                              "type": "string"
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "properties": {},
+                                                    "additionalProperties": {}
+                                                  }
+                                                }
+                                              ]
                                             },
                                             "file": {
                                               "type": "string"
@@ -8837,7 +9122,8 @@
                                               "type": "array",
                                               "items": {
                                                 "type": "object",
-                                                "additionalProperties": true
+                                                "properties": {},
+                                                "additionalProperties": {}
                                               }
                                             },
                                             "raw": {
@@ -10711,7 +10997,19 @@
                               "type": "string"
                             },
                             "prompt": {
-                              "type": "string"
+                              "anyOf": [
+                                {
+                                  "type": "string"
+                                },
+                                {
+                                  "type": "array",
+                                  "items": {
+                                    "type": "object",
+                                    "properties": {},
+                                    "additionalProperties": {}
+                                  }
+                                }
+                              ]
                             },
                             "file": {
                               "type": "string"
@@ -10720,7 +11018,8 @@
                               "type": "array",
                               "items": {
                                 "type": "object",
-                                "additionalProperties": true
+                                "properties": {},
+                                "additionalProperties": {}
                               }
                             },
                             "raw": {
@@ -10785,7 +11084,19 @@
                                 "type": "string"
                               },
                               "prompt": {
-                                "type": "string"
+                                "anyOf": [
+                                  {
+                                    "type": "string"
+                                  },
+                                  {
+                                    "type": "array",
+                                    "items": {
+                                      "type": "object",
+                                      "properties": {},
+                                      "additionalProperties": {}
+                                    }
+                                  }
+                                ]
                               },
                               "file": {
                                 "type": "string"
@@ -10794,7 +11105,8 @@
                                 "type": "array",
                                 "items": {
                                   "type": "object",
-                                  "additionalProperties": true
+                                  "properties": {},
+                                  "additionalProperties": {}
                                 }
                               },
                               "raw": {
@@ -11067,7 +11379,19 @@
                                   "type": "string"
                                 },
                                 "prompt": {
-                                  "type": "string"
+                                  "anyOf": [
+                                    {
+                                      "type": "string"
+                                    },
+                                    {
+                                      "type": "array",
+                                      "items": {
+                                        "type": "object",
+                                        "properties": {},
+                                        "additionalProperties": {}
+                                      }
+                                    }
+                                  ]
                                 },
                                 "file": {
                                   "type": "string"
@@ -11076,7 +11400,8 @@
                                   "type": "array",
                                   "items": {
                                     "type": "object",
-                                    "additionalProperties": true
+                                    "properties": {},
+                                    "additionalProperties": {}
                                   }
                                 },
                                 "raw": {
@@ -11141,7 +11466,19 @@
                                     "type": "string"
                                   },
                                   "prompt": {
-                                    "type": "string"
+                                    "anyOf": [
+                                      {
+                                        "type": "string"
+                                      },
+                                      {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "properties": {},
+                                          "additionalProperties": {}
+                                        }
+                                      }
+                                    ]
                                   },
                                   "file": {
                                     "type": "string"
@@ -11150,7 +11487,8 @@
                                     "type": "array",
                                     "items": {
                                       "type": "object",
-                                      "additionalProperties": true
+                                      "properties": {},
+                                      "additionalProperties": {}
                                     }
                                   },
                                   "raw": {
@@ -11423,7 +11761,19 @@
                                     "type": "string"
                                   },
                                   "prompt": {
-                                    "type": "string"
+                                    "anyOf": [
+                                      {
+                                        "type": "string"
+                                      },
+                                      {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "properties": {},
+                                          "additionalProperties": {}
+                                        }
+                                      }
+                                    ]
                                   },
                                   "file": {
                                     "type": "string"
@@ -11432,7 +11782,8 @@
                                     "type": "array",
                                     "items": {
                                       "type": "object",
-                                      "additionalProperties": true
+                                      "properties": {},
+                                      "additionalProperties": {}
                                     }
                                   },
                                   "raw": {
@@ -11497,7 +11848,19 @@
                                       "type": "string"
                                     },
                                     "prompt": {
-                                      "type": "string"
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "array",
+                                          "items": {
+                                            "type": "object",
+                                            "properties": {},
+                                            "additionalProperties": {}
+                                          }
+                                        }
+                                      ]
                                     },
                                     "file": {
                                       "type": "string"
@@ -11506,7 +11869,8 @@
                                       "type": "array",
                                       "items": {
                                         "type": "object",
-                                        "additionalProperties": true
+                                        "properties": {},
+                                        "additionalProperties": {}
                                       }
                                     },
                                     "raw": {
@@ -11934,7 +12298,19 @@
                                   "type": "string"
                                 },
                                 "prompt": {
-                                  "type": "string"
+                                  "anyOf": [
+                                    {
+                                      "type": "string"
+                                    },
+                                    {
+                                      "type": "array",
+                                      "items": {
+                                        "type": "object",
+                                        "properties": {},
+                                        "additionalProperties": {}
+                                      }
+                                    }
+                                  ]
                                 },
                                 "file": {
                                   "type": "string"
@@ -11943,7 +12319,8 @@
                                   "type": "array",
                                   "items": {
                                     "type": "object",
-                                    "additionalProperties": true
+                                    "properties": {},
+                                    "additionalProperties": {}
                                   }
                                 },
                                 "raw": {
@@ -12008,7 +12385,19 @@
                                     "type": "string"
                                   },
                                   "prompt": {
-                                    "type": "string"
+                                    "anyOf": [
+                                      {
+                                        "type": "string"
+                                      },
+                                      {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "properties": {},
+                                          "additionalProperties": {}
+                                        }
+                                      }
+                                    ]
                                   },
                                   "file": {
                                     "type": "string"
@@ -12017,7 +12406,8 @@
                                     "type": "array",
                                     "items": {
                                       "type": "object",
-                                      "additionalProperties": true
+                                      "properties": {},
+                                      "additionalProperties": {}
                                     }
                                   },
                                   "raw": {
@@ -12290,7 +12680,19 @@
                                       "type": "string"
                                     },
                                     "prompt": {
-                                      "type": "string"
+                                      "anyOf": [
+                                        {
+                                          "type": "string"
+                                        },
+                                        {
+                                          "type": "array",
+                                          "items": {
+                                            "type": "object",
+                                            "properties": {},
+                                            "additionalProperties": {}
+                                          }
+                                        }
+                                      ]
                                     },
                                     "file": {
                                       "type": "string"
@@ -12299,7 +12701,8 @@
                                       "type": "array",
                                       "items": {
                                         "type": "object",
-                                        "additionalProperties": true
+                                        "properties": {},
+                                        "additionalProperties": {}
                                       }
                                     },
                                     "raw": {
@@ -12364,7 +12767,19 @@
                                         "type": "string"
                                       },
                                       "prompt": {
-                                        "type": "string"
+                                        "anyOf": [
+                                          {
+                                            "type": "string"
+                                          },
+                                          {
+                                            "type": "array",
+                                            "items": {
+                                              "type": "object",
+                                              "properties": {},
+                                              "additionalProperties": {}
+                                            }
+                                          }
+                                        ]
                                       },
                                       "file": {
                                         "type": "string"
@@ -12373,7 +12788,8 @@
                                         "type": "array",
                                         "items": {
                                           "type": "object",
-                                          "additionalProperties": true
+                                          "properties": {},
+                                          "additionalProperties": {}
                                         }
                                       },
                                       "raw": {
@@ -12646,7 +13062,19 @@
                                         "type": "string"
                                       },
                                       "prompt": {
-                                        "type": "string"
+                                        "anyOf": [
+                                          {
+                                            "type": "string"
+                                          },
+                                          {
+                                            "type": "array",
+                                            "items": {
+                                              "type": "object",
+                                              "properties": {},
+                                              "additionalProperties": {}
+                                            }
+                                          }
+                                        ]
                                       },
                                       "file": {
                                         "type": "string"
@@ -12655,7 +13083,8 @@
                                         "type": "array",
                                         "items": {
                                           "type": "object",
-                                          "additionalProperties": true
+                                          "properties": {},
+                                          "additionalProperties": {}
                                         }
                                       },
                                       "raw": {
@@ -12720,16 +13149,29 @@
                                           "type": "string"
                                         },
                                         "prompt": {
-                                          "type": "string"
-                                        },
-                                        "file": {
-                                          "type": "string"
-                                        },
-                                        "messages": {
-                                          "type": "array",
+                                          "anyOf": [
+                                            {
+                                              "type": "string"
+                                            },
+                                            {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "properties": {},
+                                                "additionalProperties": {}
+                                              }
+                                            }
+                                          ]
+                                        },
+                                        "file": {
+                                          "type": "string"
+                                        },
+                                        "messages": {
+                                          "type": "array",
                                           "items": {
                                             "type": "object",
-                                            "additionalProperties": true
+                                            "properties": {},
+                                            "additionalProperties": {}
                                           }
                                         },
                                         "raw": {
@@ -12967,7 +13409,19 @@
                           "type": "string"
                         },
                         "prompt": {
-                          "type": "string"
+                          "anyOf": [
+                            {
+                              "type": "string"
+                            },
+                            {
+                              "type": "array",
+                              "items": {
+                                "type": "object",
+                                "properties": {},
+                                "additionalProperties": {}
+                              }
+                            }
+                          ]
                         },
                         "file": {
                           "type": "string"
@@ -12976,7 +13430,8 @@
                           "type": "array",
                           "items": {
                             "type": "object",
-                            "additionalProperties": true
+                            "properties": {},
+                            "additionalProperties": {}
                           }
                         },
                         "raw": {
@@ -13041,7 +13496,19 @@
                             "type": "string"
                           },
                           "prompt": {
-                            "type": "string"
+                            "anyOf": [
+                              {
+                                "type": "string"
+                              },
+                              {
+                                "type": "array",
+                                "items": {
+                                  "type": "object",
+                                  "properties": {},
+                                  "additionalProperties": {}
+                                }
+                              }
+                            ]
                           },
                           "file": {
                             "type": "string"
@@ -13050,7 +13517,8 @@
                             "type": "array",
                             "items": {
                               "type": "object",
-                              "additionalProperties": true
+                              "properties": {},
+                              "additionalProperties": {}
                             }
                           },
                           "raw": {
@@ -13344,7 +13812,19 @@
                                           "type": "string"
                                         },
                                         "prompt": {
-                                          "type": "string"
+                                          "anyOf": [
+                                            {
+                                              "type": "string"
+                                            },
+                                            {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "properties": {},
+                                                "additionalProperties": {}
+                                              }
+                                            }
+                                          ]
                                         },
                                         "file": {
                                           "type": "string"
@@ -13353,7 +13833,8 @@
                                           "type": "array",
                                           "items": {
                                             "type": "object",
-                                            "additionalProperties": true
+                                            "properties": {},
+                                            "additionalProperties": {}
                                           }
                                         },
                                         "raw": {
@@ -13418,7 +13899,19 @@
                                             "type": "string"
                                           },
                                           "prompt": {
-                                            "type": "string"
+                                            "anyOf": [
+                                              {
+                                                "type": "string"
+                                              },
+                                              {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "properties": {},
+                                                  "additionalProperties": {}
+                                                }
+                                              }
+                                            ]
                                           },
                                           "file": {
                                             "type": "string"
@@ -13427,7 +13920,8 @@
                                             "type": "array",
                                             "items": {
                                               "type": "object",
-                                              "additionalProperties": true
+                                              "properties": {},
+                                              "additionalProperties": {}
                                             }
                                           },
                                           "raw": {
@@ -13700,7 +14194,19 @@
                                               "type": "string"
                                             },
                                             "prompt": {
-                                              "type": "string"
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "properties": {},
+                                                    "additionalProperties": {}
+                                                  }
+                                                }
+                                              ]
                                             },
                                             "file": {
                                               "type": "string"
@@ -13709,7 +14215,8 @@
                                               "type": "array",
                                               "items": {
                                                 "type": "object",
-                                                "additionalProperties": true
+                                                "properties": {},
+                                                "additionalProperties": {}
                                               }
                                             },
                                             "raw": {
@@ -13774,7 +14281,19 @@
                                                 "type": "string"
                                               },
                                               "prompt": {
-                                                "type": "string"
+                                                "anyOf": [
+                                                  {
+                                                    "type": "string"
+                                                  },
+                                                  {
+                                                    "type": "array",
+                                                    "items": {
+                                                      "type": "object",
+                                                      "properties": {},
+                                                      "additionalProperties": {}
+                                                    }
+                                                  }
+                                                ]
                                               },
                                               "file": {
                                                 "type": "string"
@@ -13783,7 +14302,8 @@
                                                 "type": "array",
                                                 "items": {
                                                   "type": "object",
-                                                  "additionalProperties": true
+                                                  "properties": {},
+                                                  "additionalProperties": {}
                                                 }
                                               },
                                               "raw": {
@@ -14056,7 +14576,19 @@
                                                 "type": "string"
                                               },
                                               "prompt": {
-                                                "type": "string"
+                                                "anyOf": [
+                                                  {
+                                                    "type": "string"
+                                                  },
+                                                  {
+                                                    "type": "array",
+                                                    "items": {
+                                                      "type": "object",
+                                                      "properties": {},
+                                                      "additionalProperties": {}
+                                                    }
+                                                  }
+                                                ]
                                               },
                                               "file": {
                                                 "type": "string"
@@ -14065,7 +14597,8 @@
                                                 "type": "array",
                                                 "items": {
                                                   "type": "object",
-                                                  "additionalProperties": true
+                                                  "properties": {},
+                                                  "additionalProperties": {}
                                                 }
                                               },
                                               "raw": {
@@ -14130,7 +14663,19 @@
                                                   "type": "string"
                                                 },
                                                 "prompt": {
-                                                  "type": "string"
+                                                  "anyOf": [
+                                                    {
+                                                      "type": "string"
+                                                    },
+                                                    {
+                                                      "type": "array",
+                                                      "items": {
+                                                        "type": "object",
+                                                        "properties": {},
+                                                        "additionalProperties": {}
+                                                      }
+                                                    }
+                                                  ]
                                                 },
                                                 "file": {
                                                   "type": "string"
@@ -14139,7 +14684,8 @@
                                                   "type": "array",
                                                   "items": {
                                                     "type": "object",
-                                                    "additionalProperties": true
+                                                    "properties": {},
+                                                    "additionalProperties": {}
                                                   }
                                                 },
                                                 "raw": {
@@ -14377,7 +14923,19 @@
                                   "type": "string"
                                 },
                                 "prompt": {
-                                  "type": "string"
+                                  "anyOf": [
+                                    {
+                                      "type": "string"
+                                    },
+                                    {
+                                      "type": "array",
+                                      "items": {
+                                        "type": "object",
+                                        "properties": {},
+                                        "additionalProperties": {}
+                                      }
+                                    }
+                                  ]
                                 },
                                 "file": {
                                   "type": "string"
@@ -14386,7 +14944,8 @@
                                   "type": "array",
                                   "items": {
                                     "type": "object",
-                                    "additionalProperties": true
+                                    "properties": {},
+                                    "additionalProperties": {}
                                   }
                                 },
                                 "raw": {
@@ -14451,7 +15010,19 @@
                                     "type": "string"
                                   },
                                   "prompt": {
-                                    "type": "string"
+                                    "anyOf": [
+                                      {
+                                        "type": "string"
+                                      },
+                                      {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "properties": {},
+                                          "additionalProperties": {}
+                                        }
+                                      }
+                                    ]
                                   },
                                   "file": {
                                     "type": "string"
@@ -14460,7 +15031,8 @@
                                     "type": "array",
                                     "items": {
                                       "type": "object",
-                                      "additionalProperties": true
+                                      "properties": {},
+                                      "additionalProperties": {}
                                     }
                                   },
                                   "raw": {
@@ -14689,7 +15261,19 @@
                                           "type": "string"
                                         },
                                         "prompt": {
-                                          "type": "string"
+                                          "anyOf": [
+                                            {
+                                              "type": "string"
+                                            },
+                                            {
+                                              "type": "array",
+                                              "items": {
+                                                "type": "object",
+                                                "properties": {},
+                                                "additionalProperties": {}
+                                              }
+                                            }
+                                          ]
                                         },
                                         "file": {
                                           "type": "string"
@@ -14698,7 +15282,8 @@
                                           "type": "array",
                                           "items": {
                                             "type": "object",
-                                            "additionalProperties": true
+                                            "properties": {},
+                                            "additionalProperties": {}
                                           }
                                         },
                                         "raw": {
@@ -14763,7 +15348,19 @@
                                             "type": "string"
                                           },
                                           "prompt": {
-                                            "type": "string"
+                                            "anyOf": [
+                                              {
+                                                "type": "string"
+                                              },
+                                              {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "properties": {},
+                                                  "additionalProperties": {}
+                                                }
+                                              }
+                                            ]
                                           },
                                           "file": {
                                             "type": "string"
@@ -14772,7 +15369,8 @@
                                             "type": "array",
                                             "items": {
                                               "type": "object",
-                                              "additionalProperties": true
+                                              "properties": {},
+                                              "additionalProperties": {}
                                             }
                                           },
                                           "raw": {
@@ -15045,7 +15643,19 @@
                                               "type": "string"
                                             },
                                             "prompt": {
-                                              "type": "string"
+                                              "anyOf": [
+                                                {
+                                                  "type": "string"
+                                                },
+                                                {
+                                                  "type": "array",
+                                                  "items": {
+                                                    "type": "object",
+                                                    "properties": {},
+                                                    "additionalProperties": {}
+                                                  }
+                                                }
+                                              ]
                                             },
                                             "file": {
                                               "type": "string"
@@ -15054,7 +15664,8 @@
                                               "type": "array",
                                               "items": {
                                                 "type": "object",
-                                                "additionalProperties": true
+                                                "properties": {},
+                                                "additionalProperties": {}
                                               }
                                             },
                                             "raw": {
@@ -15119,7 +15730,19 @@
                                                 "type": "string"
                                               },
                                               "prompt": {
-                                                "type": "string"
+                                                "anyOf": [
+                                                  {
+                                                    "type": "string"
+                                                  },
+                                                  {
+                                                    "type": "array",
+                                                    "items": {
+                                                      "type": "object",
+                                                      "properties": {},
+                                                      "additionalProperties": {}
+                                                    }
+                                                  }
+                                                ]
                                               },
                                               "file": {
                                                 "type": "string"
@@ -15128,7 +15751,8 @@
                                                 "type": "array",
                                                 "items": {
                                                   "type": "object",
-                                                  "additionalProperties": true
+                                                  "properties": {},
+                                                  "additionalProperties": {}
                                                 }
                                               },
                                               "raw": {
@@ -15401,7 +16025,19 @@
                                                 "type": "string"
                                               },
                                               "prompt": {
-                                                "type": "string"
+                                                "anyOf": [
+                                                  {
+                                                    "type": "string"
+                                                  },
+                                                  {
+                                                    "type": "array",
+                                                    "items": {
+                                                      "type": "object",
+                                                      "properties": {},
+                                                      "additionalProperties": {}
+                                                    }
+                                                  }
+                                                ]
                                               },
                                               "file": {
                                                 "type": "string"
@@ -15410,7 +16046,8 @@
                                                 "type": "array",
                                                 "items": {
                                                   "type": "object",
-                                                  "additionalProperties": true
+                                                  "properties": {},
+                                                  "additionalProperties": {}
                                                 }
                                               },
                                               "raw": {
@@ -15475,7 +16112,19 @@
                                                   "type": "string"
                                                 },
                                                 "prompt": {
-                                                  "type": "string"
+                                                  "anyOf": [
+                                                    {
+                                                      "type": "string"
+                                                    },
+                                                    {
+                                                      "type": "array",
+                                                      "items": {
+                                                        "type": "object",
+                                                        "properties": {},
+                                                        "additionalProperties": {}
+                                                      }
+                                                    }
+                                                  ]
                                                 },
                                                 "file": {
                                                   "type": "string"
@@ -15484,7 +16133,8 @@
                                                   "type": "array",
                                                   "items": {
                                                     "type": "object",
-                                                    "additionalProperties": true
+                                                    "properties": {},
+                                                    "additionalProperties": {}
                                                   }
                                                 },
                                                 "raw": {
@@ -15722,7 +16372,19 @@
                                   "type": "string"
                                 },
                                 "prompt": {
-                                  "type": "string"
+                                  "anyOf": [
+                                    {
+                                      "type": "string"
+                                    },
+                                    {
+                                      "type": "array",
+                                      "items": {
+                                        "type": "object",
+                                        "properties": {},
+                                        "additionalProperties": {}
+                                      }
+                                    }
+                                  ]
                                 },
                                 "file": {
                                   "type": "string"
@@ -15731,7 +16393,8 @@
                                   "type": "array",
                                   "items": {
                                     "type": "object",
-                                    "additionalProperties": true
+                                    "properties": {},
+                                    "additionalProperties": {}
                                   }
                                 },
                                 "raw": {
@@ -15796,7 +16459,19 @@
                                     "type": "string"
                                   },
                                   "prompt": {
-                                    "type": "string"
+                                    "anyOf": [
+                                      {
+                                        "type": "string"
+                                      },
+                                      {
+                                        "type": "array",
+                                        "items": {
+                                          "type": "object",
+                                          "properties": {},
+                                          "additionalProperties": {}
+                                        }
+                                      }
+                                    ]
                                   },
                                   "file": {
                                     "type": "string"
@@ -15805,7 +16480,8 @@
                                     "type": "array",
                                     "items": {
                                       "type": "object",
-                                      "additionalProperties": true
+                                      "properties": {},
+                                      "additionalProperties": {}
                                     }
                                   },
                                   "raw": {
@@ -16269,7 +16945,19 @@
                                         "type": "string"
                                       },
                                       "prompt": {
-                                        "type": "string"
+                                        "anyOf": [
+                                          {
+                                            "type": "string"
+                                          },
+                                          {
+                                            "type": "array",
+                                            "items": {
+                                              "type": "object",
+                                              "properties": {},
+                                              "additionalProperties": {}
+                                            }
+                                          }
+                                        ]
                                       },
                                       "file": {
                                         "type": "string"
@@ -16278,7 +16966,8 @@
                                         "type": "array",
                                         "items": {
                                           "type": "object",
-                                          "additionalProperties": true
+                                          "properties": {},
+                                          "additionalProperties": {}
                                         }
                                       },
                                       "raw": {
@@ -17651,7 +18340,19 @@
                                             "type": "string"
                                           },
                                           "prompt": {
-                                            "type": "string"
+                                            "anyOf": [
+                                              {
+                                                "type": "string"
+                                              },
+                                              {
+                                                "type": "array",
+                                                "items": {
+                                                  "type": "object",
+                                                  "properties": {},
+                                                  "additionalProperties": {}
+                                                }
+                                              }
+                                            ]
                                           },
                                           "file": {
                                             "type": "string"
@@ -17660,7 +18361,8 @@
                                             "type": "array",
                                             "items": {
                                               "type": "object",
-                                              "additionalProperties": true
+                                              "properties": {},
+                                              "additionalProperties": {}
                                             }
                                           },
                                           "raw": {

From 0b707fdf04dec1c31e4d8e84a7412665f72bd745 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Thu, 2 Jul 2026 14:21:45 +0200
Subject: [PATCH 3/4] feat(eval): add rerun-failed runner pooling

---
 apps/cli/src/commands/eval/commands/run.ts    |   5 +-
 apps/cli/src/commands/eval/run-cache.ts       |   8 +-
 apps/cli/src/commands/eval/run-eval.ts        | 311 +++++++++++-------
 apps/cli/src/commands/results/eval-runner.ts  |   5 +-
 apps/cli/test/commands/results/serve.test.ts  |   4 +-
 apps/cli/test/eval.integration.test.ts        |  95 ++++++
 apps/cli/test/fixtures/mock-run-evaluation.ts |  27 +-
 .../docs/docs/evaluation/running-evals.mdx    |  13 +-
 .../docs/docs/guides/workspace-pool.mdx       |   2 +
 packages/core/src/evaluation/orchestrator.ts  |  33 +-
 packages/core/src/evaluation/run-artifacts.ts |  22 +-
 .../core/test/evaluation/orchestrator.test.ts | 135 ++++++++
 12 files changed, 496 insertions(+), 164 deletions(-)

diff --git a/apps/cli/src/commands/eval/commands/run.ts b/apps/cli/src/commands/eval/commands/run.ts
index a078edc06..09b0f80ef 100644
--- a/apps/cli/src/commands/eval/commands/run.ts
+++ b/apps/cli/src/commands/eval/commands/run.ts
@@ -168,10 +168,11 @@ export const evalRunCommand = command({
       description:
         'Resume an interrupted run: skip already-completed tests and append new results to --output dir',
     }),
-    rerunFailed: flag({
+    rerunFailed: option({
+      type: optional(string),
       long: 'rerun-failed',
       description:
-        'Rerun failed/errored tests while keeping passing results. Implies --resume semantics',
+        'Run ID, run workspace, or index.jsonl to rerun failed/errored tests while keeping passing results',
     }),
     strict: flag({
       long: 'strict',
diff --git a/apps/cli/src/commands/eval/run-cache.ts b/apps/cli/src/commands/eval/run-cache.ts
index 342fa8429..d2e8c7b85 100644
--- a/apps/cli/src/commands/eval/run-cache.ts
+++ b/apps/cli/src/commands/eval/run-cache.ts
@@ -54,10 +54,10 @@ export async function loadRunCache(cwd: string): Promise<RunCache | undefined> {
 /**
  * Resolve the cached last-run directory for a cwd, if it still exists on disk.
  * Returns undefined when there is no cache, the cache lacks a `lastRunDir`,
- * or the directory has since been deleted. Used by `--resume` / `--rerun-failed`
- * to default `--output` to the most recent run when no explicit dir is given,
- * matching the convention used by promptfoo (`--resume [evalId]`) and
- * OpenCompass (`-r [timestamp]`).
+ * or the directory has since been deleted. Used by `--resume` to default
+ * `--output` to the most recent run when no explicit dir is given, matching
+ * the convention used by promptfoo (`--resume [evalId]`) and OpenCompass
+ * (`-r [timestamp]`).
  */
 export async function resolveCachedRunDir(cwd: string): Promise<string | undefined> {
   const cache = await loadRunCache(cwd);
diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index a16655015..6dd0afc36 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -3,6 +3,7 @@ import { access, readFile } from 'node:fs/promises';
 import { createRequire as createNodeRequire } from 'node:module';
 import path from 'node:path';
 import { pathToFileURL } from 'node:url';
+import pLimit from 'p-limit';
 
 import {
   DEFAULT_THRESHOLD,
@@ -54,6 +55,7 @@ import {
   aggregateRunDir,
   buildEvalTestTargetKey,
   buildEvaluationResultTargetKey,
+  buildTestTargetKey,
   deduplicateByTestIdTarget,
   parseJsonlResults,
   writeArtifactsFromResults,
@@ -135,6 +137,7 @@ interface NormalizedOptions {
   readonly retryErrors?: string;
   readonly resume: boolean;
   readonly rerunFailed: boolean;
+  readonly rerunFailedSource?: string;
   readonly workspaceMode?: 'pooled' | 'temp' | 'static';
   readonly workspacePath?: string;
   readonly keepWorkspaces: boolean;
@@ -609,8 +612,10 @@ function normalizeOptions(
     otelGroupTurns:
       normalizeBoolean(rawOptions.otelGroupTurns) || yamlExecution?.otel_group_turns === true,
     retryErrors: normalizeString(rawOptions.retryErrors),
-    resume: normalizeBoolean(rawOptions.resume) || normalizeBoolean(rawOptions.rerunFailed),
-    rerunFailed: normalizeBoolean(rawOptions.rerunFailed),
+    resume:
+      normalizeBoolean(rawOptions.resume) || normalizeString(rawOptions.rerunFailed) !== undefined,
+    rerunFailed: normalizeString(rawOptions.rerunFailed) !== undefined,
+    rerunFailedSource: normalizeString(rawOptions.rerunFailed),
     workspaceMode,
     workspacePath,
     // Precedence: CLI > YAML config > TS config
@@ -1164,6 +1169,27 @@ async function readExistingResultsFromRunDir(runDir: string): Promise<Evaluation
   return results;
 }
 
+async function resolveRerunFailedRunDir(cwd: string, source: string): Promise<string> {
+  const trimmed = source.trim();
+  if (!trimmed) {
+    throw new Error('--rerun-failed requires a run ID, run workspace, or index.jsonl path.');
+  }
+
+  const candidate = path.isAbsolute(trimmed) ? trimmed : path.resolve(cwd, trimmed);
+  if (existsSync(candidate)) {
+    return path.basename(candidate) === RESULT_INDEX_FILENAME ? path.dirname(candidate) : candidate;
+  }
+
+  const runIdCandidate = path.join(cwd, '.agentv', 'results', trimmed);
+  if (existsSync(runIdCandidate)) {
+    return runIdCandidate;
+  }
+
+  throw new Error(
+    `Run not found for --rerun-failed: ${source}. Expected a run ID under .agentv/results, a run workspace, or an index.jsonl path.`,
+  );
+}
+
 async function prepareFileMetadata(params: {
   readonly testFilePath: string;
   readonly repoRoot: string;
@@ -1825,17 +1851,16 @@ export async function runEvalCommand(
     }
   }
 
-  // --resume / --rerun-failed without an explicit --output: default to the
+  // --resume without an explicit --output: default to the
   // last-known run dir for this cwd from .agentv/cache.json. Matches promptfoo's
   // `--resume [evalId]` and OpenCompass's `-r [timestamp]` "latest by default"
   // convention. The cache pointer is written by saveRunCache after every eval.
-  if (options.resume && !options.retryErrors && !options.outputDir) {
+  if (options.resume && !options.rerunFailedSource && !options.retryErrors && !options.outputDir) {
     const cachedDir = await resolveCachedRunDir(cwd);
     if (cachedDir) {
       options = { ...options, outputDir: cachedDir };
-      const flagLabel = options.rerunFailed ? 'rerun-failed' : 'resume';
       const displayDir = path.relative(cwd, cachedDir) || cachedDir;
-      console.log(`Auto-detected last run dir for --${flagLabel}: ${displayDir}`);
+      console.log(`Auto-detected last run dir for --resume: ${displayDir}`);
     }
   }
 
@@ -1844,22 +1869,35 @@ export async function runEvalCommand(
   let resumeSkipKeys: Set<string> | undefined;
   let isResumeAppend = false;
   if (options.resume && !options.retryErrors) {
-    const explicitResumeDir = options.outputDir;
-    if (explicitResumeDir) {
-      const resumeDir = path.resolve(explicitResumeDir);
-      const resumeIndexPaths = discoverRunManifestPaths(resumeDir);
+    const sourceRunDir = options.rerunFailedSource
+      ? await resolveRerunFailedRunDir(cwd, options.rerunFailedSource)
+      : options.outputDir
+        ? path.resolve(options.outputDir)
+        : undefined;
+
+    if (sourceRunDir) {
+      if (options.rerunFailedSource && !options.outputDir) {
+        options = { ...options, outputDir: sourceRunDir };
+      }
+
+      const resumeIndexPaths = discoverRunManifestPaths(sourceRunDir);
       if (resumeIndexPaths.length > 0) {
-        const existingResults = await readExistingResultsFromRunDir(resumeDir);
+        const existingResults = await readExistingResultsFromRunDir(sourceRunDir);
         resumeSkipKeys = new Set<string>();
+        let completedResultCount = 0;
         for (const r of existingResults) {
           if (shouldSkipExistingResultForResume(r, options.rerunFailed)) {
+            completedResultCount += 1;
             resumeSkipKeys.add(buildEvaluationResultTargetKey(r));
+            resumeSkipKeys.add(buildTestTargetKey(r.testId, r.target, r.variant));
           }
         }
-        isResumeAppend = true;
+        isResumeAppend =
+          options.outputDir !== undefined &&
+          path.resolve(options.outputDir) === path.resolve(sourceRunDir);
         const modeLabel = options.rerunFailed ? 'Rerun-failed' : 'Resume';
         console.log(
-          `${modeLabel}: found ${existingResults.length} existing result(s), skipping ${resumeSkipKeys.size} completed.`,
+          `${modeLabel}: found ${existingResults.length} existing result(s), skipping ${completedResultCount} completed.`,
         );
       } else {
         // No existing bundle manifest — behave like a normal run.
@@ -2116,7 +2154,8 @@ export async function runEvalCommand(
         const target = selection.targetName;
         const variant = targetVariantForSelection(selection);
         const key = buildEvalTestTargetKey(test, target, variant);
-        if (resumeSkipKeys?.has(key)) {
+        const fallbackKey = buildTestTargetKey(test.id, target, variant);
+        if (resumeSkipKeys?.has(key) || resumeSkipKeys?.has(fallbackKey)) {
           resumeSkippedCount++;
         } else {
           totalEvalCount++;
@@ -2339,126 +2378,142 @@ export async function runEvalCommand(
         continue;
       }
 
-      // Run all targets concurrently (each target has its own worker limit)
+      const fileWorkerLimit = Math.max(1, fileOptions.workers ?? DEFAULT_WORKERS);
+      const targetConcurrency =
+        targetPrep.selections.length > 1
+          ? Math.min(fileWorkerLimit, targetPrep.selections.length)
+          : 1;
+      const perTargetWorkers =
+        targetPrep.selections.length > 1
+          ? Math.max(1, Math.floor(fileWorkerLimit / targetConcurrency))
+          : fileWorkerLimit;
+      const limitTarget = pLimit(targetConcurrency);
+
+      // Run target matrix selections through a bounded pool. Each active target
+      // receives a slice of the worker budget so total in-process case execution
+      // never multiplies past max_concurrency.
       const targetResults = await Promise.all(
-        targetPrep.selections.map(async ({ selection, inlineTargetLabel }) => {
-          // Target selection is suite/experiment/CLI runtime policy; every selected
-          // target runs every filtered test case for this eval file.
-          const targetName = selection.targetName;
-          const applicableTestCases = targetPrep.testCases;
-
-          // --resume / --rerun-failed: skip tests that are already completed
-          const filteredTestCases = resumeSkipKeys
-            ? applicableTestCases.filter(
-                (test) =>
-                  !resumeSkipKeys.has(
-                    buildEvalTestTargetKey(test, targetName, targetVariantForSelection(selection)),
-                  ),
-              )
-            : applicableTestCases;
-
-          if (filteredTestCases.length === 0) {
-            return [];
-          }
+        targetPrep.selections.map(({ selection, inlineTargetLabel }) =>
+          limitTarget(async () => {
+            // Target selection is suite/experiment/CLI runtime policy; every selected
+            // target runs every filtered test case for this eval file.
+            const targetName = selection.targetName;
+            const applicableTestCases = targetPrep.testCases;
+
+            // --resume / --rerun-failed: skip tests that are already completed
+            const filteredTestCases = resumeSkipKeys
+              ? applicableTestCases.filter((test) => {
+                  const variant = targetVariantForSelection(selection);
+                  return (
+                    !resumeSkipKeys.has(buildEvalTestTargetKey(test, targetName, variant)) &&
+                    !resumeSkipKeys.has(buildTestTargetKey(test.id, targetName, variant))
+                  );
+                })
+              : applicableTestCases;
+
+            if (filteredTestCases.length === 0) {
+              return [];
+            }
 
-          try {
-            const runGroups = groupTestsByRunPolicy({
-              tests: filteredTestCases,
-              options: fileOptions,
-              defaultTrialsConfig: fileOptions.transcript ? undefined : targetPrep.trialsConfig,
-              defaultThreshold: targetPrep.threshold ?? fileOptions.threshold,
-              defaultTimeoutSeconds: fileOptions.agentTimeoutSeconds,
-              defaultBudgetUsd: targetPrep.budgetUsd,
-            });
-            const groupResults: EvaluationResult[] = [];
-            for (const group of runGroups) {
-              hasScopedRunPolicies ||= group.policy.hasScopedOverride;
-              const result = await runSingleEvalFile({
-                testFilePath,
-                cwd,
-                repoRoot,
+            try {
+              const runGroups = groupTestsByRunPolicy({
+                tests: filteredTestCases,
                 options: fileOptions,
-                outputWriter,
-                otelExporter,
-                cache,
-                evaluationRunner,
-                workersOverride: fileOptions.workers,
-                progressReporter,
-                seenTestCases,
-                displayIdTracker,
-                selection,
-                inlineTargetLabel,
-                testCases: group.tests,
-                trialsConfig: fileOptions.transcript ? undefined : group.policy.trialsConfig,
-                agentTimeoutSeconds: group.policy.timeoutSeconds,
-                matrixMode: targetPrep.selections.length > 1,
-                budgetUsd: group.policy.budgetUsd,
-                runBudgetTracker: fileBudgetTracker,
-                failOnError: targetPrep.failOnError,
-                threshold: group.policy.threshold,
-                providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory,
-              });
-              groupResults.push(...result.results);
-            }
-            const evalFile = path.relative(cwd, testFilePath);
-            const existingSummary = remoteEvalSummaries.find(
-              (summary) => summary.evalFile === evalFile,
-            );
-            if (existingSummary) {
-              existingSummary.results.push(...groupResults);
-            } else {
-              remoteEvalSummaries.push({
-                evalFile,
-                results: [...groupResults],
+                defaultTrialsConfig: fileOptions.transcript ? undefined : targetPrep.trialsConfig,
+                defaultThreshold: targetPrep.threshold ?? fileOptions.threshold,
+                defaultTimeoutSeconds: fileOptions.agentTimeoutSeconds,
+                defaultBudgetUsd: targetPrep.budgetUsd,
               });
-            }
-
-            return groupResults;
-          } catch (fileError) {
-            // before_all or other setup failures should not abort the entire run.
-            // Mark all tests in this file as errors and continue with other files.
-            const message = fileError instanceof Error ? fileError.message : String(fileError);
-            console.error(
-              `\n[ERROR] ⚠ Eval file failed: ${path.basename(testFilePath)} — ${message}\n`,
-            );
-            const explicitVariant = targetVariantForSelection(selection);
-            const errorResults: EvaluationResult[] = filteredTestCases.map((testCase) =>
-              withSourceMetadata(
-                {
-                  timestamp: new Date().toISOString(),
-                  testId: testCase.id,
-                  score: 0,
-                  assertions: [],
-                  output: message,
-                  trace: buildTraceFromMessages({
-                    input: testCase.input as EvaluationResult['input'],
-                    output: [{ role: 'assistant' as const, content: message }],
-                    finalOutput: message,
-                    target: selection.targetName,
+              const groupResults: EvaluationResult[] = [];
+              for (const group of runGroups) {
+                hasScopedRunPolicies ||= group.policy.hasScopedOverride;
+                const result = await runSingleEvalFile({
+                  testFilePath,
+                  cwd,
+                  repoRoot,
+                  options: fileOptions,
+                  outputWriter,
+                  otelExporter,
+                  cache,
+                  evaluationRunner,
+                  workersOverride: perTargetWorkers,
+                  progressReporter,
+                  seenTestCases,
+                  displayIdTracker,
+                  selection,
+                  inlineTargetLabel,
+                  testCases: group.tests,
+                  trialsConfig: fileOptions.transcript ? undefined : group.policy.trialsConfig,
+                  agentTimeoutSeconds: group.policy.timeoutSeconds,
+                  matrixMode: targetPrep.selections.length > 1,
+                  budgetUsd: group.policy.budgetUsd,
+                  runBudgetTracker: fileBudgetTracker,
+                  failOnError: targetPrep.failOnError,
+                  threshold: group.policy.threshold,
+                  providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory,
+                });
+                groupResults.push(...result.results);
+              }
+              const evalFile = path.relative(cwd, testFilePath);
+              const existingSummary = remoteEvalSummaries.find(
+                (summary) => summary.evalFile === evalFile,
+              );
+              if (existingSummary) {
+                existingSummary.results.push(...groupResults);
+              } else {
+                remoteEvalSummaries.push({
+                  evalFile,
+                  results: [...groupResults],
+                });
+              }
+
+              return groupResults;
+            } catch (fileError) {
+              // before_all or other setup failures should not abort the entire run.
+              // Mark all tests in this file as errors and continue with other files.
+              const message = fileError instanceof Error ? fileError.message : String(fileError);
+              console.error(
+                `\n[ERROR] ⚠ Eval file failed: ${path.basename(testFilePath)} — ${message}\n`,
+              );
+              const explicitVariant = targetVariantForSelection(selection);
+              const errorResults: EvaluationResult[] = filteredTestCases.map((testCase) =>
+                withSourceMetadata(
+                  {
+                    timestamp: new Date().toISOString(),
                     testId: testCase.id,
-                    conversationId: testCase.conversation_id,
+                    score: 0,
+                    assertions: [],
+                    output: message,
+                    trace: buildTraceFromMessages({
+                      input: testCase.input as EvaluationResult['input'],
+                      output: [{ role: 'assistant' as const, content: message }],
+                      finalOutput: message,
+                      target: selection.targetName,
+                      testId: testCase.id,
+                      conversationId: testCase.conversation_id,
+                      error: message,
+                    }),
+                    scores: [],
                     error: message,
-                  }),
-                  scores: [],
-                  error: message,
-                  executionStatus: 'execution_error' as const,
-                  failureStage: 'setup' as const,
-                  failureReasonCode: 'setup_error' as const,
-                  durationMs: 0,
-                  tokenUsage: { input: 0, output: 0 },
-                  target: selection.targetName,
-                  variant: explicitVariant,
-                },
-                testFilePath,
-                fileOptions,
-              ),
-            );
-            for (const errResult of errorResults) {
-              await outputWriter.append(errResult);
+                    executionStatus: 'execution_error' as const,
+                    failureStage: 'setup' as const,
+                    failureReasonCode: 'setup_error' as const,
+                    durationMs: 0,
+                    tokenUsage: { input: 0, output: 0 },
+                    target: selection.targetName,
+                    variant: explicitVariant,
+                  },
+                  testFilePath,
+                  fileOptions,
+                ),
+              );
+              for (const errResult of errorResults) {
+                await outputWriter.append(errResult);
+              }
+              return errorResults;
             }
-            return errorResults;
-          }
-        }),
+          }),
+        ),
       );
       for (const results of targetResults) {
         allResults.push(...results);
@@ -2646,7 +2701,7 @@ export async function runEvalCommand(
       const relativeRunDir = path.relative(cwd, runDir);
       console.log(
         `\nTip: ${summary.executionErrorCount} execution error(s) detected. Re-run failed tests with:\n` +
-          `  agentv eval run ${evalFileArgs}${targetFlag} --output ${relativeRunDir} --rerun-failed`,
+          `  agentv eval run ${evalFileArgs}${targetFlag} --rerun-failed ${relativeRunDir}`,
       );
     }
 
diff --git a/apps/cli/src/commands/results/eval-runner.ts b/apps/cli/src/commands/results/eval-runner.ts
index 9a4617eb5..3fb397621 100644
--- a/apps/cli/src/commands/results/eval-runner.ts
+++ b/apps/cli/src/commands/results/eval-runner.ts
@@ -171,6 +171,9 @@ function validateResumeOptions(req: RunEvalRequest): string | undefined {
   if (modes.length > 1) {
     return `resume, rerun_failed, and retry_errors are mutually exclusive (got: ${modes.join(', ')})`;
   }
+  if (req.rerun_failed && !req.output?.trim()) {
+    return 'rerun_failed requires output to identify the prior run workspace';
+  }
   return undefined;
 }
 
@@ -230,7 +233,7 @@ function buildCliArgs(req: RunEvalRequest, experiment?: string): string[] {
     args.push('--resume');
   }
   if (req.rerun_failed) {
-    args.push('--rerun-failed');
+    args.push('--rerun-failed', req.output?.trim() ?? '');
   }
   if (req.retry_errors?.trim()) {
     args.push('--retry-errors', req.retry_errors.trim());
diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
index 98a08f30b..9d301496a 100644
--- a/apps/cli/test/commands/results/serve.test.ts
+++ b/apps/cli/test/commands/results/serve.test.ts
@@ -3956,7 +3956,7 @@ describe('serve app', () => {
       });
       expect(res.status).toBe(202);
       const data = (await res.json()) as { command: string };
-      expect(data.command).toContain('--rerun-failed');
+      expect(data.command).toContain('--rerun-failed .agentv/results/r1');
       expect(data.command).toContain('--output .agentv/results/r1');
     });
 
@@ -4140,7 +4140,7 @@ describe('serve app', () => {
       });
       expect(res.status).toBe(200);
       const data = (await res.json()) as { command: string };
-      expect(data.command).toContain('--rerun-failed');
+      expect(data.command).toContain('--rerun-failed .agentv/results/r1');
       expect(data.command).not.toContain('--resume');
     });
 
diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts
index 00d49a159..5fc435e85 100644
--- a/apps/cli/test/eval.integration.test.ts
+++ b/apps/cli/test/eval.integration.test.ts
@@ -931,6 +931,101 @@ describe('agentv eval CLI', () => {
     }
   }, 30_000);
 
+  it('reruns failed rows from a canonical run id', async () => {
+    const fixture = await createFixture();
+    try {
+      const priorRunDir = path.join(fixture.suiteDir, '.agentv', 'results', 'prior-run');
+      const first = await runCli(fixture, [
+        'eval',
+        fixture.testFilePath,
+        '--output',
+        priorRunDir,
+        '--threshold',
+        '0.8',
+      ]);
+      expect(first.exitCode).toBe(1);
+      const priorIndexPath = path.join(priorRunDir, 'index.jsonl');
+      const priorRows = (await readJsonLines(priorIndexPath)) as Array<Record<string, unknown>>;
+      await writeFile(
+        priorIndexPath,
+        `${priorRows
+          .map((row) =>
+            JSON.stringify({
+              ...row,
+              execution_status: row.test_id === 'case-alpha' ? 'quality_failure' : 'ok',
+            }),
+          )
+          .join('\n')}\n`,
+        'utf8',
+      );
+
+      const second = await runCli(fixture, [
+        'eval',
+        fixture.testFilePath,
+        '--rerun-failed',
+        'prior-run',
+        '--threshold',
+        '0.8',
+      ]);
+      expect(second.exitCode).toBe(1);
+      expect(second.stdout).toContain('Rerun-failed: found 2 existing result(s), skipping 1');
+
+      const diagnostics = await readDiagnostics(fixture);
+      const calls = diagnostics.calls as Array<Record<string, unknown>>;
+      expect(calls.at(-1)).toMatchObject({
+        evalCaseIds: ['case-alpha'],
+      });
+
+      const rows = await readJsonLines(priorIndexPath);
+      expect(rows).toHaveLength(3);
+      expect((rows.at(-1) as Record<string, unknown>).test_id).toBe('case-alpha');
+    } finally {
+      await rm(fixture.baseDir, { recursive: true, force: true });
+    }
+  }, 30_000);
+
+  it('does not multiply max_concurrency across target matrix selections', async () => {
+    const fixture = await createFixture();
+    try {
+      const evalPath = path.join(fixture.suiteDir, 'target-matrix.eval.yaml');
+      await writeFile(
+        evalPath,
+        [
+          'name: target-matrix',
+          'target: file-target',
+          'tests:',
+          '  - id: first-case',
+          '    input: first',
+          '    criteria: ok',
+          '  - id: second-case',
+          '    input: second',
+          '    criteria: ok',
+          '',
+        ].join('\n'),
+        'utf8',
+      );
+
+      const { exitCode } = await runCli(fixture, [
+        'eval',
+        evalPath,
+        '--workers',
+        '2',
+        '--target',
+        'file-target',
+        '--target',
+        'cli-target',
+      ]);
+
+      expect(exitCode).toBe(0);
+      const diagnostics = await readDiagnostics(fixture);
+      const calls = diagnostics.calls as Array<Record<string, unknown>>;
+      expect(calls).toHaveLength(2);
+      expect(calls.map((call) => call.maxConcurrency)).toEqual([1, 1]);
+    } finally {
+      await rm(fixture.baseDir, { recursive: true, force: true });
+    }
+  }, 30_000);
+
   it('records CLI-named experiment namespace separately from default runtime config', async () => {
     const fixture = await createFixture();
     try {
diff --git a/apps/cli/test/fixtures/mock-run-evaluation.ts b/apps/cli/test/fixtures/mock-run-evaluation.ts
index 5c30221ff..4103e4eaf 100644
--- a/apps/cli/test/fixtures/mock-run-evaluation.ts
+++ b/apps/cli/test/fixtures/mock-run-evaluation.ts
@@ -64,6 +64,8 @@ interface EvaluationResultLike {
   readonly timestamp: string;
 }
 
+let diagnosticsWriteQueue: Promise<void> = Promise.resolve();
+
 function evalCaseIds(evalCases: ReadonlyArray<unknown> | undefined): readonly string[] {
   if (!Array.isArray(evalCases) || evalCases.length === 0) {
     return ['case-alpha', 'case-beta'];
@@ -210,17 +212,20 @@ async function maybeWriteDiagnostics(
     resultCount: results.length,
   } satisfies Record<string, unknown>;
 
-  const priorCalls = await readFile(diagnosticsPath, 'utf8')
-    .then((raw) => {
-      const parsed = JSON.parse(raw) as { readonly calls?: unknown };
-      return Array.isArray(parsed.calls) ? parsed.calls : [parsed];
-    })
-    .catch(() => []);
-  await writeFile(
-    diagnosticsPath,
-    JSON.stringify({ ...payload, calls: [...priorCalls, payload] }, null, 2),
-    'utf8',
-  );
+  diagnosticsWriteQueue = diagnosticsWriteQueue.then(async () => {
+    const priorCalls = await readFile(diagnosticsPath, 'utf8')
+      .then((raw) => {
+        const parsed = JSON.parse(raw) as { readonly calls?: unknown };
+        return Array.isArray(parsed.calls) ? parsed.calls : [parsed];
+      })
+      .catch(() => []);
+    await writeFile(
+      diagnosticsPath,
+      JSON.stringify({ ...payload, calls: [...priorCalls, payload] }, null, 2),
+      'utf8',
+    );
+  });
+  await diagnosticsWriteQueue;
 }
 
 async function maybeWritePromptDump(
diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
index dc881d48b..5d8a14633 100644
--- a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
@@ -296,7 +296,7 @@ agentv eval evals/my-eval.yaml --export-otel
 
 ### Parallelism
 
-The `--workers N` flag controls how many **test cases run in parallel within each eval file** (default: 3). Eval files always run sequentially — one file completes before the next starts.
+The `--workers N` flag controls the in-process worker pool for a single eval file (default: 3). Eval files always run sequentially — one file completes before the next starts. In target-matrix runs, selected targets share that worker budget instead of each target creating its own full pool.
 
 ```bash
 agentv eval evals/my-eval.yaml --workers 4
@@ -304,6 +304,9 @@ agentv eval evals/my-eval.yaml --workers 4
 
 agentv eval evals/file1.yaml evals/file2.yaml evals/file3.yaml --workers 3
 # Files run one at a time; within each file, up to 3 test cases run in parallel
+
+agentv eval evals/my-eval.yaml --target gpt --target claude --workers 4
+# The target matrix shares the same 4-worker budget
 ```
 
 This matches the standard model used by eval frameworks (promptfoo, deepeval, OpenAI Evals) and avoids cross-file workspace races without any special configuration.
@@ -353,10 +356,10 @@ AgentV ships three flags for picking up a partial run. They differ only in **whi
 | Flag | What it skips | What it re-runs | Use when |
 |------|---------------|-----------------|----------|
 | `--resume` | Anything that finished without an `execution_error` (passes, fails, threshold misses) | Errors and missing cases | The run was interrupted (Ctrl-C, crash, OOM) and you just want it to finish |
-| `--rerun-failed` | Only cases with `executionStatus === 'ok'` | Errors **and** test failures (assertion misses, threshold misses) | A grader change or model swap means you want to re-grade everything that wasn't already passing |
+| `--rerun-failed <run_id>` | Only cases with `executionStatus === 'ok'` | Errors **and** test failures (assertion misses, threshold misses) | A grader change or model swap means you want to re-grade everything that wasn't already passing |
 | `--retry-errors <path>` | Anything that completed without an `execution_error` (same set as `--resume`) | Errors and missing cases | You want to point at an arbitrary prior run/manifest by path, instead of resuming the run dir you're currently writing to |
 
-`--resume` and `--rerun-failed` both append to the existing `index.jsonl`. When `--output <dir>` is given they target that directory; when omitted they default to the **last run dir for the current cwd**, recorded in `.agentv/cache.json` and updated after every eval. This matches promptfoo's `--resume [evalId]` and OpenCompass's `-r [timestamp]` "latest by default" convention. `--retry-errors` takes the prior run's path directly (a directory or an `index.jsonl`).
+`--resume` appends to the existing `index.jsonl` in `--output <dir>`; when omitted it defaults to the **last run dir for the current cwd**, recorded in `.agentv/cache.json` and updated after every eval. `--rerun-failed <run_id>` reads a specific canonical run bundle from `.agentv/results/<run_id>` and, when `--output` is omitted, appends replacement rows to that same bundle. You can also pass a run workspace path or `index.jsonl` path instead of a bare run ID. `--retry-errors` takes the prior run's path directly and re-runs only execution errors or missing cases.
 
 ```bash
 # Resume the last run — no args needed; AgentV finds it from .agentv/cache.json
@@ -365,8 +368,8 @@ agentv eval evals/my-eval.yaml --resume
 # Or target a specific run dir explicitly
 agentv eval evals/my-eval.yaml --output .agentv/results/<run_id> --resume
 
-# Re-run errors AND failed cases against the last run dir
-agentv eval evals/my-eval.yaml --rerun-failed
+# Re-run errors AND failed cases from a specific canonical run
+agentv eval evals/my-eval.yaml --rerun-failed <run_id>
 
 # Re-run only execution errors from any prior run by path
 agentv eval evals/my-eval.yaml --retry-errors .agentv/results/<run_id>/index.jsonl
diff --git a/apps/web/src/content/docs/docs/guides/workspace-pool.mdx b/apps/web/src/content/docs/docs/guides/workspace-pool.mdx
index 685a1f801..5eba56da6 100644
--- a/apps/web/src/content/docs/docs/guides/workspace-pool.mdx
+++ b/apps/web/src/content/docs/docs/guides/workspace-pool.mdx
@@ -132,6 +132,8 @@ This creates up to 4 slots (`slot-0` through `slot-3`). PID-based lock files pre
 
 The maximum number of pool slots defaults to 10 (capped at 50). Slots are created on demand — a run with 2 workers only creates 2 slots, even if the pool allows 10.
 
+Before a slot is reused for another case, AgentV resets it to the slot baseline. A pooled workspace is a performance cache, not shared mutable state between cases.
+
 **Multiple eval files:** When you pass multiple eval files to `agentv eval`, they run sequentially — one file completes before the next starts (see [Parallelism](/docs/evaluation/running-evals/#parallelism)). Within each file, pool slots support concurrent workers as described above.
 
 ## Drift detection
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index 26fbda0f6..29cdd0382 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -1209,13 +1209,17 @@ export async function runEvaluation(
       // shared owner prepare without inheriting a child suite's workspace.
       const usesSharedWorkspace = caseUsesSharedWorkspaceSetup(evalCase, sharedSetup);
       const testPoolSlot =
-        usesSharedWorkspace && availablePoolSlots.length > 0 ? availablePoolSlots.pop() : undefined;
+        usesSharedWorkspace && availablePoolSlots.length > 0
+          ? availablePoolSlots.pop()
+          : usesSharedWorkspace
+            ? poolSlot
+            : undefined;
       const testWorkspacePath = usesSharedWorkspace
         ? (testPoolSlot?.path ?? sharedWorkspacePath)
         : undefined;
       const testBaselineCommit = usesSharedWorkspace
         ? testPoolSlot
-          ? poolSlotBaselines.get(testPoolSlot.path)
+          ? (poolSlotBaselines.get(testPoolSlot.path) ?? sharedBaselineCommit)
           : sharedBaselineCommit
         : undefined;
 
@@ -1323,9 +1327,30 @@ export async function runEvaluation(
         }
         throw error;
       } finally {
-        // Return pool slot for reuse by next test
+        // Return pool slot for reuse by next test only after resetting it to
+        // the per-slot baseline. Pooling is a local performance optimization,
+        // not shared state between eval cases.
         if (testPoolSlot) {
-          availablePoolSlots.push(testPoolSlot);
+          const shouldReturnPoolSlot = testPoolSlot !== poolSlot;
+          const resetMode = workspaceClean === 'full' ? 'strict' : 'fast';
+          let resetSucceeded = true;
+          try {
+            if (repoManager && suiteWorkspace?.repos?.length) {
+              await repoManager.reset(suiteWorkspace.repos, testPoolSlot.path, resetMode);
+            }
+            await resetWorkspaceRoot(testPoolSlot.path, resetMode, testBaselineCommit);
+          } catch (resetError) {
+            resetSucceeded = false;
+            if (verbose) {
+              const message = resetError instanceof Error ? resetError.message : String(resetError);
+              console.warn(
+                `Warning: failed to reset workspace pool slot ${testPoolSlot.index}; leaving it out of reuse: ${message}`,
+              );
+            }
+          }
+          if (resetSucceeded && shouldReturnPoolSlot) {
+            availablePoolSlots.push(testPoolSlot);
+          }
         }
       }
     }
diff --git a/packages/core/src/evaluation/run-artifacts.ts b/packages/core/src/evaluation/run-artifacts.ts
index 40dd3eb45..f12b5817a 100644
--- a/packages/core/src/evaluation/run-artifacts.ts
+++ b/packages/core/src/evaluation/run-artifacts.ts
@@ -1997,6 +1997,13 @@ function indexRecordReplacementKey(record: unknown): string | undefined {
   return projectionIdentityRecordKey(record) ?? indexRecordKey(record);
 }
 
+function indexRecordReplacementKeys(record: unknown): readonly string[] {
+  const keys = [projectionIdentityRecordKey(record), indexRecordKey(record)].filter(
+    (key): key is string => typeof key === 'string' && key.length > 0,
+  );
+  return Array.from(new Set(keys));
+}
+
 function projectionIdentityRecordKey(record: unknown): string | undefined {
   if (!isRecord(record) || !isRecord(record.projection_identity)) {
     return undefined;
@@ -2086,10 +2093,9 @@ async function rewriteExistingIndexRecords(
   }
 
   const replacementsByKey = new Map(
-    replacements.flatMap((record) => {
-      const key = indexRecordReplacementKey(record);
-      return key ? [[key, record] as const] : [];
-    }),
+    replacements.flatMap((record) =>
+      indexRecordReplacementKeys(record).map((key) => [key, record] as const),
+    ),
   );
   const seen = new Set<string>();
   const records: unknown[] = [];
@@ -2103,7 +2109,9 @@ async function rewriteExistingIndexRecords(
       const replacement = key ? replacementsByKey.get(key) : undefined;
       if (key && replacement) {
         records.push(replacement);
-        seen.add(key);
+        for (const replacementKey of indexRecordReplacementKeys(replacement)) {
+          seen.add(replacementKey);
+        }
       } else {
         records.push(parsed);
       }
@@ -2111,8 +2119,8 @@ async function rewriteExistingIndexRecords(
   }
 
   for (const replacement of replacements) {
-    const key = indexRecordReplacementKey(replacement);
-    if (!key || !seen.has(key)) {
+    const keys = indexRecordReplacementKeys(replacement);
+    if (keys.length === 0 || keys.every((key) => !seen.has(key))) {
       records.push(replacement);
     }
   }
diff --git a/packages/core/test/evaluation/orchestrator.test.ts b/packages/core/test/evaluation/orchestrator.test.ts
index 73c544135..a67f8553b 100644
--- a/packages/core/test/evaluation/orchestrator.test.ts
+++ b/packages/core/test/evaluation/orchestrator.test.ts
@@ -1,10 +1,12 @@
 import { afterEach, describe, expect, it, mock } from 'bun:test';
+import { execSync } from 'node:child_process';
 import {
   existsSync,
   mkdirSync,
   mkdtempSync,
   readFileSync,
   readdirSync,
+  rmSync,
   writeFileSync,
 } from 'node:fs';
 import { tmpdir } from 'node:os';
@@ -151,6 +153,31 @@ const baseTarget: ResolvedTarget = {
   config: { response: '{}' },
 };
 
+function cleanGitEnv(): Record<string, string> {
+  const env: Record<string, string> = {};
+  for (const [key, value] of Object.entries(process.env)) {
+    if (value !== undefined && !(key.startsWith('GIT_') && key !== 'GIT_SSH_COMMAND')) {
+      env[key] = value;
+    }
+  }
+  return env;
+}
+
+function createTestRepo(dir: string, files: Record<string, string>): string {
+  mkdirSync(dir, { recursive: true });
+  const opts = { cwd: dir, stdio: 'ignore' as const, env: cleanGitEnv() };
+  execSync('git init', opts);
+  execSync('git config user.email "test@test.com"', opts);
+  execSync('git config user.name "Test"', opts);
+  for (const [name, content] of Object.entries(files)) {
+    const filePath = path.join(dir, name);
+    mkdirSync(path.dirname(filePath), { recursive: true });
+    writeFileSync(filePath, content);
+  }
+  execSync('git add -A && git commit -m "initial"', opts);
+  return execSync('git rev-parse HEAD', { cwd: dir, env: cleanGitEnv() }).toString().trim();
+}
+
 const evaluatorRegistry = {
   'llm-grader': {
     kind: 'llm-grader',
@@ -638,6 +665,114 @@ console.log('spreadsheet: revenue,total\\nQ1,42');`,
     expect(result.score).toBeGreaterThan(0);
   });
 
+  it('does not retry completed quality failures', async () => {
+    const provider = new SequenceProvider('mock', {
+      responses: [
+        {
+          output: [{ role: 'assistant', content: 'Incomplete response.' }],
+        },
+      ],
+    });
+    const failingEvaluators = {
+      'llm-grader': {
+        kind: 'llm-grader',
+        async evaluate() {
+          return {
+            score: 0.1,
+            verdict: 'fail' as const,
+            assertions: [{ text: 'quality miss', passed: false }],
+            expectedAspectCount: 1,
+          };
+        },
+      },
+    };
+
+    const result = await runEvalCase({
+      evalCase: baseTestCase,
+      provider,
+      target: baseTarget,
+      evaluators: failingEvaluators,
+      maxRetries: 3,
+    });
+
+    expect(provider.callIndex).toBe(1);
+    expect(result.executionStatus).toBe('quality_failure');
+    expect(result.retryIndex).toBe(0);
+  });
+
+  it('resets a pooled workspace slot before reusing it for the next case', async () => {
+    const tempDir = mkdtempSync(path.join(tmpdir(), 'agentv-pooled-runner-'));
+    const previousAgentvHome = process.env.AGENTV_HOME;
+    const previousAgentvDataDir = process.env.AGENTV_DATA_DIR;
+    process.env.AGENTV_HOME = path.join(tempDir, 'agentv-home');
+    process.env.AGENTV_DATA_DIR = path.join(tempDir, 'agentv-data');
+
+    try {
+      const sourceRepo = path.join(tempDir, 'source-repo');
+      const cleanCommit = createTestRepo(sourceRepo, { 'tracked.txt': 'clean\n' });
+      const workspace = {
+        repos: [
+          {
+            path: './repo-a',
+            repo: `file://${sourceRepo}`,
+            commit: cleanCommit,
+          },
+        ],
+      };
+      const seenStaleBeforeSecond: boolean[] = [];
+      let callCount = 0;
+      const provider: Provider = {
+        id: 'mock:pooled-reset',
+        kind: 'mock' as const,
+        targetName: 'pooled-reset',
+        async invoke(request: ProviderRequest): Promise<ProviderResponse> {
+          callCount += 1;
+          if (!request.cwd) {
+            throw new Error('missing cwd');
+          }
+          const repoDir = path.join(request.cwd, 'repo-a');
+          if (callCount === 1) {
+            writeFileSync(path.join(repoDir, 'tracked.txt'), 'dirty\n');
+            writeFileSync(path.join(repoDir, 'stale.txt'), 'stale\n');
+          } else {
+            seenStaleBeforeSecond.push(existsSync(path.join(repoDir, 'stale.txt')));
+            expect(readFileSync(path.join(repoDir, 'tracked.txt'), 'utf8')).toBe('clean\n');
+          }
+          return { output: [{ role: 'assistant', content: `response ${callCount}` }] };
+        },
+      };
+
+      const results = await runEvaluation({
+        testFilePath: path.join(tempDir, 'eval.yaml'),
+        repoRoot: tempDir,
+        target: { ...baseTarget, name: 'pooled-reset' },
+        providerFactory: () => provider,
+        evaluators: evaluatorRegistry,
+        workspaceMode: 'pooled',
+        maxConcurrency: 1,
+        evalCases: [
+          { ...baseTestCase, id: 'case-1', workspace },
+          { ...baseTestCase, id: 'case-2', workspace },
+        ],
+      });
+
+      expect(results).toHaveLength(2);
+      expect(seenStaleBeforeSecond).toEqual([false]);
+    } finally {
+      if (previousAgentvHome === undefined) {
+        process.env.AGENTV_HOME = undefined;
+      } else {
+        process.env.AGENTV_HOME = previousAgentvHome;
+      }
+      if (previousAgentvDataDir === undefined) {
+        process.env.AGENTV_DATA_DIR = undefined;
+      } else {
+        process.env.AGENTV_DATA_DIR = previousAgentvDataDir;
+      }
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  }, 30_000);
+
   it('applies exponential backoff between retries', async () => {
     const provider = new SequenceProvider('mock', {
       errors: [new Error('Transient failure')],

From 7242c84725f7f4d556f78ba29be2aa04e9a7e2e0 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Thu, 2 Jul 2026 15:09:16 +0200
Subject: [PATCH 4/4] fix(eval): constrain rerun-failed identities

---
 apps/cli/src/commands/eval/run-eval.ts        | 191 ++++++++++++++++--
 apps/cli/test/eval.integration.test.ts        | 164 +++++++++++++--
 apps/cli/test/fixtures/mock-run-evaluation.ts |   1 +
 3 files changed, 324 insertions(+), 32 deletions(-)

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index 6dd0afc36..e2f6cc765 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -105,6 +105,137 @@ function shouldSkipExistingResultForResume(
   return result.executionStatus !== 'execution_error';
 }
 
+interface ResumeIdentityEntry {
+  readonly kind: 'precise' | 'legacy';
+  readonly key: string;
+  readonly result: EvaluationResult;
+}
+
+interface ResumeIdentityMatcher {
+  readonly preciseKeys: Set<string>;
+  readonly legacyKeys: Set<string>;
+}
+
+function hasNonEmptyString(value: unknown): value is string {
+  return typeof value === 'string' && value.trim().length > 0;
+}
+
+function objectRecord(value: unknown): Record<string, unknown> | undefined {
+  return typeof value === 'object' && value !== null && !Array.isArray(value)
+    ? (value as Record<string, unknown>)
+    : undefined;
+}
+
+function resultProjectionDimensions(result: EvaluationResult): Record<string, unknown> | undefined {
+  const projectionIdentity = objectRecord(
+    (result as unknown as Record<string, unknown>).projectionIdentity,
+  );
+  return objectRecord(projectionIdentity?.dimensions);
+}
+
+function hasCanonicalResultIdentity(result: EvaluationResult): boolean {
+  const source = result.source;
+  const dimensions = resultProjectionDimensions(result);
+  const resultRecord = result as unknown as Record<string, unknown>;
+  return (
+    hasNonEmptyString(dimensions?.evalPath) ||
+    hasNonEmptyString(dimensions?.suite) ||
+    hasNonEmptyString(dimensions?.promptId) ||
+    hasNonEmptyString(resultRecord.evalPath) ||
+    hasNonEmptyString(source?.evalFileRepoPath) ||
+    hasNonEmptyString(source?.evalFilePath) ||
+    hasNonEmptyString(source?.evalFileAbsolutePath) ||
+    hasNonEmptyString(result.suite) ||
+    hasNonEmptyString(result.prompt?.id)
+  );
+}
+
+function resultResumeIdentityEntry(result: EvaluationResult): ResumeIdentityEntry {
+  if (hasCanonicalResultIdentity(result)) {
+    return {
+      kind: 'precise',
+      key: buildEvaluationResultTargetKey(result),
+      result,
+    };
+  }
+  return {
+    kind: 'legacy',
+    key: buildTestTargetKey(result.testId, result.target, result.variant),
+    result,
+  };
+}
+
+function latestResumeIdentityEntries(
+  results: readonly EvaluationResult[],
+): readonly ResumeIdentityEntry[] {
+  const latestByIdentity = new Map<string, ResumeIdentityEntry>();
+  for (const result of results) {
+    const entry = resultResumeIdentityEntry(result);
+    latestByIdentity.set(`${entry.kind}:${entry.key}`, entry);
+  }
+  return Array.from(latestByIdentity.values());
+}
+
+function createResumeIdentityMatcher(): ResumeIdentityMatcher {
+  return { preciseKeys: new Set<string>(), legacyKeys: new Set<string>() };
+}
+
+function addResumeIdentityEntry(matcher: ResumeIdentityMatcher, entry: ResumeIdentityEntry): void {
+  if (entry.kind === 'legacy') {
+    matcher.legacyKeys.add(entry.key);
+    return;
+  }
+  matcher.preciseKeys.add(entry.key);
+}
+
+function uniqueStrings(values: readonly (string | undefined)[]): string[] {
+  return Array.from(new Set(values.filter(hasNonEmptyString)));
+}
+
+function buildPlannedResumeIdentityKeys(
+  test: EvalTest,
+  target: string,
+  variant: string | undefined,
+): readonly string[] {
+  const keys = new Set<string>([buildEvalTestTargetKey(test, target, variant)]);
+  const evalPaths = uniqueStrings([
+    test.source?.evalFileRepoPath,
+    test.source?.evalFilePath,
+    test.source?.evalFileAbsolutePath,
+  ]);
+  const suites = Array.from(new Set<string | null>([test.suite ?? null, null]));
+
+  for (const evalPath of evalPaths) {
+    for (const suite of suites) {
+      keys.add(
+        JSON.stringify({
+          eval_path: evalPath,
+          suite,
+          test_id: test.id ?? 'unknown',
+          prompt_id: test.prompt?.id ?? null,
+          target: target ?? 'unknown',
+          variant: variant ?? null,
+        }),
+      );
+    }
+  }
+
+  return Array.from(keys);
+}
+
+function resumeIdentityMatches(
+  matcher: ResumeIdentityMatcher,
+  test: EvalTest,
+  target: string,
+  variant: string | undefined,
+): boolean {
+  return (
+    buildPlannedResumeIdentityKeys(test, target, variant).some((key) =>
+      matcher.preciseKeys.has(key),
+    ) || matcher.legacyKeys.has(buildTestTargetKey(test.id, target, variant))
+  );
+}
+
 interface RunEvalCommandInput {
   readonly testFiles: readonly string[];
   readonly rawOptions: Record<string, unknown>;
@@ -1864,9 +1995,10 @@ export async function runEvalCommand(
     }
   }
 
-  // --resume / --rerun-failed: skip already-completed tests and append to existing output.
+  // --resume skips completed rows; --rerun-failed includes only latest failed/error rows.
   // IMPORTANT: JSONL must be loaded before the output writer is created (same file).
-  let resumeSkipKeys: Set<string> | undefined;
+  let resumeSkipKeys: ResumeIdentityMatcher | undefined;
+  let rerunIncludeKeys: ResumeIdentityMatcher | undefined;
   let isResumeAppend = false;
   if (options.resume && !options.retryErrors) {
     const sourceRunDir = options.rerunFailedSource
@@ -1883,13 +2015,15 @@ export async function runEvalCommand(
       const resumeIndexPaths = discoverRunManifestPaths(sourceRunDir);
       if (resumeIndexPaths.length > 0) {
         const existingResults = await readExistingResultsFromRunDir(sourceRunDir);
-        resumeSkipKeys = new Set<string>();
+        resumeSkipKeys = createResumeIdentityMatcher();
+        rerunIncludeKeys = options.rerunFailed ? createResumeIdentityMatcher() : undefined;
         let completedResultCount = 0;
-        for (const r of existingResults) {
-          if (shouldSkipExistingResultForResume(r, options.rerunFailed)) {
+        for (const entry of latestResumeIdentityEntries(existingResults)) {
+          if (shouldSkipExistingResultForResume(entry.result, options.rerunFailed)) {
             completedResultCount += 1;
-            resumeSkipKeys.add(buildEvaluationResultTargetKey(r));
-            resumeSkipKeys.add(buildTestTargetKey(r.testId, r.target, r.variant));
+            addResumeIdentityEntry(resumeSkipKeys, entry);
+          } else if (rerunIncludeKeys) {
+            addResumeIdentityEntry(rerunIncludeKeys, entry);
           }
         }
         isResumeAppend =
@@ -1899,6 +2033,9 @@ export async function runEvalCommand(
         console.log(
           `${modeLabel}: found ${existingResults.length} existing result(s), skipping ${completedResultCount} completed.`,
         );
+      } else if (options.rerunFailed) {
+        rerunIncludeKeys = createResumeIdentityMatcher();
+        console.log('Rerun-failed: no existing bundle run manifest found. Nothing to rerun.');
       } else {
         // No existing bundle manifest — behave like a normal run.
         console.log('Resume: no existing bundle run manifest found, starting fresh run.');
@@ -2153,9 +2290,13 @@ export async function runEvalCommand(
       for (const { selection } of meta.selections) {
         const target = selection.targetName;
         const variant = targetVariantForSelection(selection);
-        const key = buildEvalTestTargetKey(test, target, variant);
-        const fallbackKey = buildTestTargetKey(test.id, target, variant);
-        if (resumeSkipKeys?.has(key) || resumeSkipKeys?.has(fallbackKey)) {
+        if (rerunIncludeKeys) {
+          if (resumeIdentityMatches(rerunIncludeKeys, test, target, variant)) {
+            totalEvalCount++;
+          } else {
+            resumeSkippedCount++;
+          }
+        } else if (resumeSkipKeys && resumeIdentityMatches(resumeSkipKeys, test, target, variant)) {
           resumeSkippedCount++;
         } else {
           totalEvalCount++;
@@ -2170,6 +2311,10 @@ export async function runEvalCommand(
       console.log('No execution errors or missing cases in the previous run. Nothing to retry.');
       return;
     }
+    if (rerunIncludeKeys) {
+      console.log('Nothing to rerun — no failed or errored test(s) matched the current suite.');
+      return;
+    }
     // When using --resume, all tests being completed means nothing to resume
     if (resumeSkipKeys && resumeSkippedCount > 0) {
       console.log(`Nothing to resume — all ${resumeSkippedCount} test(s) already completed.`);
@@ -2400,16 +2545,22 @@ export async function runEvalCommand(
             const targetName = selection.targetName;
             const applicableTestCases = targetPrep.testCases;
 
-            // --resume / --rerun-failed: skip tests that are already completed
-            const filteredTestCases = resumeSkipKeys
-              ? applicableTestCases.filter((test) => {
-                  const variant = targetVariantForSelection(selection);
-                  return (
-                    !resumeSkipKeys.has(buildEvalTestTargetKey(test, targetName, variant)) &&
-                    !resumeSkipKeys.has(buildTestTargetKey(test.id, targetName, variant))
-                  );
-                })
-              : applicableTestCases;
+            // --resume skips completed tests; --rerun-failed only includes prior failed/error tests.
+            const filteredTestCases = rerunIncludeKeys
+              ? applicableTestCases.filter((test) =>
+                  resumeIdentityMatches(
+                    rerunIncludeKeys,
+                    test,
+                    targetName,
+                    targetVariantForSelection(selection),
+                  ),
+                )
+              : resumeSkipKeys
+                ? applicableTestCases.filter((test) => {
+                    const variant = targetVariantForSelection(selection);
+                    return !resumeIdentityMatches(resumeSkipKeys, test, targetName, variant);
+                  })
+                : applicableTestCases;
 
             if (filteredTestCases.length === 0) {
               return [];
diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts
index 5fc435e85..fa6320b3b 100644
--- a/apps/cli/test/eval.integration.test.ts
+++ b/apps/cli/test/eval.integration.test.ts
@@ -931,7 +931,7 @@ describe('agentv eval CLI', () => {
     }
   }, 30_000);
 
-  it('reruns failed rows from a canonical run id', async () => {
+  it('reruns only latest failed rows from a canonical run id', async () => {
     const fixture = await createFixture();
     try {
       const priorRunDir = path.join(fixture.suiteDir, '.agentv', 'results', 'prior-run');
@@ -946,18 +946,61 @@ describe('agentv eval CLI', () => {
       expect(first.exitCode).toBe(1);
       const priorIndexPath = path.join(priorRunDir, 'index.jsonl');
       const priorRows = (await readJsonLines(priorIndexPath)) as Array<Record<string, unknown>>;
+      const alphaRow = priorRows.find((row) => row.test_id === 'case-alpha');
+      const betaRow = priorRows.find((row) => row.test_id === 'case-beta');
+      if (!alphaRow || !betaRow) {
+        throw new Error('Expected prior rows for case-alpha and case-beta');
+      }
       await writeFile(
         priorIndexPath,
-        `${priorRows
-          .map((row) =>
-            JSON.stringify({
-              ...row,
-              execution_status: row.test_id === 'case-alpha' ? 'quality_failure' : 'ok',
-            }),
-          )
+        `${[
+          ...priorRows.map((row) => ({
+            ...row,
+            execution_status: row.test_id === 'case-alpha' ? 'ok' : 'quality_failure',
+          })),
+          { ...alphaRow, execution_status: 'quality_failure' },
+          { ...betaRow, execution_status: 'ok' },
+        ]
+          .map((row) => JSON.stringify(row))
           .join('\n')}\n`,
         'utf8',
       );
+      await writeFile(
+        fixture.testFilePath,
+        `description: CLI integration test
+target: file-target
+
+tests:
+  - id: case-alpha
+    criteria: System responds with alpha
+    input:
+      - role: user
+        content: |
+          Please respond with alpha
+    expected_output:
+      - role: assistant
+        content: "Alpha"
+  - id: case-beta
+    criteria: System responds with beta
+    input:
+      - role: user
+        content: |
+          Please respond with beta
+    expected_output:
+      - role: assistant
+        content: "Beta"
+  - id: case-gamma
+    criteria: System responds with gamma
+    input:
+      - role: user
+        content: |
+          Please respond with gamma
+    expected_output:
+      - role: assistant
+        content: "Gamma"
+`,
+        'utf8',
+      );
 
       const second = await runCli(fixture, [
         'eval',
@@ -965,10 +1008,10 @@ describe('agentv eval CLI', () => {
         '--rerun-failed',
         'prior-run',
         '--threshold',
-        '0.8',
+        '0.5',
       ]);
-      expect(second.exitCode).toBe(1);
-      expect(second.stdout).toContain('Rerun-failed: found 2 existing result(s), skipping 1');
+      expect(second.exitCode).toBe(0);
+      expect(second.stdout).toContain('Rerun-failed: found 4 existing result(s), skipping 1');
 
       const diagnostics = await readDiagnostics(fixture);
       const calls = diagnostics.calls as Array<Record<string, unknown>>;
@@ -977,13 +1020,110 @@ describe('agentv eval CLI', () => {
       });
 
       const rows = await readJsonLines(priorIndexPath);
-      expect(rows).toHaveLength(3);
+      expect(rows).toHaveLength(5);
       expect((rows.at(-1) as Record<string, unknown>).test_id).toBe('case-alpha');
     } finally {
       await rm(fixture.baseDir, { recursive: true, force: true });
     }
   }, 30_000);
 
+  it('does not use coarse fallback keys for precise rerun-failed identities', async () => {
+    const fixture = await createFixture();
+    try {
+      const firstEvalPath = path.join(fixture.suiteDir, 'collision-a.eval.yaml');
+      const secondEvalPath = path.join(fixture.suiteDir, 'collision-b.eval.yaml');
+      const evalContent = (name: string) => `description: ${name}
+target: file-target
+
+tests:
+  - id: shared-case
+    criteria: System responds
+    input:
+      - role: user
+        content: |
+          Please respond for ${name}
+    expected_output:
+      - role: assistant
+        content: "Shared"
+`;
+      await writeFile(firstEvalPath, evalContent('collision a'), 'utf8');
+      await writeFile(secondEvalPath, evalContent('collision b'), 'utf8');
+
+      const priorRunDir = path.join(fixture.suiteDir, '.agentv', 'results', 'prior-collision');
+      const first = await runCli(fixture, [
+        'eval',
+        firstEvalPath,
+        '--output',
+        priorRunDir,
+        '--threshold',
+        '0.8',
+      ]);
+      expect(first.exitCode).toBe(0);
+
+      const priorIndexPath = path.join(priorRunDir, 'index.jsonl');
+      const priorRows = (await readJsonLines(priorIndexPath)) as Array<Record<string, unknown>>;
+      expect(priorRows).toHaveLength(1);
+      const baseRow = priorRows[0];
+      if (!baseRow) {
+        throw new Error('Expected one prior collision row');
+      }
+      const baseProjection = baseRow.projection_identity as Record<string, unknown>;
+      const baseDimensions = baseProjection.dimensions as Record<string, unknown>;
+      const secondProjection = {
+        ...baseProjection,
+        dimensions: {
+          ...baseDimensions,
+          eval_path: secondEvalPath,
+        },
+      };
+      await writeFile(
+        priorIndexPath,
+        `${[
+          { ...baseRow, execution_status: 'ok' },
+          {
+            ...baseRow,
+            projection_identity: secondProjection,
+            execution_status: 'quality_failure',
+          },
+        ]
+          .map((row) => JSON.stringify(row))
+          .join('\n')}\n`,
+        'utf8',
+      );
+
+      const second = await runCli(fixture, [
+        'eval',
+        firstEvalPath,
+        secondEvalPath,
+        '--rerun-failed',
+        'prior-collision',
+        '--threshold',
+        '0.8',
+      ]);
+      expect(second.exitCode).toBe(0);
+      expect(second.stdout).toContain('Rerun-failed: found 2 existing result(s), skipping 1');
+
+      const diagnostics = await readDiagnostics(fixture);
+      const calls = diagnostics.calls as Array<Record<string, unknown>>;
+      const rerunCalls = calls.slice(1);
+      expect(rerunCalls).toHaveLength(1);
+      const rerunCall = rerunCalls[0];
+      if (!rerunCall) {
+        throw new Error('Expected one rerun diagnostics call');
+      }
+      expect(path.basename(rerunCall.testFilePath as string)).toBe('collision-b.eval.yaml');
+      expect(rerunCall).toMatchObject({
+        evalCaseIds: ['shared-case'],
+      });
+
+      const rows = await readJsonLines(priorIndexPath);
+      expect(rows).toHaveLength(3);
+      expect((rows.at(-1) as Record<string, unknown>).test_id).toBe('shared-case');
+    } finally {
+      await rm(fixture.baseDir, { recursive: true, force: true });
+    }
+  }, 30_000);
+
   it('does not multiply max_concurrency across target matrix selections', async () => {
     const fixture = await createFixture();
     try {
diff --git a/apps/cli/test/fixtures/mock-run-evaluation.ts b/apps/cli/test/fixtures/mock-run-evaluation.ts
index 4103e4eaf..c97edf54b 100644
--- a/apps/cli/test/fixtures/mock-run-evaluation.ts
+++ b/apps/cli/test/fixtures/mock-run-evaluation.ts
@@ -173,6 +173,7 @@ async function maybeWriteDiagnostics(
   }
 
   const payload = {
+    testFilePath: options.testFilePath,
     target: options.target?.name,
     targetKind: options.target?.kind,
     targetModel: