diff --git a/apps/cli/src/commands/prepare/index.ts b/apps/cli/src/commands/prepare/index.ts
index a615e823a..5a6ad9aeb 100644
--- a/apps/cli/src/commands/prepare/index.ts
+++ b/apps/cli/src/commands/prepare/index.ts
@@ -8,6 +8,7 @@ import path from 'node:path';
 
 import {
   type EvalTargetRef,
+  type JsonObject,
   type PreparedEvalWorkspace,
   type PreparedWorkspaceRepoPin,
   type ResolvedTarget,
@@ -51,6 +52,8 @@ interface PrepareResult {
   readonly manifestPath: string;
   readonly setupStatus: 'ok';
   readonly setupSteps: readonly SetupStep[];
+  readonly providerContext?: JsonObject;
+  readonly metadata?: Record<string, unknown>;
   readonly repoPins: readonly RepoPin[];
   readonly baseline: PreparedEvalWorkspace['baseline'];
   readonly createdAt: string;
@@ -65,6 +68,8 @@ interface PrepareManifestWire {
   readonly prompt_path: string;
   readonly setup_status: 'ok';
   readonly setup_steps: readonly SetupStepWire[];
+  readonly provider_context?: JsonObject;
+  readonly metadata?: Record<string, unknown>;
   readonly repo_pins: readonly RepoPinWire[];
   readonly baseline: BaselineWire;
   readonly created_at: string;
@@ -130,6 +135,37 @@ function toRepoPins(pins: readonly PreparedWorkspaceRepoPin[]): readonly RepoPin
   }));
 }
 
+function remapWorkspacePaths<T>(
+  value: T,
+  sourceWorkspacePath: string,
+  targetWorkspacePath: string,
+): T {
+  if (typeof value === 'string') {
+    const relativePath = path.relative(sourceWorkspacePath, value);
+    if (
+      relativePath === '' ||
+      (!!relativePath && !relativePath.startsWith('..') && !path.isAbsolute(relativePath))
+    ) {
+      return path.join(targetWorkspacePath, relativePath) as T;
+    }
+    return value;
+  }
+  if (Array.isArray(value)) {
+    return value.map((item) =>
+      remapWorkspacePaths(item, sourceWorkspacePath, targetWorkspacePath),
+    ) as T;
+  }
+  if (value && typeof value === 'object') {
+    return Object.fromEntries(
+      Object.entries(value).map(([key, item]) => [
+        key,
+        remapWorkspacePaths(item, sourceWorkspacePath, targetWorkspacePath),
+      ]),
+    ) as T;
+  }
+  return value;
+}
+
 async function moveDirectory(sourcePath: string, destinationPath: string): Promise<void> {
   try {
     await rename(sourcePath, destinationPath);
@@ -200,6 +236,8 @@ function toManifestWire(result: PrepareResult): PrepareManifestWire {
       status: step.status,
       ...(step.message !== undefined && { message: step.message }),
     })),
+    ...(result.providerContext !== undefined && { provider_context: result.providerContext }),
+    ...(result.metadata !== undefined && { metadata: result.metadata }),
     repo_pins: result.repoPins.map((pin) => ({
       ...(pin.path !== undefined && { path: pin.path }),
       ...(pin.repo !== undefined && { repo: pin.repo }),
@@ -322,6 +360,16 @@ async function prepareAttempt(options: {
     manifestPath,
     setupStatus: 'ok',
     setupSteps: setupStepsFromPrepared(prepared),
+    ...(prepared.providerContext !== undefined && {
+      providerContext: remapWorkspacePaths(
+        prepared.providerContext,
+        prepared.workspacePath,
+        workspacePath,
+      ),
+    }),
+    ...(prepared.metadata !== undefined && {
+      metadata: remapWorkspacePaths(prepared.metadata, prepared.workspacePath, workspacePath),
+    }),
     repoPins: toRepoPins(prepared.repoPins),
     baseline: prepared.baseline,
     createdAt: prepared.createdAt,
diff --git a/apps/cli/test/commands/prepare/prepare.test.ts b/apps/cli/test/commands/prepare/prepare.test.ts
index 4e8a9fe65..1bb265d33 100644
--- a/apps/cli/test/commands/prepare/prepare.test.ts
+++ b/apps/cli/test/commands/prepare/prepare.test.ts
@@ -238,4 +238,75 @@ describe('agentv prepare', () => {
     expect(typeof output.baseline.commit).toBe('string');
     expect(Object.keys(output)).not.toContain('workspacePath');
   });
+
+  it('remaps prepared extension context paths into the output workspace', async () => {
+    const evalPath = path.join(tempDir, 'evals', 'suite.eval.yaml');
+    const outDir = path.join(tempDir, 'prepared-extension-context');
+
+    await mkdir(path.join(tempDir, 'evals'), { recursive: true });
+    await mkdir(path.join(tempDir, 'template'), { recursive: true });
+    await mkdir(path.join(tempDir, 'rules'), { recursive: true });
+    await mkdir(path.join(tempDir, 'scripts'), { recursive: true });
+    await mkdir(path.join(tempDir, '.agentv'), { recursive: true });
+    await writeFile(path.join(tempDir, 'template', 'app.txt'), 'initial\n', 'utf8');
+    await writeFile(path.join(tempDir, 'rules', 'AGENTS.md'), '# Rules\n', 'utf8');
+    await writeFile(path.join(tempDir, 'scripts', 'target.ts'), '', 'utf8');
+    await writeFile(
+      path.join(tempDir, '.agentv', 'targets.yaml'),
+      `
+targets:
+  - name: codex
+    provider: cli
+    command: bun ./scripts/target.ts
+`,
+      'utf8',
+    );
+    await writeFile(
+      evalPath,
+      `
+extensions:
+  - id: agentv:agent-rules
+    hook: beforeAll
+    rules: ../rules/AGENTS.md
+workspace:
+  template: ../template
+tests:
+  - id: case-1
+    input: "Fix the workspace file."
+    criteria: "Works"
+`,
+      'utf8',
+    );
+
+    await execa(
+      'bun',
+      [
+        '--no-env-file',
+        CLI_ENTRY,
+        'prepare',
+        evalPath,
+        '--test-id',
+        'case-1',
+        '--target',
+        'codex',
+        '--out',
+        outDir,
+      ],
+      {
+        cwd: tempDir,
+        env: {
+          AGENTV_HOME: path.join(tempDir, '.agentv-home'),
+          AGENTV_NO_UPDATE_CHECK: '1',
+        },
+      },
+    );
+
+    const workspacePath = path.join(outDir, 'workspace');
+    const manifest = JSON.parse(await readFile(path.join(outDir, 'agentv_prepare.json'), 'utf8'));
+    const rulesPath = manifest.provider_context.agent_rules_paths.rules[0];
+
+    expect(rulesPath).toStartWith(workspacePath);
+    expect(await exists(rulesPath)).toBe(true);
+    expect(manifest.metadata.agent_rules_paths.rules[0]).toBe(rulesPath);
+  });
 });
diff --git a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
index 63d6655ab..a49a13229 100644
--- a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
@@ -5,7 +5,7 @@ sidebar:
   order: 1
 ---
 
-Evaluation files define the test cases, graders, workspace lifecycle, and run controls for an evaluation run. The reserved `tags.experiment` key is the run/result grouping label, top-level `target` identifies the system under test, and fields such as `repeat`, `threshold`, `timeout_seconds`, `evaluate_options.budget_usd`, and `evaluate_options.max_concurrency` control repeated attempts and gates. Workspace reuse belongs under `workspace.isolation`; Docker/container binding belongs under `workspace.docker`. Install, build, and reset commands belong under `workspace.hooks`; runner-specific setup belongs in the `target` object or `targets.yaml`. AgentV supports two eval data formats: YAML and JSONL.
+Evaluation files define the test cases, graders, workspace lifecycle, and run controls for an evaluation run. The reserved `tags.experiment` key is the run/result grouping label, top-level `target` identifies the system under test, and fields such as `repeat`, `threshold`, `timeout_seconds`, `evaluate_options.budget_usd`, and `evaluate_options.max_concurrency` control repeated attempts and gates. Workspace reuse belongs under `workspace.isolation`; repository provenance belongs under `workspace.repos`; Docker/container binding belongs under `workspace.docker`. Non-provisioning setup commands belong in top-level `extensions`; reset policy stays under `workspace.hooks.after_each.reset`; runner-specific setup belongs in the `target` object or `targets.yaml`. AgentV supports two eval data formats: YAML and JSONL.
 
 YAML is the canonical portable model. TypeScript helpers, generated fixtures, and Python scripts should lower to the same YAML/JSONL shapes rather than inventing a separate eval contract.
 Eval files describe the task, target binding, and run controls. Use `evaluate_options.max_concurrency` for authored suite concurrency. Operators can still override concurrency with `--workers` or set defaults with `execution.workers` in `agentv.config.*` / `.agentv/config.yaml`; do not author legacy `workers` fields in eval YAML.
@@ -122,20 +122,58 @@ tests:
 | `evaluate_options` | Optional evaluation runtime options such as `budget_usd` and `max_concurrency` |
 | `threshold` | Optional suite quality threshold |
 | `workspace` | Suite-level task environment — inline object or string path to an [external workspace file](/docs/guides/workspace-pool/#external-workspace-config). Repo entries declare identity and checkout pins; acquisition is covered in [Workspace Architecture](/docs/guides/workspace-architecture/#repo-provenance-vs-acquisition). |
+| `extensions` | Promptfoo-style lifecycle hooks: `file://path/to/hooks.mjs:beforeAll`, `beforeEach`, `afterEach`, `afterAll`, plus the built-in `agentv:agent-rules`. Hooks run after `workspace.repos` materializes. |
 | `imports` | Optional import groups. `imports.suites` imports full child eval suites with their task context. `imports.tests` imports raw test rows into this file's context. Import entries may use scoped `run:` overrides for `threshold`, `repeat`, `timeout_seconds`, and `budget_usd`. |
 | `tests` | Inline raw tests or a string path to an external raw-case file or directory. Legacy `tests[].include` entries still load with a migration warning; prefer `imports.suites` or `imports.tests`. |
 | `assertions` | Suite-level graders appended to each test unless `execution.skip_defaults: true` is set on the test |
 | `input` | Suite-level input messages prepended to each test's input unless `execution.skip_defaults: true` is set on the test |
 
 `workspace` is what the agent can inspect or modify through tools, not prompt
-input. Put instructions in `input`; put repos, templates, and lifecycle setup in
-`workspace`.
+input. Put instructions in `input`; put repos, templates, Docker config, env
+checks, isolation, and repo provenance in `workspace`. Put lifecycle setup that
+does not acquire repos in `extensions`.
 
 For historical or repo-state evals, put the checkout under
 `workspace.repos[].commit` or `workspace.repos[].base_commit`. A commit SHA in
 the prompt or metadata is useful context, but it does not materialize a repo for
 the agent to inspect.
 
+### Lifecycle Extensions
+
+`extensions` uses Promptfoo-compatible lifecycle names. File hooks are local
+JavaScript or TypeScript modules resolved relative to the eval file:
+
+```yaml
+extensions:
+  - file://scripts/setup.mjs:beforeAll
+  - file://scripts/setup.mjs:beforeEach
+  - file://scripts/setup.mjs:afterEach
+  - file://scripts/setup.mjs:afterAll
+```
+
+Each exported function receives a context object with snake_case keys such as
+`workspace_path`, `test_id`, `eval_run_id`, `case_input`, and `case_metadata`.
+Setup hook failures (`beforeAll`, `beforeEach`) fail the affected run; teardown
+hook failures (`afterEach`, `afterAll`) are non-fatal.
+
+`agentv:agent-rules` is the only built-in extension in this slice. It runs after
+workspace materialization and exposes staged rule paths to providers and result
+metadata as `agent_rules_paths`:
+
+```yaml
+extensions:
+  - id: agentv:agent-rules
+    hook: beforeAll
+    skills: agent-rules/skills
+    hooks: agent-rules/hooks
+    agents: agent-rules/agents
+    rules: agent-rules/AGENTS.md
+```
+
+If `agentv:agent-rules` is authored as a string, it defaults to `beforeAll` and
+discovers conventional rule locations already present in the materialized
+workspace. It does not clone repositories or replace `workspace.repos`.
+
 ### Metadata Fields
 
 You can add structured metadata to your eval file using these optional top-level fields. Metadata is parsed when the `name` field is present:
diff --git a/apps/web/src/content/docs/docs/evaluation/experiments.mdx b/apps/web/src/content/docs/docs/evaluation/experiments.mdx
index aab8f78f6..a02e1a331 100644
--- a/apps/web/src/content/docs/docs/evaluation/experiments.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/experiments.mdx
@@ -189,7 +189,7 @@ Scoped `run:` supports `threshold`, `repeat`, `timeout_seconds`, and legacy
 per-case `budget_usd` overrides. Parent suite budgets should use
 `evaluate_options.budget_usd` for public eval authoring. Use
 `evaluate_options.max_concurrency` for authored concurrency. Candidate-changing fields stay
-parent-level. Workspace mutation belongs in `workspace.hooks`, and
+parent-level. Executable workspace setup belongs in top-level lifecycle extensions, and
 provider-specific setup belongs in target configuration.
 
 ## Lifecycle Ownership
@@ -199,8 +199,9 @@ target-specific runner state.
 
 | Need | Put it in |
 | --- | --- |
-| Install dependencies, build the repo, seed files | `workspace.hooks.before_all` |
-| Reset or apply per-case state | `workspace.hooks.before_each` / `workspace.hooks.after_each` |
+| Install dependencies, build the repo, seed files | `extensions: ["file://scripts/setup.mjs:beforeAll"]` |
+| Apply per-case state | `extensions: ["file://scripts/setup.mjs:beforeEach"]` |
+| Reset file state after each case | `workspace.hooks.after_each.reset` |
 | Configure an agent runner or provider variant | `target` object or `targets.yaml` |
 | Choose the target | top-level `target` |
 | Override the target's default model | `target.model` |
@@ -208,10 +209,8 @@ target-specific runner state.
 | Bind an existing local workspace directory | `--workspace-path` or `.agentv/config.local.yaml` |
 
 ```yaml
-workspace:
-  hooks:
-    before_all:
-      command: ["bash", "-lc", "bun install && bun run build"]
+extensions:
+  - file://scripts/build.mjs:beforeAll
 
 target:
   extends: codex-gpt5
diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
index dc881d48b..ae8711bc4 100644
--- a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
@@ -326,14 +326,16 @@ agentv eval evals/my-eval.yaml --workspace-clean full
 agentv eval evals/my-eval.yaml --retain-on-success cleanup --retain-on-failure keep
 ```
 
-Portable eval YAML keeps workspace intent under templates, repos, hooks, env,
-Docker, and folder isolation:
+Portable eval YAML keeps workspace intent under templates, repos, env, Docker,
+and folder isolation. Use top-level extensions for executable setup:
 
 ```yaml
+extensions:
+  - file://scripts/setup.mjs:beforeAll
+
 workspace:
   isolation: shared      # shared | per_case
   hooks:
-    enabled: true        # set false to skip all hooks
     after_each:
       reset: fast        # none | fast | strict
 ```
@@ -343,7 +345,7 @@ Notes:
 - Pooled mode is an explicit machine-local optimization.
 - `--workspace-path` uses an existing machine-local directory as-is and implies static runtime mode.
 - Runtime static mode is incompatible with `isolation: per_case`.
-- `hooks.enabled: false` skips all lifecycle hooks (setup, teardown, reset).
+- `workspace.hooks.after_each.reset` resets file state after each case.
 - Pool slots are managed separately (`agentv workspace list|clean`).
 
 ### Resume an Interrupted Run
diff --git a/apps/web/src/content/docs/docs/guides/benchmark-provenance.mdx b/apps/web/src/content/docs/docs/guides/benchmark-provenance.mdx
index ae760593a..87241b4dd 100644
--- a/apps/web/src/content/docs/docs/guides/benchmark-provenance.mdx
+++ b/apps/web/src/content/docs/docs/guides/benchmark-provenance.mdx
@@ -27,24 +27,25 @@ Use this split when deciding where a benchmark key belongs:
 |------------|--------------|------------------|
 | `workspace.repos[]` | Yes | Declares repo identity and checkout refs; AgentV resolves acquisition and materializes the checkout. |
 | `workspace.template` | Yes | Copies a workspace template into the run workspace. |
-| `workspace.hooks` | Yes | Runs lifecycle commands with workspace and case context on stdin. |
+| `extensions` | Yes | Runs Promptfoo-style lifecycle setup after `workspace.template` and `workspace.repos` materialize. |
+| `workspace.hooks.after_each.reset` | Yes | Controls workspace reset policy after each case. |
 | `workspace.isolation` | Yes | Controls shared vs per-case folder isolation. Runtime workspace paths are machine-local config/CLI bindings, not benchmark provenance. |
 | `experiment` | Yes | Selects targets, thresholds, repeat policy, budgets, and default grader behavior. Concurrency is an operator/run setting from `--workers` or project config. |
 | `input`, `input_files`, `expected_output` | Yes | Builds the target prompt and passive reference answer. |
 | `assertions` | Yes | Runs deterministic, LLM, composite, or code graders. |
 | Top-level `name`, `version`, `tags`, `license`, `requires` | Informational | Identifies and categorizes the suite. |
-| `tests[].metadata` | Informational to AgentV | Passes arbitrary case data through to results and hook stdin; in-process custom assertions can also read it. |
+| `tests[].metadata` | Informational to AgentV | Passes arbitrary case data through to results and extension context; in-process custom assertions can also read it. |
 
-`metadata` can still become operational inside your own hook scripts. For
-example, a `before_each` hook can read `case_metadata.test_patch` and apply that
+`metadata` can still become operational inside your own lifecycle extensions. For
+example, a `beforeEach` extension can read `case_metadata.test_patch` and apply that
 patch before the agent starts. The distinction is that AgentV itself only passes
-the metadata along; the script owns the behavior.
+the metadata along; the extension owns the behavior.
 
-## Hook Payloads
+## Extension Context
 
-Lifecycle hooks receive JSON on stdin. Case-scoped hooks such as per-test
-`before_all`, `before_each`, and `after_each` receive the current test's
-metadata as `case_metadata`:
+File lifecycle extensions export functions named `beforeAll`, `beforeEach`,
+`afterEach`, or `afterAll`. AgentV calls each function with context including
+the current test's metadata as `case_metadata`:
 
 ```json
 {
@@ -59,9 +60,9 @@ metadata as `case_metadata`:
 }
 ```
 
-Suite-level `before_all` hooks run once for the workspace, before any one test is
-selected, so they should do suite setup only. Use `before_each` when setup depends
-on per-case metadata such as a patch path, source row, or selected test list.
+`beforeAll` runs once for the shared workspace after repo materialization, so it
+should do suite setup only. Use `beforeEach` when setup depends on per-case
+metadata such as a patch path, source row, or selected test list.
 
 ## Task Artifact Anatomy
 
@@ -71,7 +72,7 @@ Benchmark task packs map cleanly onto AgentV fields at authoring time:
 |---------------|----------------|
 | Prompt or instruction | `input`, usually with `type: file` blocks for long prompts |
 | Source checkout | `workspace.repos[].repo` and `workspace.repos[].commit` |
-| Per-case setup | `workspace.hooks.before_each` reading `case_metadata` |
+| Per-case setup | `extensions: ["file://scripts/setup.mjs:beforeEach"]` reading `case_metadata` |
 | Gold answer | `expected_output` when the answer is passive reference data |
 | Active verification | `assertions`, especially `code-grader` for commands or artifact checks |
 | Provenance | `tests[].metadata` with source pins, generator rows, and curation labels |
@@ -104,12 +105,12 @@ workspace:
       repo: https://github.com/example/widget.git
       commit: 4f3e2d19b6e4e8f1c2b7d9a0e5a6b7c8d9e0f123
   hooks:
-    before_each:
-      command: ["python", "./scripts/apply-test-patch.py"]
-      timeout_ms: 120000
     after_each:
       reset: strict
 
+extensions:
+  - file://scripts/apply-test-patch.mjs:beforeEach
+
 assertions:
   - name: focused-tests
     type: code-grader
@@ -133,7 +134,7 @@ tests:
 
 In this example, `workspace.repos[].commit` is the actual checkout. The
 matching `metadata.source_commit` is audit data that gets recorded with the case
-and is available to scripts. `apply-test-patch.py` can read
+and is available to extensions. `apply-test-patch.mjs` can read
 `case_metadata.test_patch` and `case_metadata.fail_to_pass_tests`, then apply
 the patch and write the selected test list into the workspace. The code grader
 can read that workspace file through its `workspace_path` payload. Repo
@@ -158,9 +159,9 @@ workspace:
     - path: ./repo
       repo: https://github.com/example/widget.git
       commit: 4f3e2d19b6e4e8f1c2b7d9a0e5a6b7c8d9e0f123
-  hooks:
-    before_each:
-      command: ["python", "./scripts/apply-case-fixtures.py"]
+
+extensions:
+  - file://scripts/apply-case-fixtures.mjs:beforeEach
 
 target: codex
 
diff --git a/apps/web/src/content/docs/docs/guides/eval-authoring.mdx b/apps/web/src/content/docs/docs/guides/eval-authoring.mdx
index 6dda5efbf..6d5c4e39b 100644
--- a/apps/web/src/content/docs/docs/guides/eval-authoring.mdx
+++ b/apps/web/src/content/docs/docs/guides/eval-authoring.mdx
@@ -5,60 +5,58 @@ sidebar:
   order: 3
 ---
 
-## Workspace Setup: Skill Discovery Paths
-
-The `before_all` setup hook must copy skills to **all** provider discovery paths. Each provider searches a different directory:
-
-| Provider | Discovery path |
-|----------|---------------|
-| claude-cli | `.claude/skills/` |
-| allagents | `.agents/skills/` |
-| pi-cli | `.pi/skills/` |
-
-If your setup hook only copies to one path, `skill-trigger` assertions will fail for other providers.
-
-### Example setup.mjs
-
-```javascript
-import { cp, mkdir } from 'node:fs/promises';
-import path from 'node:path';
-
-// Read AgentV payload from stdin
-const payload = JSON.parse(await new Promise((resolve) => {
-  let data = '';
-  process.stdin.on('data', (chunk) => (data += chunk));
-  process.stdin.on('end', () => resolve(data));
-}));
-
-const workspacePath = payload.workspace_path;
-const skillSource = path.resolve('skills');
-
-// Copy skills to all provider discovery paths
-const discoveryPaths = [
-  '.claude/skills',
-  '.agents/skills',
-  '.pi/skills',
-];
-
-for (const rel of discoveryPaths) {
-  const dest = path.join(workspacePath, rel);
-  await mkdir(path.dirname(dest), { recursive: true });
-  await cp(skillSource, dest, { recursive: true });
-}
-```
+## Agent Rules and Skill Paths
 
-### In your eval YAML
+Use the built-in `agentv:agent-rules` extension when an eval needs to stage or
+expose agent-facing rules, skills, hooks, or subagents. It runs after
+`workspace.template` and `workspace.repos` materialize, then writes
+`agent_rules_paths` into provider context and result metadata.
 
 ```yaml
+extensions:
+  - id: agentv:agent-rules
+    hook: beforeAll
+    skills: agent-rules/skills
+    hooks: agent-rules/hooks
+    agents: agent-rules/agents
+    rules: agent-rules/AGENTS.md
+
 workspace:
   template: ./workspace-template
-  hooks:
-    before_all:
-      command:
-        - node
-        - ../scripts/setup.mjs
+  repos:
+    - path: ./app
+      repo: acme/app
+      commit: main
 ```
 
+Configured paths are resolved relative to the eval file and staged under the
+materialized workspace. If you write the shorthand form, AgentV discovers
+conventional rule locations already present in the workspace:
+
+```yaml
+extensions:
+  - agentv:agent-rules
+```
+
+Do not move repo acquisition into `agentv:agent-rules`. Repositories remain
+first-class workspace provenance through `workspace.repos`.
+
+## Custom Lifecycle Setup
+
+Use file extensions for setup that is not repo provisioning:
+
+```yaml
+extensions:
+  - file://scripts/setup.mjs:beforeAll
+  - file://scripts/setup.mjs:beforeEach
+  - file://scripts/setup.mjs:afterEach
+  - file://scripts/setup.mjs:afterAll
+```
+
+Each file hook exports a function with the matching name. The function receives
+context such as `workspace_path`, `test_id`, `eval_run_id`, `case_input`, and
+`case_metadata`.
+
 ## Workspace Limitations: No GitHub Remote
 
 Workspace-based evals are sandboxed — there is no GitHub remote, no PRs, and no issue tracker. Tests that ask agents to interact with GitHub will fail.
diff --git a/apps/web/src/content/docs/docs/guides/workspace-architecture.mdx b/apps/web/src/content/docs/docs/guides/workspace-architecture.mdx
index 93a6e281a..1cc0141b6 100644
--- a/apps/web/src/content/docs/docs/guides/workspace-architecture.mdx
+++ b/apps/web/src/content/docs/docs/guides/workspace-architecture.mdx
@@ -41,14 +41,14 @@ eval start
   |
   v
 +---------------------------+
-| 4. before_all hooks       |  workspace hook, then target hook
+| 4. beforeAll lifecycle    |  extensions, then target hook
 +---------------------------+
   |
   v
 +---------------------------+
 | 5. Test loop              |  For each test case:
-|    before_each -> run ->  |    workspace hook, target hook, agent,
-|    after_each             |    target hook, workspace hook
+|    beforeEach -> run ->   |    extension, target hook, agent,
+|    afterEach              |    target hook, extension, reset
 +---------------------------+
   |
   v
diff --git a/apps/web/src/content/docs/docs/guides/workspace-pool.mdx b/apps/web/src/content/docs/docs/guides/workspace-pool.mdx
index 685a1f801..f907aeb81 100644
--- a/apps/web/src/content/docs/docs/guides/workspace-pool.mdx
+++ b/apps/web/src/content/docs/docs/guides/workspace-pool.mdx
@@ -31,9 +31,9 @@ On subsequent runs:
 1. AgentV computes the fingerprint from your repo configs
 2. If a matching pool entry exists, it acquires a slot and resets it (`git reset --hard` + `git clean -fd`)
 3. Template files are re-copied (repo directories are preserved)
-4. Lifecycle hooks (`before_all`, etc.) run as normal
+4. Lifecycle extensions (`beforeAll`, etc.) run as normal
 
-**Keep templates small.** Template files are re-copied into every slot on every run. Use them for lightweight setup — agent skills, configuration files, prompt templates — not large assets. Heavy dependencies belong in repos (pooled and reused) or should be installed by `before_all` hooks (cached across reuse cycles with `fast` reset).
+**Keep templates small.** Template files are re-copied into every slot on every run. Use them for lightweight setup — agent skills, configuration files, prompt templates — not large assets. Heavy dependencies belong in repos (pooled and reused) or should be installed by `beforeAll` extensions (cached across reuse cycles with `fast` reset).
 
 The first pooled run materializes from scratch. Subsequent pooled runs reuse the pool — skipping clone and checkout entirely.
 
@@ -57,7 +57,7 @@ execution:
 
 ## Pool reset mode
 
-By default, pool reset uses `git clean -fd` which **preserves `.gitignore`d files** like `node_modules/`, `build/`, and compiled binaries. This means `before_all` build steps survive across reuse cycles.
+By default, pool reset uses `git clean -fd` which **preserves `.gitignore`d files** like `node_modules/`, `build/`, and compiled binaries. This means `beforeAll` build steps survive across reuse cycles.
 
 For strict reset that also removes `.gitignore`d files, use the `--workspace-clean full` CLI flag:
 
diff --git a/apps/web/src/content/docs/docs/targets/configuration.mdx b/apps/web/src/content/docs/docs/targets/configuration.mdx
index 50de9994d..b3e7830e0 100644
--- a/apps/web/src/content/docs/docs/targets/configuration.mdx
+++ b/apps/web/src/content/docs/docs/targets/configuration.mdx
@@ -85,56 +85,49 @@ targets:
     grader_target: azure-base  # LLM used for grading
 ```
 
-### Workspace Lifecycle Hooks
+### Lifecycle Extensions
 
-Run commands and reset/cleanup policies at different lifecycle points using `workspace.hooks`. This can be defined at the suite level (applies to all tests) or per test (overrides suite-level).
-Use workspace hooks for repo preparation such as dependency installs, builds,
-fixture generation, and per-case resets. Use target hooks for runner-specific
-setup.
+Run non-provisioning setup at Promptfoo-compatible lifecycle points using
+top-level `extensions`. The harness materializes `workspace.template` and
+`workspace.repos` first, then runs `beforeAll` extensions. Use extensions for
+dependency installs, builds, fixture generation, and agent-rule staging. Use
+target hooks for runner-specific setup. Keep repo identity and checkout pins in
+`workspace.repos`; extensions must not become the default repo acquisition path.
 
 ```yaml
+extensions:
+  - file://scripts/workspace.mjs:beforeAll
+  - file://scripts/workspace.mjs:beforeEach
+  - file://scripts/workspace.mjs:afterEach
+  - file://scripts/workspace.mjs:afterAll
+  - id: agentv:agent-rules
+    hook: beforeAll
+    skills: agent-rules/skills
+    rules: agent-rules/AGENTS.md
+
 workspace:
   template: ./workspace-templates/my-project
   hooks:
-    before_all:
-      command: ["bun", "run", "setup.ts"]
-      timeout_ms: 120000
-      cwd: ./scripts
     after_each:
-      command: ["bun", "run", "reset.ts"]
-      timeout_ms: 5000
       reset: fast
-    after_all:
-      command: ["bun", "run", "cleanup.ts"]
-      timeout_ms: 30000
 ```
 
 | Field | Description |
 |-------|-------------|
 | `template` | Directory to copy as workspace |
-| `hooks.before_all` | Runs once after workspace creation, before the first test |
-| `hooks.after_all` | Runs once after the last test, before cleanup |
-| `hooks.before_each` | Runs before each test |
-| `hooks.after_each` | Runs after each test (supports both `command` and `reset`) |
-
-Each hook config accepts:
-
-| Field | Description |
-|-------|-------------|
-| `command` | Command array (e.g., `["bun", "run", "setup.ts"]`) |
-| `reset` | Reset mode: `none`, `fast`, `strict` |
-| `timeout_ms` | Timeout in milliseconds (default: 60000 for setup hooks, 30000 for teardown hooks) |
-| `cwd` | Working directory (relative paths resolved against eval file directory) |
+| `extensions[]` | `file://...:beforeAll`, `beforeEach`, `afterEach`, `afterAll`, or `agentv:agent-rules` |
+| `hooks.after_each.reset` | Reset mode: `none`, `fast`, `strict` |
 
-**Lifecycle order:** template copy → repo materialization → workspace `hooks.before_all` → target `hooks.before_all` → git baseline → (`hooks.before_each` → target `hooks.before_each` → agent runs → file changes captured → target `hooks.after_each` → `hooks.after_each`) × N tests → target `hooks.after_all` → `hooks.after_all` → cleanup
+**Lifecycle order:** template copy → repo materialization → `extensions.beforeAll` → target `hooks.before_all` → git baseline → (`extensions.beforeEach` → target `hooks.before_each` → agent runs → file changes captured → target `hooks.after_each` → `extensions.afterEach` → `workspace.hooks.after_each.reset`) × N tests → target `hooks.after_all` → `extensions.afterAll` → cleanup
 
 **Shared workspace:** The workspace is created once and shared across all tests in a suite. Use `hooks.after_each.reset` to reset state between tests (e.g., `fast`/`strict`).
 
 **Error handling:**
-- `hooks.before_all` / `hooks.before_each` command failure aborts the test with an error result
-- `hooks.after_all` / `hooks.after_each` command failure is non-fatal (warning only)
+- `beforeAll` / `beforeEach` extension failure aborts the affected run with an error result
+- `afterAll` / `afterEach` extension failure is non-fatal
 
-**Script context:** All scripts receive a JSON object on stdin with case context:
+**File hook context:** Exported functions receive a JSON-compatible object with
+case context:
 
 ```json
 {
@@ -146,7 +139,9 @@ Each hook config accepts:
 }
 ```
 
-**Suite vs per-test:** When both are defined, test-level fields replace suite-level fields. See [Per-Test Workspace Config](/docs/evaluation/eval-cases/#per-case-workspace-config) for examples.
+`workspace.hooks` remains the reset-policy home for `after_each.reset`. Legacy
+command hooks still parse for existing local suites, but new portable evals
+should use `extensions` for executable setup.
 
 ### Repository Lifecycle
 
@@ -237,7 +232,7 @@ Use `cwd` on a target to run in an existing directory (shared across tests). If
 
 Eval files can define per-target hooks that run setup/teardown scripts to customize the workspace for each target variant. This enables comparing different harness configurations (e.g., baseline vs with-plugins) in a single eval file.
 
-Targets do not declare `repos`. Repositories belong to the shared eval workspace so every target runs in the same world; target hooks customize the harness under evaluation. Use hooks for per-target setup such as copying skills, enabling wrappers, or changing provider-local config. Keep installs, builds, fixture generation, and case resets in `workspace.hooks`.
+Targets do not declare `repos`. Repositories belong to the shared eval workspace so every target runs in the same world; target hooks customize the harness under evaluation. Use hooks for per-target setup such as enabling wrappers or changing provider-local config. Keep installs, builds, fixture generation, and case setup in top-level lifecycle `extensions`.
 
 Target hooks can be scoped to an eval-local target object:
 
@@ -253,7 +248,7 @@ target:
 
 Target hooks run after workspace hooks on setup, before workspace hooks on teardown:
 
-1. Workspace `before_all`
+1. Extension `beforeAll`
 2. **Target `before_all`**
 3. For each test:
    - Workspace `before_each`
diff --git a/apps/web/src/content/docs/docs/tools/prepare.mdx b/apps/web/src/content/docs/docs/tools/prepare.mdx
index c93470fd1..ac07cddcc 100644
--- a/apps/web/src/content/docs/docs/tools/prepare.mdx
+++ b/apps/web/src/content/docs/docs/tools/prepare.mdx
@@ -17,12 +17,12 @@ The prepared directory contains:
 
 ```text
 /tmp/agentv-case-1/
-  workspace/              # materialized template/repos/hooks state
+  workspace/              # materialized template/repos/extensions state
   prompt.md               # safe task prompt for the human or external agent
   agentv_prepare.json     # snake_case manifest for audit and later grading
 ```
 
-`prepare` runs setup only: workspace `before_all`, target `before_all`, workspace `before_each`, and target `before_each`. It does not launch the agent, run graders, mark an eval complete, or expose hidden expected outputs and grader internals in `prompt.md`.
+`prepare` runs setup only: workspace materialization, extension `beforeAll`, target `before_all`, extension `beforeEach`, and target `before_each`. It does not launch the agent, run graders, mark an eval complete, or expose hidden expected outputs and grader internals in `prompt.md`.
 
 ## Grade the Attempt
 
diff --git a/examples/README.md b/examples/README.md
index 93f662ee9..f64080578 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -48,7 +48,7 @@ Focused demonstrations of specific AgentV capabilities. Each example includes it
 - [compare](features/compare/) - Baseline comparison
 - [deterministic-graders](features/deterministic-graders/) - Deterministic assertions (contains, regex, JSON validation)
 - [vitest-workspace-grader](features/vitest-workspace-grader/) - Vitest-style deterministic workspace verifiers
-- [workspace-setup-script](features/workspace-setup-script/) - Multi-step workspace setup with `before_all` lifecycle hook
+- [workspace-setup-script](features/workspace-setup-script/) - Multi-step workspace setup with a `beforeAll` lifecycle extension
 
 ### SDK
 
diff --git a/examples/features/README.md b/examples/features/README.md
index 40153f696..632f10b4d 100644
--- a/examples/features/README.md
+++ b/examples/features/README.md
@@ -98,7 +98,7 @@ Focused examples for specific AgentV capabilities. Find your use case below, the
 ### Workspace and agent setup
 | Example | Description |
 |---------|-------------|
-| [workspace-setup-script](workspace-setup-script/) | Multi-step setup with the `before_all` lifecycle hook |
+| [workspace-setup-script](workspace-setup-script/) | Multi-step setup with a `beforeAll` lifecycle extension |
 | [workspace-multi-repo](workspace-multi-repo/) | Multi-repo workspace using a VS Code `.code-workspace` file |
 | [workspace-shared-config](workspace-shared-config/) | Define a `workspace.yaml` once and reference it across eval files |
 | [repo-lifecycle](repo-lifecycle/) | Clone a git repo into the workspace and target the agent at it |
diff --git a/examples/features/copilot-log-eval/README.md b/examples/features/copilot-log-eval/README.md
index 59aadfdb7..d0ac4b635 100644
--- a/examples/features/copilot-log-eval/README.md
+++ b/examples/features/copilot-log-eval/README.md
@@ -38,7 +38,7 @@ the latest session from `~/.copilot/session-state/` and runs all graders.
 ## How it works
 
 ```
-allagents workspace init (before_all hook)
+allagents workspace init (setup hook)
   ↓ syncs agentv-dev plugin skills from marketplace
 ~/.copilot/session-state/{uuid}/events.jsonl
   ↓ copilot-log provider (reads from disk)
diff --git a/examples/features/file-changes-with-repos/evals/eval.yaml b/examples/features/file-changes-with-repos/evals/eval.yaml
index ebbdc1bde..b029608a8 100644
--- a/examples/features/file-changes-with-repos/evals/eval.yaml
+++ b/examples/features/file-changes-with-repos/evals/eval.yaml
@@ -6,8 +6,8 @@
 #
 # Setup:
 #   - workspace.template copies workspace-template/ into the temp workspace
-#   - before_all hook initialises my-lib/ as a git repo inside the workspace
-#   - initializeBaseline (runs after before_all) sees my-lib/.git as a gitlink
+#   - setup hook initialises my-lib/ as a git repo inside the workspace
+#   - initializeBaseline sees my-lib/.git as a gitlink after setup
 #
 # Agent behaviour:
 #   - Writes report.txt to workspace root (not inside any repo)
diff --git a/examples/features/tool-calls-template/evals/eval.yaml b/examples/features/tool-calls-template/evals/eval.yaml
index 18976db70..a8f9a96d3 100644
--- a/examples/features/tool-calls-template/evals/eval.yaml
+++ b/examples/features/tool-calls-template/evals/eval.yaml
@@ -4,7 +4,7 @@
 # whether an agent invoked the right skills — without needing the
 # skill-trigger evaluator.
 #
-# Skills live in workspace/.agents/skills/. The before_all hook copies
+# Skills live in workspace/.agents/skills/. The setup hook copies
 # them to .claude/skills/ so copilot and other providers can discover them.
 #
 # Run:
diff --git a/examples/features/workspace-setup-script/README.md b/examples/features/workspace-setup-script/README.md
index d2fdccfd4..d557ebafd 100644
--- a/examples/features/workspace-setup-script/README.md
+++ b/examples/features/workspace-setup-script/README.md
@@ -1,78 +1,54 @@
-# Workspace Setup Script
+# Workspace Setup Extension
 
-Demonstrates using a `before_all` lifecycle hook to clean and re-initialize an allagents workspace before evaluation runs, then register a project-scoped marketplace and sync plugin content (including prompt files).
+Demonstrates using a `beforeAll` lifecycle extension to clean and re-initialize an allagents workspace before evaluation runs, then register a project-scoped marketplace and sync plugin content.
 
 ## Problem
 
-`allagents workspace init` fails if `.allagents/workspace.yaml` already exists. In CI and repeated eval runs, stale artifacts need to be cleaned first. Without a wrapper, you'd need shell operators like `&&` (not cross-platform) or framework-level multi-command support.
+`allagents workspace init` fails if `.allagents/workspace.yaml` already exists. In CI and repeated eval runs, stale artifacts need to be cleaned before project-scoped plugin content is synced.
 
 ## Solution
 
-A generic Node.js script that any eval can reuse. It reads `workspace_path` from AgentV's stdin JSON, removes stale `.allagents/` state, runs `allagents workspace init --from`, registers a project-scoped marketplace, then runs `allagents workspace sync`.
+A Node.js lifecycle extension exports `beforeAll(context)`. AgentV runs it after `workspace.template` and `workspace.repos` materialize, so the extension can safely prepare local configuration without owning repo provisioning.
 
 ```
 workspace-setup-script/
 ├── evals/
-│   └── dataset.eval.yaml        # Eval with before_all hook
+│   └── dataset.eval.yaml        # Eval with beforeAll extension
 ├── plugins/
 │   └── my-plugin/               # Plugin content (AGENTS + prompt)
-│       ├── AGENTS.md             # Agent guidelines
+│       ├── AGENTS.md
 │       └── .github/
 │           └── prompts/
 │               └── summarize-repo.prompt.md
 ├── marketplace/
 │   └── .claude-plugin/
-│       └── marketplace.json     # Local marketplace manifest
+│       └── marketplace.json
 ├── scripts/
-│   └── workspace-setup.mjs      # Generic setup script (reusable across evals)
+│   └── workspace-setup.mjs      # Lifecycle extension module
 └── workspace-template/
     └── .allagents/
-        └── workspace.yaml       # Template for allagents init
+        └── workspace.yaml
 ```
 
-## Plugin Installation via Project Marketplace
-
-The `.allagents/workspace.yaml` installs a plugin from a named marketplace:
-
-```yaml
-# .allagents/workspace.yaml
-plugins:
-  - my-plugin@workspace-setup-script-marketplace
-```
-
-The setup script registers that marketplace using project scope:
-
-```bash
-npx --yes allagents plugin marketplace add ../marketplace --scope project
-```
-
-This matches the project-scoped marketplace flow introduced in `allagents` (PR #224).
-
 ## Eval YAML
 
-The template path and local marketplace path are passed as arguments. Use `--require` to validate expected artifacts after sync:
+Use top-level `extensions` for executable setup and keep repos under `workspace.repos`:
 
 ```yaml
+extensions:
+  - file://../scripts/workspace-setup.mjs:beforeAll
+
 workspace:
-  template: ./workspace-template
-  hooks:
-    before_all:
-      command:
-        - node
-        - ../scripts/workspace-setup.mjs
-        - --from
-        - ../workspace-template/.allagents/workspace.yaml
-        - --marketplace-source
-        - ../marketplace
-        - --require
-        - AGENTS.md
-        - --require
-        - .github/prompts/summarize-repo.prompt.md
+  template: ../workspace-template
+  repos:
+    - path: ./my-repo
+      repo: https://github.com/EntityProcess/agentv.git
+      commit: main
 ```
 
-The `--require` flag accepts one or more file paths (relative to the workspace root). If any required file is missing after `allagents workspace init`, the script exits with an error listing the missing files.
+The extension reads `context.workspace_path` and `context.eval_dir`, refreshes `.allagents/`, runs `allagents workspace init`, registers the local marketplace with `--scope project`, syncs plugins, and validates that expected artifacts exist.
 
-## Referencing plugin files in test inputs
+## Referencing Plugin Files In Test Inputs
 
 Reference plugin files via `type: file` in test inputs to inject them into the agent's prompt:
 
@@ -90,22 +66,18 @@ tests:
 
 The `type: file` path is resolved from the eval file's directory up to the repo root. This injects the file contents into the agent's prompt alongside any text instructions.
 
-## How it works
-
-1. AgentV copies `workspace-template/` to a pooled workspace
-2. The setup script removes stale `.allagents/` config and runs `npx allagents workspace init`
-3. The setup script registers the local marketplace with `--scope project`
-4. `allagents workspace sync` installs `my-plugin@workspace-setup-script-marketplace`
-5. `--require` checks verify `AGENTS.md` and `.github/prompts/summarize-repo.prompt.md` exist
-6. AgentV clones repos and runs tests against the initialized workspace
+## How It Works
 
-## Cross-platform
+1. AgentV copies `workspace-template/` to a pooled workspace.
+2. AgentV clones `workspace.repos`.
+3. The `beforeAll` extension removes stale `.allagents/` config and runs `npx allagents workspace init`.
+4. The extension registers the local marketplace with `--scope project`.
+5. `allagents workspace sync` installs `my-plugin@workspace-setup-script-marketplace`.
+6. Required-file checks verify `AGENTS.md` and `.github/prompts/summarize-repo.prompt.md` exist.
 
-The script handles Windows by using `npx.cmd` instead of `npx`.
+## Cross-Platform Notes
 
-Because the script first reads AgentV payload from stdin, it then launches `npx` with:
+The extension handles Windows by using `npx.cmd` instead of `npx` and launches subprocesses with:
 
 - `stdio: ['ignore', 'inherit', 'inherit']`
 - `shell: process.platform === 'win32'`
-
-This avoids a Windows-specific `spawnSync npx.cmd EINVAL` failure seen when stdin is inherited after being consumed in `before_all` hooks.
diff --git a/examples/features/workspace-setup-script/evals/dataset-vscode.eval.yaml b/examples/features/workspace-setup-script/evals/dataset-vscode.eval.yaml
index 8e0a2ef6e..ee2e1a43b 100644
--- a/examples/features/workspace-setup-script/evals/dataset-vscode.eval.yaml
+++ b/examples/features/workspace-setup-script/evals/dataset-vscode.eval.yaml
@@ -1,22 +1,13 @@
 description: >-
-  Demonstrates using a before_all workspace setup script with the VSCode target.
+  Demonstrates using a beforeAll lifecycle extension with the VSCode target.
   Same as dataset.eval.yaml but uses vscode instead of copilot.
 
+extensions:
+  - file://../scripts/workspace-setup.mjs:beforeAll
+
 workspace:
   template: ../workspace-template
   hooks:
-    before_all:
-      command:
-        - node
-        - ../scripts/workspace-setup.mjs
-        - --from
-        - ../workspace-template/.allagents/workspace.yaml
-        - --marketplace-source
-        - ../marketplace
-        - --require
-        - AGENTS.md
-        - --require
-        - .github/prompts/summarize-repo.prompt.md
     after_each:
       reset: fast
   repos:
diff --git a/examples/features/workspace-setup-script/evals/dataset.eval.yaml b/examples/features/workspace-setup-script/evals/dataset.eval.yaml
index 27fdb1bb5..ac6f62ada 100644
--- a/examples/features/workspace-setup-script/evals/dataset.eval.yaml
+++ b/examples/features/workspace-setup-script/evals/dataset.eval.yaml
@@ -1,22 +1,12 @@
 description: >-
-  Demonstrates using a before_all workspace setup script to clean and
+  Demonstrates using a beforeAll lifecycle extension to clean and
   re-initialize an allagents workspace before evaluation runs.
 
+extensions:
+  - file://../scripts/workspace-setup.mjs:beforeAll
+
 workspace:
   template: ../workspace-template
-  hooks:
-    before_all:
-      command:
-        - node
-        - ../scripts/workspace-setup.mjs
-        - --from
-        - ../workspace-template/.allagents/workspace.yaml
-        - --marketplace-source
-        - ../marketplace
-        - --require
-        - AGENTS.md
-        - --require
-        - .github/prompts/summarize-repo.prompt.md
   repos:
     - path: ./my-repo
       repo: https://github.com/EntityProcess/agentv.git
diff --git a/examples/features/workspace-setup-script/scripts/workspace-setup.mjs b/examples/features/workspace-setup-script/scripts/workspace-setup.mjs
index 7c45ee286..0e351e908 100644
--- a/examples/features/workspace-setup-script/scripts/workspace-setup.mjs
+++ b/examples/features/workspace-setup-script/scripts/workspace-setup.mjs
@@ -1,149 +1,158 @@
-#!/usr/bin/env node
 // @ts-check
 //
-// Generic workspace setup script for AgentV before_all lifecycle hook.
+// AgentV beforeAll lifecycle extension for this example.
 //
-// Reads workspace_path from AgentV stdin JSON, removes stale .allagents/
-// config, copies source directories, and runs `npx allagents workspace init`.
-//
-// Usage in eval YAML:
-//   workspace:
-//     hooks:
-//       before_all:
-//         command:
-//           - node
-//           - ../scripts/workspace-setup.mjs
-//           - --from
-//           - ../workspace-template/.allagents/workspace.yaml
-//           - --source
-//           - ../guidelines
-//           - --require
-//           - AGENTS.md
+// It runs after workspace.template and workspace.repos materialize, then
+// refreshes allagents project state inside the prepared workspace.
 
 import { spawnSync } from 'node:child_process';
-import { cpSync, existsSync, readFileSync, rmSync } from 'node:fs';
-import { basename, isAbsolute, join, resolve } from 'node:path';
-
-// --- parse arguments ---
-const fromIndex = process.argv.indexOf('--from');
-if (fromIndex === -1 || !process.argv[fromIndex + 1]) {
-  console.error(
-    'Usage: workspace-setup.mjs --from <template-path> [--source <dir> ...] [--marketplace-source <dir>] [--marketplace-name <name>] [--require <file> ...]',
-  );
-  process.exit(1);
-}
-const templatePath = process.argv[fromIndex + 1];
-
-// Collect --source arguments: directories to copy into the workspace before init
-const sourceDirs = [];
-for (let i = 0; i < process.argv.length; i++) {
-  if (process.argv[i] === '--source' && process.argv[i + 1]) {
-    sourceDirs.push(process.argv[i + 1]);
-    i++;
-  }
-}
+import { existsSync, readFileSync, rmSync } from 'node:fs';
+import { join, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const REQUIRED_FILES = ['AGENTS.md', '.github/prompts/summarize-repo.prompt.md'];
 
-// Collect --require arguments: files that must exist in the workspace after init
-const requiredFiles = [];
-for (let i = 0; i < process.argv.length; i++) {
-  if (process.argv[i] === '--require' && process.argv[i + 1]) {
-    requiredFiles.push(process.argv[i + 1]);
-    i++;
+/**
+ * @param {{
+ *   workspace_path?: string;
+ *   eval_dir: string;
+ * }} context
+ */
+export function beforeAll(context) {
+  const workspacePath = context.workspace_path;
+  if (!workspacePath) {
+    throw new Error('workspace_path not provided to workspace setup extension');
   }
-}
 
-// Optional project-scoped marketplace source to register after init.
-const marketplaceSourceIndex = process.argv.indexOf('--marketplace-source');
-const marketplaceSource =
-  marketplaceSourceIndex !== -1 ? process.argv[marketplaceSourceIndex + 1] : undefined;
-const marketplaceNameIndex = process.argv.indexOf('--marketplace-name');
-const marketplaceName =
-  marketplaceNameIndex !== -1 ? process.argv[marketplaceNameIndex + 1] : undefined;
-
-// --- stdin context from AgentV ---
-const { workspace_path } = JSON.parse(readFileSync(0, 'utf8'));
-if (!workspace_path) {
-  console.error('workspace_path not provided on stdin');
-  process.exit(1);
-}
+  const templatePath = resolve(context.eval_dir, '../workspace-template/.allagents/workspace.yaml');
+  const marketplaceSource = resolve(context.eval_dir, '../marketplace');
 
-// --- copy source directories into workspace ---
-for (const src of sourceDirs) {
-  if (!existsSync(src)) {
-    console.error(`Source directory not found: ${src}`);
-    process.exit(1);
-  }
-  const dest = join(workspace_path, basename(src));
-  cpSync(src, dest, { recursive: true });
-}
+  runAllagentsSetup({
+    workspacePath,
+    templatePath,
+    marketplaceSource,
+    requiredFiles: REQUIRED_FILES,
+  });
 
-// --- clean previous workspace config ---
-rmSync(join(workspace_path, '.allagents'), { recursive: true, force: true });
-
-// --- run allagents workspace init ---
-const npx = process.platform === 'win32' ? 'npx.cmd' : 'npx';
-const result = spawnSync(
-  npx,
-  ['--yes', 'allagents', 'workspace', 'init', workspace_path, '--from', templatePath],
-  {
-    // This script reads AgentV stdin first, so don't pass fd 0 through.
-    // On Windows, inheriting stdin into `npx.cmd` can raise EINVAL.
-    // shell=true ensures `.cmd` is launched reliably.
-    stdio: ['ignore', 'inherit', 'inherit'],
-    shell: process.platform === 'win32',
-  },
-);
-if (result.status !== 0) {
-  process.exit(result.status ?? 1);
+  return {
+    metadata: {
+      workspace_setup: {
+        marketplace_source: marketplaceSource,
+        required_files: REQUIRED_FILES,
+      },
+    },
+  };
 }
 
-// --- optionally register project-scoped marketplace and resync ---
-if (marketplaceSource) {
-  const resolvedMarketplaceSource = isAbsolute(marketplaceSource)
-    ? marketplaceSource
-    : resolve(process.cwd(), marketplaceSource);
+/**
+ * @param {{
+ *   workspacePath: string;
+ *   templatePath: string;
+ *   marketplaceSource?: string;
+ *   marketplaceName?: string;
+ *   requiredFiles: readonly string[];
+ * }} options
+ */
+function runAllagentsSetup(options) {
+  rmSync(join(options.workspacePath, '.allagents'), { recursive: true, force: true });
 
-  const addMarketplaceArgs = [
+  const npx = process.platform === 'win32' ? 'npx.cmd' : 'npx';
+  run(npx, [
     '--yes',
     'allagents',
-    'plugin',
-    'marketplace',
-    'add',
-    resolvedMarketplaceSource,
-    '--scope',
-    'project',
-  ];
-  if (marketplaceName) {
-    addMarketplaceArgs.push('--name', marketplaceName);
+    'workspace',
+    'init',
+    options.workspacePath,
+    '--from',
+    options.templatePath,
+  ]);
+
+  if (options.marketplaceSource) {
+    const addMarketplaceArgs = [
+      '--yes',
+      'allagents',
+      'plugin',
+      'marketplace',
+      'add',
+      options.marketplaceSource,
+      '--scope',
+      'project',
+    ];
+    if (options.marketplaceName) {
+      addMarketplaceArgs.push('--name', options.marketplaceName);
+    }
+    run(npx, addMarketplaceArgs, options.workspacePath);
+    run(npx, ['--yes', 'allagents', 'workspace', 'sync'], options.workspacePath);
   }
 
-  const addMarketplaceResult = spawnSync(npx, addMarketplaceArgs, {
-    stdio: ['ignore', 'inherit', 'inherit'],
-    shell: process.platform === 'win32',
-    cwd: workspace_path,
-  });
-  if (addMarketplaceResult.status !== 0) {
-    process.exit(addMarketplaceResult.status ?? 1);
+  const missing = options.requiredFiles.filter(
+    (file) => !existsSync(join(options.workspacePath, file)),
+  );
+  if (missing.length > 0) {
+    throw new Error(`Required artifacts not found in workspace: ${missing.join(', ')}`);
   }
+}
 
-  const syncResult = spawnSync(npx, ['--yes', 'allagents', 'workspace', 'sync'], {
+/**
+ * @param {string} command
+ * @param {readonly string[]} args
+ * @param {string | undefined} cwd
+ */
+function run(command, args, cwd = undefined) {
+  const result = spawnSync(command, args, {
     stdio: ['ignore', 'inherit', 'inherit'],
     shell: process.platform === 'win32',
-    cwd: workspace_path,
+    ...(cwd ? { cwd } : {}),
   });
-  if (syncResult.status !== 0) {
-    process.exit(syncResult.status ?? 1);
+  if (result.status !== 0) {
+    throw new Error(`${command} ${args.join(' ')} failed with exit ${result.status ?? 1}`);
   }
 }
 
-// --- validate required artifacts exist in workspace ---
-const missing = requiredFiles.filter((file) => !existsSync(join(workspace_path, file)));
-if (missing.length > 0) {
-  console.error('Required artifacts not found in workspace:');
-  for (const file of missing) {
-    console.error(`  - ${file}`);
+function runCli() {
+  const fromIndex = process.argv.indexOf('--from');
+  if (fromIndex === -1 || !process.argv[fromIndex + 1]) {
+    throw new Error(
+      'Usage: workspace-setup.mjs --from <template-path> [--marketplace-source <dir>] [--marketplace-name <name>] [--require <file> ...]',
+    );
+  }
+
+  const { workspace_path } = JSON.parse(readFileSync(0, 'utf8'));
+  if (!workspace_path) {
+    throw new Error('workspace_path not provided on stdin');
   }
-  process.exit(1);
+
+  const requiredFiles = [];
+  for (let i = 0; i < process.argv.length; i++) {
+    if (process.argv[i] === '--require' && process.argv[i + 1]) {
+      requiredFiles.push(process.argv[i + 1]);
+      i++;
+    }
+  }
+
+  const marketplaceSourceIndex = process.argv.indexOf('--marketplace-source');
+  const marketplaceSource =
+    marketplaceSourceIndex !== -1
+      ? resolve(process.cwd(), process.argv[marketplaceSourceIndex + 1])
+      : undefined;
+  const marketplaceNameIndex = process.argv.indexOf('--marketplace-name');
+  const marketplaceName =
+    marketplaceNameIndex !== -1 ? process.argv[marketplaceNameIndex + 1] : undefined;
+
+  runAllagentsSetup({
+    workspacePath: workspace_path,
+    templatePath: resolve(process.cwd(), process.argv[fromIndex + 1]),
+    ...(marketplaceSource ? { marketplaceSource } : {}),
+    ...(marketplaceName ? { marketplaceName } : {}),
+    requiredFiles,
+  });
 }
 
-process.exit(0);
+if (process.argv[1] && fileURLToPath(import.meta.url) === resolve(process.argv[1])) {
+  try {
+    runCli();
+  } catch (error) {
+    console.error(error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  }
+}
diff --git a/packages/core/src/evaluation/extensions/runner.ts b/packages/core/src/evaluation/extensions/runner.ts
new file mode 100644
index 000000000..e7ba6aed0
--- /dev/null
+++ b/packages/core/src/evaluation/extensions/runner.ts
@@ -0,0 +1,290 @@
+import { type Stats, existsSync } from 'node:fs';
+import { cp, mkdir, stat } from 'node:fs/promises';
+import path from 'node:path';
+import { pathToFileURL } from 'node:url';
+
+import type {
+  AgentRulesExtensionConfig,
+  AgentRulesPaths,
+  AgentVExtensionConfig,
+  EvalTest,
+  ExtensionLifecycleHook,
+  JsonObject,
+} from '../types.js';
+
+export interface ExtensionHookContext {
+  readonly hook_name: ExtensionLifecycleHook;
+  readonly workspace_path?: string;
+  readonly test_id: string;
+  readonly eval_run_id?: string;
+  readonly eval_dir: string;
+  readonly case_input?: string;
+  readonly case_metadata?: Record<string, unknown>;
+  readonly workspace_file_dir?: string;
+  readonly provider_context?: JsonObject;
+  readonly agent_rules_paths?: AgentRulesPaths;
+}
+
+export interface ExtensionRuntimeState {
+  readonly providerContext?: JsonObject;
+  readonly metadata?: Record<string, unknown>;
+  readonly output?: string;
+  readonly agentRulesPaths?: AgentRulesPaths;
+}
+
+type ExtensionReturn = {
+  readonly provider_context?: JsonObject;
+  readonly metadata?: Record<string, unknown>;
+  readonly output?: string;
+  readonly agent_rules_paths?: AgentRulesPaths;
+};
+
+export function mergeExtensionState(
+  left: ExtensionRuntimeState | undefined,
+  right: ExtensionRuntimeState | undefined,
+): ExtensionRuntimeState | undefined {
+  if (!left) return right;
+  if (!right) return left;
+
+  const agentRulesPaths = mergeAgentRulesPaths(left.agentRulesPaths, right.agentRulesPaths);
+  const providerContext = {
+    ...(left.providerContext ?? {}),
+    ...(right.providerContext ?? {}),
+    ...(agentRulesPaths ? { agent_rules_paths: agentRulesPaths } : {}),
+  };
+  const metadata = {
+    ...(left.metadata ?? {}),
+    ...(right.metadata ?? {}),
+    ...(agentRulesPaths ? { agent_rules_paths: agentRulesPaths } : {}),
+  };
+  const output = [left.output, right.output].filter(Boolean).join('\n') || undefined;
+
+  return {
+    ...(Object.keys(providerContext).length > 0 ? { providerContext } : {}),
+    ...(Object.keys(metadata).length > 0 ? { metadata } : {}),
+    ...(output !== undefined ? { output } : {}),
+    ...(agentRulesPaths !== undefined ? { agentRulesPaths } : {}),
+  };
+}
+
+export async function runExtensionsForHook(options: {
+  readonly extensions: readonly AgentVExtensionConfig[] | undefined;
+  readonly hook: ExtensionLifecycleHook;
+  readonly context: ExtensionHookContext;
+  readonly state?: ExtensionRuntimeState;
+}): Promise<ExtensionRuntimeState | undefined> {
+  const matching = (options.extensions ?? []).filter(
+    (extension) => extension.hook === options.hook,
+  );
+  if (matching.length === 0) {
+    return options.state;
+  }
+
+  let state = options.state;
+  for (const extension of matching) {
+    const context = buildContextWithState(options.context, state);
+    const next = isAgentRulesExtension(extension)
+      ? await runAgentRulesExtension(extension, context)
+      : await runFileExtension(extension, context);
+    state = mergeExtensionState(state, next);
+  }
+  return state;
+}
+
+function buildContextWithState(
+  context: ExtensionHookContext,
+  state: ExtensionRuntimeState | undefined,
+): ExtensionHookContext {
+  return {
+    ...context,
+    ...(state?.providerContext !== undefined ? { provider_context: state.providerContext } : {}),
+    ...(state?.agentRulesPaths !== undefined ? { agent_rules_paths: state.agentRulesPaths } : {}),
+  };
+}
+
+function isAgentRulesExtension(
+  extension: AgentVExtensionConfig,
+): extension is AgentRulesExtensionConfig {
+  return extension.id === 'agentv:agent-rules';
+}
+
+async function runFileExtension(
+  extension: Exclude<AgentVExtensionConfig, AgentRulesExtensionConfig>,
+  context: ExtensionHookContext,
+): Promise<ExtensionRuntimeState | undefined> {
+  const moduleUrl = pathToFileURL(extension.path);
+  moduleUrl.search = `t=${Date.now()}-${Math.random().toString(36).slice(2)}`;
+  const imported = (await import(moduleUrl.href)) as Record<string, unknown>;
+  const defaultExport = imported.default;
+  const maybeCommonJs =
+    defaultExport && typeof defaultExport === 'object'
+      ? (defaultExport as Record<string, unknown>)[extension.functionName]
+      : undefined;
+  const hookFn = imported[extension.functionName] ?? maybeCommonJs;
+  if (typeof hookFn !== 'function') {
+    throw new Error(`Extension ${extension.id} does not export function ${extension.functionName}`);
+  }
+
+  const result = (await hookFn(context, { hookName: extension.hook })) as unknown;
+  return normalizeExtensionReturn(result);
+}
+
+async function runAgentRulesExtension(
+  extension: AgentRulesExtensionConfig,
+  context: ExtensionHookContext,
+): Promise<ExtensionRuntimeState | undefined> {
+  if (!context.workspace_path) {
+    throw new Error('agentv:agent-rules requires a materialized workspace');
+  }
+
+  const paths: AgentRulesPaths = {
+    skills: await stageConfiguredOrDiscover({
+      kind: 'skills',
+      configured: extension.skills,
+      evalDir: context.eval_dir,
+      workspacePath: context.workspace_path,
+      discover: ['.claude/skills', '.agents/skills', '.codex/skills', '.pi/skills', 'skills'],
+    }),
+    hooks: await stageConfiguredOrDiscover({
+      kind: 'hooks',
+      configured: extension.hooks,
+      evalDir: context.eval_dir,
+      workspacePath: context.workspace_path,
+      discover: ['.claude/hooks', '.agents/hooks', '.codex/hooks', '.pi/hooks', 'hooks'],
+    }),
+    agents: await stageConfiguredOrDiscover({
+      kind: 'agents',
+      configured: extension.agents,
+      evalDir: context.eval_dir,
+      workspacePath: context.workspace_path,
+      discover: ['.agents/agents', '.codex/agents', 'agents'],
+    }),
+    rules: await stageConfiguredOrDiscover({
+      kind: 'rules',
+      configured: extension.rules,
+      evalDir: context.eval_dir,
+      workspacePath: context.workspace_path,
+      discover: ['AGENTS.md', 'CLAUDE.md', 'rules'],
+    }),
+  };
+  const compactPaths = compactAgentRulesPaths(paths);
+  if (!compactPaths) {
+    return undefined;
+  }
+
+  return normalizeExtensionReturn({
+    provider_context: { agent_rules_paths: compactPaths },
+    metadata: { agent_rules_paths: compactPaths },
+    agent_rules_paths: compactPaths,
+  });
+}
+
+async function stageConfiguredOrDiscover(options: {
+  readonly kind: keyof AgentRulesPaths;
+  readonly configured: readonly string[] | undefined;
+  readonly evalDir: string;
+  readonly workspacePath: string;
+  readonly discover: readonly string[];
+}): Promise<readonly string[] | undefined> {
+  if (!options.configured || options.configured.length === 0) {
+    const discovered = options.discover
+      .map((candidate) => path.resolve(options.workspacePath, candidate))
+      .filter((candidate) => existsSync(candidate));
+    return discovered.length > 0 ? discovered : undefined;
+  }
+
+  const staged: string[] = [];
+  const stageRoot = path.join(options.workspacePath, '.agentv', 'agent-rules', options.kind);
+  await mkdir(stageRoot, { recursive: true });
+
+  for (const entry of options.configured) {
+    const sourcePath = path.isAbsolute(entry) ? entry : path.resolve(options.evalDir, entry);
+    let sourceStat: Stats;
+    try {
+      sourceStat = await stat(sourcePath);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      throw new Error(`agentv:agent-rules ${options.kind} path not found: ${entry}: ${message}`);
+    }
+
+    if (isInside(options.workspacePath, sourcePath)) {
+      staged.push(sourcePath);
+      continue;
+    }
+
+    const destPath = path.join(stageRoot, path.basename(sourcePath));
+    await cp(sourcePath, destPath, {
+      recursive: sourceStat.isDirectory(),
+      force: true,
+    });
+    staged.push(destPath);
+  }
+
+  return staged.length > 0 ? staged : undefined;
+}
+
+function normalizeExtensionReturn(value: unknown): ExtensionRuntimeState | undefined {
+  if (!value || typeof value !== 'object' || Array.isArray(value)) {
+    return undefined;
+  }
+  const result = value as ExtensionReturn;
+  const agentRulesPaths = compactAgentRulesPaths(result.agent_rules_paths);
+  const providerContext = {
+    ...(result.provider_context ?? {}),
+    ...(agentRulesPaths ? { agent_rules_paths: agentRulesPaths } : {}),
+  };
+  const metadata = {
+    ...(result.metadata ?? {}),
+    ...(agentRulesPaths ? { agent_rules_paths: agentRulesPaths } : {}),
+  };
+
+  return {
+    ...(Object.keys(providerContext).length > 0 ? { providerContext } : {}),
+    ...(Object.keys(metadata).length > 0 ? { metadata } : {}),
+    ...(typeof result.output === 'string' ? { output: result.output } : {}),
+    ...(agentRulesPaths ? { agentRulesPaths } : {}),
+  };
+}
+
+function compactAgentRulesPaths(paths: AgentRulesPaths | undefined): AgentRulesPaths | undefined {
+  if (!paths) {
+    return undefined;
+  }
+  const compacted: AgentRulesPaths = {
+    ...(paths.skills && paths.skills.length > 0 ? { skills: [...paths.skills] } : {}),
+    ...(paths.hooks && paths.hooks.length > 0 ? { hooks: [...paths.hooks] } : {}),
+    ...(paths.agents && paths.agents.length > 0 ? { agents: [...paths.agents] } : {}),
+    ...(paths.rules && paths.rules.length > 0 ? { rules: [...paths.rules] } : {}),
+  };
+  return Object.keys(compacted).length > 0 ? compacted : undefined;
+}
+
+function mergeAgentRulesPaths(
+  left: AgentRulesPaths | undefined,
+  right: AgentRulesPaths | undefined,
+): AgentRulesPaths | undefined {
+  if (!left) return compactAgentRulesPaths(right);
+  if (!right) return compactAgentRulesPaths(left);
+
+  return compactAgentRulesPaths({
+    skills: mergePathLists(left.skills, right.skills),
+    hooks: mergePathLists(left.hooks, right.hooks),
+    agents: mergePathLists(left.agents, right.agents),
+    rules: mergePathLists(left.rules, right.rules),
+  });
+}
+
+function mergePathLists(
+  left: readonly string[] | undefined,
+  right: readonly string[] | undefined,
+): readonly string[] | undefined {
+  const merged = [...(left ?? []), ...(right ?? [])];
+  return merged.length > 0 ? [...new Set(merged)] : undefined;
+}
+
+function isInside(root: string, candidate: string): boolean {
+  const relative = path.relative(root, candidate);
+  return (
+    relative === '' || (!!relative && !relative.startsWith('..') && !path.isAbsolute(relative))
+  );
+}
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index 93e15cab5..29518ee3a 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -4,6 +4,7 @@ import { fileURLToPath } from 'node:url';
 import micromatch from 'micromatch';
 import pLimit from 'p-limit';
 
+import { runExtensionsForHook } from './extensions/runner.js';
 import { readJsonFile } from './file-utils.js';
 import {
   type ChildGraderResult,
@@ -145,6 +146,21 @@ function extractProviderRawLogPath(response: ProviderResponse): string | undefin
   return trimmed.length > 0 ? trimmed : undefined;
 }
 
+function mergeMetadata(
+  base: Record<string, unknown> | undefined,
+  overlay: JsonObject | Record<string, unknown> | undefined,
+): JsonObject | undefined {
+  const merged = {
+    ...(base ?? {}),
+    ...(overlay ?? {}),
+  } as JsonObject;
+  return Object.keys(merged).length > 0 ? merged : undefined;
+}
+
+function mergeTextOutput(left: string | undefined, right: string | undefined): string | undefined {
+  return [left, right].filter(Boolean).join('\n') || undefined;
+}
+
 interface EvaluationRuntimeOptions {
   readonly target: ResolvedTarget;
   readonly targets?: readonly TargetDefinition[];
@@ -415,6 +431,8 @@ export interface RunEvalCaseOptions {
   readonly sharedWorkspacePath?: string;
   /** Pre-initialized baseline commit for shared workspace */
   readonly sharedBaselineCommit?: string;
+  /** Provider/runtime context produced by shared beforeAll extensions. */
+  readonly sharedExtensionState?: import('./extensions/runner.js').ExtensionRuntimeState;
   /** Suite-level .code-workspace file (resolved from workspace.template) */
   readonly suiteWorkspaceFile?: string;
   /** Real-time observability callbacks passed to the provider */
@@ -959,10 +977,14 @@ export async function runEvaluation(
     poolSlots,
     availablePoolSlots,
     poolSlotBaselines,
+    poolSlotExtensionStates,
     useStaticWorkspace,
+    extensionState: sharedExtensionState,
   } = sharedSetup;
   const targetHooks = options.targetHooks;
   const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
+  const suiteExtensions =
+    filteredEvalCases.find((evalCase) => evalCase.extensions?.length)?.extensions ?? [];
 
   try {
     // Track worker assignments for progress reporting
@@ -1216,6 +1238,11 @@ export async function runEvaluation(
           ? poolSlotBaselines.get(testPoolSlot.path)
           : sharedBaselineCommit
         : undefined;
+      const testExtensionState = usesSharedWorkspace
+        ? testPoolSlot
+          ? poolSlotExtensionStates.get(testPoolSlot.path)
+          : sharedExtensionState
+        : undefined;
 
       try {
         const graderProvider = await resolveGraderProvider(target);
@@ -1247,6 +1274,7 @@ export async function runEvaluation(
           verbose,
           threshold: scoreThreshold,
           targetHooks: options.targetHooks,
+          sharedExtensionState: testExtensionState,
           replayRecording,
           evalFilePath,
           repoRoot: repoRootPath,
@@ -1457,6 +1485,35 @@ export async function runEvaluation(
     }
 
     const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all;
+    if (afterAllWorkspaces.length > 0 && suiteExtensions.length > 0) {
+      for (const wsPath of afterAllWorkspaces) {
+        try {
+          const afterAllState = await runExtensionsForHook({
+            extensions: suiteExtensions,
+            hook: 'afterAll',
+            context: {
+              hook_name: 'afterAll',
+              workspace_path: wsPath,
+              test_id: '__after_all__',
+              eval_run_id: evalRunId,
+              eval_dir: evalDir,
+            },
+            state: poolSlotExtensionStates.get(wsPath) ?? sharedExtensionState,
+          });
+          if (afterAllState?.output && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
+            results[results.length - 1] = {
+              ...results[results.length - 1],
+              afterAllOutput: mergeTextOutput(
+                results[results.length - 1].afterAllOutput,
+                afterAllState.output,
+              ),
+            };
+          }
+        } catch {
+          // afterAll extension failures are non-fatal, matching teardown hooks.
+        }
+      }
+    }
     if (afterAllWorkspaces.length > 0 && suiteHooksEnabled && hasHookCommand(suiteAfterAllHook)) {
       const afterAllHook = suiteAfterAllHook;
       for (const wsPath of afterAllWorkspaces) {
@@ -1792,6 +1849,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
     retainOnFailure,
     sharedWorkspacePath,
     sharedBaselineCommit,
+    sharedExtensionState,
     suiteWorkspaceFile,
     typeRegistry: providedTypeRegistry,
     repoManager,
@@ -1833,6 +1891,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
       cleanupWorkspaces: forceCleanup,
       targetHooks: options.targetHooks,
       setupDebug,
+      sharedExtensionState,
     });
   } catch (error) {
     const setupError = error instanceof WorkspaceSetupError ? error : undefined;
@@ -1856,11 +1915,114 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
     baselineCommit,
     isSharedWorkspace,
     caseWorkspaceFile,
+    extensionState,
   } = workspaceSetup;
+  const extensionMetadata = extensionState?.metadata;
+  const providerMetadata = mergeMetadata(evalCase.metadata, extensionState?.providerContext);
+  const resultMetadata = mergeMetadata(evalCase.metadata, extensionMetadata);
+
+  const runAfterEachHooks = async () => {
+    // Execute target after_each hook before workspace after_each/reset.
+    const targetAfterEachHook = options.targetHooks?.after_each;
+    if (workspacePath && hasHookCommand(targetAfterEachHook)) {
+      const scriptContext: ScriptExecutionContext = {
+        workspacePath,
+        testId: evalCase.id,
+        evalRunId: evalRunId ?? '',
+        caseInput: evalCase.question,
+        caseMetadata: evalCase.metadata,
+        evalDir,
+        workspaceFileDir: evalCase.workspace?.workspaceFileDir,
+      };
+      try {
+        await executeWorkspaceScript(
+          toScriptConfig(targetAfterEachHook, 'after_each', `target hook for '${evalCase.id}'`),
+          scriptContext,
+          'warn',
+        );
+      } catch {
+        // target after_each failures are non-fatal
+      }
+    }
+
+    if (workspacePath && evalCase.extensions && evalCase.extensions.length > 0) {
+      try {
+        const afterEachState = await runExtensionsForHook({
+          extensions: evalCase.extensions,
+          hook: 'afterEach',
+          context: {
+            hook_name: 'afterEach',
+            workspace_path: workspacePath,
+            test_id: evalCase.id,
+            eval_run_id: evalRunId ?? '',
+            case_input: evalCase.question,
+            case_metadata: evalCase.metadata,
+            eval_dir: evalDir ?? process.cwd(),
+            workspace_file_dir: evalCase.workspace?.workspaceFileDir,
+          },
+          state: extensionState,
+        });
+        afterEachOutput = mergeTextOutput(afterEachOutput, afterEachState?.output);
+      } catch {
+        // afterEach extension failures are non-fatal, matching teardown hooks.
+      }
+    }
+
+    // Reset workspace state before after_each hook (if configured), but only
+    // after graders have inspected the agent-modified workspace.
+    if (
+      caseHooksEnabled &&
+      workspacePath &&
+      evalCase.workspace?.hooks?.after_each?.reset &&
+      evalCase.workspace.hooks.after_each.reset !== 'none'
+    ) {
+      try {
+        if (repoManager && evalCase.workspace.repos?.length) {
+          await repoManager.reset(
+            evalCase.workspace.repos,
+            workspacePath,
+            evalCase.workspace.hooks.after_each.reset,
+          );
+        } else {
+          await resetWorkspaceRoot(
+            workspacePath,
+            evalCase.workspace.hooks.after_each.reset,
+            baselineCommit,
+          );
+        }
+      } catch {
+        // Reset failures are non-fatal (like after_each)
+      }
+    }
+
+    // Execute after_each hook (runs after grading, before cleanup)
+    const caseAfterEachHook = evalCase.workspace?.hooks?.after_each;
+    if (workspacePath && caseHooksEnabled && hasHookCommand(caseAfterEachHook)) {
+      const afterEachHook = caseAfterEachHook;
+      const scriptContext: ScriptExecutionContext = {
+        workspacePath,
+        testId: evalCase.id,
+        evalRunId: evalRunId ?? '',
+        caseInput: evalCase.question,
+        caseMetadata: evalCase.metadata,
+        evalDir,
+        workspaceFileDir: evalCase.workspace?.workspaceFileDir,
+      };
+      try {
+        afterEachOutput = await executeWorkspaceScript(
+          toScriptConfig(afterEachHook, 'after_each', `test '${evalCase.id}'`),
+          scriptContext,
+          'warn',
+        );
+      } catch {
+        // after_each failures are non-fatal
+      }
+    }
+  };
 
   // Conversation mode: turn-by-turn evaluation
   if (evalCase.mode === 'conversation' && evalCase.turns?.length) {
-    const conversationResult = await runConversationMode({
+    let conversationResult = await runConversationMode({
       evalCase,
       provider,
       target,
@@ -1879,7 +2041,16 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
       targetResolver,
       availableTargets,
       evalFilePath,
+      metadata: providerMetadata,
     });
+    await runAfterEachHooks();
+    conversationResult = {
+      ...conversationResult,
+      ...(resultMetadata !== undefined ? { metadata: resultMetadata } : {}),
+      beforeAllOutput,
+      beforeEachOutput,
+      afterEachOutput,
+    };
 
     // Cleanup workspace (same logic as standard path)
     if (workspacePath && !isSharedWorkspace) {
@@ -1917,6 +2088,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
         workspaceFile: caseWorkspaceFile,
         captureFileChanges: !!baselineCommit,
         streamCallbacks: options.streamCallbacks,
+        metadata: providerMetadata,
       });
     } catch (error) {
       lastError = error;
@@ -1950,6 +2122,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
           workspaceFile: caseWorkspaceFile,
           captureFileChanges: !!baselineCommit,
           streamCallbacks: options.streamCallbacks,
+          metadata: providerMetadata,
         });
         targetUsed = fallbackName;
         break; // Fallback succeeded
@@ -2059,82 +2232,6 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
 
   const providerError = extractProviderError(providerResponse);
 
-  const runAfterEachHooks = async () => {
-    // Execute target after_each hook before workspace after_each/reset.
-    const targetAfterEachHook = options.targetHooks?.after_each;
-    if (workspacePath && hasHookCommand(targetAfterEachHook)) {
-      const scriptContext: ScriptExecutionContext = {
-        workspacePath,
-        testId: evalCase.id,
-        evalRunId: evalRunId ?? '',
-        caseInput: evalCase.question,
-        caseMetadata: evalCase.metadata,
-        evalDir,
-        workspaceFileDir: evalCase.workspace?.workspaceFileDir,
-      };
-      try {
-        await executeWorkspaceScript(
-          toScriptConfig(targetAfterEachHook, 'after_each', `target hook for '${evalCase.id}'`),
-          scriptContext,
-          'warn',
-        );
-      } catch {
-        // target after_each failures are non-fatal
-      }
-    }
-
-    // Reset workspace state before after_each hook (if configured), but only
-    // after graders have inspected the agent-modified workspace.
-    if (
-      caseHooksEnabled &&
-      workspacePath &&
-      evalCase.workspace?.hooks?.after_each?.reset &&
-      evalCase.workspace.hooks.after_each.reset !== 'none'
-    ) {
-      try {
-        if (repoManager && evalCase.workspace.repos?.length) {
-          await repoManager.reset(
-            evalCase.workspace.repos,
-            workspacePath,
-            evalCase.workspace.hooks.after_each.reset,
-          );
-        } else {
-          await resetWorkspaceRoot(
-            workspacePath,
-            evalCase.workspace.hooks.after_each.reset,
-            baselineCommit,
-          );
-        }
-      } catch {
-        // Reset failures are non-fatal (like after_each)
-      }
-    }
-
-    // Execute after_each hook (runs after grading, before cleanup)
-    const caseAfterEachHook = evalCase.workspace?.hooks?.after_each;
-    if (workspacePath && caseHooksEnabled && hasHookCommand(caseAfterEachHook)) {
-      const afterEachHook = caseAfterEachHook;
-      const scriptContext: ScriptExecutionContext = {
-        workspacePath,
-        testId: evalCase.id,
-        evalRunId: evalRunId ?? '',
-        caseInput: evalCase.question,
-        caseMetadata: evalCase.metadata,
-        evalDir,
-        workspaceFileDir: evalCase.workspace?.workspaceFileDir,
-      };
-      try {
-        afterEachOutput = await executeWorkspaceScript(
-          toScriptConfig(afterEachHook, 'after_each', `test '${evalCase.id}'`),
-          scriptContext,
-          'warn',
-        );
-      } catch {
-        // after_each failures are non-fatal
-      }
-    }
-  };
-
   try {
     const result = await evaluateCandidate({
       evalCase,
@@ -2215,6 +2312,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
           failureStage: 'agent' as const,
           failureReasonCode: 'provider_error',
           executionError: { message: providerError, stage: 'agent' as const },
+          ...(resultMetadata !== undefined ? { metadata: resultMetadata } : {}),
           beforeAllOutput,
           beforeEachOutput,
           afterEachOutput,
@@ -2234,6 +2332,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
             failureStage: 'evaluator' as const,
             failureReasonCode: 'evaluator_error',
             executionError: { message: skippedEvaluatorError, stage: 'evaluator' as const },
+            ...(resultMetadata !== undefined ? { metadata: resultMetadata } : {}),
             beforeAllOutput,
             beforeEachOutput,
             afterEachOutput,
@@ -2243,6 +2342,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
             ...targetUsedField,
             evalRun,
             executionStatus,
+            ...(resultMetadata !== undefined ? { metadata: resultMetadata } : {}),
             beforeAllOutput,
             beforeEachOutput,
             afterEachOutput,
@@ -3035,6 +3135,7 @@ async function runConversationMode(options: {
   readonly targetResolver?: (name: string) => Provider | undefined;
   readonly availableTargets?: readonly string[];
   readonly evalFilePath?: string;
+  readonly metadata?: JsonObject;
 }): Promise<EvaluationResult> {
   const {
     evalCase,
@@ -3055,6 +3156,7 @@ async function runConversationMode(options: {
     targetResolver,
     availableTargets,
     evalFilePath,
+    metadata,
   } = options;
 
   // biome-ignore lint/style/noNonNullAssertion: turns is guaranteed by the caller (conversation mode gate)
@@ -3114,6 +3216,7 @@ async function runConversationMode(options: {
         cwd: workspacePath,
         workspaceFile: caseWorkspaceFile,
         streamCallbacks,
+        metadata,
       });
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
@@ -3414,6 +3517,7 @@ async function invokeProvider(
     readonly workspaceFile?: string;
     /** When true, AgentV captures file changes — provider should skip forced diff prompt */
     readonly captureFileChanges?: boolean;
+    readonly metadata?: JsonObject;
     /** Real-time observability callbacks */
     readonly streamCallbacks?: ProviderStreamCallbacks;
   },
@@ -3428,6 +3532,7 @@ async function invokeProvider(
     cwd,
     workspaceFile,
     captureFileChanges,
+    metadata,
     streamCallbacks,
   } = options;
 
@@ -3455,6 +3560,7 @@ async function invokeProvider(
       cwd,
       workspaceFile,
       captureFileChanges,
+      metadata,
       streamCallbacks,
       braintrustSpanIds: braintrustSpanIds ?? undefined,
     });
diff --git a/packages/core/src/evaluation/prepared-workspace.ts b/packages/core/src/evaluation/prepared-workspace.ts
index 15994a146..df4e67569 100644
--- a/packages/core/src/evaluation/prepared-workspace.ts
+++ b/packages/core/src/evaluation/prepared-workspace.ts
@@ -18,7 +18,7 @@ import micromatch from 'micromatch';
 import type { ResolvedTarget } from './providers/targets.js';
 import type { ChatPrompt } from './providers/types.js';
 import { AGENT_PROVIDER_KINDS } from './providers/types.js';
-import type { EvalTest, RepoConfig, TargetHooksConfig } from './types.js';
+import type { EvalTest, JsonObject, RepoConfig, TargetHooksConfig } from './types.js';
 import {
   type SharedWorkspaceSetup,
   type WorkspaceSetupCleanPolicy,
@@ -100,6 +100,8 @@ export interface PreparedEvalWorkspace {
   readonly workspaceFile?: string;
   readonly createdAt: string;
   readonly hookExecutions: readonly WorkspaceSetupHookExecution[];
+  readonly providerContext?: JsonObject;
+  readonly metadata?: Record<string, unknown>;
   readonly repoPins: readonly PreparedWorkspaceRepoPin[];
   readonly baseline: PreparedWorkspaceBaseline;
   readonly promptSource: PreparedWorkspacePromptSource;
@@ -244,6 +246,7 @@ export async function prepareEvalWorkspace(
       evalDir,
       cleanupWorkspaces: options.cleanupWorkspaces,
       targetHooks: options.targetHooks,
+      sharedExtensionState: sharedSetup.extensionState,
       setupDebug: options.verbose,
     });
 
@@ -267,6 +270,12 @@ export async function prepareEvalWorkspace(
       }),
       createdAt: (options.now ?? (() => new Date()))().toISOString(),
       hookExecutions: [...sharedSetup.hookExecutions, ...caseSetup.hookExecutions],
+      ...(caseSetup.extensionState?.providerContext !== undefined && {
+        providerContext: caseSetup.extensionState.providerContext,
+      }),
+      ...(caseSetup.extensionState?.metadata !== undefined && {
+        metadata: caseSetup.extensionState.metadata,
+      }),
       repoPins: toRepoPins(evalCase.workspace?.repos),
       baseline: caseSetup.baselineCommit
         ? { status: 'initialized', commit: caseSetup.baselineCommit }
diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts
index bdb4b8aac..8d4ae944f 100644
--- a/packages/core/src/evaluation/types.ts
+++ b/packages/core/src/evaluation/types.ts
@@ -292,6 +292,29 @@ export type TargetHooksConfig = {
   readonly after_all?: WorkspaceHookConfig;
 };
 
+export type ExtensionLifecycleHook = 'beforeAll' | 'beforeEach' | 'afterEach' | 'afterAll';
+
+export type AgentRulesPaths = {
+  readonly skills?: readonly string[];
+  readonly hooks?: readonly string[];
+  readonly agents?: readonly string[];
+  readonly rules?: readonly string[];
+};
+
+export type AgentRulesExtensionConfig = AgentRulesPaths & {
+  readonly id: 'agentv:agent-rules';
+  readonly hook: ExtensionLifecycleHook;
+};
+
+export type FileExtensionConfig = {
+  readonly id: string;
+  readonly hook: ExtensionLifecycleHook;
+  readonly path: string;
+  readonly functionName: ExtensionLifecycleHook;
+};
+
+export type AgentVExtensionConfig = AgentRulesExtensionConfig | FileExtensionConfig;
+
 /**
  * Extended target reference from eval file.
  * Allows eval files to define per-target hooks and delegation alongside target names.
@@ -325,7 +348,7 @@ export type DockerWorkspaceConfig = {
 
 /**
  * Preflight environment requirements for the workspace.
- * Checked once before before_all hooks run. Fails fast if anything is missing.
+ * Checked once before workspace setup hooks run. Fails fast if anything is missing.
  *
  * @example
  * ```yaml
@@ -358,7 +381,7 @@ export type WorkspaceConfig = {
    *  Used as default cwd for hook commands so that file-referenced templates resolve
    *  relative paths from their own directory, not the eval file's directory. */
   readonly workspaceFileDir?: string;
-  /** Preflight environment requirements. Checked before before_all hooks run. */
+  /** Preflight environment requirements. Checked before workspace setup hooks run. */
   readonly env?: WorkspaceEnvConfig;
 };
 
@@ -994,6 +1017,8 @@ export interface EvalTest {
   readonly assertions?: readonly GraderConfig[];
   /** Suite-level preprocessors used by the implicit default llm-grader. */
   readonly preprocessors?: readonly ContentPreprocessorConfig[];
+  /** Promptfoo-style lifecycle extensions inherited from the suite. */
+  readonly extensions?: readonly AgentVExtensionConfig[];
   /** Workspace configuration (merged from suite-level and case-level) */
   readonly workspace?: WorkspaceConfig;
   /** Arbitrary metadata passed to workspace scripts via stdin */
diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts
index a289a3f2a..135fc8912 100644
--- a/packages/core/src/evaluation/validation/eval-file.schema.ts
+++ b/packages/core/src/evaluation/validation/eval-file.schema.ts
@@ -325,6 +325,68 @@ const WorkspaceScriptSchema = z
   })
   .strict();
 
+const ExtensionHookSchema = z.enum(['beforeAll', 'beforeEach', 'afterEach', 'afterAll']);
+
+const FileExtensionSchema = z
+  .string()
+  .min(1)
+  .refine((value) => value.startsWith('file://'), {
+    message: 'file extensions must start with file://',
+  })
+  .refine(
+    (value) => {
+      const lastColon = value.lastIndexOf(':');
+      return (
+        lastColon > 'file://'.length &&
+        ExtensionHookSchema.safeParse(value.slice(lastColon + 1)).success
+      );
+    },
+    {
+      message: 'file extensions must be of the form file://path/to/hook.ts:beforeAll',
+    },
+  );
+
+const AgentRulesStringExtensionSchema = z.union([
+  z.literal('agentv:agent-rules'),
+  z
+    .string()
+    .startsWith('agentv:agent-rules:')
+    .refine(
+      (value) => ExtensionHookSchema.safeParse(value.slice('agentv:agent-rules:'.length)).success,
+      {
+        message: 'agentv:agent-rules hook must be beforeAll, beforeEach, afterEach, or afterAll',
+      },
+    ),
+]);
+
+const AgentRulesPathListSchema = z.union([z.string().min(1), z.array(z.string().min(1))]);
+
+const AgentRulesObjectExtensionSchema = z
+  .object({
+    id: z.literal('agentv:agent-rules'),
+    hook: ExtensionHookSchema.optional(),
+    skills: AgentRulesPathListSchema.optional(),
+    hooks: AgentRulesPathListSchema.optional(),
+    agents: AgentRulesPathListSchema.optional(),
+    rules: AgentRulesPathListSchema.optional(),
+    config: z
+      .object({
+        skills: AgentRulesPathListSchema.optional(),
+        hooks: AgentRulesPathListSchema.optional(),
+        agents: AgentRulesPathListSchema.optional(),
+        rules: AgentRulesPathListSchema.optional(),
+      })
+      .strict()
+      .optional(),
+  })
+  .strict();
+
+const ExtensionSchema = z.union([
+  FileExtensionSchema,
+  AgentRulesStringExtensionSchema,
+  AgentRulesObjectExtensionSchema,
+]);
+
 // ---------------------------------------------------------------------------
 // Repo lifecycle
 // ---------------------------------------------------------------------------
@@ -686,8 +748,8 @@ export const EvalFileSchema: z.ZodType = z
     output_path: z.union([z.string().min(1), z.array(z.string().min(1))]).optional(),
     env: z.record(z.string()).optional(),
     nunjucks_filters: z.union([JsonObjectSchema, z.array(z.string().min(1))]).optional(),
-    extensions: z.array(z.union([z.string().min(1), JsonObjectSchema])).optional(),
-    on_run_complete: z.union([z.string().min(1), z.array(z.string().min(1))]).optional(),
+    extensions: z.array(ExtensionSchema).optional(),
+    on_run_complete: z.never().optional(),
     policy: z.never().optional(),
     execution: z.never().optional(),
     // Suite-level assertions
diff --git a/packages/core/src/evaluation/workspace/setup.ts b/packages/core/src/evaluation/workspace/setup.ts
index df0004df2..b099f987e 100644
--- a/packages/core/src/evaluation/workspace/setup.ts
+++ b/packages/core/src/evaluation/workspace/setup.ts
@@ -18,8 +18,11 @@ import path from 'node:path';
 import { promisify } from 'node:util';
 
 import { getWorkspacePoolRoot } from '../../paths.js';
+import { type ExtensionRuntimeState, runExtensionsForHook } from '../extensions/runner.js';
 import type {
+  AgentVExtensionConfig,
   EvalTest,
+  ExtensionLifecycleHook,
   FailureStage,
   TargetHooksConfig,
   WorkspaceConfig,
@@ -117,9 +120,11 @@ export interface SharedWorkspaceSetup {
   readonly poolSlots: readonly PoolSlot[];
   readonly availablePoolSlots: PoolSlot[];
   readonly poolSlotBaselines: ReadonlyMap<string, string>;
+  readonly poolSlotExtensionStates: ReadonlyMap<string, ExtensionRuntimeState>;
   readonly useStaticWorkspace: boolean;
   readonly configuredMode: WorkspaceSetupMode;
   readonly hookExecutions: readonly WorkspaceSetupHookExecution[];
+  readonly extensionState?: ExtensionRuntimeState;
 }
 
 export interface EvalCaseWorkspaceSetupOptions {
@@ -134,6 +139,7 @@ export interface EvalCaseWorkspaceSetupOptions {
   readonly cleanupWorkspaces?: boolean;
   readonly targetHooks?: TargetHooksConfig;
   readonly setupDebug?: boolean;
+  readonly sharedExtensionState?: ExtensionRuntimeState;
 }
 
 export interface EvalCaseWorkspaceSetup {
@@ -144,6 +150,7 @@ export interface EvalCaseWorkspaceSetup {
   readonly baselineCommit?: string;
   readonly isSharedWorkspace: boolean;
   readonly hookExecutions: readonly WorkspaceSetupHookExecution[];
+  readonly extensionState?: ExtensionRuntimeState;
 }
 
 export function toScriptConfig(
@@ -299,6 +306,28 @@ function selectSuiteWorkspace(evalCases: readonly EvalTest[]): SelectedSharedWor
   );
 }
 
+function selectSuiteExtensions(evalCases: readonly EvalTest[]): readonly AgentVExtensionConfig[] {
+  const candidates = new Map<string, readonly AgentVExtensionConfig[]>();
+  for (const evalCase of evalCases) {
+    const extensions = evalCase.extensions ?? [];
+    if (extensions.length === 0 || isPerCaseIsolation(evalCase.workspace)) {
+      continue;
+    }
+    candidates.set(stableWorkspaceValue(extensions), extensions);
+  }
+
+  if (candidates.size > 1) {
+    throw new WorkspaceSetupError(
+      'Wrapper eval contains multiple shared extension sets. Split the suites or use isolation: per_case when lifecycle extensions differ.',
+      {
+        failureStage: 'setup',
+        failureReasonCode: 'ambiguous_shared_extensions',
+      },
+    );
+  }
+  return [...candidates.values()][0] ?? [];
+}
+
 function workspaceGitEnv(): Record<string, string | undefined> {
   const env = { ...process.env };
   for (const key of Object.keys(env)) {
@@ -340,6 +369,17 @@ function commandForHook(hook: WorkspaceHookConfig | undefined): readonly string[
   return hook?.command;
 }
 
+function mergeHookOutput(left: string | undefined, right: string | undefined): string | undefined {
+  return [left, right].filter(Boolean).join('\n') || undefined;
+}
+
+function hasExtensionHook(
+  extensions: readonly AgentVExtensionConfig[] | undefined,
+  hook: ExtensionLifecycleHook,
+): boolean {
+  return (extensions ?? []).some((extension) => extension.hook === hook);
+}
+
 function hookExecution(options: {
   readonly scope: WorkspaceSetupHookScope;
   readonly name: WorkspaceSetupHookName;
@@ -404,6 +444,7 @@ export async function prepareSharedWorkspaceSetup(
   } = options;
   const selectedSuiteWorkspace = selectSuiteWorkspace(evalCases);
   const suiteWorkspace = selectedSuiteWorkspace?.workspace;
+  const suiteExtensions = selectSuiteExtensions(evalCases);
   const rawTemplate = suiteWorkspace?.template;
   const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
   const workspaceTemplate = resolvedTemplate?.dir;
@@ -447,7 +488,8 @@ export async function prepareSharedWorkspaceSetup(
   const hasSharedWorkspace = !!(
     useStaticWorkspace ||
     (!isPerCaseWorkspace &&
-      (workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length))
+      (workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) ||
+    suiteExtensions.length > 0
   );
 
   const poolEnabled = configuredMode === 'pooled';
@@ -479,7 +521,9 @@ export async function prepareSharedWorkspaceSetup(
   const poolSlots: PoolSlot[] = [];
   const availablePoolSlots: PoolSlot[] = [];
   const poolSlotBaselines = new Map<string, string>();
+  const poolSlotExtensionStates = new Map<string, ExtensionRuntimeState>();
   const hookExecutions: WorkspaceSetupHookExecution[] = [];
+  let extensionState: ExtensionRuntimeState | undefined;
 
   const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
   let repoManager: RepoManager | undefined;
@@ -532,7 +576,10 @@ export async function prepareSharedWorkspaceSetup(
           cause: error,
         });
       }
-    } else if (!isPerCaseWorkspace && (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) {
+    } else if (
+      !isPerCaseWorkspace &&
+      (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length || suiteExtensions.length > 0)
+    ) {
       sharedWorkspacePath = getWorkspacePath(evalRunId, 'shared');
       await mkdir(sharedWorkspacePath, { recursive: true });
       setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
@@ -603,6 +650,67 @@ export async function prepareSharedWorkspaceSetup(
 
     const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
     const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
+    if (sharedWorkspacePath && suiteExtensions.length > 0) {
+      try {
+        extensionState = await runExtensionsForHook({
+          extensions: suiteExtensions,
+          hook: 'beforeAll',
+          context: {
+            hook_name: 'beforeAll',
+            workspace_path: sharedWorkspacePath,
+            test_id: '__before_all__',
+            eval_run_id: evalRunId,
+            eval_dir: evalDir,
+          },
+          state: extensionState,
+        });
+        beforeAllOutput = mergeHookOutput(beforeAllOutput, extensionState?.output);
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (sharedWorkspacePath && !useStaticWorkspace) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {});
+        }
+        throw new WorkspaceSetupError(`beforeAll extension failed: ${message}`, {
+          failureStage: 'setup',
+          failureReasonCode: 'extension_error',
+          hookExecutions,
+          cause: error,
+        });
+      }
+    }
+    if (availablePoolSlots.length > 0 && suiteExtensions.length > 0) {
+      for (const slot of availablePoolSlots) {
+        setupLog(`running beforeAll extensions on pool slot ${slot.index}`);
+        try {
+          const slotExtensionState = await runExtensionsForHook({
+            extensions: suiteExtensions,
+            hook: 'beforeAll',
+            context: {
+              hook_name: 'beforeAll',
+              workspace_path: slot.path,
+              test_id: '__before_all__',
+              eval_run_id: evalRunId,
+              eval_dir: evalDir,
+            },
+          });
+          if (slotExtensionState) {
+            poolSlotExtensionStates.set(slot.path, slotExtensionState);
+          }
+          beforeAllOutput = mergeHookOutput(beforeAllOutput, slotExtensionState?.output);
+        } catch (error) {
+          const message = error instanceof Error ? error.message : String(error);
+          throw new WorkspaceSetupError(
+            `beforeAll extension failed on pool slot ${slot.index}: ${message}`,
+            {
+              failureStage: 'setup',
+              failureReasonCode: 'extension_error',
+              hookExecutions,
+              cause: error,
+            },
+          );
+        }
+      }
+    }
     if (sharedWorkspacePath && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
       const beforeAllHook = suiteBeforeAllHook;
       const beforeAllCommand = (beforeAllHook.command ?? []).join(' ');
@@ -857,9 +965,11 @@ export async function prepareSharedWorkspaceSetup(
       poolSlots,
       availablePoolSlots,
       poolSlotBaselines,
+      poolSlotExtensionStates,
       useStaticWorkspace,
       configuredMode,
       hookExecutions,
+      ...(extensionState !== undefined && { extensionState }),
     };
   } catch (error) {
     await releasePoolSlots({ poolManager, poolSlot, poolSlots }).catch(() => {});
@@ -881,6 +991,7 @@ export async function prepareEvalCaseWorkspace(
     cleanupWorkspaces: forceCleanup,
     targetHooks,
     setupDebug,
+    sharedExtensionState,
   } = options;
 
   let workspacePath: string | undefined = isPerCaseIsolation(evalCase.workspace)
@@ -893,6 +1004,7 @@ export async function prepareEvalCaseWorkspace(
   let caseWorkspaceFile: string | undefined;
   const caseHooksEnabled = hooksEnabled(evalCase.workspace);
   const hookExecutions: WorkspaceSetupHookExecution[] = [];
+  let extensionState = sharedExtensionState;
 
   if (!workspacePath) {
     const rawCaseTemplate = evalCase.workspace?.template;
@@ -925,7 +1037,9 @@ export async function prepareEvalCaseWorkspace(
 
     if (
       !workspacePath &&
-      (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) &&
+      (evalCase.workspace?.hooks ||
+        evalCase.workspace?.repos?.length ||
+        evalCase.extensions?.length) &&
       evalRunId
     ) {
       workspacePath = getWorkspacePath(evalRunId, evalCase.id);
@@ -1025,6 +1139,38 @@ export async function prepareEvalCaseWorkspace(
       }
     }
 
+    if (workspacePath && evalCase.extensions && evalCase.extensions.length > 0) {
+      try {
+        extensionState = await runExtensionsForHook({
+          extensions: evalCase.extensions,
+          hook: 'beforeAll',
+          context: {
+            hook_name: 'beforeAll',
+            workspace_path: workspacePath,
+            test_id: evalCase.id,
+            eval_run_id: evalRunId ?? '',
+            case_input: evalCase.question,
+            case_metadata: evalCase.metadata,
+            eval_dir: evalDir ?? process.cwd(),
+            workspace_file_dir: evalCase.workspace?.workspaceFileDir,
+          },
+          state: extensionState,
+        });
+        beforeAllOutput = mergeHookOutput(beforeAllOutput, extensionState?.output);
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (forceCleanup && workspacePath) {
+          await cleanupWorkspace(workspacePath).catch(() => {});
+        }
+        throw new WorkspaceSetupError(`beforeAll extension failed: ${message}`, {
+          failureStage: 'setup',
+          failureReasonCode: 'extension_error',
+          hookExecutions,
+          cause: error,
+        });
+      }
+    }
+
     const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all;
     if (workspacePath && caseHooksEnabled && hasHookCommand(caseBeforeAllHook)) {
       const beforeAllHook = caseBeforeAllHook;
@@ -1122,6 +1268,40 @@ export async function prepareEvalCaseWorkspace(
   }
 
   const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each;
+  if (workspacePath && evalCase.extensions && evalCase.extensions.length > 0) {
+    try {
+      beforeEachNeedsFreshBaseline = hasExtensionHook(evalCase.extensions, 'beforeEach');
+      const nextState = await runExtensionsForHook({
+        extensions: evalCase.extensions,
+        hook: 'beforeEach',
+        context: {
+          hook_name: 'beforeEach',
+          workspace_path: workspacePath,
+          test_id: evalCase.id,
+          eval_run_id: evalRunId ?? '',
+          case_input: evalCase.question,
+          case_metadata: evalCase.metadata,
+          eval_dir: evalDir ?? process.cwd(),
+          workspace_file_dir: evalCase.workspace?.workspaceFileDir,
+        },
+        state: extensionState,
+      });
+      if (nextState !== extensionState) {
+        beforeEachNeedsFreshBaseline = true;
+      }
+      extensionState = nextState;
+      beforeEachOutput = mergeHookOutput(beforeEachOutput, extensionState?.output);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      throw new WorkspaceSetupError(`beforeEach extension failed: ${message}`, {
+        failureStage: 'setup',
+        failureReasonCode: 'extension_error',
+        hookExecutions,
+        cause: error,
+      });
+    }
+  }
+
   if (workspacePath && caseHooksEnabled && hasHookCommand(caseBeforeEachHook)) {
     const beforeEachHook = caseBeforeEachHook;
     const scriptContext: ScriptExecutionContext = {
@@ -1243,6 +1423,7 @@ export async function prepareEvalCaseWorkspace(
     ...(baselineCommit !== undefined && { baselineCommit }),
     isSharedWorkspace,
     hookExecutions,
+    ...(extensionState !== undefined && { extensionState }),
   };
 }
 
diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts
index 890b28c69..97036af04 100644
--- a/packages/core/src/evaluation/yaml-parser.ts
+++ b/packages/core/src/evaluation/yaml-parser.ts
@@ -56,6 +56,9 @@ import {
 import { parseMetadata } from './metadata.js';
 import type { TargetDefinition } from './providers/types.js';
 import type {
+  AgentRulesExtensionConfig,
+  AgentRulesPaths,
+  AgentVExtensionConfig,
   ConversationAggregation,
   ConversationMode,
   ConversationTurn,
@@ -65,6 +68,7 @@ import type {
   EvalSourceReference,
   EvalTest,
   EvalTestSource,
+  ExtensionLifecycleHook,
   GraderConfig,
   JsonObject,
   JsonValue,
@@ -199,6 +203,8 @@ type RawTestSuite = JsonObject & {
   readonly workspace?: JsonValue;
   readonly assertions?: JsonValue;
   readonly preprocessors?: JsonValue;
+  readonly extensions?: JsonValue;
+  readonly on_run_complete?: JsonValue;
   readonly nunjucks_filters?: JsonValue;
   readonly input?: JsonValue;
   readonly metadata?: JsonValue;
@@ -614,6 +620,7 @@ async function loadTestsFromParsedYamlValue(
   // Top-level `metadata:` is inherited by cases. Suite identity tags are parsed
   // separately by parseMetadata() and are not case tags.
   const suiteMetadataPayload = extractSuiteMetadataPayload(suite);
+  const evalFileDir = path.dirname(absoluteTestPath);
 
   const globalEvaluator = coerceEvaluator(suite.evaluator, 'global') ?? 'llm-grader';
   const suitePreprocessors = await parsePreprocessors(
@@ -622,9 +629,9 @@ async function loadTestsFromParsedYamlValue(
     '<suite>',
     absoluteTestPath,
   );
+  const suiteExtensions = parseExtensions(suite.extensions, evalFileDir);
 
   const importedSuiteTests: EvalTest[] = [];
-  const evalFileDir = path.dirname(absoluteTestPath);
   const nunjucksFilters = await loadNunjucksFilters(suite.nunjucks_filters, evalFileDir);
   const parentWorkspace = parentWorkspaceLocation(suite);
   const importEntries = readImports(suite.imports);
@@ -956,6 +963,7 @@ async function loadTestsFromParsedYamlValue(
         evaluator: testCaseEvaluatorKind,
         assertions: evaluators,
         ...(suitePreprocessors ? { preprocessors: suitePreprocessors } : {}),
+        ...(suiteExtensions.length > 0 ? { extensions: suiteExtensions } : {}),
         workspace: mergedWorkspace,
         metadata,
         ...(caseRun?.threshold !== undefined ? { threshold: caseRun.threshold } : {}),
@@ -1626,6 +1634,11 @@ function readSuiteRuntimeBlock(suite: RawTestSuite, evalFilePath: string): JsonO
       `Invalid eval runtime config in ${evalFilePath}: top-level 'early_exit' has been removed. Use repeat.early_exit instead.`,
     );
   }
+  if (suite.on_run_complete !== undefined) {
+    throw new Error(
+      `Invalid eval runtime config in ${evalFilePath}: top-level 'on_run_complete' has been removed. Use extensions with afterAll instead.`,
+    );
+  }
   return undefined;
 }
 
@@ -2105,6 +2118,106 @@ function parseWorkspaceHooksConfig(
   return Object.keys(hooks).length > 0 ? hooks : undefined;
 }
 
+const EXTENSION_HOOKS = new Set(['beforeAll', 'beforeEach', 'afterEach', 'afterAll']);
+
+function parseExtensions(raw: unknown, evalFileDir: string): AgentVExtensionConfig[] {
+  if (raw === undefined) return [];
+  if (!Array.isArray(raw)) {
+    throw new Error('extensions must be an array');
+  }
+
+  return raw.map((entry, index) => parseExtension(entry, index, evalFileDir));
+}
+
+function parseExtension(entry: unknown, index: number, evalFileDir: string): AgentVExtensionConfig {
+  if (typeof entry === 'string') {
+    return parseExtensionString(entry, `extensions[${index}]`, evalFileDir);
+  }
+  if (!isJsonObject(entry)) {
+    throw new Error(`extensions[${index}] must be a string or object`);
+  }
+
+  const obj = entry as Record<string, unknown>;
+  const id = typeof obj.id === 'string' ? obj.id : undefined;
+  if (id !== 'agentv:agent-rules') {
+    throw new Error(`extensions[${index}].id must be agentv:agent-rules`);
+  }
+  const hook = parseExtensionHook(obj.hook, `extensions[${index}].hook`) ?? 'beforeAll';
+  const source = isJsonObject(obj.config) ? (obj.config as Record<string, unknown>) : obj;
+  return {
+    id,
+    hook,
+    ...(readPathList(source.skills, `extensions[${index}].skills`) ?? {}),
+    ...(readPathList(source.hooks, `extensions[${index}].hooks`) ?? {}),
+    ...(readPathList(source.agents, `extensions[${index}].agents`) ?? {}),
+    ...(readPathList(source.rules, `extensions[${index}].rules`) ?? {}),
+  };
+}
+
+function parseExtensionString(
+  raw: string,
+  label: string,
+  evalFileDir: string,
+): AgentVExtensionConfig {
+  if (raw === 'agentv:agent-rules') {
+    return { id: 'agentv:agent-rules', hook: 'beforeAll' };
+  }
+  if (raw.startsWith('agentv:agent-rules:')) {
+    const hook = parseExtensionHook(raw.slice('agentv:agent-rules:'.length), label);
+    if (!hook) {
+      throw new Error(`${label} must use one of beforeAll, beforeEach, afterEach, afterAll`);
+    }
+    return { id: 'agentv:agent-rules', hook };
+  }
+  if (!raw.startsWith('file://')) {
+    throw new Error(`${label} must start with file:// or agentv:agent-rules`);
+  }
+
+  const lastColon = raw.lastIndexOf(':');
+  if (lastColon <= 'file://'.length) {
+    throw new Error(`${label} must be of the form file://path/to/hook.ts:beforeAll`);
+  }
+  const functionName = raw.slice(lastColon + 1);
+  const hook = parseExtensionHook(functionName, label);
+  if (!hook) {
+    throw new Error(`${label} must target one of beforeAll, beforeEach, afterEach, afterAll`);
+  }
+  const filePart = raw.slice('file://'.length, lastColon);
+  if (!filePart) {
+    throw new Error(`${label} must include a file path`);
+  }
+  const resolvedPath = path.isAbsolute(filePart) ? filePart : path.resolve(evalFileDir, filePart);
+  return {
+    id: raw,
+    hook,
+    path: resolvedPath,
+    functionName: hook,
+  };
+}
+
+function parseExtensionHook(raw: unknown, label: string): ExtensionLifecycleHook | undefined {
+  if (typeof raw !== 'string') return undefined;
+  if (!EXTENSION_HOOKS.has(raw)) {
+    throw new Error(`${label} must be one of beforeAll, beforeEach, afterEach, afterAll`);
+  }
+  return raw as ExtensionLifecycleHook;
+}
+
+function readPathList(raw: unknown, label: string): Partial<AgentRulesPaths> | undefined {
+  if (raw === undefined) return undefined;
+  const values =
+    typeof raw === 'string'
+      ? [raw]
+      : Array.isArray(raw)
+        ? raw.filter((entry): entry is string => typeof entry === 'string')
+        : undefined;
+  if (!values) {
+    throw new Error(`${label} must be a string or string array`);
+  }
+  const key = label.split('.').at(-1) as keyof AgentRulesExtensionConfig | undefined;
+  return key ? ({ [key]: values } as Partial<AgentRulesPaths>) : undefined;
+}
+
 /**
  * Resolve a workspace config value: either an inline object or a string path
  * to an external workspace YAML file.
diff --git a/packages/core/test/evaluation/extensions.test.ts b/packages/core/test/evaluation/extensions.test.ts
new file mode 100644
index 000000000..4ff995b8b
--- /dev/null
+++ b/packages/core/test/evaluation/extensions.test.ts
@@ -0,0 +1,413 @@
+import { afterEach, describe, expect, it } from 'bun:test';
+import { execSync } from 'node:child_process';
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+
+import { runEvaluation } from '../../src/evaluation/orchestrator.js';
+import type { ResolvedTarget } from '../../src/evaluation/providers/targets.js';
+import type {
+  Provider,
+  ProviderRequest,
+  ProviderResponse,
+} from '../../src/evaluation/providers/types.js';
+import { loadTestSuite, loadTests } from '../../src/evaluation/yaml-parser.js';
+
+const target: ResolvedTarget = {
+  name: 'mock',
+  kind: 'mock',
+  config: {},
+};
+
+const passEvaluators = {
+  'llm-grader': {
+    kind: 'llm-grader' as const,
+    async evaluate() {
+      return {
+        score: 1,
+        verdict: 'pass' as const,
+        assertions: [{ text: 'passed', passed: true }],
+        expectedAspectCount: 1,
+      };
+    },
+  },
+};
+
+class CapturingProvider implements Provider {
+  readonly id = 'mock:capturing';
+  readonly kind = 'mock' as const;
+  readonly targetName = 'mock';
+  lastRequest?: ProviderRequest;
+
+  constructor(private readonly onInvoke?: (request: ProviderRequest) => void | Promise<void>) {}
+
+  async invoke(request: ProviderRequest): Promise<ProviderResponse> {
+    this.lastRequest = request;
+    await this.onInvoke?.(request);
+    return {
+      output: [{ role: 'assistant', content: 'answer' }],
+    };
+  }
+}
+
+function cleanGitEnv(): Record<string, string> {
+  const env: Record<string, string> = {};
+  for (const [key, value] of Object.entries(process.env)) {
+    if (value !== undefined && !(key.startsWith('GIT_') && key !== 'GIT_SSH_COMMAND')) {
+      env[key] = value;
+    }
+  }
+  return env;
+}
+
+function createTestRepo(dir: string, files: Record<string, string>): string {
+  mkdirSync(dir, { recursive: true });
+  const execOptions = { cwd: dir, stdio: 'ignore' as const, env: cleanGitEnv() };
+  execSync('git init', execOptions);
+  execSync('git config user.email "test@test.com"', execOptions);
+  execSync('git config user.name "Test"', execOptions);
+  for (const [name, content] of Object.entries(files)) {
+    const filePath = path.join(dir, name);
+    mkdirSync(path.dirname(filePath), { recursive: true });
+    writeFileSync(filePath, content, 'utf8');
+  }
+  execSync('git add -A && git commit -m "initial"', execOptions);
+  return execSync('git rev-parse HEAD', { cwd: dir, env: cleanGitEnv() }).toString().trim();
+}
+
+describe('promptfoo-compatible lifecycle extensions', () => {
+  const tempDirs: string[] = [];
+
+  afterEach(async () => {
+    await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true })));
+    tempDirs.length = 0;
+  });
+
+  it('parses file hooks and agent-rules extensions from eval YAML', async () => {
+    const dir = await mkdtemp(path.join(tmpdir(), 'agentv-extensions-parse-'));
+    tempDirs.push(dir);
+    await writeFile(path.join(dir, 'hooks.mjs'), 'export function beforeAll() {}', 'utf8');
+    await writeFile(
+      path.join(dir, 'suite.eval.yaml'),
+      `extensions:
+  - file://hooks.mjs:beforeAll
+  - id: agentv:agent-rules
+    hook: beforeEach
+    skills: rules/skills
+tests:
+  - id: one
+    input: hello
+    criteria: works
+`,
+      'utf8',
+    );
+
+    const tests = await loadTests(path.join(dir, 'suite.eval.yaml'), dir);
+
+    expect(tests[0].extensions).toEqual([
+      {
+        id: 'file://hooks.mjs:beforeAll',
+        hook: 'beforeAll',
+        path: path.join(dir, 'hooks.mjs'),
+        functionName: 'beforeAll',
+      },
+      {
+        id: 'agentv:agent-rules',
+        hook: 'beforeEach',
+        skills: ['rules/skills'],
+      },
+    ]);
+  });
+
+  it('runs lifecycle file hooks and exposes staged agent-rules paths to providers and results', async () => {
+    const dir = await mkdtemp(path.join(tmpdir(), 'agentv-extensions-run-'));
+    tempDirs.push(dir);
+    await mkdir(path.join(dir, 'template'), { recursive: true });
+    await mkdir(path.join(dir, 'rules', 'skills', 'csv'), { recursive: true });
+    await writeFile(path.join(dir, 'rules', 'skills', 'csv', 'SKILL.md'), '# CSV\n', 'utf8');
+    await writeFile(path.join(dir, 'rules', 'AGENTS.md'), '# Rules\n', 'utf8');
+    await writeFile(
+      path.join(dir, 'hooks.mjs'),
+      `import { appendFileSync } from 'node:fs';
+import path from 'node:path';
+
+function log(context, name) {
+  appendFileSync(path.join(context.eval_dir, 'lifecycle.log'), name + ':' + Boolean(context.workspace_path) + '\\n');
+}
+
+export function beforeAll(context) {
+  log(context, 'beforeAll');
+  return { provider_context: { custom_flag: 'beforeAll' }, output: 'beforeAll output' };
+}
+
+export function beforeEach(context) {
+  log(context, 'beforeEach');
+  return { provider_context: { case_id: context.test_id }, output: 'beforeEach output' };
+}
+
+export function afterEach(context) {
+  log(context, 'afterEach');
+  return { output: 'afterEach output' };
+}
+
+export function afterAll(context) {
+  log(context, 'afterAll');
+  return { output: 'afterAll output' };
+}
+`,
+      'utf8',
+    );
+    await writeFile(
+      path.join(dir, 'suite.eval.yaml'),
+      `extensions:
+  - file://hooks.mjs:beforeAll
+  - file://hooks.mjs:beforeEach
+  - file://hooks.mjs:afterEach
+  - file://hooks.mjs:afterAll
+  - id: agentv:agent-rules
+    hook: beforeAll
+    skills: rules/skills
+    rules: rules/AGENTS.md
+workspace:
+  template: template
+tests:
+  - id: one
+    input: hello
+    criteria: works
+`,
+      'utf8',
+    );
+    const suite = await loadTestSuite(path.join(dir, 'suite.eval.yaml'), dir);
+    const provider = new CapturingProvider();
+
+    const results = await runEvaluation({
+      testFilePath: path.join(dir, 'suite.eval.yaml'),
+      repoRoot: dir,
+      target,
+      providerFactory: () => provider,
+      evaluators: passEvaluators,
+      evalCases: suite.tests,
+      maxConcurrency: 1,
+    });
+
+    const log = await readFile(path.join(dir, 'lifecycle.log'), 'utf8');
+    expect(log.trim().split('\n')).toEqual([
+      'beforeAll:true',
+      'beforeEach:true',
+      'afterEach:true',
+      'afterAll:true',
+    ]);
+    expect(provider.lastRequest?.metadata?.custom_flag).toBe('beforeAll');
+    expect(provider.lastRequest?.metadata?.case_id).toBe('one');
+    const providerRules = provider.lastRequest?.metadata?.agent_rules_paths as {
+      skills?: string[];
+      rules?: string[];
+    };
+    expect(providerRules.skills?.[0]).toContain(path.join('.agentv', 'agent-rules', 'skills'));
+    expect(providerRules.rules?.[0]).toContain(path.join('.agentv', 'agent-rules', 'rules'));
+    expect(results[0].metadata?.agent_rules_paths).toEqual(
+      provider.lastRequest?.metadata?.agent_rules_paths,
+    );
+    expect(results[0].beforeAllOutput).toContain('beforeAll output');
+    expect(results[0].beforeEachOutput).toContain('beforeEach output');
+    expect(results[0].afterEachOutput).toContain('afterEach output');
+    expect(results[0].afterAllOutput).toContain('afterAll output');
+  });
+
+  it('runs afterEach extensions and preserves extension metadata for conversation cases', async () => {
+    const dir = await mkdtemp(path.join(tmpdir(), 'agentv-extensions-conversation-'));
+    tempDirs.push(dir);
+    await mkdir(path.join(dir, 'template'), { recursive: true });
+    await mkdir(path.join(dir, 'rules', 'skills', 'chat'), { recursive: true });
+    await writeFile(path.join(dir, 'rules', 'skills', 'chat', 'SKILL.md'), '# Chat\n', 'utf8');
+    await writeFile(
+      path.join(dir, 'hooks.mjs'),
+      `import { appendFileSync } from 'node:fs';
+import path from 'node:path';
+
+export function afterEach(context) {
+  appendFileSync(path.join(context.eval_dir, 'conversation.log'), context.test_id + ':' + Boolean(context.agent_rules_paths?.skills?.length) + '\\n');
+  return { output: 'conversation afterEach output' };
+}
+`,
+      'utf8',
+    );
+    await writeFile(
+      path.join(dir, 'suite.eval.yaml'),
+      `extensions:
+  - id: agentv:agent-rules
+    hook: beforeAll
+    skills: rules/skills
+  - file://hooks.mjs:afterEach
+workspace:
+  template: template
+tests:
+  - id: conversation
+    mode: conversation
+    input: "You are concise"
+    turns:
+      - input: hello
+`,
+      'utf8',
+    );
+    const suite = await loadTestSuite(path.join(dir, 'suite.eval.yaml'), dir);
+    const provider = new CapturingProvider();
+
+    const results = await runEvaluation({
+      testFilePath: path.join(dir, 'suite.eval.yaml'),
+      repoRoot: dir,
+      target,
+      providerFactory: () => provider,
+      evaluators: passEvaluators,
+      evalCases: suite.tests,
+      maxConcurrency: 1,
+    });
+
+    expect((await readFile(path.join(dir, 'conversation.log'), 'utf8')).trim()).toBe(
+      'conversation:true',
+    );
+    expect(results[0].metadata?.agent_rules_paths).toEqual(
+      provider.lastRequest?.metadata?.agent_rules_paths,
+    );
+    expect(results[0].afterEachOutput).toContain('conversation afterEach output');
+  });
+
+  it('scopes pooled beforeAll extension state to the selected workspace slot', async () => {
+    const dir = await mkdtemp(path.join(tmpdir(), 'agentv-extensions-pool-'));
+    tempDirs.push(dir);
+    const previousDataDir = process.env.AGENTV_DATA_DIR;
+    process.env.AGENTV_DATA_DIR = path.join(dir, 'agentv-data');
+    try {
+      const sourceRepo = path.join(dir, 'source-repo');
+      const commit = createTestRepo(sourceRepo, { 'README.md': 'base\n' });
+      await mkdir(path.join(dir, 'rules', 'skills', 'slot'), { recursive: true });
+      await writeFile(path.join(dir, 'rules', 'skills', 'slot', 'SKILL.md'), '# Slot\n', 'utf8');
+      await writeFile(
+        path.join(dir, 'suite.eval.yaml'),
+        `extensions:
+  - id: agentv:agent-rules
+    hook: beforeAll
+    skills: rules/skills
+workspace:
+  repos:
+    - path: ./repo-a
+      repo: file://${sourceRepo}
+      commit: ${commit}
+tests:
+  - id: one
+    input: one
+    criteria: works
+  - id: two
+    input: two
+    criteria: works
+`,
+        'utf8',
+      );
+      const suite = await loadTestSuite(path.join(dir, 'suite.eval.yaml'), dir);
+      const requests: ProviderRequest[] = [];
+      const provider = new CapturingProvider((request) => {
+        requests.push(request);
+      });
+
+      await runEvaluation({
+        testFilePath: path.join(dir, 'suite.eval.yaml'),
+        repoRoot: dir,
+        target,
+        providerFactory: () => provider,
+        evaluators: passEvaluators,
+        evalCases: suite.tests,
+        maxConcurrency: 2,
+        workspaceMode: 'pooled',
+        poolMaxSlots: 2,
+      });
+
+      expect(requests).toHaveLength(2);
+      const workspacePaths = new Set(requests.map((request) => request.cwd));
+      expect(workspacePaths.size).toBe(2);
+      for (const request of requests) {
+        expect(request.cwd).toBeDefined();
+        const rules = request.metadata?.agent_rules_paths as { skills?: string[] } | undefined;
+        expect(rules?.skills?.length).toBe(1);
+        expect(rules?.skills?.[0]).toContain(
+          path.join(request.cwd ?? '', '.agentv', 'agent-rules', 'skills'),
+        );
+      }
+    } finally {
+      if (previousDataDir === undefined) {
+        process.env.AGENTV_DATA_DIR = undefined;
+      } else {
+        process.env.AGENTV_DATA_DIR = previousDataDir;
+      }
+    }
+  }, 30_000);
+
+  it('refreshes the baseline after beforeEach extensions mutate files without state', async () => {
+    const dir = await mkdtemp(path.join(tmpdir(), 'agentv-extensions-baseline-'));
+    tempDirs.push(dir);
+    await mkdir(path.join(dir, 'template'), { recursive: true });
+    await writeFile(
+      path.join(dir, 'hooks.mjs'),
+      `import { writeFileSync } from 'node:fs';
+import path from 'node:path';
+
+export function beforeEach(context) {
+  writeFileSync(path.join(context.workspace_path, 'setup.txt'), 'setup from extension\\n');
+}
+`,
+      'utf8',
+    );
+    await writeFile(
+      path.join(dir, 'suite.eval.yaml'),
+      `extensions:
+  - file://hooks.mjs:beforeEach
+workspace:
+  template: template
+tests:
+  - id: one
+    input: hello
+    criteria: works
+`,
+      'utf8',
+    );
+    const suite = await loadTestSuite(path.join(dir, 'suite.eval.yaml'), dir);
+    const provider = new CapturingProvider((request) => {
+      if (!request.cwd) {
+        throw new Error('cwd was not provided');
+      }
+      writeFileSync(path.join(request.cwd, 'agent.txt'), 'agent output\n', 'utf8');
+    });
+
+    const results = await runEvaluation({
+      testFilePath: path.join(dir, 'suite.eval.yaml'),
+      repoRoot: dir,
+      target,
+      providerFactory: () => provider,
+      evaluators: passEvaluators,
+      evalCases: suite.tests,
+      maxConcurrency: 1,
+    });
+
+    expect(results[0].fileChanges).toContain('agent.txt');
+    expect(results[0].fileChanges).not.toContain('setup.txt');
+  });
+
+  it('rejects removed on_run_complete in favor of afterAll extensions', async () => {
+    const dir = await mkdtemp(path.join(tmpdir(), 'agentv-extensions-removed-'));
+    tempDirs.push(dir);
+    await writeFile(
+      path.join(dir, 'suite.eval.yaml'),
+      `on_run_complete: ./done.sh
+tests:
+  - id: one
+    input: hello
+    criteria: works
+`,
+      'utf8',
+    );
+
+    await expect(loadTestSuite(path.join(dir, 'suite.eval.yaml'), dir)).rejects.toThrow(
+      /on_run_complete.*extensions with afterAll/,
+    );
+  });
+});
diff --git a/skills-data/agentv-eval-writer/references/eval.schema.json b/skills-data/agentv-eval-writer/references/eval.schema.json
index d044864f7..f769ebb89 100644
--- a/skills-data/agentv-eval-writer/references/eval.schema.json
+++ b/skills-data/agentv-eval-writer/references/eval.schema.json
@@ -18673,28 +18673,164 @@
                 "type": "string",
                 "minLength": 1
               },
+              {
+                "anyOf": [
+                  {
+                    "type": "string",
+                    "const": "agentv:agent-rules"
+                  },
+                  {
+                    "type": "string",
+                    "pattern": "^agentv\\:agent\\-rules\\:"
+                  }
+                ]
+              },
               {
                 "type": "object",
-                "properties": {},
-                "additionalProperties": {}
+                "properties": {
+                  "id": {
+                    "type": "string",
+                    "const": "agentv:agent-rules"
+                  },
+                  "hook": {
+                    "type": "string",
+                    "enum": ["beforeAll", "beforeEach", "afterEach", "afterAll"]
+                  },
+                  "skills": {
+                    "anyOf": [
+                      {
+                        "type": "string",
+                        "minLength": 1
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string",
+                          "minLength": 1
+                        }
+                      }
+                    ]
+                  },
+                  "hooks": {
+                    "anyOf": [
+                      {
+                        "type": "string",
+                        "minLength": 1
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string",
+                          "minLength": 1
+                        }
+                      }
+                    ]
+                  },
+                  "agents": {
+                    "anyOf": [
+                      {
+                        "type": "string",
+                        "minLength": 1
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string",
+                          "minLength": 1
+                        }
+                      }
+                    ]
+                  },
+                  "rules": {
+                    "anyOf": [
+                      {
+                        "type": "string",
+                        "minLength": 1
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string",
+                          "minLength": 1
+                        }
+                      }
+                    ]
+                  },
+                  "config": {
+                    "type": "object",
+                    "properties": {
+                      "skills": {
+                        "anyOf": [
+                          {
+                            "type": "string",
+                            "minLength": 1
+                          },
+                          {
+                            "type": "array",
+                            "items": {
+                              "type": "string",
+                              "minLength": 1
+                            }
+                          }
+                        ]
+                      },
+                      "hooks": {
+                        "anyOf": [
+                          {
+                            "type": "string",
+                            "minLength": 1
+                          },
+                          {
+                            "type": "array",
+                            "items": {
+                              "type": "string",
+                              "minLength": 1
+                            }
+                          }
+                        ]
+                      },
+                      "agents": {
+                        "anyOf": [
+                          {
+                            "type": "string",
+                            "minLength": 1
+                          },
+                          {
+                            "type": "array",
+                            "items": {
+                              "type": "string",
+                              "minLength": 1
+                            }
+                          }
+                        ]
+                      },
+                      "rules": {
+                        "anyOf": [
+                          {
+                            "type": "string",
+                            "minLength": 1
+                          },
+                          {
+                            "type": "array",
+                            "items": {
+                              "type": "string",
+                              "minLength": 1
+                            }
+                          }
+                        ]
+                      }
+                    },
+                    "additionalProperties": false
+                  }
+                },
+                "required": ["id"],
+                "additionalProperties": false
               }
             ]
           }
         },
         "on_run_complete": {
-          "anyOf": [
-            {
-              "type": "string",
-              "minLength": 1
-            },
-            {
-              "type": "array",
-              "items": {
-                "type": "string",
-                "minLength": 1
-              }
-            }
-          ]
+          "not": {}
         },
         "policy": {
           "not": {}