From 61bcec47aa4335aab577e9e21b2969b8c1b3ce64 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Thu, 2 Jul 2026 11:51:22 +0200
Subject: [PATCH 1/3] feat(core): adopt nunjucks eval templating

---
 .../docs/docs/evaluation/eval-files.mdx       |  22 +-
 bun.lock                                      |  20 +-
 examples/features/README.md                   |   4 +-
 examples/features/env-interpolation/README.md |   6 +-
 .../env-interpolation/evals/dataset.eval.yaml |   8 +-
 .../features/test-vars-templating/README.md   |   6 +-
 .../evals/dataset.eval.yaml                   |  18 +-
 packages/core/package.json                    |   2 +
 packages/core/src/evaluation/interpolation.ts | 110 ++-
 .../src/evaluation/loaders/grader-parser.ts   |   2 +
 .../evaluation/loaders/shorthand-expansion.ts |  22 +
 packages/core/src/evaluation/types.ts         |   8 +-
 packages/core/src/evaluation/yaml-parser.ts   | 661 ++++++++++--------
 .../interpolation-integration.test.ts         |  62 +-
 .../test/evaluation/interpolation.test.ts     | 100 ++-
 .../test/evaluation/suite-level-input.test.ts | 157 ++++-
 .../evaluation/workspace/deps-scanner.test.ts |   2 +-
 packages/core/test/projects.test.ts           |   4 +-
 18 files changed, 787 insertions(+), 427 deletions(-)

diff --git a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
index 9dc940b97..63d6655ab 100644
--- a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
@@ -438,8 +438,8 @@ tests:
 # workspace.yaml — works on any machine
 repos:
   - path: ./my-repo
-    repo: "${{ MY_REPO_URL }}"
-    commit: "${{ MY_REPO_COMMIT }}"
+    repo: "{{ env.MY_REPO_URL }}"
+    commit: "{{ env.MY_REPO_COMMIT }}"
 ```
 
 ```bash
@@ -450,31 +450,31 @@ MY_REPO_COMMIT=main
 
 ## Per-Test Template Variables
 
-Eval YAML also supports per-test `vars` for data-driven prompt templates. Use `{{name}}` placeholders in test-facing text fields, and AgentV resolves them when the suite loads.
+Eval YAML also supports per-test `vars` for data-driven prompt templates. Use `{{ vars.name }}` placeholders in test-facing text fields, and AgentV resolves them when the suite loads.
 
 ```yaml
-input: "Answer clearly: {{question}}"
+input: "Answer clearly: {{ vars.question }}"
 
 tests:
   - id: capital
     vars:
       question: What is the capital of France?
       expected_answer: Paris
-    criteria: "Answers {{question}} correctly"
+    criteria: "Answers {{ vars.question }} correctly"
     input:
       - role: user
-        content: "Question: {{question}}"
-    expected_output: "{{expected_answer}}"
+        content: "Question: {{ vars.question }}"
+    expected_output: "{{ vars.expected_answer }}"
 ```
 
 ### Behavior
 
 - `vars` is defined per test as an object
-- `{{name}}` and dotted paths like `{{ user.name }}` are supported
-- Substitution applies to suite-level `input`, test `input`, `input_files`, `criteria`, `expected_output`, and conversation turn `input` / `expected_output`
+- `{{ vars.name }}` and dotted paths like `{{ vars.user.name }}` are supported
+- Substitution applies to suite-level `input`, test `input`, `input_files`, `criteria`, `expected_output`, assertion values/metrics, and conversation turn `input` / `expected_output` / assertions
 - When the whole string is a single placeholder, the original JSON value is preserved
-- Missing variables are left unchanged, so unrelated template syntax is not silently blanked out
-- `vars` interpolation is separate from environment interpolation: `{{question}}` uses test data, `${{ PROJECT_NAME }}` uses environment variables
+- Missing variables render as empty strings following Nunjucks semantics
+- `vars` interpolation is separate from environment interpolation: `{{ vars.question }}` uses test data, `{{ env.PROJECT_NAME }}` uses environment variables
 
 ## JSONL Format
 
diff --git a/bun.lock b/bun.lock
index bd6bd8963..d6fbdd77e 100644
--- a/bun.lock
+++ b/bun.lock
@@ -19,7 +19,7 @@
     },
     "apps/cli": {
       "name": "agentv",
-      "version": "4.42.4",
+      "version": "5.0.0-next.1",
       "bin": {
         "agentv": "./dist/cli.js",
       },
@@ -85,7 +85,7 @@
     },
     "packages/core": {
       "name": "@agentv/core",
-      "version": "4.42.4",
+      "version": "5.0.0-next.1",
       "dependencies": {
         "@agentclientprotocol/sdk": "^0.14.1",
         "@earendil-works/pi-ai": "^0.74.0",
@@ -94,11 +94,13 @@
         "fast-glob": "^3.3.3",
         "json5": "^2.2.3",
         "micromatch": "^4.0.8",
+        "nunjucks": "^3.2.4",
         "yaml": "^2.8.3",
         "zod": "^3.23.8",
       },
       "devDependencies": {
         "@types/micromatch": "^4.0.10",
+        "@types/nunjucks": "^3.2.6",
         "zod-to-json-schema": "^3.25.1",
       },
       "optionalDependencies": {
@@ -120,7 +122,7 @@
     },
     "packages/sdk": {
       "name": "@agentv/sdk",
-      "version": "4.42.4",
+      "version": "5.0.0-next.1",
       "dependencies": {
         "@agentv/core": "workspace:*",
         "yaml": "^2.8.3",
@@ -837,6 +839,8 @@
 
     "@types/node": ["@types/node@24.1.0", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-ut5FthK5moxFKH2T1CUOC6ctR67rQRvvHdFLCD2Ql6KXmMuCrjsSsRI9UsLCm9M18BMwClv4pn327UvB7eeO1w=="],
 
+    "@types/nunjucks": ["@types/nunjucks@3.2.6", "", {}, "sha512-pHiGtf83na1nCzliuAdq8GowYiXvH5l931xZ0YEHaLMNFgynpEqx+IPStlu7UaDkehfvl01e4x/9Tpwhy7Ue3w=="],
+
     "@types/react": ["@types/react@19.2.14", "", { "dependencies": { "csstype": "^3.2.2" } }, "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w=="],
 
     "@types/react-dom": ["@types/react-dom@19.2.3", "", { "peerDependencies": { "@types/react": "^19.2.0" } }, "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ=="],
@@ -857,6 +861,8 @@
 
     "@vitejs/plugin-react": ["@vitejs/plugin-react@4.7.0", "", { "dependencies": { "@babel/core": "^7.28.0", "@babel/plugin-transform-react-jsx-self": "^7.27.1", "@babel/plugin-transform-react-jsx-source": "^7.27.1", "@rolldown/pluginutils": "1.0.0-beta.27", "@types/babel__core": "^7.20.5", "react-refresh": "^0.17.0" }, "peerDependencies": { "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA=="],
 
+    "a-sync-waterfall": ["a-sync-waterfall@1.0.1", "", {}, "sha512-RYTOHHdWipFUliRFMCS4X2Yn2X8M87V/OpSqWzKKOGhzqyUxzyVmhHDH9sAvG+ZuQf/TAOFsLCpMw09I1ufUnA=="],
+
     "acorn": ["acorn@8.15.0", "", { "bin": { "acorn": "bin/acorn" } }, "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg=="],
 
     "acorn-jsx": ["acorn-jsx@5.3.2", "", { "peerDependencies": { "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ=="],
@@ -885,6 +891,8 @@
 
     "array-iterate": ["array-iterate@2.0.1", "", {}, "sha512-I1jXZMjAgCMmxT4qxXfPXa6SthSoE8h6gkSI9BGGNv8mP8G/v0blc+qFnZu6K42vTOiuME596QaLO0TP3Lk0xg=="],
 
+    "asap": ["asap@2.0.6", "", {}, "sha512-BSHWgDSAiKs50o2Re8ppvp3seVHXSRM44cdSsT9FfNEUUZLOGWVCsiWaRPWM1Znn+mqZ1OfVZ3z3DWEzSp7hRA=="],
+
     "ast-types": ["ast-types@0.16.1", "", { "dependencies": { "tslib": "^2.0.1" } }, "sha512-6t10qk83GOG8p0vKmaCr8eiilZwO171AvbROMtvvNiwrTly62t+7XkA8RdIIVbpMhCASAsxgAzdRSwh6nw/5Dg=="],
 
     "astring": ["astring@1.9.0", "", { "bin": { "astring": "bin/astring" } }, "sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg=="],
@@ -969,7 +977,7 @@
 
     "comma-separated-tokens": ["comma-separated-tokens@2.0.3", "", {}, "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg=="],
 
-    "commander": ["commander@4.1.1", "", {}, "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA=="],
+    "commander": ["commander@5.1.0", "", {}, "sha512-P0CysNDQ7rtVw4QIQtm+MRxV66vKFSvlsQvGYXZWR3qFU0jlMKHZZZgw8e+8DSah4UDKMqnknRDQz+xuQXQ/Zg=="],
 
     "common-ancestor-path": ["common-ancestor-path@1.0.1", "", {}, "sha512-L3sHRo1pXXEqX8VU28kfgUY+YGsk09hPqZiZmLacNib6XNTCM8ubYeT7ryXQw8asB1sKgcU5lkB7ONug08aB8w=="],
 
@@ -1505,6 +1513,8 @@
 
     "nth-check": ["nth-check@2.1.1", "", { "dependencies": { "boolbase": "^1.0.0" } }, "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w=="],
 
+    "nunjucks": ["nunjucks@3.2.4", "", { "dependencies": { "a-sync-waterfall": "^1.0.0", "asap": "^2.0.3", "commander": "^5.1.0" }, "peerDependencies": { "chokidar": "^3.3.0" }, "optionalPeers": ["chokidar"], "bin": { "nunjucks-precompile": "bin/precompile" } }, "sha512-26XRV6BhkgK0VOxfbU5cQI+ICFUtMLixv1noZn1tGU38kQH5A5nmmbk/O45xdyBhD1esk47nKrY0mvQpZIhRjQ=="],
+
     "object-assign": ["object-assign@4.1.1", "", {}, "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg=="],
 
     "ofetch": ["ofetch@1.5.1", "", { "dependencies": { "destr": "^2.0.5", "node-fetch-native": "^1.6.7", "ufo": "^1.6.1" } }, "sha512-2W4oUZlVaqAPAil6FUg/difl6YhqhUR7x2eZY4bQCko22UXg3hptq9KLQdqFClV+Wu85UX7hNtdGTngi/1BxcA=="],
@@ -1987,6 +1997,8 @@
 
     "sitemap/@types/node": ["@types/node@17.0.45", "", {}, "sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw=="],
 
+    "sucrase/commander": ["commander@4.1.1", "", {}, "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA=="],
+
     "svgo/commander": ["commander@11.1.0", "", {}, "sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ=="],
 
     "tinyglobby/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
diff --git a/examples/features/README.md b/examples/features/README.md
index d8ad0867b..40153f696 100644
--- a/examples/features/README.md
+++ b/examples/features/README.md
@@ -72,8 +72,8 @@ Focused examples for specific AgentV capabilities. Find your use case below, the
 | [input-files-shorthand](input-files-shorthand/) | Attach files to every test using a compact shorthand |
 | [suite-level-input](suite-level-input/) | Prepend a shared system prompt to every test in the suite |
 | [suite-level-input-files](suite-level-input-files/) | Share file attachments across every test in the suite |
-| [env-interpolation](env-interpolation/) | Inject environment variables into eval config with `${{ VAR }}` |
-| [test-vars-templating](test-vars-templating/) | Inject per-test `vars` into `{{name}}` templates in eval fields |
+| [env-interpolation](env-interpolation/) | Inject environment variables into eval config with `{{ env.VAR }}` |
+| [test-vars-templating](test-vars-templating/) | Inject per-test `vars` into `{{ vars.name }}` templates in eval fields |
 
 ---
 
diff --git a/examples/features/env-interpolation/README.md b/examples/features/env-interpolation/README.md
index 920af7c8f..eb4c7725f 100644
--- a/examples/features/env-interpolation/README.md
+++ b/examples/features/env-interpolation/README.md
@@ -1,6 +1,6 @@
 # Environment Variable Interpolation
 
-Demonstrates `${{ VAR }}` syntax for portable eval configs.
+Demonstrates `{{ env.VAR }}` syntax for portable eval configs.
 
 ## Usage
 
@@ -14,7 +14,7 @@ Or create a `.env` file — AgentV loads `.env` files automatically from the dir
 
 ## Features
 
-- **Full-value**: `criteria: "${{ EVAL_CRITERIA }}"` — entire field from env var
-- **Partial/inline**: `"must be ${{ EXPECTED }} and clear"` — env var within a string
+- **Full-value**: `criteria: "{{ env.EVAL_CRITERIA }}"` — entire field from env var
+- **Partial/inline**: `"must be {{ env.EXPECTED }} and clear"` — env var within a string
 - **Missing vars**: resolve to empty string (downstream validation catches required blanks)
 - **All fields**: works in any string field — criteria, input, workspace paths, etc.
diff --git a/examples/features/env-interpolation/evals/dataset.eval.yaml b/examples/features/env-interpolation/evals/dataset.eval.yaml
index e40ff22fe..613f7bfd9 100644
--- a/examples/features/env-interpolation/evals/dataset.eval.yaml
+++ b/examples/features/env-interpolation/evals/dataset.eval.yaml
@@ -1,6 +1,6 @@
 # Environment Variable Interpolation Example
 #
-# All string fields support ${{ VAR }} syntax for env variable interpolation.
+# Config-load fields support {{ env.VAR }} syntax for env variable interpolation.
 # Missing variables resolve to empty string.
 #
 # Usage:
@@ -10,7 +10,7 @@
 # Or use a .env file in the project root:
 #   CUSTOM_SYSTEM_PROMPT=You are a helpful assistant who always greets warmly.
 
-description: Demonstrates ${{ VAR }} interpolation in eval fields
+description: Demonstrates {{ env.VAR }} interpolation in eval fields
 
 target: llm
 
@@ -19,13 +19,13 @@ tests:
   - id: full-value
     criteria: Responds with a friendly greeting
     input: "Hello!"
-    expected_output: "${{ EXPECTED_GREETING }}"
+    expected_output: "{{ env.EXPECTED_GREETING }}"
 
   # Partial/inline interpolation: env var embedded in a larger string
   - id: partial-value
     criteria: Response uses the system prompt persona
     input:
       - role: system
-        content: "${{ CUSTOM_SYSTEM_PROMPT }}"
+        content: "{{ env.CUSTOM_SYSTEM_PROMPT }}"
       - role: user
         content: "Hi there!"
diff --git a/examples/features/test-vars-templating/README.md b/examples/features/test-vars-templating/README.md
index 44a29636f..c41ddb618 100644
--- a/examples/features/test-vars-templating/README.md
+++ b/examples/features/test-vars-templating/README.md
@@ -1,6 +1,6 @@
 # Per-Test Vars Templating
 
-Demonstrates `tests[].vars` with `{{name}}` placeholders in eval files.
+Demonstrates `tests[].vars` with `{{ vars.name }}` placeholders in eval files.
 
 ## Usage
 
@@ -11,6 +11,6 @@ agentv eval examples/features/test-vars-templating/evals/dataset.eval.yaml
 ## Features
 
 - **Per-test data**: each test defines its own `vars` object
-- **Template substitution**: `{{question}}` and dotted paths like `{{expected.answer}}`
+- **Template substitution**: `{{ vars.question }}` and dotted paths like `{{ vars.expected.answer }}`
 - **Suite-level templates**: shared `input` can reference per-test vars too
-- **Separate from env interpolation**: `{{question}}` uses test data, `${{ VAR }}` uses environment variables
+- **Separate from env interpolation**: `{{ vars.question }}` uses test data, `{{ env.VAR }}` uses environment variables
diff --git a/examples/features/test-vars-templating/evals/dataset.eval.yaml b/examples/features/test-vars-templating/evals/dataset.eval.yaml
index da567890f..ae21ad1f5 100644
--- a/examples/features/test-vars-templating/evals/dataset.eval.yaml
+++ b/examples/features/test-vars-templating/evals/dataset.eval.yaml
@@ -1,7 +1,7 @@
 # Per-test vars templating example
 #
-# tests[].vars provides per-test data for {{name}} placeholders in eval fields.
-# Placeholders support dotted paths like {{expected.answer}}.
+# tests[].vars provides per-test data for {{ vars.name }} placeholders in eval fields.
+# Placeholders support dotted paths like {{ vars.expected.answer }}.
 #
 # Usage:
 #   agentv eval examples/features/test-vars-templating/evals/dataset.eval.yaml
@@ -12,7 +12,7 @@ target: llm
 
 input:
   - role: system
-    content: "You are a concise assistant answering {{category}} questions."
+    content: "You are a concise assistant answering {{ vars.category }} questions."
 
 tests:
   - id: capital-france
@@ -21,9 +21,9 @@ tests:
       question: What is the capital of France?
       expected:
         answer: Paris
-    criteria: "Answers {{question}} correctly"
-    input: "Question: {{question}}"
-    expected_output: "{{expected.answer}}"
+    criteria: "Answers {{ vars.question }} correctly"
+    input: "Question: {{ vars.question }}"
+    expected_output: "{{ vars.expected.answer }}"
 
   - id: greet-ada
     vars:
@@ -32,8 +32,8 @@ tests:
         name: Ada
       expected:
         answer: Hello, Ada!
-    criteria: "Greets {{person.name}} warmly"
+    criteria: "Greets {{ vars.person.name }} warmly"
     input:
       - role: user
-        content: "Say hello to {{person.name}}."
-    expected_output: "{{expected.answer}}"
+        content: "Say hello to {{ vars.person.name }}."
+    expected_output: "{{ vars.expected.answer }}"
diff --git a/packages/core/package.json b/packages/core/package.json
index b7804a364..6e733ff2b 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -47,6 +47,7 @@
     "fast-glob": "^3.3.3",
     "json5": "^2.2.3",
     "micromatch": "^4.0.8",
+    "nunjucks": "^3.2.4",
     "yaml": "^2.8.3",
     "zod": "^3.23.8"
   },
@@ -72,6 +73,7 @@
   },
   "devDependencies": {
     "@types/micromatch": "^4.0.10",
+    "@types/nunjucks": "^3.2.6",
     "zod-to-json-schema": "^3.25.1"
   }
 }
diff --git a/packages/core/src/evaluation/interpolation.ts b/packages/core/src/evaluation/interpolation.ts
index e8b396224..1ab64a496 100644
--- a/packages/core/src/evaluation/interpolation.ts
+++ b/packages/core/src/evaluation/interpolation.ts
@@ -1,14 +1,12 @@
+import nunjucks from 'nunjucks';
 import type { EnvLookup } from './providers/types.js';
 
-const ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
-const TEMPLATE_VAR_PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}/g;
-const WHOLE_TEMPLATE_VAR_PATTERN = /^\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}$/;
+export type NunjucksFilterMap = Readonly<Record<string, (...args: unknown[]) => unknown>>;
 
-/**
- * Regex that matches a string consisting of exactly one `${{ VAR }}` reference
- * and nothing else. Used to detect whole-value substitutions eligible for type coercion.
- */
-const WHOLE_VAR_PATTERN = /^\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}$/;
+const WHOLE_SIMPLE_TEMPLATE_VAR_PATTERN =
+  /^\s*\{\{\s*([A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)*)\s*\}\}\s*$/;
+const ENV_OUTPUT_PATTERN = /\{\{\s*env\.[\s\S]*?\}\}/g;
+const WHOLE_ENV_OUTPUT_PATTERN = /^\s*\{\{\s*env\.[\s\S]*?\}\}\s*$/;
 
 /**
  * Pattern matching plain integers (e.g. "42", "-7") and decimal fractions
@@ -20,9 +18,6 @@ const PLAIN_NUMBER_PATTERN = /^-?(?:0|[1-9]\d*)(?:\.\d+)?$/;
 /**
  * Coerce a resolved string to its native primitive type when appropriate.
  * "true"/"false" become booleans; plain integer/decimal strings become numbers.
- * Strings that happen to be valid JS numbers but are not plain decimal notation
- * (hex, scientific notation, "Infinity") are left as strings.
- * All other strings (including empty string) are returned as-is.
  */
 function coercePrimitive(value: string): unknown {
   if (value === 'true') return true;
@@ -49,53 +44,52 @@ function cloneTemplateValue(value: unknown): unknown {
   return value;
 }
 
-function stringifyTemplateValue(value: unknown): string {
-  if (typeof value === 'string') return value;
-  return JSON.stringify(value);
+function createNunjucksEnvironment(filters?: NunjucksFilterMap): nunjucks.Environment {
+  const environment = new nunjucks.Environment(undefined, {
+    autoescape: false,
+    throwOnUndefined: false,
+  });
+  environment.addFilter('load', (value: string) => JSON.parse(value) as unknown);
+  for (const [name, filter] of Object.entries(filters ?? {})) {
+    environment.addFilter(name, filter);
+  }
+  return environment;
 }
 
-function lookupTemplateVar(
-  vars: Readonly<Record<string, unknown>>,
-  expression: string,
-): unknown | undefined {
-  if (!expression) return undefined;
+function lookupPath(context: Readonly<Record<string, unknown>>, expression: string): unknown {
   return expression.split('.').reduce<unknown>((current, segment) => {
     if (!isPlainObject(current)) {
       return undefined;
     }
     return current[segment];
-  }, vars);
+  }, context);
+}
+
+function renderString(
+  template: string,
+  context: Readonly<Record<string, unknown>>,
+  filters?: NunjucksFilterMap,
+): string {
+  return createNunjucksEnvironment(filters).renderString(template, context);
+}
+
+function renderEnvString(template: string, env: EnvLookup): string {
+  if (template.includes('${{')) {
+    return template;
+  }
+  return template.replace(ENV_OUTPUT_PATTERN, (match) => renderString(match, { env }));
 }
 
 /**
- * Recursively interpolate `${{ VAR }}` references in all string values.
- * Missing variables resolve to empty string.
- * Non-string values pass through unchanged. Returns a new object (no mutation).
- *
- * Type coercion: when the **entire** string value is a single `${{ VAR }}` reference
- * (no surrounding text), the resolved value is coerced to its native type —
- * `"true"`/`"false"` become booleans, numeric strings become numbers. This allows
- * boolean and numeric config fields to be driven by environment variables:
+ * Recursively render config-load `{{ env.VAR }}` templates in string values.
  *
- * ```yaml
- * # .agentv/config.yaml
- * results:
- *   export:
- *     auto_push: ${{ AGENTV_AUTO_PUSH }}   # AGENTV_AUTO_PUSH=true → boolean true
- * ```
- *
- * Inline/partial substitutions (e.g. `"prefix-${{ VAR }}"`) are always strings.
+ * Runtime shell variables such as `$VAR` and `${VAR}` are intentionally outside
+ * this syntax and pass through unchanged for CLI target subprocesses.
  */
 export function interpolateEnv(value: unknown, env: EnvLookup): unknown {
   if (typeof value === 'string') {
-    // Whole-value substitution: coerce the resolved value to its native type.
-    const wholeMatch = WHOLE_VAR_PATTERN.exec(value);
-    if (wholeMatch) {
-      const resolved = env[wholeMatch[1] as string] ?? '';
-      return coercePrimitive(resolved);
-    }
-    // Partial/inline substitution: always produces a string.
-    return value.replace(ENV_VAR_PATTERN, (_, varName: string) => env[varName] ?? '');
+    const rendered = renderEnvString(value, env);
+    return WHOLE_ENV_OUTPUT_PATTERN.test(value) ? coercePrimitive(rendered) : rendered;
   }
   if (Array.isArray(value)) {
     return value.map((item) => interpolateEnv(item, env));
@@ -111,35 +105,37 @@ export function interpolateEnv(value: unknown, env: EnvLookup): unknown {
 }
 
 /**
- * Recursively interpolate `{{ var }}` references in string values using per-test vars.
- * Missing variables are left unchanged so unrelated template syntaxes remain intact.
- * When the whole string is a single variable reference, the original JSON value is preserved.
+ * Recursively render eval-time Nunjucks templates using per-test vars.
+ *
+ * The context exposes both promptfoo-style top-level vars (`{{ name }}`) and the
+ * explicit namespace (`{{ vars.name }}`). When the whole field is exactly a
+ * simple variable reference, the original JSON value is preserved.
  */
 export function interpolateTemplateVars(
   value: unknown,
   vars: Readonly<Record<string, unknown>>,
+  filters?: NunjucksFilterMap,
 ): unknown {
   if (typeof value === 'string') {
-    const wholeMatch = WHOLE_TEMPLATE_VAR_PATTERN.exec(value);
+    const context = { ...vars, vars };
+    const wholeMatch = WHOLE_SIMPLE_TEMPLATE_VAR_PATTERN.exec(value);
     if (wholeMatch) {
-      const resolved = lookupTemplateVar(vars, wholeMatch[1] as string);
-      return resolved === undefined ? value : cloneTemplateValue(resolved);
+      const resolved = lookupPath(context, wholeMatch[1] as string);
+      if (resolved !== undefined) {
+        return cloneTemplateValue(resolved);
+      }
     }
-
-    return value.replace(TEMPLATE_VAR_PATTERN, (match, expression: string) => {
-      const resolved = lookupTemplateVar(vars, expression);
-      return resolved === undefined ? match : stringifyTemplateValue(resolved);
-    });
+    return renderString(value, context, filters);
   }
 
   if (Array.isArray(value)) {
-    return value.map((item) => interpolateTemplateVars(item, vars));
+    return value.map((item) => interpolateTemplateVars(item, vars, filters));
   }
 
   if (isPlainObject(value)) {
     const result: Record<string, unknown> = {};
     for (const [key, nested] of Object.entries(value)) {
-      result[key] = interpolateTemplateVars(nested, vars);
+      result[key] = interpolateTemplateVars(nested, vars, filters);
     }
     return result;
   }
diff --git a/packages/core/src/evaluation/loaders/grader-parser.ts b/packages/core/src/evaluation/loaders/grader-parser.ts
index a2d74a4ea..49283f09f 100644
--- a/packages/core/src/evaluation/loaders/grader-parser.ts
+++ b/packages/core/src/evaluation/loaders/grader-parser.ts
@@ -470,6 +470,7 @@ async function parseGraderList(
     const name =
       rawName ??
       (isCustomType ? typeValue : generateAssertionName(typeValue as GraderKind, rawEvaluator));
+    const metric = asString(rawEvaluator.metric);
 
     if (!name) {
       logWarning(`Skipping evaluator with missing name in '${evalId}'`);
@@ -1327,6 +1328,7 @@ async function parseGraderList(
         name,
         type: 'contains',
         value,
+        ...(metric !== undefined ? { metric } : {}),
         ...(weight !== undefined ? { weight } : {}),
         ...(required !== undefined ? { required } : {}),
         ...(min_score !== undefined ? { min_score } : {}),
diff --git a/packages/core/src/evaluation/loaders/shorthand-expansion.ts b/packages/core/src/evaluation/loaders/shorthand-expansion.ts
index b6a784189..9ed2aee24 100644
--- a/packages/core/src/evaluation/loaders/shorthand-expansion.ts
+++ b/packages/core/src/evaluation/loaders/shorthand-expansion.ts
@@ -29,6 +29,10 @@ export function expandInputShorthand(value: JsonValue | undefined): TestMessage[
 
   // String shorthand: single user message
   if (typeof value === 'string') {
+    const parsedMessages = parseChatArrayPrompt(value);
+    if (parsedMessages) {
+      return parsedMessages;
+    }
     return [{ role: 'user', content: value }];
   }
 
@@ -50,6 +54,24 @@ export function expandInputShorthand(value: JsonValue | undefined): TestMessage[
   return undefined;
 }
 
+function parseChatArrayPrompt(value: string): TestMessage[] | undefined {
+  const trimmed = value.trim();
+  if (!trimmed.startsWith('[')) {
+    return undefined;
+  }
+
+  try {
+    const parsed = JSON.parse(trimmed) as unknown;
+    if (!Array.isArray(parsed)) {
+      return undefined;
+    }
+    const messages = parsed.filter((message): message is TestMessage => isTestMessage(message));
+    return messages.length === parsed.length && messages.length > 0 ? messages : undefined;
+  } catch {
+    return undefined;
+  }
+}
+
 /**
  * Expand the `expected_output` shorthand into a message array.
  *
diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts
index 1964d1336..bdb4b8aac 100644
--- a/packages/core/src/evaluation/types.ts
+++ b/packages/core/src/evaluation/types.ts
@@ -869,7 +869,7 @@ export type InlineAssertEvaluatorConfig = {
   readonly negate?: boolean;
 };
 
-export type GraderConfig =
+export type GraderConfig = (
   | CodeGraderConfig
   | LlmGraderConfig
   | CompositeGraderConfig
@@ -892,7 +892,11 @@ export type GraderConfig =
   | IsJsonGraderConfig
   | EqualsGraderConfig
   | RubricsEvaluatorConfig
-  | InlineAssertEvaluatorConfig;
+  | InlineAssertEvaluatorConfig
+) & {
+  /** Optional promptfoo-style named score key. Scoring aggregation support is layered separately. */
+  readonly metric?: string;
+};
 
 /**
  * Source reference resolved while loading an eval definition.
diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts
index a5a99ef18..890b28c69 100644
--- a/packages/core/src/evaluation/yaml-parser.ts
+++ b/packages/core/src/evaluation/yaml-parser.ts
@@ -1,5 +1,6 @@
 import { readFile, realpath, stat } from 'node:fs/promises';
 import path from 'node:path';
+import { pathToFileURL } from 'node:url';
 import fg from 'fast-glob';
 import micromatch from 'micromatch';
 import { stringify as stringifyYaml } from 'yaml';
@@ -11,7 +12,11 @@ import {
   normalizeExperimentRunOverride,
 } from './experiment.js';
 import { collectResolvedInputFilePaths } from './input-message-utils.js';
-import { interpolateEnv, interpolateTemplateVars } from './interpolation.js';
+import {
+  type NunjucksFilterMap,
+  interpolateEnv,
+  interpolateTemplateVars,
+} from './interpolation.js';
 import { loadTestsFromAgentSkills } from './loaders/agent-skills-parser.js';
 import {
   expandFileReferences,
@@ -194,6 +199,7 @@ type RawTestSuite = JsonObject & {
   readonly workspace?: JsonValue;
   readonly assertions?: JsonValue;
   readonly preprocessors?: JsonValue;
+  readonly nunjucks_filters?: JsonValue;
   readonly input?: JsonValue;
   readonly metadata?: JsonValue;
   readonly governance?: JsonValue;
@@ -254,16 +260,18 @@ function resolveTests(suite: RawTestSuite): JsonValue | undefined {
 function interpolateCaseField<T extends JsonValue | undefined>(
   value: T,
   vars: JsonObject | undefined,
+  filters?: NunjucksFilterMap,
 ): T {
   if (!vars || value === undefined) {
     return value;
   }
-  return interpolateTemplateVars(value, vars as Record<string, unknown>) as T;
+  return interpolateTemplateVars(value, vars as Record<string, unknown>, filters) as T;
 }
 
 function interpolateCaseTurns(
   turns: JsonValue | undefined,
   vars: JsonObject | undefined,
+  filters?: NunjucksFilterMap,
 ): JsonValue | undefined {
   if (!vars || !Array.isArray(turns)) {
     return turns;
@@ -276,34 +284,123 @@ function interpolateCaseTurns(
 
     return {
       ...rawTurn,
-      input: interpolateCaseField(rawTurn.input, vars),
-      expected_output: interpolateCaseField(rawTurn.expected_output, vars),
+      input: interpolateCaseField(rawTurn.input, vars, filters),
+      expected_output: interpolateCaseField(rawTurn.expected_output, vars, filters),
+      assertions: interpolateCaseField(rawTurn.assertions, vars, filters),
     } satisfies JsonObject;
   });
 }
 
-function interpolateRawEvalCase(raw: RawEvalCase, vars: JsonObject | undefined): RawEvalCase {
+function interpolateRawEvalCase(
+  raw: RawEvalCase,
+  vars: JsonObject | undefined,
+  filters?: NunjucksFilterMap,
+): RawEvalCase {
   if (!vars) {
     return raw;
   }
 
   return {
     ...raw,
-    ...(raw.criteria !== undefined ? { criteria: interpolateCaseField(raw.criteria, vars) } : {}),
+    ...(raw.id !== undefined ? { id: interpolateCaseField(raw.id, vars, filters) } : {}),
+    ...(raw.criteria !== undefined
+      ? { criteria: interpolateCaseField(raw.criteria, vars, filters) }
+      : {}),
     ...(raw.expected_outcome !== undefined
-      ? { expected_outcome: interpolateCaseField(raw.expected_outcome, vars) }
+      ? { expected_outcome: interpolateCaseField(raw.expected_outcome, vars, filters) }
       : {}),
-    ...(raw.input !== undefined ? { input: interpolateCaseField(raw.input, vars) } : {}),
+    ...(raw.input !== undefined ? { input: interpolateCaseField(raw.input, vars, filters) } : {}),
     ...(raw.input_files !== undefined
-      ? { input_files: interpolateCaseField(raw.input_files, vars) }
+      ? { input_files: interpolateCaseField(raw.input_files, vars, filters) }
       : {}),
     ...(raw.expected_output !== undefined
-      ? { expected_output: interpolateCaseField(raw.expected_output, vars) }
+      ? { expected_output: interpolateCaseField(raw.expected_output, vars, filters) }
+      : {}),
+    ...(raw.assertions !== undefined
+      ? { assertions: interpolateCaseField(raw.assertions, vars, filters) }
       : {}),
-    ...(raw.turns !== undefined ? { turns: interpolateCaseTurns(raw.turns, vars) } : {}),
+    ...(raw.evaluators !== undefined
+      ? { evaluators: interpolateCaseField(raw.evaluators, vars, filters) }
+      : {}),
+    ...(raw.rubrics !== undefined
+      ? { rubrics: interpolateCaseField(raw.rubrics, vars, filters) }
+      : {}),
+    ...(raw.turns !== undefined ? { turns: interpolateCaseTurns(raw.turns, vars, filters) } : {}),
   };
 }
 
+function shouldExpandVarValue(value: JsonValue): value is readonly JsonValue[] {
+  return Array.isArray(value) && (value.length === 0 || typeof value[0] === 'string');
+}
+
+function expandArrayVarCases(raw: RawEvalCase): readonly RawEvalCase[] {
+  if (!isJsonObject(raw.vars)) {
+    return [raw];
+  }
+
+  const entries = Object.entries(raw.vars);
+  let combinations: Record<string, JsonValue>[] = [{}];
+  let expanded = false;
+
+  for (const [key, value] of entries) {
+    const values = shouldExpandVarValue(value) ? value : [value];
+    expanded ||= values.length !== 1 || values[0] !== value;
+    const next: Record<string, JsonValue>[] = [];
+    for (const combination of combinations) {
+      for (const candidate of values) {
+        next.push({ ...combination, [key]: candidate });
+      }
+    }
+    combinations = next;
+  }
+
+  if (!expanded) {
+    return [raw];
+  }
+
+  return combinations.map((vars) => ({ ...raw, vars }));
+}
+
+async function loadNunjucksFilters(
+  rawFilters: JsonValue | undefined,
+  evalFileDir: string,
+): Promise<NunjucksFilterMap | undefined> {
+  if (rawFilters === undefined) {
+    return undefined;
+  }
+  if (!isJsonObject(rawFilters)) {
+    logWarning('Invalid nunjucks_filters: expected object mapping filter names to file paths');
+    return undefined;
+  }
+
+  const filters: Record<string, (...args: unknown[]) => unknown> = {};
+  for (const [name, rawFilterPath] of Object.entries(rawFilters)) {
+    if (typeof rawFilterPath !== 'string' || rawFilterPath.trim().length === 0) {
+      logWarning(`Skipping nunjucks filter '${name}': expected file path string`);
+      continue;
+    }
+
+    const filterPath = rawFilterPath.startsWith('file://')
+      ? rawFilterPath.slice('file://'.length)
+      : rawFilterPath;
+    const matches = await fg(path.resolve(evalFileDir, filterPath).replaceAll('\\', '/'), {
+      onlyFiles: true,
+      absolute: true,
+    });
+    const resolvedPath = matches.sort().at(-1) ?? path.resolve(evalFileDir, filterPath);
+    const imported = (await import(pathToFileURL(resolvedPath).href)) as Record<string, unknown>;
+    const filter = imported.default ?? imported[name];
+    if (typeof filter !== 'function') {
+      throw new Error(
+        `Invalid nunjucks filter '${name}' at ${resolvedPath}: expected default export or named export '${name}' to be a function`,
+      );
+    }
+    filters[name] = filter as (...args: unknown[]) => unknown;
+  }
+
+  return Object.keys(filters).length > 0 ? filters : undefined;
+}
+
 /**
  * Read metadata from a test suite file (like target name).
  * This is a convenience function for CLI tools that need metadata without loading all tests.
@@ -528,6 +625,7 @@ async function loadTestsFromParsedYamlValue(
 
   const importedSuiteTests: EvalTest[] = [];
   const evalFileDir = path.dirname(absoluteTestPath);
+  const nunjucksFilters = await loadNunjucksFilters(suite.nunjucks_filters, evalFileDir);
   const parentWorkspace = parentWorkspaceLocation(suite);
   const importEntries = readImports(suite.imports);
   const expandedImports = await expandImportEntries({
@@ -578,301 +676,312 @@ async function loadTestsFromParsedYamlValue(
 
   const results: EvalTest[] = [];
 
-  for (const rawTestCase of expandedTestCases) {
-    if (!isJsonObject(rawTestCase)) {
-      logWarning('Skipping invalid test entry (expected object)');
-      continue;
-    }
+  for (const rawExpandedTestCase of expandedTestCases) {
+    const expandedVarCases = isJsonObject(rawExpandedTestCase)
+      ? expandArrayVarCases(rawExpandedTestCase as RawEvalCase)
+      : [rawExpandedTestCase];
 
-    const testCaseConfig = rawTestCase as RawEvalCase;
-    const id = asString(testCaseConfig.id);
+    for (const rawTestCase of expandedVarCases) {
+      if (!isJsonObject(rawTestCase)) {
+        logWarning('Skipping invalid test entry (expected object)');
+        continue;
+      }
 
-    // Skip tests that don't match the filter pattern (glob supported)
-    if (filterPattern && (!id || !matchesFilter(id, filterPattern))) {
-      continue;
-    }
+      const testCaseConfig = rawTestCase as RawEvalCase;
+      const caseVars = isJsonObject(testCaseConfig.vars) ? testCaseConfig.vars : undefined;
+      const renderedCase = interpolateRawEvalCase(testCaseConfig, caseVars, nunjucksFilters);
+      const id = asString(renderedCase.id);
 
-    const caseVars = isJsonObject(testCaseConfig.vars) ? testCaseConfig.vars : undefined;
-    const renderedCase = interpolateRawEvalCase(testCaseConfig, caseVars);
+      // Skip tests that don't match the filter pattern (glob supported)
+      if (filterPattern && (!id || !matchesFilter(id, filterPattern))) {
+        continue;
+      }
 
-    const conversationId = asString(renderedCase.conversation_id);
-    let outcome = asString(renderedCase.criteria);
-    if (!outcome && renderedCase.expected_outcome !== undefined) {
-      outcome = asString(renderedCase.expected_outcome);
-      if (outcome) {
-        logWarning(
-          `Test '${asString(renderedCase.id) ?? 'unknown'}': 'expected_outcome' is deprecated. Use 'criteria' instead.`,
-        );
+      const conversationId = asString(renderedCase.conversation_id);
+      let outcome = asString(renderedCase.criteria);
+      if (!outcome && renderedCase.expected_outcome !== undefined) {
+        outcome = asString(renderedCase.expected_outcome);
+        if (outcome) {
+          logWarning(
+            `Test '${asString(renderedCase.id) ?? 'unknown'}': 'expected_outcome' is deprecated. Use 'criteria' instead.`,
+          );
+        }
       }
-    }
 
-    // Extract per-case execution config early (reused below for skip_defaults)
-    const caseExecution = isJsonObject(renderedCase.execution) ? renderedCase.execution : undefined;
-    rejectUnsupportedTestExecutionFields(caseExecution, id);
-    if (caseExecution?.workspace !== undefined) {
-      throw new Error(
-        `test '${id ?? 'unknown'}'.execution.workspace has been removed from eval YAML. Put machine-local workspace_path/workspace_mode in .agentv/config.local.yaml under execution, or pass --workspace-path/--workspace-mode. Keep portable task setup in test workspace or suite workspace.`,
-      );
-    }
-    const skipDefaults = caseExecution?.skip_defaults === true;
-    const caseThreshold =
-      typeof caseExecution?.threshold === 'number' &&
-      (caseExecution.threshold as number) >= 0 &&
-      (caseExecution.threshold as number) <= 1
-        ? (caseExecution.threshold as number)
+      // Extract per-case execution config early (reused below for skip_defaults)
+      const caseExecution = isJsonObject(renderedCase.execution)
+        ? renderedCase.execution
         : undefined;
-    const caseRun = mergeRunOverrides(
-      caseThreshold !== undefined ? { threshold: caseThreshold } : undefined,
-      normalizeRunOverride(renderedCase.run, `test '${id ?? 'unknown'}'.run`),
-    );
+      rejectUnsupportedTestExecutionFields(caseExecution, id);
+      if (caseExecution?.workspace !== undefined) {
+        throw new Error(
+          `test '${id ?? 'unknown'}'.execution.workspace has been removed from eval YAML. Put machine-local workspace_path/workspace_mode in .agentv/config.local.yaml under execution, or pass --workspace-path/--workspace-mode. Keep portable task setup in test workspace or suite workspace.`,
+        );
+      }
+      const skipDefaults = caseExecution?.skip_defaults === true;
+      const caseThreshold =
+        typeof caseExecution?.threshold === 'number' &&
+        (caseExecution.threshold as number) >= 0 &&
+        (caseExecution.threshold as number) <= 1
+          ? (caseExecution.threshold as number)
+          : undefined;
+      const caseRun = mergeRunOverrides(
+        caseThreshold !== undefined ? { threshold: caseThreshold } : undefined,
+        normalizeRunOverride(renderedCase.run, `test '${id ?? 'unknown'}'.run`),
+      );
 
-    // Resolve input with shorthand support (pass suite-level input_files for merge)
-    const effectiveSuiteInputFiles =
-      rawSuiteInputFiles && !skipDefaults
-        ? interpolateCaseField(rawSuiteInputFiles, caseVars)
-        : undefined;
-    let inputCase = renderedCase;
-    let inputSuiteFiles = effectiveSuiteInputFiles;
-    if (renderedCase.input === undefined) {
-      const promptFallback = await loadPromptMdFallback({
-        evalFilePath: absoluteTestPath,
-        searchRoots,
-        testInputFiles: renderedCase.input_files,
-        suiteInputFiles: effectiveSuiteInputFiles,
-      });
-      if (promptFallback) {
-        if (promptFallback.inputFilesSource === 'test') {
-          const { input_files: _inputFiles, ...caseWithoutInputFiles } = renderedCase;
-          inputCase = {
-            ...caseWithoutInputFiles,
-            input: promptFallback.promptText,
-            ...(promptFallback.remainingInputFiles
-              ? { input_files: [...promptFallback.remainingInputFiles] }
-              : {}),
-          };
-          inputSuiteFiles = undefined;
-        } else {
-          inputCase = {
-            ...renderedCase,
-            input: promptFallback.promptText,
-          };
-          if (promptFallback.inputFilesSource === 'suite') {
-            inputSuiteFiles = promptFallback.remainingInputFiles
-              ? [...promptFallback.remainingInputFiles]
-              : undefined;
+      // Resolve input with shorthand support (pass suite-level input_files for merge)
+      const effectiveSuiteInputFiles =
+        rawSuiteInputFiles && !skipDefaults
+          ? interpolateCaseField(rawSuiteInputFiles, caseVars, nunjucksFilters)
+          : undefined;
+      let inputCase = renderedCase;
+      let inputSuiteFiles = effectiveSuiteInputFiles;
+      if (renderedCase.input === undefined) {
+        const promptFallback = await loadPromptMdFallback({
+          evalFilePath: absoluteTestPath,
+          searchRoots,
+          testInputFiles: renderedCase.input_files,
+          suiteInputFiles: effectiveSuiteInputFiles,
+        });
+        if (promptFallback) {
+          if (promptFallback.inputFilesSource === 'test') {
+            const { input_files: _inputFiles, ...caseWithoutInputFiles } = renderedCase;
+            inputCase = {
+              ...caseWithoutInputFiles,
+              input: promptFallback.promptText,
+              ...(promptFallback.remainingInputFiles
+                ? { input_files: [...promptFallback.remainingInputFiles] }
+                : {}),
+            };
+            inputSuiteFiles = undefined;
+          } else {
+            inputCase = {
+              ...renderedCase,
+              input: promptFallback.promptText,
+            };
+            if (promptFallback.inputFilesSource === 'suite') {
+              inputSuiteFiles = promptFallback.remainingInputFiles
+                ? [...promptFallback.remainingInputFiles]
+                : undefined;
+            }
           }
         }
       }
-    }
-    const testInputMessages = resolveInputMessages(inputCase, inputSuiteFiles);
-    // Resolve expected_output with shorthand support
-    const expectedMessages = resolveExpectedMessages(renderedCase) ?? [];
-
-    // A test is complete when it has id, input, and at least one of: criteria, expected_output, assertions, or turns (conversation mode)
-    const hasEvaluationSpec =
-      !!outcome ||
-      expectedMessages.length > 0 ||
-      renderedCase.assertions !== undefined ||
-      (Array.isArray(renderedCase.turns) && renderedCase.turns.length > 0);
-    if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
-      logError(
-        `Skipping incomplete test: ${id ?? 'unknown'}. Missing required fields: id, input or PROMPT.md, and at least one of criteria/expected_output/assertions/turns`,
-      );
-      continue;
-    }
-
-    // Prepend suite-level input to test input (respecting skip_defaults)
-    const effectiveSuiteInputValue =
-      rawSuiteInput && !skipDefaults ? interpolateCaseField(rawSuiteInput, caseVars) : undefined;
-    const effectiveSuiteInputMessages = expandInputShorthand(effectiveSuiteInputValue);
-
-    // expected_output is optional - for outcome-only evaluation
-    const hasExpectedMessages = expectedMessages.length > 0;
+      const testInputMessages = resolveInputMessages(inputCase, inputSuiteFiles);
+      // Resolve expected_output with shorthand support
+      const expectedMessages = resolveExpectedMessages(renderedCase) ?? [];
+
+      // A test is complete when it has id, input, and at least one of: criteria, expected_output, assertions, or turns (conversation mode)
+      const hasEvaluationSpec =
+        !!outcome ||
+        expectedMessages.length > 0 ||
+        renderedCase.assertions !== undefined ||
+        (Array.isArray(renderedCase.turns) && renderedCase.turns.length > 0);
+      if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
+        logError(
+          `Skipping incomplete test: ${id ?? 'unknown'}. Missing required fields: id, input or PROMPT.md, and at least one of criteria/expected_output/assertions/turns`,
+        );
+        continue;
+      }
 
-    const inputTextParts: string[] = [];
+      // Prepend suite-level input to test input (respecting skip_defaults)
+      const effectiveSuiteInputValue =
+        rawSuiteInput && !skipDefaults
+          ? interpolateCaseField(rawSuiteInput, caseVars, nunjucksFilters)
+          : undefined;
+      const effectiveSuiteInputMessages = expandInputShorthand(effectiveSuiteInputValue);
+
+      // expected_output is optional - for outcome-only evaluation
+      const hasExpectedMessages = expectedMessages.length > 0;
+
+      const inputTextParts: string[] = [];
+
+      // Process suite-level input first
+      const suiteResolvedInputMessages = effectiveSuiteInputMessages
+        ? await processMessages({
+            messages: effectiveSuiteInputMessages,
+            searchRoots,
+            repoRootPath,
+            textParts: inputTextParts,
+            messageType: 'input',
+            verbose,
+          })
+        : [];
 
-    // Process suite-level input first
-    const suiteResolvedInputMessages = effectiveSuiteInputMessages
-      ? await processMessages({
-          messages: effectiveSuiteInputMessages,
-          searchRoots,
-          repoRootPath,
-          textParts: inputTextParts,
-          messageType: 'input',
-          verbose,
-        })
-      : [];
-
-    // Process test-level input
-    const testResolvedInputMessages = await processMessages({
-      messages: testInputMessages,
-      searchRoots,
-      repoRootPath,
-      textParts: inputTextParts,
-      messageType: 'input',
-      verbose,
-    });
-    const inputMessages = [...suiteResolvedInputMessages, ...testResolvedInputMessages];
+      // Process test-level input
+      const testResolvedInputMessages = await processMessages({
+        messages: testInputMessages,
+        searchRoots,
+        repoRootPath,
+        textParts: inputTextParts,
+        messageType: 'input',
+        verbose,
+      });
+      const inputMessages = [...suiteResolvedInputMessages, ...testResolvedInputMessages];
+
+      // Process expected_output into segments (only if provided)
+      // Preserve full message structure including role and tool_calls for evaluator
+      const outputSegments = hasExpectedMessages
+        ? await processExpectedMessages({
+            messages: expectedMessages,
+            searchRoots,
+            repoRootPath,
+            verbose,
+          })
+        : [];
 
-    // Process expected_output into segments (only if provided)
-    // Preserve full message structure including role and tool_calls for evaluator
-    const outputSegments = hasExpectedMessages
-      ? await processExpectedMessages({
-          messages: expectedMessages,
+      // Build reference_answer:
+      // Extract the content from the last message in expected_output (similar to answer)
+      let referenceAnswer = '';
+      if (outputSegments.length > 0) {
+        // Get the last message
+        const lastMessage = outputSegments[outputSegments.length - 1];
+        const content = lastMessage.content;
+        const toolCalls = lastMessage.tool_calls;
+
+        if (typeof content === 'string') {
+          referenceAnswer = content;
+        } else if (content !== undefined && content !== null) {
+          // Serialize just the content, not the entire message
+          referenceAnswer = JSON.stringify(content, null, 2);
+        } else if (toolCalls !== undefined && toolCalls !== null) {
+          // Message with only tool_calls - serialize just the tool_calls
+          referenceAnswer = JSON.stringify(toolCalls, null, 2);
+        }
+      }
+      const question = inputTextParts
+        .map((part) => part.trim())
+        .filter((part) => part.length > 0)
+        .join(' ');
+
+      const testCaseEvaluatorKind = coerceEvaluator(renderedCase.evaluator, id) ?? globalEvaluator;
+      let evaluators: Awaited<ReturnType<typeof parseGraders>>;
+      try {
+        evaluators = await parseGraders(
+          renderedCase,
+          globalExecution,
           searchRoots,
-          repoRootPath,
-          verbose,
-        })
-      : [];
-
-    // Build reference_answer:
-    // Extract the content from the last message in expected_output (similar to answer)
-    let referenceAnswer = '';
-    if (outputSegments.length > 0) {
-      // Get the last message
-      const lastMessage = outputSegments[outputSegments.length - 1];
-      const content = lastMessage.content;
-      const toolCalls = lastMessage.tool_calls;
-
-      if (typeof content === 'string') {
-        referenceAnswer = content;
-      } else if (content !== undefined && content !== null) {
-        // Serialize just the content, not the entire message
-        referenceAnswer = JSON.stringify(content, null, 2);
-      } else if (toolCalls !== undefined && toolCalls !== null) {
-        // Message with only tool_calls - serialize just the tool_calls
-        referenceAnswer = JSON.stringify(toolCalls, null, 2);
+          id ?? 'unknown',
+          suitePreprocessors,
+        );
+      } catch (error) {
+        // Skip entire test if evaluator validation fails
+        const message = error instanceof Error ? error.message : String(error);
+        logError(`Skipping test '${id}': ${message}`);
+        continue;
       }
-    }
-    const question = inputTextParts
-      .map((part) => part.trim())
-      .filter((part) => part.length > 0)
-      .join(' ');
 
-    const testCaseEvaluatorKind = coerceEvaluator(renderedCase.evaluator, id) ?? globalEvaluator;
-    let evaluators: Awaited<ReturnType<typeof parseGraders>>;
-    try {
-      evaluators = await parseGraders(
+      const assertionTemplateReferences = await collectAssertionTemplateSourceReferences(
         renderedCase,
         globalExecution,
         searchRoots,
         id ?? 'unknown',
-        suitePreprocessors,
       );
-    } catch (error) {
-      // Skip entire test if evaluator validation fails
-      const message = error instanceof Error ? error.message : String(error);
-      logError(`Skipping test '${id}': ${message}`);
-      continue;
-    }
 
-    const assertionTemplateReferences = await collectAssertionTemplateSourceReferences(
-      renderedCase,
-      globalExecution,
-      searchRoots,
-      id ?? 'unknown',
-    );
-
-    // Handle inline rubrics field (deprecated: use assertions: [{type: rubrics, criteria: [...]}] instead)
-    const inlineRubrics = renderedCase.rubrics;
-    if (inlineRubrics !== undefined && Array.isArray(inlineRubrics)) {
-      const rubricEvaluator = parseInlineRubrics(inlineRubrics);
-      if (rubricEvaluator) {
-        // Prepend rubric evaluator to existing evaluators
-        evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
+      // Handle inline rubrics field (deprecated: use assertions: [{type: rubrics, criteria: [...]}] instead)
+      const inlineRubrics = renderedCase.rubrics;
+      if (inlineRubrics !== undefined && Array.isArray(inlineRubrics)) {
+        const rubricEvaluator = parseInlineRubrics(inlineRubrics);
+        if (rubricEvaluator) {
+          // Prepend rubric evaluator to existing evaluators
+          evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
+        }
       }
-    }
 
-    warnUnconsumedCriteria(outcome, evaluators, id ?? 'unknown');
-
-    const userFilePaths = collectResolvedInputFilePaths(inputMessages);
-
-    // Parse per-case workspace config and merge with suite-level
-    const caseWorkspace = await resolveWorkspaceConfig(renderedCase.workspace, evalFileDir);
-    const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
-
-    // Parse per-case metadata, then merge suite-level metadata payload.
-    // Arrays concatenate (suite-first, deduplicated), scalars on the case win.
-    const rawCaseMetadata = isJsonObject(renderedCase.metadata)
-      ? (renderedCase.metadata as Record<string, unknown>)
-      : undefined;
-    const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suiteMetadataPayload);
-
-    // Extract dependency fields
-    const dependsOn = Array.isArray(renderedCase.depends_on)
-      ? (renderedCase.depends_on as readonly string[]).filter(
-          (v): v is string => typeof v === 'string',
-        )
-      : undefined;
-    const onDependencyFailureRaw = asString(renderedCase.on_dependency_failure);
-    const onDependencyFailure =
-      onDependencyFailureRaw === 'skip' ||
-      onDependencyFailureRaw === 'fail' ||
-      onDependencyFailureRaw === 'run'
-        ? (onDependencyFailureRaw as import('./types.js').DependencyFailurePolicy)
+      warnUnconsumedCriteria(outcome, evaluators, id ?? 'unknown');
+
+      const userFilePaths = collectResolvedInputFilePaths(inputMessages);
+
+      // Parse per-case workspace config and merge with suite-level
+      const caseWorkspace = await resolveWorkspaceConfig(renderedCase.workspace, evalFileDir);
+      const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
+
+      // Parse per-case metadata, then merge suite-level metadata payload.
+      // Arrays concatenate (suite-first, deduplicated), scalars on the case win.
+      const rawCaseMetadata = isJsonObject(renderedCase.metadata)
+        ? (renderedCase.metadata as Record<string, unknown>)
         : undefined;
+      const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suiteMetadataPayload);
 
-    // Extract conversation mode fields
-    const modeRaw = asString(renderedCase.mode);
-    const mode: ConversationMode | undefined =
-      modeRaw === 'conversation' ? 'conversation' : undefined;
-    const turns = Array.isArray(renderedCase.turns)
-      ? parseTurns(renderedCase.turns as readonly unknown[])
-      : undefined;
-    const aggregationRaw = asString(renderedCase.aggregation);
-    const aggregation: ConversationAggregation | undefined =
-      aggregationRaw === 'mean' || aggregationRaw === 'min' || aggregationRaw === 'max'
-        ? aggregationRaw
+      // Extract dependency fields
+      const dependsOn = Array.isArray(renderedCase.depends_on)
+        ? (renderedCase.depends_on as readonly string[]).filter(
+            (v): v is string => typeof v === 'string',
+          )
         : undefined;
-    const onTurnFailureRaw = asString(renderedCase.on_turn_failure);
-    const onTurnFailure: TurnFailurePolicy | undefined =
-      onTurnFailureRaw === 'continue' || onTurnFailureRaw === 'stop' ? onTurnFailureRaw : undefined;
-    const windowSize =
-      typeof renderedCase.window_size === 'number' && renderedCase.window_size >= 1
-        ? (renderedCase.window_size as number)
+      const onDependencyFailureRaw = asString(renderedCase.on_dependency_failure);
+      const onDependencyFailure =
+        onDependencyFailureRaw === 'skip' ||
+        onDependencyFailureRaw === 'fail' ||
+        onDependencyFailureRaw === 'run'
+          ? (onDependencyFailureRaw as import('./types.js').DependencyFailurePolicy)
+          : undefined;
+
+      // Extract conversation mode fields
+      const modeRaw = asString(renderedCase.mode);
+      const mode: ConversationMode | undefined =
+        modeRaw === 'conversation' ? 'conversation' : undefined;
+      const turns = Array.isArray(renderedCase.turns)
+        ? parseTurns(renderedCase.turns as readonly unknown[])
         : undefined;
-
-    const category = normalizeCategoryPath(suite.category ?? options?.category);
-
-    const testCase: EvalTest = {
-      id,
-      suite: suiteName,
-      category,
-      conversation_id: conversationId,
-      question: question,
-      input: inputMessages,
-      expected_output: outputSegments,
-      reference_answer: referenceAnswer,
-      file_paths: userFilePaths,
-      criteria: outcome ?? '',
-      evaluator: testCaseEvaluatorKind,
-      assertions: evaluators,
-      ...(suitePreprocessors ? { preprocessors: suitePreprocessors } : {}),
-      workspace: mergedWorkspace,
-      metadata,
-      ...(caseRun?.threshold !== undefined ? { threshold: caseRun.threshold } : {}),
-      ...(caseRun !== undefined ? { run: caseRun } : {}),
-      ...(mode ? { mode } : {}),
-      ...(turns && turns.length > 0 ? { turns } : {}),
-      ...(aggregation ? { aggregation } : {}),
-      ...(onTurnFailure ? { on_turn_failure: onTurnFailure } : {}),
-      ...(windowSize !== undefined ? { window_size: windowSize } : {}),
-      ...(dependsOn && dependsOn.length > 0 ? { depends_on: dependsOn } : {}),
-      ...(onDependencyFailure ? { on_dependency_failure: onDependencyFailure } : {}),
-      source: buildEvalTestSource({
-        evalFilePath,
-        absoluteTestPath,
-        repoRootPath,
+      const aggregationRaw = asString(renderedCase.aggregation);
+      const aggregation: ConversationAggregation | undefined =
+        aggregationRaw === 'mean' || aggregationRaw === 'min' || aggregationRaw === 'max'
+          ? aggregationRaw
+          : undefined;
+      const onTurnFailureRaw = asString(renderedCase.on_turn_failure);
+      const onTurnFailure: TurnFailurePolicy | undefined =
+        onTurnFailureRaw === 'continue' || onTurnFailureRaw === 'stop'
+          ? onTurnFailureRaw
+          : undefined;
+      const windowSize =
+        typeof renderedCase.window_size === 'number' && renderedCase.window_size >= 1
+          ? (renderedCase.window_size as number)
+          : undefined;
+
+      const category = normalizeCategoryPath(suite.category ?? options?.category);
+
+      const testCase: EvalTest = {
         id,
-        renderedCase,
-        rawCaseSnapshots,
-        inputMessages,
-        evaluators,
-        assertionTemplateReferences,
-      }),
-    };
-
-    results.push(testCase);
+        suite: suiteName,
+        category,
+        conversation_id: conversationId,
+        question: question,
+        input: inputMessages,
+        expected_output: outputSegments,
+        reference_answer: referenceAnswer,
+        file_paths: userFilePaths,
+        criteria: outcome ?? '',
+        evaluator: testCaseEvaluatorKind,
+        assertions: evaluators,
+        ...(suitePreprocessors ? { preprocessors: suitePreprocessors } : {}),
+        workspace: mergedWorkspace,
+        metadata,
+        ...(caseRun?.threshold !== undefined ? { threshold: caseRun.threshold } : {}),
+        ...(caseRun !== undefined ? { run: caseRun } : {}),
+        ...(mode ? { mode } : {}),
+        ...(turns && turns.length > 0 ? { turns } : {}),
+        ...(aggregation ? { aggregation } : {}),
+        ...(onTurnFailure ? { on_turn_failure: onTurnFailure } : {}),
+        ...(windowSize !== undefined ? { window_size: windowSize } : {}),
+        ...(dependsOn && dependsOn.length > 0 ? { depends_on: dependsOn } : {}),
+        ...(onDependencyFailure ? { on_dependency_failure: onDependencyFailure } : {}),
+        source: buildEvalTestSource({
+          evalFilePath,
+          absoluteTestPath,
+          repoRootPath,
+          id,
+          renderedCase,
+          rawCaseSnapshots,
+          inputMessages,
+          evaluators,
+          assertionTemplateReferences,
+        }),
+      };
+
+      results.push(testCase);
+    }
   }
 
   return {
diff --git a/packages/core/test/evaluation/interpolation-integration.test.ts b/packages/core/test/evaluation/interpolation-integration.test.ts
index 664b9dbe0..fef7fefbc 100644
--- a/packages/core/test/evaluation/interpolation-integration.test.ts
+++ b/packages/core/test/evaluation/interpolation-integration.test.ts
@@ -26,11 +26,11 @@ describe('env interpolation in YAML loading', () => {
     }
   });
 
-  it('interpolates ${{ VAR }} in test criteria field', async () => {
+  it('interpolates {{ env.VAR }} in test criteria field', async () => {
     const evalFile = path.join(testDir, 'interp-criteria.eval.yaml');
     await writeFile(
       evalFile,
-      'tests:\n  - id: test-1\n    input: "hello"\n    criteria: "${{ AGENTV_TEST_CRITERIA }}"\n',
+      'tests:\n  - id: test-1\n    input: "hello"\n    criteria: "{{ env.AGENTV_TEST_CRITERIA }}"\n',
     );
     const cases = await loadTests(evalFile, testDir);
     expect(cases[0].criteria).toBe('Must return correct answer');
@@ -44,11 +44,11 @@ describe('env interpolation in YAML loading', () => {
         'workspace:',
         '  repos:',
         '    - path: ./RepoA',
-        '      repo: "${{ AGENTV_TEST_PATH }}"',
+        '      repo: "{{ env.AGENTV_TEST_PATH }}"',
         'tests:',
         '  - id: test-1',
         '    input: "hello"',
-        '    criteria: "${{ AGENTV_TEST_CRITERIA }}"',
+        '    criteria: "{{ env.AGENTV_TEST_CRITERIA }}"',
         '',
       ].join('\n'),
     );
@@ -58,7 +58,7 @@ describe('env interpolation in YAML loading', () => {
     expect(cases[0].workspace?.repos?.[0]?.repo).toBe('https://github.com/org/from-env.git');
   });
 
-  it('interpolates ${{ VAR }} in workspace repo identity', async () => {
+  it('interpolates {{ env.VAR }} in workspace repo identity', async () => {
     const evalFile = path.join(testDir, 'interp-workspace.eval.yaml');
     await writeFile(
       evalFile,
@@ -66,7 +66,7 @@ describe('env interpolation in YAML loading', () => {
         'workspace:',
         '  repos:',
         '    - path: ./RepoA',
-        '      repo: "${{ AGENTV_TEST_PATH }}"',
+        '      repo: "{{ env.AGENTV_TEST_PATH }}"',
         'tests:',
         '  - id: test-1',
         '    input: "hello"',
@@ -78,11 +78,11 @@ describe('env interpolation in YAML loading', () => {
     expect(cases[0].workspace?.repos?.[0]?.repo).toBe('https://github.com/org/from-env.git');
   });
 
-  it('interpolates ${{ VAR }} in external workspace YAML file', async () => {
+  it('interpolates {{ env.VAR }} in external workspace YAML file', async () => {
     const workspaceFile = path.join(testDir, 'workspace.yaml');
     await writeFile(
       workspaceFile,
-      ['repos:', '  - path: ./RepoB', '    repo: "${{ AGENTV_TEST_PATH }}"', ''].join('\n'),
+      ['repos:', '  - path: ./RepoB', '    repo: "{{ env.AGENTV_TEST_PATH }}"', ''].join('\n'),
     );
     const evalFile = path.join(testDir, 'interp-ext-workspace.eval.yaml');
     await writeFile(
@@ -100,11 +100,11 @@ describe('env interpolation in YAML loading', () => {
     expect(cases[0].workspace?.repos?.[0]?.repo).toBe('https://github.com/org/from-env.git');
   });
 
-  it('interpolates ${{ VAR }} in external YAML case files', async () => {
+  it('interpolates {{ env.VAR }} in external YAML case files', async () => {
     const casesFile = path.join(testDir, 'cases.yaml');
     await writeFile(
       casesFile,
-      ['- id: ext-1', '  input: "hello"', '  criteria: "${{ AGENTV_TEST_CRITERIA }}"', ''].join(
+      ['- id: ext-1', '  input: "hello"', '  criteria: "{{ env.AGENTV_TEST_CRITERIA }}"', ''].join(
         '\n',
       ),
     );
@@ -114,11 +114,11 @@ describe('env interpolation in YAML loading', () => {
     expect(cases[0].criteria).toBe('Must return correct answer');
   });
 
-  it('interpolates ${{ VAR }} in external JSONL case files', async () => {
+  it('interpolates {{ env.VAR }} in external JSONL case files', async () => {
     const casesFile = path.join(testDir, 'cases.jsonl');
     await writeFile(
       casesFile,
-      '{"id": "ext-jsonl-1", "input": "hello", "criteria": "${{ AGENTV_TEST_CRITERIA }}"}\n',
+      '{"id": "ext-jsonl-1", "input": "hello", "criteria": "{{ env.AGENTV_TEST_CRITERIA }}"}\n',
     );
     const evalFile = path.join(testDir, 'interp-external-jsonl.eval.yaml');
     await writeFile(evalFile, 'tests: cases.jsonl\n');
@@ -142,9 +142,45 @@ describe('env interpolation in YAML loading', () => {
     // (empty criteria alone causes the test loader to skip it as incomplete)
     await writeFile(
       evalFile,
-      'tests:\n  - id: test-1\n    input: "hello"\n    criteria: "prefix ${{ AGENTV_NONEXISTENT_VAR }} suffix"\n    expected_output: "some output"\n',
+      'tests:\n  - id: test-1\n    input: "hello"\n    criteria: "prefix {{ env.AGENTV_NONEXISTENT_VAR }} suffix"\n    expected_output: "some output"\n',
     );
     const cases = await loadTests(evalFile, testDir);
     expect(cases[0].criteria).toBe('prefix  suffix');
   });
+
+  it('resolves default filter values through env rendering', async () => {
+    const evalFile = path.join(testDir, 'interp-default.eval.yaml');
+    await writeFile(
+      evalFile,
+      'tests:\n  - id: test-1\n    input: "hello"\n    criteria: "{{ env.AGENTV_NONEXISTENT_VAR | default(\\"fallback criteria\\") }}"\n',
+    );
+    const cases = await loadTests(evalFile, testDir);
+    expect(cases[0].criteria).toBe('fallback criteria');
+  });
+
+  it('leaves runtime shell variables in target commands untouched', async () => {
+    const evalFile = path.join(testDir, 'interp-shell-vars.eval.yaml');
+    await writeFile(
+      evalFile,
+      [
+        'target:',
+        '  name: local-shell',
+        '  provider: cli',
+        '  command: "echo $RUNTIME ${RUNTIME} {{ env.AGENTV_TEST_PATH }}"',
+        'tests:',
+        '  - id: test-1',
+        '    input: "hello"',
+        '    criteria: "do something"',
+        '',
+      ].join('\n'),
+    );
+    const { targetSpec } = await import('../../src/evaluation/yaml-parser.js').then((module) =>
+      module.readTestSuiteMetadata(evalFile),
+    );
+    expect(
+      targetSpec?.definition && 'command' in targetSpec.definition
+        ? targetSpec.definition.command
+        : '',
+    ).toBe('echo $RUNTIME ${RUNTIME} https://github.com/org/from-env.git');
+  });
 });
diff --git a/packages/core/test/evaluation/interpolation.test.ts b/packages/core/test/evaluation/interpolation.test.ts
index ecaccf299..b1b1b5884 100644
--- a/packages/core/test/evaluation/interpolation.test.ts
+++ b/packages/core/test/evaluation/interpolation.test.ts
@@ -4,24 +4,38 @@ import { interpolateEnv, interpolateTemplateVars } from '../../src/evaluation/in
 describe('interpolateEnv', () => {
   const env = { HOME: '/home/user', PROJECT: 'agentv', EMPTY: '' };
 
-  it('replaces ${{ VAR }} in a string', () => {
-    expect(interpolateEnv('${{ HOME }}', env)).toBe('/home/user');
+  it('replaces {{ env.VAR }} in a string', () => {
+    expect(interpolateEnv('{{ env.HOME }}', env)).toBe('/home/user');
   });
 
-  it('replaces ${{VAR}} without spaces', () => {
-    expect(interpolateEnv('${{HOME}}', env)).toBe('/home/user');
+  it('replaces {{env.VAR}} without spaces', () => {
+    expect(interpolateEnv('{{env.HOME}}', env)).toBe('/home/user');
   });
 
   it('handles partial/inline interpolation', () => {
-    expect(interpolateEnv('${{ HOME }}/repos/${{ PROJECT }}', env)).toBe('/home/user/repos/agentv');
+    expect(interpolateEnv('{{ env.HOME }}/repos/{{ env.PROJECT }}', env)).toBe(
+      '/home/user/repos/agentv',
+    );
   });
 
   it('resolves missing variables to empty string', () => {
-    expect(interpolateEnv('${{ MISSING }}', env)).toBe('');
+    expect(interpolateEnv('{{ env.MISSING }}', env)).toBe('');
+  });
+
+  it('supports the Nunjucks default filter for missing env vars', () => {
+    expect(interpolateEnv('{{ env.MISSING | default("fallback") }}', env)).toBe('fallback');
   });
 
   it('resolves missing variable inline to empty string', () => {
-    expect(interpolateEnv('prefix-${{ MISSING }}-suffix', env)).toBe('prefix--suffix');
+    expect(interpolateEnv('prefix-{{ env.MISSING }}-suffix', env)).toBe('prefix--suffix');
+  });
+
+  it('preserves runtime shell variables', () => {
+    expect(interpolateEnv('echo $RUNTIME ${RUNTIME}', env)).toBe('echo $RUNTIME ${RUNTIME}');
+  });
+
+  it('does not resolve legacy ${{ VAR }} syntax', () => {
+    expect(interpolateEnv('${{ HOME }}', env)).toBe('${{ HOME }}');
   });
 
   it('passes through strings without interpolation syntax', () => {
@@ -37,8 +51,8 @@ describe('interpolateEnv', () => {
 
   it('recursively interpolates object values', () => {
     const input = {
-      path: '${{ HOME }}/repos',
-      nested: { url: '${{ PROJECT }}' },
+      path: '{{ env.HOME }}/repos',
+      nested: { url: '{{ env.PROJECT }}' },
       literal: 'no-vars',
     };
     expect(interpolateEnv(input, env)).toEqual({
@@ -49,90 +63,90 @@ describe('interpolateEnv', () => {
   });
 
   it('does not mutate the original object', () => {
-    const input = { path: '${{ HOME }}' };
+    const input = { path: '{{ env.HOME }}' };
     const result = interpolateEnv(input, env);
     expect(result).not.toBe(input);
-    expect(input.path).toBe('${{ HOME }}');
+    expect(input.path).toBe('{{ env.HOME }}');
   });
 
   it('recursively interpolates arrays', () => {
-    const input = ['${{ HOME }}', { key: '${{ PROJECT }}' }, 42];
+    const input = ['{{ env.HOME }}', { key: '{{ env.PROJECT }}' }, 42];
     expect(interpolateEnv(input, env)).toEqual(['/home/user', { key: 'agentv' }, 42]);
   });
 
   it('handles empty string env values', () => {
-    expect(interpolateEnv('${{ EMPTY }}', env)).toBe('');
+    expect(interpolateEnv('{{ env.EMPTY }}', env)).toBe('');
   });
 
   describe('whole-value type coercion', () => {
     it('coerces "true" to boolean true', () => {
-      expect(interpolateEnv('${{ FLAG }}', { FLAG: 'true' })).toBe(true);
+      expect(interpolateEnv('{{ env.FLAG }}', { FLAG: 'true' })).toBe(true);
     });
 
     it('coerces "false" to boolean false', () => {
-      expect(interpolateEnv('${{ FLAG }}', { FLAG: 'false' })).toBe(false);
+      expect(interpolateEnv('{{ env.FLAG }}', { FLAG: 'false' })).toBe(false);
     });
 
     it('coerces integer string to number', () => {
-      expect(interpolateEnv('${{ COUNT }}', { COUNT: '10' })).toBe(10);
+      expect(interpolateEnv('{{ env.COUNT }}', { COUNT: '10' })).toBe(10);
     });
 
     it('coerces float string to number', () => {
-      expect(interpolateEnv('${{ RATIO }}', { RATIO: '0.75' })).toBe(0.75);
+      expect(interpolateEnv('{{ env.RATIO }}', { RATIO: '0.75' })).toBe(0.75);
     });
 
     it('leaves empty string as string (missing var)', () => {
-      expect(interpolateEnv('${{ MISSING }}', {})).toBe('');
+      expect(interpolateEnv('{{ env.MISSING }}', {})).toBe('');
     });
 
     it('leaves plain string values as strings', () => {
-      expect(interpolateEnv('${{ HOME }}', env)).toBe('/home/user');
+      expect(interpolateEnv('{{ env.HOME }}', env)).toBe('/home/user');
     });
 
     it('does not coerce partial/inline substitutions', () => {
       // "true" appears only after inline replacement — no coercion
-      expect(interpolateEnv('enabled=${{ FLAG }}', { FLAG: 'true' })).toBe('enabled=true');
+      expect(interpolateEnv('enabled={{ env.FLAG }}', { FLAG: 'true' })).toBe('enabled=true');
     });
 
     it('coerces inside nested objects', () => {
-      const input = { auto_push: '${{ PUSH }}', label: 'runs' };
+      const input = { auto_push: '{{ env.PUSH }}', label: 'runs' };
       expect(interpolateEnv(input, { PUSH: 'true' })).toEqual({ auto_push: true, label: 'runs' });
     });
 
     // Numeric edge-case regression tests — these must stay as strings
     it('does not coerce scientific notation (1e3)', () => {
-      expect(interpolateEnv('${{ VAL }}', { VAL: '1e3' })).toBe('1e3');
+      expect(interpolateEnv('{{ env.VAL }}', { VAL: '1e3' })).toBe('1e3');
     });
 
     it('does not coerce hex strings (0x10)', () => {
-      expect(interpolateEnv('${{ VAL }}', { VAL: '0x10' })).toBe('0x10');
+      expect(interpolateEnv('{{ env.VAL }}', { VAL: '0x10' })).toBe('0x10');
     });
 
     it('does not coerce "Infinity"', () => {
-      expect(interpolateEnv('${{ VAL }}', { VAL: 'Infinity' })).toBe('Infinity');
+      expect(interpolateEnv('{{ env.VAL }}', { VAL: 'Infinity' })).toBe('Infinity');
     });
 
     it('does not coerce whitespace-only string', () => {
-      expect(interpolateEnv('${{ VAL }}', { VAL: ' ' })).toBe(' ');
+      expect(interpolateEnv('{{ env.VAL }}', { VAL: ' ' })).toBe(' ');
     });
 
     it('does not coerce leading-zero string (00123)', () => {
-      expect(interpolateEnv('${{ VAL }}', { VAL: '00123' })).toBe('00123');
+      expect(interpolateEnv('{{ env.VAL }}', { VAL: '00123' })).toBe('00123');
     });
 
     it('coerces negative integer', () => {
-      expect(interpolateEnv('${{ VAL }}', { VAL: '-7' })).toBe(-7);
+      expect(interpolateEnv('{{ env.VAL }}', { VAL: '-7' })).toBe(-7);
     });
   });
 
   it('is case-sensitive for variable names', () => {
-    expect(interpolateEnv('${{ home }}', env)).toBe('');
-    expect(interpolateEnv('${{ HOME }}', env)).toBe('/home/user');
+    expect(interpolateEnv('{{ env.home }}', env)).toBe('');
+    expect(interpolateEnv('{{ env.HOME }}', env)).toBe('/home/user');
   });
 
   it('handles variables with underscores and digits', () => {
     const envWithSpecial = { MY_VAR_2: 'value' };
-    expect(interpolateEnv('${{ MY_VAR_2 }}', envWithSpecial)).toBe('value');
+    expect(interpolateEnv('{{ env.MY_VAR_2 }}', envWithSpecial)).toBe('value');
   });
 });
 
@@ -149,17 +163,33 @@ describe('interpolateTemplateVars', () => {
     );
   });
 
+  it('replaces namespaced {{ vars.foo }} references', () => {
+    expect(interpolateTemplateVars('Answer clearly: {{ vars.question }}', vars)).toBe(
+      'Answer clearly: What is 2 + 2?',
+    );
+  });
+
   it('supports dotted paths', () => {
-    expect(interpolateTemplateVars('Topic: {{ nested.topic }}', vars)).toBe('Topic: math');
+    expect(interpolateTemplateVars('Topic: {{ vars.nested.topic }}', vars)).toBe('Topic: math');
   });
 
-  it('preserves missing variables instead of blanking them out', () => {
-    expect(interpolateTemplateVars('Answer clearly: {{missing}}', vars)).toBe(
-      'Answer clearly: {{missing}}',
+  it('supports loops and built-in filters', () => {
+    const rendered = interpolateTemplateVars(
+      '{% for item in vars.items %}{{ item | upper }}{% if not loop.last %}, {% endif %}{% endfor %}',
+      { items: ['alpha', 'beta'] },
     );
+    expect(rendered).toBe('ALPHA, BETA');
+  });
+
+  it('renders missing variables as empty strings', () => {
+    expect(interpolateTemplateVars('Answer clearly: {{missing}}', vars)).toBe('Answer clearly: ');
   });
 
   it('returns the original JSON value for whole-value substitutions', () => {
-    expect(interpolateTemplateVars('{{expected}}', vars)).toEqual({ answer: '4' });
+    expect(interpolateTemplateVars('{{ vars.expected }}', vars)).toEqual({ answer: '4' });
+  });
+
+  it('returns the full vars object for {{ vars }}', () => {
+    expect(interpolateTemplateVars('{{ vars }}', vars)).toEqual(vars);
   });
 });
diff --git a/packages/core/test/evaluation/suite-level-input.test.ts b/packages/core/test/evaluation/suite-level-input.test.ts
index f5c1dae9c..0909fda86 100644
--- a/packages/core/test/evaluation/suite-level-input.test.ts
+++ b/packages/core/test/evaluation/suite-level-input.test.ts
@@ -256,6 +256,7 @@ tests:
   - id: templated
     vars:
       question: "What is the capital of France?"
+      expected_answer: "Paris"
     criteria: "Answers {{question}} correctly"
     input:
       - role: user
@@ -287,12 +288,160 @@ tests:
       role: 'assistant',
       content: 'Thinking about What is the capital of France?',
     });
-    expect(tests[0].expected_output).toEqual([
-      { role: 'assistant', content: '{{expected_answer}}' },
-    ]);
+    expect(tests[0].expected_output).toEqual([{ role: 'assistant', content: 'Paris' }]);
     expect(tests[0].metadata).toEqual({ untouched: '{{question}}' });
   });
 
+  it('applies namespaced vars with loops in suite and test input templates', async () => {
+    await writeFile(
+      path.join(tempDir, 'templated-namespaced-input.eval.yaml'),
+      `input: |
+  Items:
+  {% for item in vars.group.items %}- {{ item | upper }}
+  {% endfor %}
+tests:
+  - id: templated-namespaced
+    vars:
+      group:
+        items:
+          - alpha
+          - beta
+    criteria: "Mentions {{ vars.group.items | length }} items"
+    input: "Question: {{ vars.group.items[0] }}"
+`,
+    );
+
+    const tests = await loadTests(
+      path.join(tempDir, 'templated-namespaced-input.eval.yaml'),
+      tempDir,
+    );
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0].criteria).toBe('Mentions 2 items');
+    expect(tests[0].input[0]).toEqual({
+      role: 'user',
+      content: 'Items:\n- ALPHA\n- BETA\n\n',
+    });
+    expect(tests[0].input[1]).toEqual({
+      role: 'user',
+      content: 'Question: alpha',
+    });
+  });
+
+  it('loads custom nunjucks_filters for eval-time rendering', async () => {
+    const filterPath = path.join(tempDir, 'slug-filter.ts');
+    await writeFile(
+      filterPath,
+      'export default function slug(value: unknown) { return String(value).toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, ""); }\n',
+    );
+    await writeFile(
+      path.join(tempDir, 'templated-custom-filter.eval.yaml'),
+      `nunjucks_filters:
+  slug: ./slug-filter.ts
+tests:
+  - id: filter-test
+    vars:
+      title: "Hello AgentV"
+    criteria: "Slug is {{ vars.title | slug }}"
+    input: "Write {{ vars.title | slug }}"
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'templated-custom-filter.eval.yaml'), tempDir);
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0].criteria).toBe('Slug is hello-agentv');
+    expect(tests[0].input[0]).toEqual({ role: 'user', content: 'Write hello-agentv' });
+  });
+
+  it('expands string array vars into multiple rendered rows', async () => {
+    await writeFile(
+      path.join(tempDir, 'templated-array-vars.eval.yaml'),
+      `tests:
+  - id: "fruit-{{ vars.fruit }}"
+    vars:
+      fruit:
+        - apple
+        - pear
+      color:
+        - red
+        - green
+      tags:
+        - stable
+    criteria: "{{ vars.color }} {{ vars.fruit }}"
+    input: "Describe {{ vars.color }} {{ vars.fruit }}"
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'templated-array-vars.eval.yaml'), tempDir);
+
+    expect(tests.map((test) => test.id)).toEqual([
+      'fruit-apple',
+      'fruit-apple',
+      'fruit-pear',
+      'fruit-pear',
+    ]);
+    expect(tests.map((test) => test.criteria)).toEqual([
+      'red apple',
+      'green apple',
+      'red pear',
+      'green pear',
+    ]);
+    expect(tests.map((test) => test.input[0]?.content)).toEqual([
+      'Describe red apple',
+      'Describe green apple',
+      'Describe red pear',
+      'Describe green pear',
+    ]);
+  });
+
+  it('renders then parses chat-array prompt strings', async () => {
+    await writeFile(
+      path.join(tempDir, 'templated-chat-array.eval.yaml'),
+      `tests:
+  - id: chat-array
+    vars:
+      topic: "templating"
+    criteria: "Uses chat array"
+    input: '[{"role":"system","content":"You review {{ vars.topic }}"},{"role":"user","content":"Explain {{ vars.topic }}"}]'
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'templated-chat-array.eval.yaml'), tempDir);
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0].input).toEqual([
+      { role: 'system', content: 'You review templating' },
+      { role: 'user', content: 'Explain templating' },
+    ]);
+  });
+
+  it('renders assertion values and metrics with per-test vars', async () => {
+    await writeFile(
+      path.join(tempDir, 'templated-assertions.eval.yaml'),
+      `tests:
+  - id: assertions
+    vars:
+      expected: "DENIED"
+      metric_name: "policy"
+    input: "Check access"
+    assertions:
+      - type: contains
+        metric: "{{ vars.metric_name }}_decision"
+        value: "{{ vars.expected }}"
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'templated-assertions.eval.yaml'), tempDir);
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0].assertions?.[0]).toMatchObject({
+      type: 'contains',
+      value: 'DENIED',
+      metric: 'policy_decision',
+    });
+  });
+
   it('applies per-test vars inside conversation turns', async () => {
     await writeFile(
       path.join(tempDir, 'templated-turns.eval.yaml'),
@@ -317,7 +466,7 @@ tests:
       {
         input: 'Fix parser null check',
         expected_output: 'Fixed parser null check',
-        assertions: ['Mentions {{bug}}'],
+        assertions: ['Mentions parser null check'],
       },
     ]);
   });
diff --git a/packages/core/test/evaluation/workspace/deps-scanner.test.ts b/packages/core/test/evaluation/workspace/deps-scanner.test.ts
index 0028130c6..163f8ffdf 100644
--- a/packages/core/test/evaluation/workspace/deps-scanner.test.ts
+++ b/packages/core/test/evaluation/workspace/deps-scanner.test.ts
@@ -273,7 +273,7 @@ tests:
 workspace:
   repos:
     - path: ./repo
-      repo: \${{ TEST_REPO_URL }}
+      repo: "{{ env.TEST_REPO_URL }}"
 tests:
   - id: test-1
     input: hello
diff --git a/packages/core/test/projects.test.ts b/packages/core/test/projects.test.ts
index 42e308e25..7c8d42f03 100644
--- a/packages/core/test/projects.test.ts
+++ b/packages/core/test/projects.test.ts
@@ -324,11 +324,9 @@ dashboard:
   it('interpolates env vars in repo', () => {
     const registryPath = getProjectsRegistryPath();
     mkdirSync(path.dirname(registryPath), { recursive: true });
-    // Use concatenation to avoid JS template literal evaluating ${{ ... }}
-    const d = '$';
     writeFileSync(
       registryPath,
-      `projects:\n  - id: env-bench\n    repo: "${d}{{ BENCH_REPO_URL }}"\n    path: /srv/agentv/repo\n    branch: main\n    added_at: "2026-01-01T00:00:00Z"\n    last_opened_at: "2026-01-01T00:00:00Z"\n`,
+      'projects:\n  - id: env-bench\n    repo: "{{ env.BENCH_REPO_URL }}"\n    path: /srv/agentv/repo\n    branch: main\n    added_at: "2026-01-01T00:00:00Z"\n    last_opened_at: "2026-01-01T00:00:00Z"\n',
       'utf-8',
     );
 

From c0b8f47caf6dd69b782e9375c4fc46e56b9020f5 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Thu, 2 Jul 2026 15:30:26 +0200
Subject: [PATCH 2/3] fix(eval): resolve env templates in target secrets

---
 packages/core/src/evaluation/interpolation.ts |  4 ++
 .../core/src/evaluation/providers/targets.ts  | 19 +++++++
 .../test/evaluation/providers/targets.test.ts | 49 +++++++++++++++++++
 3 files changed, 72 insertions(+)

diff --git a/packages/core/src/evaluation/interpolation.ts b/packages/core/src/evaluation/interpolation.ts
index 1ab64a496..c14b89f08 100644
--- a/packages/core/src/evaluation/interpolation.ts
+++ b/packages/core/src/evaluation/interpolation.ts
@@ -80,6 +80,10 @@ function renderEnvString(template: string, env: EnvLookup): string {
   return template.replace(ENV_OUTPUT_PATTERN, (match) => renderString(match, { env }));
 }
 
+export function renderEnvTemplateString(template: string, env: EnvLookup): string {
+  return renderEnvString(template, env);
+}
+
 /**
  * Recursively render config-load `{{ env.VAR }}` templates in string values.
  *
diff --git a/packages/core/src/evaluation/providers/targets.ts b/packages/core/src/evaluation/providers/targets.ts
index 5705ed8dd..437f4dbe1 100644
--- a/packages/core/src/evaluation/providers/targets.ts
+++ b/packages/core/src/evaluation/providers/targets.ts
@@ -1,6 +1,7 @@
 import path from 'node:path';
 import { z } from 'zod';
 
+import { renderEnvTemplateString } from '../interpolation.js';
 import type { EnvLookup, TargetDefinition } from './types.js';
 
 // ---------------------------------------------------------------------------
@@ -819,6 +820,7 @@ export const COMMON_TARGET_SETTINGS = [
 ] as const;
 
 const USE_TARGET_ENV_PATTERN = /^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i;
+const WHOLE_ENV_TEMPLATE_PATTERN = /^\s*\{\{\s*env\.[\s\S]*?\}\}\s*$/;
 
 const BASE_TARGET_SCHEMA = z
   .object({
@@ -2338,6 +2340,23 @@ function resolveOptionalString(
     return envValue;
   }
 
+  if (trimmed.includes('{{') && trimmed.includes('env.')) {
+    const allowLiteral = options?.allowLiteral ?? false;
+    if (!allowLiteral && !WHOLE_ENV_TEMPLATE_PATTERN.test(trimmed)) {
+      throw new Error(
+        `${description} must use a whole \${{ VARIABLE_NAME }} or {{ env.VARIABLE_NAME }} reference`,
+      );
+    }
+    const rendered = renderEnvTemplateString(trimmed, env).trim();
+    if (rendered.length === 0) {
+      if (options?.optionalEnv ?? false) {
+        return undefined;
+      }
+      throw new Error(`${description} env template resolved to an empty value`);
+    }
+    return rendered;
+  }
+
   // Return as literal value
   const allowLiteral = options?.allowLiteral ?? false;
   if (!allowLiteral) {
diff --git a/packages/core/test/evaluation/providers/targets.test.ts b/packages/core/test/evaluation/providers/targets.test.ts
index 1ec0c2f5c..da80ec4bb 100644
--- a/packages/core/test/evaluation/providers/targets.test.ts
+++ b/packages/core/test/evaluation/providers/targets.test.ts
@@ -579,6 +579,55 @@ describe('resolveTargetDefinition', () => {
     });
   });
 
+  it('resolves openai settings from {{ env.* }} templates', () => {
+    const env = {
+      OPENAI_ENDPOINT: 'https://llm-gateway.example.com/v1',
+      OPENAI_API_KEY: 'openai-secret',
+      OPENAI_MODEL: 'gpt-5.4',
+    } satisfies Record<string, string>;
+
+    const target = resolveTargetDefinition(
+      {
+        name: 'openai-target',
+        provider: 'openai',
+        endpoint: '{{ env.OPENAI_ENDPOINT }}',
+        api_key: '{{ env.OPENAI_API_KEY }}',
+        model: '{{ env.OPENAI_MODEL | default("gpt-5.4-mini") }}',
+      },
+      env,
+    );
+
+    expect(target.kind).toBe('openai');
+    if (target.kind !== 'openai') {
+      throw new Error('expected openai target');
+    }
+
+    expect(target.config).toMatchObject({
+      baseURL: 'https://llm-gateway.example.com/v1',
+      apiKey: 'openai-secret',
+      model: 'gpt-5.4',
+    });
+  });
+
+  it('rejects inline {{ env.* }} templates in secret fields', () => {
+    expect(() =>
+      resolveTargetDefinition(
+        {
+          name: 'openai-target',
+          provider: 'openai',
+          endpoint: '{{ env.OPENAI_ENDPOINT }}',
+          api_key: 'Bearer {{ env.OPENAI_API_KEY }}',
+          model: '{{ env.OPENAI_MODEL }}',
+        },
+        {
+          OPENAI_ENDPOINT: 'https://llm-gateway.example.com/v1',
+          OPENAI_API_KEY: 'openai-secret',
+          OPENAI_MODEL: 'gpt-5.4',
+        },
+      ),
+    ).toThrow(/whole .+ env\.VARIABLE_NAME/i);
+  });
+
   it('resolves openrouter settings from environment', () => {
     const env = {
       OPENROUTER_API_KEY: 'openrouter-secret',

From c654e9713002a4e27a20b204d3939bc2929c143b Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Thu, 2 Jul 2026 15:48:22 +0200
Subject: [PATCH 3/3] fix(eval): tighten target secret env templates

---
 .../docs/docs/evaluation/eval-files.mdx       | 14 +++----
 .../core/src/evaluation/providers/targets.ts  |  5 ++-
 .../test/evaluation/providers/targets.test.ts | 38 +++++++++++++++++++
 3 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
index 63d6655ab..d5abf93ab 100644
--- a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
@@ -407,26 +407,26 @@ dataset rows out of oversized inline YAML, see [Benchmark Provenance](/docs/guid
 
 ## Environment Variable Interpolation
 
-All string fields in eval files support `${{ VAR }}` syntax for environment variable interpolation. This enables portable eval configs that work across machines and CI environments without hardcoded paths.
+All string fields in eval files support `{{ env.VAR }}` syntax for environment variable interpolation. This enables portable eval configs that work across machines and CI environments without hardcoded paths.
 
 ```yaml
 workspace:
   repos:
     - path: ./RepoA
-      repo: "${{ REPO_A_URL }}"
-      commit: "${{ REPO_A_COMMIT }}"
+      repo: "{{ env.REPO_A_URL }}"
+      commit: "{{ env.REPO_A_COMMIT }}"
 
 tests:
   - id: test-1
-    input: "Evaluate the code in ${{ PROJECT_NAME }}"
-    criteria: "${{ EVAL_CRITERIA }}"
+    input: "Evaluate the code in {{ env.PROJECT_NAME }}"
+    criteria: "{{ env.EVAL_CRITERIA }}"
 ```
 
 ### Behavior
 
-- **Syntax:** `${{ VARIABLE_NAME }}` with optional whitespace around the name
+- **Syntax:** `{{ env.VARIABLE_NAME }}` with optional whitespace around the name
 - **Missing variables** resolve to an empty string
-- **Partial interpolation** is supported: `${{ HOME }}/repos/${{ PROJECT }}` becomes `/home/user/repos/myproject`
+- **Partial interpolation** is supported: `{{ env.HOME }}/repos/{{ env.PROJECT }}` becomes `/home/user/repos/myproject`
 - **Non-string values** (numbers, booleans) are not affected
 - Interpolation is applied recursively to all nested objects and arrays
 - Works in YAML eval files, external YAML/JSONL case files, and external workspace config files
diff --git a/packages/core/src/evaluation/providers/targets.ts b/packages/core/src/evaluation/providers/targets.ts
index 437f4dbe1..6a6bfdd91 100644
--- a/packages/core/src/evaluation/providers/targets.ts
+++ b/packages/core/src/evaluation/providers/targets.ts
@@ -820,7 +820,7 @@ export const COMMON_TARGET_SETTINGS = [
 ] as const;
 
 const USE_TARGET_ENV_PATTERN = /^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i;
-const WHOLE_ENV_TEMPLATE_PATTERN = /^\s*\{\{\s*env\.[\s\S]*?\}\}\s*$/;
+const SECRET_ENV_TEMPLATE_PATTERN = /^\s*\{\{\s*env\.([A-Za-z_][A-Za-z0-9_]*)\s*\}\}\s*$/;
 
 const BASE_TARGET_SCHEMA = z
   .object({
@@ -2342,7 +2342,8 @@ function resolveOptionalString(
 
   if (trimmed.includes('{{') && trimmed.includes('env.')) {
     const allowLiteral = options?.allowLiteral ?? false;
-    if (!allowLiteral && !WHOLE_ENV_TEMPLATE_PATTERN.test(trimmed)) {
+    const isSecretField = /\b(api key|bearer token|github token|token|secret)\b/i.test(description);
+    if (!allowLiteral && isSecretField && !SECRET_ENV_TEMPLATE_PATTERN.test(trimmed)) {
       throw new Error(
         `${description} must use a whole \${{ VARIABLE_NAME }} or {{ env.VARIABLE_NAME }} reference`,
       );
diff --git a/packages/core/test/evaluation/providers/targets.test.ts b/packages/core/test/evaluation/providers/targets.test.ts
index da80ec4bb..cdd5c4c20 100644
--- a/packages/core/test/evaluation/providers/targets.test.ts
+++ b/packages/core/test/evaluation/providers/targets.test.ts
@@ -628,6 +628,44 @@ describe('resolveTargetDefinition', () => {
     ).toThrow(/whole .+ env\.VARIABLE_NAME/i);
   });
 
+  it('rejects composed {{ env.* }} templates in secret fields', () => {
+    expect(() =>
+      resolveTargetDefinition(
+        {
+          name: 'openai-target',
+          provider: 'openai',
+          endpoint: '{{ env.OPENAI_ENDPOINT }}',
+          api_key: '{{ env.OPENAI_API_KEY }}{{ env.OPENAI_API_KEY_2 }}',
+          model: '{{ env.OPENAI_MODEL }}',
+        },
+        {
+          OPENAI_ENDPOINT: 'https://llm-gateway.example.com/v1',
+          OPENAI_API_KEY: 'openai-secret',
+          OPENAI_API_KEY_2: 'extra-secret',
+          OPENAI_MODEL: 'gpt-5.4',
+        },
+      ),
+    ).toThrow(/whole .+ env\.VARIABLE_NAME/i);
+  });
+
+  it('rejects literal defaults in secret field env templates', () => {
+    expect(() =>
+      resolveTargetDefinition(
+        {
+          name: 'openai-target',
+          provider: 'openai',
+          endpoint: '{{ env.OPENAI_ENDPOINT }}',
+          api_key: '{{ env.OPENAI_API_KEY | default("hardcoded-secret") }}',
+          model: '{{ env.OPENAI_MODEL }}',
+        },
+        {
+          OPENAI_ENDPOINT: 'https://llm-gateway.example.com/v1',
+          OPENAI_MODEL: 'gpt-5.4',
+        },
+      ),
+    ).toThrow(/whole .+ env\.VARIABLE_NAME/i);
+  });
+
   it('resolves openrouter settings from environment', () => {
     const env = {
       OPENROUTER_API_KEY: 'openrouter-secret',