diff --git a/apps/cli/test/commands/prepare/prepare.test.ts b/apps/cli/test/commands/prepare/prepare.test.ts
index 1c7e5e6a9..ed850db91 100644
--- a/apps/cli/test/commands/prepare/prepare.test.ts
+++ b/apps/cli/test/commands/prepare/prepare.test.ts
@@ -255,7 +255,7 @@ describe('agentv prepare', () => {
       path.join(tempDir, '.agentv', 'targets.yaml'),
       `
 targets:
-  - name: codex
+  - label: codex
     provider: cli
     command: bun ./scripts/target.ts
 `,
diff --git a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
index 7391e2dc9..c53d61e4f 100644
--- a/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/eval-files.mdx
@@ -20,8 +20,9 @@ experiment format.
 - A **task suite** is eval YAML that owns task context: `workspace`, shared
   `input`, shared `assertions`, fixtures, graders, and test cases. It can run
   directly or be imported through `imports.suites`.
-- A **raw case file** is a YAML/JSONL array, directory, or glob of cases. Import
-  it with `imports.tests`, `tests: ./cases.yaml`, or string shorthand; parent
+- A **raw case file** is a YAML, JSON, JSONL, CSV, script-backed dataset,
+  directory, or glob of cases. Import it with `imports.tests`,
+  `tests: ./cases.yaml`, `tests: file://cases.csv`, or string shorthand; parent
   suite context applies because raw cases do not carry their own suite context.
 - A **wrapper eval** is eval YAML that imports one or more suites with
   `imports.suites` and binds run controls with top-level `target`, `repeat`,
@@ -373,15 +374,33 @@ tests: ./cases.yaml
 ```
 
 The path is resolved relative to the eval file's directory. The external raw
-case file should contain a YAML array of test objects or a JSONL file with one
-test per line. String entries inside a `tests:` list work the same way and may
-use direct paths, directories, or globs:
+case file can be a YAML or JSON array of test objects, a JSONL file with one
+test per line, a promptfoo-compatible CSV file, or an explicit JavaScript or
+Python dataset function such as `file://generate-tests.mjs:createTests` or
+`file://generate_tests.py:create_tests`. String entries inside a `tests:` list
+work the same way and may use direct paths, `file://` paths, directories, or
+globs:
 
 ```yaml
 tests:
   - ./cases/*.cases.yaml
 ```
 
+CSV datasets support promptfoo-style magic columns. `__expected` and
+`__expectedN` create AgentV assertions using the supported expected-column
+mini-DSL (`contains:*`, `icontains:*`, `contains-any:*`, `contains-all:*`,
+`icontains-any:*`, `icontains-all:*`, `starts-with:*`, `ends-with:*`,
+`regex:*`, `equals:*`, `is-json`, `latency(<ms>)`, `cost(<usd>)`,
+`grade:*`, `llm-rubric:*`, `javascript:*`, `fn:*`, `eval:*`, `python:*`, and
+`file://*.py`; file paths inside CSV cells are resolved relative to the CSV
+file). Unsupported promptfoo assertion forms such as `similar:*` are rejected
+during validation instead of being skipped at runtime.
+`__provider_output` becomes first-class `expected_output`, `__metric` names the
+generated assertions, `__threshold` sets the test threshold,
+`__metadata:<key>` adds metadata, and `__config:__expectedN:threshold` sets an
+assertion `min_score`. Ordinary columns become `vars`, so CSV rows can rely on
+suite-level `input` that interpolates those variables.
+
 String shorthand is raw-case-only. Import reusable task suites through
 `imports.suites`; use `imports.tests` when you want to drop suite context and
 import only raw cases into the parent context:
diff --git a/examples/features/external-datasets/README.md b/examples/features/external-datasets/README.md
index e2a2babc3..07121d7fd 100644
--- a/examples/features/external-datasets/README.md
+++ b/examples/features/external-datasets/README.md
@@ -6,6 +6,7 @@ Demonstrates loading raw test cases from external files using `imports.tests`.
 
 - Loading tests from external YAML files (`imports.tests[].path: cases/accuracy.yaml`)
 - Loading tests from external JSONL files (`imports.tests[].path: cases/regression.jsonl`)
+- Loading tests from promptfoo-compatible CSV files (`imports.tests[].path: cases/magic.csv`)
 - Mixing inline `tests` with imported raw test rows
 - Glob patterns for loading multiple files (`imports.tests[].path: cases/**/*.yaml`)
 
@@ -21,6 +22,7 @@ bun agentv eval examples/features/external-datasets/evals/dataset.eval.yaml
 - `evals/dataset.eval.yaml` — Main eval with inline tests and `imports.tests` references
 - `evals/cases/accuracy.yaml` — YAML array of test cases
 - `evals/cases/regression.jsonl` — JSONL test data (one test per line)
+- `evals/cases/magic.csv` — CSV test data with promptfoo-style magic columns
 
 ## Supported Formats
 
@@ -42,6 +44,22 @@ One JSON test object per line:
 {"id": "test-2", "criteria": "Another outcome", "input": "Another input"}
 ```
 
+### CSV (.csv)
+CSV files use ordinary columns for `id`, `input`, and `vars`, plus promptfoo-style magic columns for assertions and metadata:
+
+```csv
+id,input,__expected,__provider_output,__metric,__threshold,__metadata:source,locale
+csv-test,Reply with a greeting,icontains:hello,Hello there,greeting,0.8,csv,en-US
+```
+
+`__expected` and `__expectedN` become AgentV assertions for the supported CSV
+mini-DSL. `latency(<ms>)`, `cost(<usd>)`, and `file://*.py` map to runnable
+AgentV graders, with CSV file paths resolved relative to the CSV file;
+unsupported promptfoo forms such as `similar:*` are rejected during validation.
+`__provider_output` becomes AgentV `expected_output`; ordinary non-magic
+columns such as `locale` become `vars` and can be interpolated by suite-level
+`input`.
+
 ## Glob Patterns
 
 Use glob patterns to load from multiple files:
diff --git a/examples/features/external-datasets/evals/cases/magic.csv b/examples/features/external-datasets/evals/cases/magic.csv
new file mode 100644
index 000000000..0cb59809a
--- /dev/null
+++ b/examples/features/external-datasets/evals/cases/magic.csv
@@ -0,0 +1,2 @@
+id,input,__expected,__provider_output,__metric,__threshold,__metadata:source,locale
+csv-magic-greeting,Reply with a short greeting,icontains:hello,Hello there,greeting,0.8,csv,en-US
diff --git a/examples/features/external-datasets/evals/dataset.eval.yaml b/examples/features/external-datasets/evals/dataset.eval.yaml
index 22601c286..a07ce3ce0 100644
--- a/examples/features/external-datasets/evals/dataset.eval.yaml
+++ b/examples/features/external-datasets/evals/dataset.eval.yaml
@@ -7,6 +7,7 @@ imports:
   tests:
     - path: cases/accuracy.yaml
     - path: cases/regression.jsonl
+    - path: cases/magic.csv
 
 tests:
   - id: inline-test
diff --git a/packages/core/src/evaluation/loaders/case-file-loader.ts b/packages/core/src/evaluation/loaders/case-file-loader.ts
index a14265fa7..ada3be8cd 100644
--- a/packages/core/src/evaluation/loaders/case-file-loader.ts
+++ b/packages/core/src/evaluation/loaders/case-file-loader.ts
@@ -1,7 +1,9 @@
 import { readFile, readdir, stat } from 'node:fs/promises';
 import path from 'node:path';
+import { pathToFileURL } from 'node:url';
 import fg from 'fast-glob';
 
+import { execFileWithStdin } from '../../runtime/exec.js';
 import { interpolateEnv } from '../interpolation.js';
 import type { JsonObject, JsonValue } from '../types.js';
 import { isJsonObject } from '../types.js';
@@ -11,6 +13,24 @@ const ANSI_YELLOW = '\u001b[33m';
 const ANSI_RESET = '\u001b[0m';
 
 const FILE_PROTOCOL = 'file://';
+const DATASET_SCRIPT_TIMEOUT_MS = 30_000;
+const DEFAULT_THRESHOLD = 0.75;
+const THRESHOLD_ASSERTION_TYPES = new Set(['starts-with']);
+const SUPPORTED_ASSERTION_TYPES = new Set([
+  'contains',
+  'contains-any',
+  'contains-all',
+  'icontains',
+  'icontains-any',
+  'icontains-all',
+  'starts-with',
+  'ends-with',
+  'regex',
+  'is-json',
+  'equals',
+  'latency',
+  'cost',
+]);
 
 /**
  * Check if a value in the tests array is a file:// reference string.
@@ -26,6 +46,10 @@ function extractFilePath(ref: string): string {
   return ref.slice(FILE_PROTOCOL.length);
 }
 
+function stripFileProtocol(value: string): string {
+  return value.startsWith(FILE_PROTOCOL) ? extractFilePath(value) : value;
+}
+
 /**
  * Check if a path contains glob pattern characters.
  */
@@ -83,36 +107,466 @@ function parseJsonlCases(content: string, filePath: string): JsonObject[] {
   return results;
 }
 
+function assertJsonCases(value: unknown, filePath: string): JsonObject[] {
+  const parsed = interpolateEnv(value, process.env);
+  const rawCases = Array.isArray(parsed)
+    ? parsed
+    : isJsonObject(parsed) && Array.isArray(parsed.tests)
+      ? parsed.tests
+      : undefined;
+  if (!rawCases) {
+    throw new Error(`External test file must contain an array of test objects: ${filePath}`);
+  }
+  return rawCases.map((item, index) => {
+    if (!isJsonObject(item)) {
+      throw new Error(
+        `External test file contains non-object entry at index ${index}: ${filePath}`,
+      );
+    }
+    return item;
+  });
+}
+
+function parseJsonCases(content: string, filePath: string): JsonObject[] {
+  try {
+    return assertJsonCases(JSON.parse(content) as unknown, filePath);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(`Malformed JSON test file: ${message}\n  File: ${filePath}`);
+  }
+}
+
+function parseCsvRows(content: string, filePath: string): Record<string, string>[] {
+  const rows: string[][] = [];
+  let row: string[] = [];
+  let cell = '';
+  let inQuotes = false;
+  let rowStart = true;
+  const source = content.charCodeAt(0) === 0xfeff ? content.slice(1) : content;
+
+  for (let index = 0; index < source.length; index++) {
+    const char = source[index];
+    const next = source[index + 1];
+    if (inQuotes) {
+      if (char === '"' && next === '"') {
+        cell += '"';
+        index++;
+      } else if (char === '"') {
+        inQuotes = false;
+      } else {
+        cell += char;
+      }
+      continue;
+    }
+    if (char === '"' && cell === '') {
+      inQuotes = true;
+      rowStart = false;
+      continue;
+    }
+    if (char === ',') {
+      row.push(cell);
+      cell = '';
+      rowStart = false;
+      continue;
+    }
+    if (char === '\n' || char === '\r') {
+      if (char === '\r' && next === '\n') {
+        index++;
+      }
+      row.push(cell);
+      if (!rowStart || row.some((value) => value.length > 0)) {
+        rows.push(row);
+      }
+      row = [];
+      cell = '';
+      rowStart = true;
+      continue;
+    }
+    cell += char;
+    rowStart = false;
+  }
+
+  if (inQuotes) {
+    throw new Error(`Malformed CSV test file: unterminated quoted cell\n  File: ${filePath}`);
+  }
+  if (!rowStart || cell.length > 0 || row.length > 0) {
+    row.push(cell);
+    rows.push(row);
+  }
+  if (rows.length === 0) {
+    return [];
+  }
+
+  const headers = rows[0].map((header) => header.trim());
+  return rows.slice(1).map((values) => {
+    const record: Record<string, string> = {};
+    headers.forEach((header, index) => {
+      if (header.length > 0) {
+        record[header] = values[index] ?? '';
+      }
+    });
+    return record;
+  });
+}
+
+function parseAssertionFromString(expected: string, sourceFilePath: string): JsonObject {
+  if (expected.startsWith('grade:') || expected.startsWith('llm-rubric:')) {
+    const value = expected.slice(expected.startsWith('grade:') ? 6 : 11).trim();
+    return {
+      type: 'llm-grader',
+      rubrics: [{ id: 'rubric', outcome: value, weight: 1 }],
+    };
+  }
+  const functionPrefixes = ['javascript:', 'fn:', 'eval:'];
+  const functionPrefix = functionPrefixes.find((prefix) => expected.startsWith(prefix));
+  if (functionPrefix) {
+    return {
+      type: 'inline-assert',
+      code: expected.slice(functionPrefix.length).trim(),
+    };
+  }
+  if (expected.startsWith('python:')) {
+    return {
+      type: 'code-grader',
+      command: ['uv', 'run', 'python', expected.slice('python:'.length).trim()],
+    };
+  }
+  if (expected.startsWith(FILE_PROTOCOL)) {
+    const filePath = stripFileProtocol(expected).trim();
+    if (!filePath.endsWith('.py')) {
+      throw new Error(
+        `Unsupported promptfoo __expected file assertion "${expected}". Only file://*.py code graders are supported.`,
+      );
+    }
+    const commandPath = path.isAbsolute(filePath)
+      ? filePath
+      : path.resolve(path.dirname(sourceFilePath), filePath);
+    return {
+      type: 'code-grader',
+      command: ['uv', 'run', 'python', commandPath],
+    };
+  }
+
+  const regexMatch = expected.match(
+    /^((?:not-)?[a-z][a-z0-9-]*)(?:\((\d+(?:\.\d+)?)\))?(?::([\s\S]*))?$/,
+  );
+  if (regexMatch) {
+    const [, rawType, thresholdText, rawValue] = regexMatch;
+    const negate = rawType.startsWith('not-');
+    const type = negate ? rawType.slice('not-'.length) : rawType;
+    const value = rawValue?.trim();
+    const parsedThreshold = thresholdText ? Number.parseFloat(thresholdText) : undefined;
+    const threshold =
+      parsedThreshold !== undefined && Number.isFinite(parsedThreshold)
+        ? parsedThreshold
+        : THRESHOLD_ASSERTION_TYPES.has(type)
+          ? DEFAULT_THRESHOLD
+          : undefined;
+    if (!SUPPORTED_ASSERTION_TYPES.has(type)) {
+      if (rawValue !== undefined || thresholdText !== undefined) {
+        throw new Error(
+          `Unsupported promptfoo __expected assertion "${type}". Supported assertion types: ${[
+            ...SUPPORTED_ASSERTION_TYPES,
+          ].join(', ')}`,
+        );
+      }
+      return { type: 'equals', value: expected };
+    }
+    if ((type === 'latency' || type === 'cost') && threshold === undefined) {
+      throw new Error(
+        `promptfoo __expected ${type} assertion requires a numeric limit, e.g. ${type}(1)`,
+      );
+    }
+    const assertion: Record<string, JsonValue> = {
+      type,
+    };
+    if (negate) {
+      assertion.negate = true;
+    }
+    if (
+      type === 'contains-any' ||
+      type === 'contains-all' ||
+      type === 'icontains-any' ||
+      type === 'icontains-all'
+    ) {
+      assertion.value = value ? value.split(',').map((item) => item.trim()) : [];
+    } else if (value !== undefined) {
+      assertion.value = value;
+    }
+    if (type === 'latency' && threshold !== undefined) {
+      assertion.threshold = threshold;
+    } else if (type === 'cost' && threshold !== undefined) {
+      assertion.budget = threshold;
+    } else if (threshold !== undefined) {
+      assertion.min_score = threshold;
+    }
+    return assertion;
+  }
+
+  return { type: 'equals', value: expected };
+}
+
+function parseMetadataValue(key: string, value: string): JsonValue | undefined {
+  if (value.trim() === '') {
+    return undefined;
+  }
+  if (key.endsWith('[]')) {
+    return value
+      .split(/(?<!\\),/)
+      .map((item) => item.trim().replaceAll('\\,', ','))
+      .filter((item) => item.length > 0);
+  }
+  return value;
+}
+
+function parseCsvCases(content: string, filePath: string): JsonObject[] {
+  return parseCsvRows(content, filePath).map((row, rowIndex) => {
+    const vars: Record<string, JsonValue> = {};
+    const metadata: Record<string, JsonValue> = {};
+    const assertions: JsonObject[] = [];
+    const assertionConfigs = new Map<number, Record<string, JsonValue>>();
+    let id: string | undefined;
+    let input: string | undefined;
+    let prefix = '';
+    let suffix = '';
+    let criteria: string | undefined;
+    let expectedOutput: string | undefined;
+    let metric: string | undefined;
+    let threshold: number | undefined;
+
+    for (const [rawKey, rawValue] of Object.entries(row)) {
+      const key = rawKey.trim();
+      const value = rawValue;
+      if (key === 'id') {
+        id = value;
+      } else if (key === 'input') {
+        input = value;
+      } else if (key.startsWith('__expected')) {
+        if (value.trim() !== '') {
+          assertions.push(parseAssertionFromString(value.trim(), filePath));
+        }
+      } else if (key === '__prefix') {
+        prefix = value;
+      } else if (key === '__suffix') {
+        suffix = value;
+      } else if (key === '__description') {
+        criteria = value;
+      } else if (key === '__provider_output' || key === '__providerOutput') {
+        expectedOutput = value;
+      } else if (key === '__metric') {
+        metric = value;
+      } else if (key === '__threshold') {
+        const parsedThreshold = Number.parseFloat(value);
+        if (Number.isFinite(parsedThreshold)) {
+          threshold = parsedThreshold;
+        }
+      } else if (key.startsWith('__metadata:')) {
+        const metadataKey = key.slice('__metadata:'.length);
+        const parsed = parseMetadataValue(metadataKey, value);
+        if (parsed !== undefined) {
+          metadata[metadataKey.endsWith('[]') ? metadataKey.slice(0, -2) : metadataKey] = parsed;
+        }
+      } else if (key.startsWith('__config:')) {
+        const [expectedKey, configKey] = key.slice('__config:'.length).split(':');
+        if (configKey !== 'threshold') {
+          throw new Error(`Invalid config key "${configKey}" in __config column: ${filePath}`);
+        }
+        const targetIndex =
+          expectedKey === '__expected'
+            ? 0
+            : /^__expected\d+$/.test(expectedKey)
+              ? Number.parseInt(expectedKey.slice('__expected'.length), 10) - 1
+              : undefined;
+        if (targetIndex === undefined || targetIndex < 0) {
+          throw new Error(`Invalid expected key "${expectedKey}" in __config column: ${filePath}`);
+        }
+        const parsedThreshold = Number.parseFloat(value);
+        if (!Number.isFinite(parsedThreshold)) {
+          throw new Error(`Invalid numeric value for ${configKey} in __config column: ${filePath}`);
+        }
+        assertionConfigs.set(targetIndex, { [configKey]: parsedThreshold });
+      } else if (key.length > 0) {
+        vars[key] = value;
+      }
+    }
+
+    const caseInput = input !== undefined ? `${prefix}${input}${suffix}` : undefined;
+    assertions.forEach((assertion, index) => {
+      if (metric) {
+        (assertion as Record<string, JsonValue>).metric = metric;
+        (assertion as Record<string, JsonValue>).name =
+          assertions.length === 1 ? metric : `${metric}-${index + 1}`;
+      }
+      const config = assertionConfigs.get(index);
+      if (config?.threshold !== undefined) {
+        (assertion as Record<string, JsonValue>).min_score = config.threshold;
+        metadata.threshold = config.threshold;
+      }
+    });
+
+    return {
+      id: id && id.trim() !== '' ? id : `row-${rowIndex + 1}`,
+      ...(caseInput !== undefined ? { input: caseInput } : {}),
+      ...(criteria ? { criteria } : {}),
+      ...(expectedOutput ? { expected_output: expectedOutput } : {}),
+      ...(assertions.length > 0 ? { assertions } : {}),
+      ...(threshold !== undefined ? { threshold } : {}),
+      ...(threshold !== undefined ? { execution: { threshold } } : {}),
+      ...(Object.keys(vars).length > 0 ? { vars } : {}),
+      ...(Object.keys(metadata).length > 0 ? { metadata } : {}),
+    };
+  });
+}
+
+function parseDatasetFunctionReference(filePath: string): {
+  readonly scriptPath: string;
+  readonly functionName?: string;
+} {
+  const extensionMatch = filePath.match(/\.(?:mjs|cjs|js|py)(?::([^/\\:]+))?$/i);
+  if (!extensionMatch) {
+    return { scriptPath: filePath };
+  }
+  return {
+    scriptPath: filePath.slice(
+      0,
+      filePath.length - (extensionMatch[1]?.length ?? 0) - (extensionMatch[1] ? 1 : 0),
+    ),
+    ...(extensionMatch[1] ? { functionName: extensionMatch[1] } : {}),
+  };
+}
+
+async function loadCasesFromJavaScriptFunction(
+  scriptPath: string,
+  functionName: string | undefined,
+): Promise<JsonObject[]> {
+  const module = (await import(pathToFileURL(scriptPath).href)) as Record<string, unknown>;
+  const candidate = functionName ? module[functionName] : (module.default ?? module.createTests);
+  if (typeof candidate !== 'function') {
+    throw new Error(
+      `JavaScript dataset file must export function '${functionName ?? 'default or createTests'}': ${scriptPath}`,
+    );
+  }
+  return assertJsonCases(await candidate(), scriptPath);
+}
+
+async function loadCasesFromPythonFunction(
+  scriptPath: string,
+  functionName: string | undefined,
+): Promise<JsonObject[]> {
+  const harness = [
+    'import importlib.util, json, pathlib, sys',
+    'script_path = pathlib.Path(sys.argv[1]).resolve()',
+    'function_name = sys.argv[2]',
+    'spec = importlib.util.spec_from_file_location("agentv_dataset_module", script_path)',
+    'module = importlib.util.module_from_spec(spec)',
+    'assert spec and spec.loader',
+    'spec.loader.exec_module(module)',
+    'fn = getattr(module, function_name)',
+    'print(json.dumps(fn()))',
+  ].join('\n');
+  const { stdout, stderr, exitCode } = await runPythonDatasetHarness(
+    harness,
+    scriptPath,
+    functionName ?? 'create_tests',
+  );
+  if (exitCode !== 0) {
+    throw new Error(`Python dataset function failed: ${scriptPath}\n${stderr.trim()}`);
+  }
+  return parseJsonCases(stdout, scriptPath);
+}
+
+async function runPythonDatasetHarness(
+  harness: string,
+  scriptPath: string,
+  functionName: string,
+): Promise<{
+  readonly stdout: string;
+  readonly stderr: string;
+  readonly exitCode: number;
+}> {
+  const cwd = path.dirname(scriptPath);
+  const args = ['-c', harness, scriptPath, functionName];
+  const commands = [
+    ['uv', 'run', 'python', ...args],
+    ['python3', ...args],
+    ['python', ...args],
+  ];
+  let lastMissingError: unknown;
+
+  for (const command of commands) {
+    try {
+      return await execFileWithStdin(command, '', {
+        cwd,
+        timeoutMs: DATASET_SCRIPT_TIMEOUT_MS,
+      });
+    } catch (error) {
+      if (!isMissingExecutableError(error)) {
+        throw error;
+      }
+      lastMissingError = error;
+    }
+  }
+
+  const message =
+    lastMissingError instanceof Error ? lastMissingError.message : String(lastMissingError);
+  throw new Error(`Python dataset function failed: no Python runner available\n${message}`);
+}
+
+function isMissingExecutableError(error: unknown): boolean {
+  if (!isJsonObjectLike(error)) {
+    return false;
+  }
+  return error.code === 'ENOENT';
+}
+
+function isJsonObjectLike(value: unknown): value is { readonly [key: string]: unknown } {
+  return typeof value === 'object' && value !== null;
+}
+
 /**
  * Load test objects from a single external file (YAML or JSONL).
  */
 export async function loadCasesFromFile(filePath: string): Promise<JsonObject[]> {
-  const ext = path.extname(filePath).toLowerCase();
+  const { scriptPath, functionName } = parseDatasetFunctionReference(filePath);
+  const ext = path.extname(scriptPath).toLowerCase();
+  if (ext === '.js' || ext === '.mjs' || ext === '.cjs') {
+    return loadCasesFromJavaScriptFunction(scriptPath, functionName);
+  }
+  if (ext === '.py') {
+    return loadCasesFromPythonFunction(scriptPath, functionName);
+  }
+
   let content: string;
 
   try {
-    content = await readFile(filePath, 'utf8');
+    content = await readFile(scriptPath, 'utf8');
   } catch (error) {
     const message = error instanceof Error ? error.message : String(error);
-    throw new Error(`Cannot read external test file: ${filePath}\n  ${message}`);
+    throw new Error(`Cannot read external test file: ${scriptPath}\n  ${message}`);
   }
 
   if (content.trim() === '') {
     console.warn(
-      `${ANSI_YELLOW}Warning: External test file is empty, skipping: ${filePath}${ANSI_RESET}`,
+      `${ANSI_YELLOW}Warning: External test file is empty, skipping: ${scriptPath}${ANSI_RESET}`,
     );
     return [];
   }
 
   if (ext === '.yaml' || ext === '.yml') {
-    return parseYamlCases(content, filePath);
+    return parseYamlCases(content, scriptPath);
   }
   if (ext === '.jsonl') {
-    return parseJsonlCases(content, filePath);
+    return parseJsonlCases(content, scriptPath);
+  }
+  if (ext === '.json') {
+    return parseJsonCases(content, scriptPath);
+  }
+  if (ext === '.csv') {
+    return parseCsvCases(content, scriptPath);
   }
 
   throw new Error(
-    `Unsupported external test file format '${ext}': ${filePath}. Supported: .yaml, .yml, .jsonl`,
+    `Unsupported external test file format '${ext}': ${scriptPath}. Supported: .csv, .json, .jsonl, .yaml, .yml, .js, .mjs, .cjs, .py`,
   );
 }
 
@@ -125,7 +579,7 @@ export async function resolveFileReference(
   ref: string,
   evalFileDir: string,
 ): Promise<JsonObject[]> {
-  const rawPath = extractFilePath(ref);
+  const rawPath = stripFileProtocol(ref);
   const absolutePattern = path.resolve(evalFileDir, rawPath);
 
   if (isGlobPattern(rawPath)) {
diff --git a/packages/core/src/evaluation/validation/eval-validator.ts b/packages/core/src/evaluation/validation/eval-validator.ts
index 0edc477ef..1c0cc3ef9 100644
--- a/packages/core/src/evaluation/validation/eval-validator.ts
+++ b/packages/core/src/evaluation/validation/eval-validator.ts
@@ -57,7 +57,17 @@ const PROMPTFOO_ASSERTION_TYPES = new Set([
 ]);
 
 /** Valid file extensions for external test files. */
-const VALID_TEST_FILE_EXTENSIONS = new Set(['.yaml', '.yml', '.jsonl']);
+const VALID_TEST_FILE_EXTENSIONS = new Set([
+  '.csv',
+  '.cjs',
+  '.js',
+  '.json',
+  '.jsonl',
+  '.mjs',
+  '.py',
+  '.yaml',
+  '.yml',
+]);
 
 /** Known fields at the top level of an eval file. */
 const KNOWN_TOP_LEVEL_FIELDS = new Set([
@@ -1826,9 +1836,13 @@ function validateTestsStringPath(
   errors: ValidationError[],
   location = 'tests',
 ): boolean {
-  const normalizedPath = testsPath.startsWith('file://')
+  let normalizedPath = testsPath.startsWith('file://')
     ? testsPath.slice('file://'.length)
     : testsPath;
+  const scriptFunctionMatch = normalizedPath.match(/\.(?:mjs|cjs|js|py):[^/\\:]+$/i);
+  if (scriptFunctionMatch) {
+    normalizedPath = normalizedPath.slice(0, normalizedPath.lastIndexOf(':'));
+  }
   if (/\.eval\.ya?ml$/i.test(normalizedPath)) {
     errors.push({
       severity: 'error',
@@ -1880,7 +1894,10 @@ async function validateRawCaseImportPath(
 
     let caseIndex = 0;
     for (const casePath of caseFiles) {
-      const pathStat = await stat(casePath).catch(() => undefined);
+      const statPath = casePath.match(/\.(?:mjs|cjs|js|py):[^/\\:]+$/i)
+        ? casePath.slice(0, casePath.lastIndexOf(':'))
+        : casePath;
+      const pathStat = await stat(statPath).catch(() => undefined);
       const externalCases = pathStat?.isDirectory()
         ? await loadCasesFromDirectory(casePath)
         : await loadCasesFromFile(casePath);
diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts
index 2914ce8df..12f25ed16 100644
--- a/packages/core/src/evaluation/yaml-parser.ts
+++ b/packages/core/src/evaluation/yaml-parser.ts
@@ -777,9 +777,14 @@ async function loadTestsFromParsedYamlValue(
           }
         }
       }
-      const testInputMessages = resolveInputMessages(inputCase, inputSuiteFiles);
+      const testInputMessages = resolveInputMessages(inputCase, inputSuiteFiles) ?? [];
       // Resolve expected_output with shorthand support
       const expectedMessages = resolveExpectedMessages(renderedCase) ?? [];
+      const effectiveSuiteInputValue =
+        rawSuiteInput && !skipDefaults
+          ? interpolateCaseField(rawSuiteInput, caseVars, nunjucksFilters)
+          : undefined;
+      const effectiveSuiteInputMessages = expandInputShorthand(effectiveSuiteInputValue);
 
       // A test is complete when it has id, input, and at least one of: criteria, expected_output, assertions, or turns (conversation mode)
       const hasEvaluationSpec =
@@ -787,7 +792,10 @@ async function loadTestsFromParsedYamlValue(
         expectedMessages.length > 0 ||
         renderedCase.assertions !== undefined ||
         (Array.isArray(renderedCase.turns) && renderedCase.turns.length > 0);
-      if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
+      const hasInputMessages =
+        testInputMessages.length > 0 ||
+        (effectiveSuiteInputMessages !== undefined && effectiveSuiteInputMessages.length > 0);
+      if (!id || !hasEvaluationSpec || !hasInputMessages) {
         logError(
           `Skipping incomplete test: ${id ?? 'unknown'}. Missing required fields: id, input or PROMPT.md, and at least one of criteria/expected_output/assertions/turns`,
         );
@@ -795,12 +803,6 @@ async function loadTestsFromParsedYamlValue(
       }
 
       // Prepend suite-level input to test input (respecting skip_defaults)
-      const effectiveSuiteInputValue =
-        rawSuiteInput && !skipDefaults
-          ? interpolateCaseField(rawSuiteInput, caseVars, nunjucksFilters)
-          : undefined;
-      const effectiveSuiteInputMessages = expandInputShorthand(effectiveSuiteInputValue);
-
       // expected_output is optional - for outcome-only evaluation
       const hasExpectedMessages = expectedMessages.length > 0;
 
@@ -1502,8 +1504,11 @@ async function resolveIncludePaths(
   includePath: string,
   evalFileDir: string,
 ): Promise<readonly string[]> {
-  const absolutePattern = path.resolve(evalFileDir, includePath);
-  if (hasGlobMagic(includePath)) {
+  const normalizedPath = includePath.startsWith('file://')
+    ? includePath.slice('file://'.length)
+    : includePath;
+  const absolutePattern = path.resolve(evalFileDir, normalizedPath);
+  if (hasGlobMagic(normalizedPath)) {
     const matches = (await fg(absolutePattern.replaceAll('\\', '/'), {
       onlyFiles: true,
       absolute: true,
diff --git a/packages/core/test/evaluation/loaders/case-file-loader.test.ts b/packages/core/test/evaluation/loaders/case-file-loader.test.ts
index d580745f5..ddca6666e 100644
--- a/packages/core/test/evaluation/loaders/case-file-loader.test.ts
+++ b/packages/core/test/evaluation/loaders/case-file-loader.test.ts
@@ -9,6 +9,7 @@ import {
   loadCasesFromDirectory,
   resolveFileReference,
 } from '../../../src/evaluation/loaders/case-file-loader.js';
+import { parseGraders } from '../../../src/evaluation/loaders/grader-parser.js';
 import { loadTestSuite, loadTests } from '../../../src/evaluation/yaml-parser.js';
 
 describe('isFileReference', () => {
@@ -76,6 +77,120 @@ describe('resolveFileReference', () => {
     expect(cases[1].id).toBe('jsonl-2');
   });
 
+  it('loads test objects from a JSON file', async () => {
+    await writeFile(
+      path.join(tempDir, 'cases', 'tests.json'),
+      JSON.stringify([
+        { id: 'json-1', criteria: 'Goal 1', input: 'Query 1' },
+        { id: 'json-2', criteria: 'Goal 2', input: 'Query 2' },
+      ]),
+    );
+
+    const cases = await resolveFileReference('file://cases/tests.json', tempDir);
+
+    expect(cases).toHaveLength(2);
+    expect(cases[0].id).toBe('json-1');
+    expect(cases[1].id).toBe('json-2');
+  });
+
+  it('maps promptfoo CSV magic columns into AgentV raw cases', async () => {
+    await writeFile(
+      path.join(tempDir, 'cases', 'tests.csv'),
+      [
+        'id,input,__expected,__expected2,__prefix,__suffix,__description,__provider_output,__metric,__threshold,__metadata:category,__metadata:tags[],__config:__expected2:threshold,locale',
+        'csv-1,"What is 2+2?",equals:4,icontains:four,"Answer briefly:"," Thanks","Arithmetic case","4","accuracy",0.7,math,"smoke,regression",0.6,en-US',
+      ].join('\n'),
+    );
+
+    const cases = await resolveFileReference('file://cases/tests.csv', tempDir);
+
+    expect(cases).toHaveLength(1);
+    expect(cases[0]).toMatchObject({
+      id: 'csv-1',
+      input: 'Answer briefly:What is 2+2? Thanks',
+      expected_output: '4',
+      criteria: 'Arithmetic case',
+      threshold: 0.7,
+      vars: { locale: 'en-US' },
+      metadata: { category: 'math', tags: ['smoke', 'regression'], threshold: 0.6 },
+      assertions: [
+        { type: 'equals', value: '4', metric: 'accuracy' },
+        { type: 'icontains', value: 'four', metric: 'accuracy', min_score: 0.6 },
+      ],
+    });
+  });
+
+  it('maps supported promptfoo expected DSL forms to runnable AgentV assertions', async () => {
+    const graderPath = path.join(tempDir, 'cases', 'grader.py');
+    await writeFile(graderPath, 'print("ok")\n');
+    await writeFile(
+      path.join(tempDir, 'cases', 'expected-dsl.csv'),
+      [
+        'id,input,__expected,__expected2,__expected3',
+        'csv-assertions,Hello,latency(1000),cost(0.01),file://grader.py',
+      ].join('\n'),
+    );
+
+    const cases = await resolveFileReference('file://cases/expected-dsl.csv', tempDir);
+
+    expect(cases[0].assertions).toEqual([
+      { type: 'latency', threshold: 1000 },
+      { type: 'cost', budget: 0.01 },
+      { type: 'code-grader', command: ['uv', 'run', 'python', graderPath] },
+    ]);
+
+    const evaluators = await parseGraders(cases[0], undefined, [tempDir], 'csv-assertions');
+    expect(evaluators.map((evaluator) => evaluator.type)).toEqual([
+      'latency',
+      'cost',
+      'code-grader',
+    ]);
+  });
+
+  it('rejects unsupported promptfoo expected DSL forms clearly', async () => {
+    await writeFile(
+      path.join(tempDir, 'cases', 'unsupported-expected.csv'),
+      ['id,input,__expected', 'csv-similar,Hello,similar:hello'].join('\n'),
+    );
+
+    await expect(
+      resolveFileReference('file://cases/unsupported-expected.csv', tempDir),
+    ).rejects.toThrow(/Unsupported promptfoo __expected assertion "similar"/);
+  });
+
+  it('loads tests from explicit JavaScript function dataset files', async () => {
+    await writeFile(
+      path.join(tempDir, 'cases', 'dataset.mjs'),
+      `export function createTests() {
+  return [
+    { id: 'js-1', criteria: 'JS goal', input: 'JS input' },
+  ];
+}
+`,
+    );
+
+    const cases = await resolveFileReference('file://cases/dataset.mjs:createTests', tempDir);
+
+    expect(cases).toHaveLength(1);
+    expect(cases[0].id).toBe('js-1');
+  });
+
+  it('loads tests from explicit Python function dataset files', async () => {
+    await writeFile(
+      path.join(tempDir, 'cases', 'dataset.py'),
+      `def create_tests():
+    return [
+        {"id": "py-1", "criteria": "Python goal", "input": "Python input"},
+    ]
+`,
+    );
+
+    const cases = await resolveFileReference('file://cases/dataset.py:create_tests', tempDir);
+
+    expect(cases).toHaveLength(1);
+    expect(cases[0].id).toBe('py-1');
+  });
+
   it('resolves glob patterns to multiple files', async () => {
     await mkdir(path.join(tempDir, 'glob-cases'), { recursive: true });
     await writeFile(
@@ -353,6 +468,112 @@ tests: ./cases.jsonl
     expect(tests[1].id).toBe('ext-jsonl-2');
   });
 
+  it('loads tests from file:// string paths', async () => {
+    await writeFile(
+      path.join(tempDir, 'file-url-cases.json'),
+      JSON.stringify([{ id: 'file-url-json', criteria: 'JSON goal', input: 'Input' }]),
+    );
+
+    await writeFile(
+      path.join(tempDir, 'file-url-suite.yaml'),
+      `name: file-url-suite
+tests: file://file-url-cases.json
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'file-url-suite.yaml'), tempDir);
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0].id).toBe('file-url-json');
+  });
+
+  it('keeps imports.tests select working beside file-backed tests', async () => {
+    await writeFile(
+      path.join(tempDir, 'import-cases.yaml'),
+      `- id: imported-keep
+  criteria: "Imported keep"
+  input: "Imported keep input"
+  metadata:
+    group: keep
+- id: imported-drop
+  criteria: "Imported drop"
+  input: "Imported drop input"
+  metadata:
+    group: drop
+`,
+    );
+    await writeFile(
+      path.join(tempDir, 'direct-cases.jsonl'),
+      '{"id": "direct-case", "criteria": "Direct goal", "input": "Direct input"}\n',
+    );
+    await writeFile(
+      path.join(tempDir, 'imports-and-file-tests.yaml'),
+      `imports:
+  tests:
+    - path: import-cases.yaml
+      select:
+        metadata:
+          group: keep
+tests: file://direct-cases.jsonl
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'imports-and-file-tests.yaml'), tempDir);
+
+    expect(tests.map((test) => test.id)).toEqual(['imported-keep', 'direct-case']);
+  });
+
+  it('loads promptfoo CSV magic columns through the full suite parser', async () => {
+    await writeFile(
+      path.join(tempDir, 'magic-cases.csv'),
+      [
+        'id,input,__expected,__metric,__threshold,__metadata:category,__provider_output',
+        'magic-csv,Hello,contains:Hi,greeting,0.9,smoke,Hi there',
+      ].join('\n'),
+    );
+    await writeFile(
+      path.join(tempDir, 'magic-suite.yaml'),
+      `name: magic-suite
+tests: file://magic-cases.csv
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'magic-suite.yaml'), tempDir);
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0].id).toBe('magic-csv');
+    expect(tests[0].threshold).toBe(0.9);
+    expect(tests[0].metadata).toMatchObject({ category: 'smoke' });
+    expect(tests[0].reference_answer).toBe('Hi there');
+    expect(tests[0].assertions?.[0]).toMatchObject({
+      name: 'greeting',
+      type: 'contains',
+      value: 'Hi',
+    });
+  });
+
+  it('applies suite-level input to promptfoo CSV rows with vars and expected assertions', async () => {
+    await writeFile(
+      path.join(tempDir, 'promptfoo-vars.csv'),
+      ['id,topic,__expected', 'case,refund,contains:refund'].join('\n'),
+    );
+    await writeFile(
+      path.join(tempDir, 'promptfoo-vars-suite.yaml'),
+      `input: Answer about {{ topic }}
+tests: file://promptfoo-vars.csv
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'promptfoo-vars-suite.yaml'), tempDir);
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0]).toMatchObject({
+      id: 'case',
+      input: [{ role: 'user', content: 'Answer about refund' }],
+    });
+    expect(tests[0].assertions?.[0]).toMatchObject({ type: 'contains', value: 'refund' });
+  });
+
   it('resolves relative path against eval file directory', async () => {
     // Create nested directory structure
     const dirA = path.join(tempDir, 'a');
diff --git a/packages/core/test/evaluation/validation/eval-validator.test.ts b/packages/core/test/evaluation/validation/eval-validator.test.ts
index 97236de02..63e54f97f 100644
--- a/packages/core/test/evaluation/validation/eval-validator.test.ts
+++ b/packages/core/test/evaluation/validation/eval-validator.test.ts
@@ -1579,6 +1579,87 @@ tests: "./cases-shorthand-workspace.yaml"
       );
       expect(extWarnings).toHaveLength(0);
     });
+
+    it('passes dataset loader extensions without unsupported-extension warnings', async () => {
+      const files = {
+        'cases.csv': 'id,input,__expected\ncsv-1,Hello,contains:Hi\n',
+        'cases.json': '[{"id":"json-1","criteria":"Goal","input":"Query"}]\n',
+        'cases.mjs': 'export function createTests() { return []; }\n',
+        'cases.py': 'def create_tests():\n    return []\n',
+      };
+      for (const [filename, content] of Object.entries(files)) {
+        await writeFile(path.join(tempDir, filename), content);
+      }
+
+      const filePath = path.join(tempDir, 'tests-dataset-extensions.yaml');
+      await writeFile(
+        filePath,
+        `imports:
+  tests:
+    - path: file://cases.csv
+    - path: cases.json
+    - path: cases.mjs:createTests
+    - path: cases.py:create_tests
+tests:
+  - id: inline
+    criteria: Goal
+    input: Query
+`,
+      );
+
+      const result = await validateEvalFile(filePath);
+
+      expect(result.valid).toBe(true);
+      const extWarnings = result.errors.filter(
+        (error) => error.severity === 'warning' && error.message.includes('extension'),
+      );
+      expect(extWarnings).toHaveLength(0);
+    });
+
+    it('passes promptfoo CSV rows that rely on suite-level input', async () => {
+      await writeFile(
+        path.join(tempDir, 'suite-input-cases.csv'),
+        'id,topic,__expected\ncase,refund,contains:refund\n',
+      );
+
+      const filePath = path.join(tempDir, 'suite-input-csv.yaml');
+      await writeFile(
+        filePath,
+        `input: Answer about {{ topic }}
+tests: file://suite-input-cases.csv
+`,
+      );
+
+      const result = await validateEvalFile(filePath);
+
+      expect(result.valid).toBe(true);
+      expect(result.errors).toHaveLength(0);
+    });
+
+    it('rejects unsupported promptfoo CSV expected DSL forms during validation', async () => {
+      await writeFile(
+        path.join(tempDir, 'unsupported-expected-cases.csv'),
+        'id,input,__expected\ncase,Hello,similar:hello\n',
+      );
+
+      const filePath = path.join(tempDir, 'unsupported-expected-csv.yaml');
+      await writeFile(
+        filePath,
+        `tests: file://unsupported-expected-cases.csv
+`,
+      );
+
+      const result = await validateEvalFile(filePath);
+
+      expect(result.valid).toBe(false);
+      expect(result.errors).toContainEqual(
+        expect.objectContaining({
+          severity: 'error',
+          location: 'tests',
+          message: expect.stringContaining('Unsupported promptfoo __expected assertion "similar"'),
+        }),
+      );
+    });
   });
 
   describe('suite-level input validation', () => {