From 0addbb7106814c322828692437d8b382dbd05eed Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Wed, 10 Jun 2026 11:15:58 +0800
Subject: [PATCH] {"schema":"decodex/commit/1","summary":"Add real-world
 external adapter coverage contract","authority":"XY-864"}

---
 README.md                                     |   7 +-
 .../memory_projects_manifest.json             | 569 +++++++++++++++++
 .../src/bin/real_world_job_benchmark.rs       | 581 ++++++++++++++++++
 .../tests/real_world_job_benchmark.rs         | 110 ++++
 .../benchmarking/live_baseline_benchmark.md   |   7 +
 .../real_world_agent_memory_benchmark.md      |  45 ++
 .../research/comparison_external_projects.md  |   8 +
 .../external_memory_improvement_plan.md       |   5 +
 .../real_world_agent_memory_benchmark_v1.md   |  86 +++
 9 files changed, 1417 insertions(+), 1 deletion(-)
 create mode 100644 apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json

diff --git a/README.md b/README.md
index c636f041..828d1821 100644
--- a/README.md
+++ b/README.md
@@ -164,7 +164,12 @@ Detailed evidence and interpretation:
   This contract defines job-level suites for agent work. Checked-in fixture runners now
   cover a smoke work-resume slice and proposal-only consolidation cases through
   `cargo make real-world-job-smoke` and `cargo make real-world-memory-consolidation`,
-  but those reports are fixture-level evidence and not live external-adapter wins.
+  and `cargo make real-world-memory` now reports the first external adapter coverage
+  manifest for ELF, qmd, agentmemory, mem0/OpenMemory, claude-mem, memsearch, and
+  OpenViking. Those real-world reports still distinguish fixture-backed and
+  live-baseline-only evidence from true live real-world adapter runs; no external
+  project has a live real-world suite win until an adapter actually executes
+  `real_world_job` prompts and scoring.
 
 Quick comparison snapshot (objective/high-level).
 This table compares capability coverage, not overall project quality.
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
new file mode 100644
index 00000000..c66ebd56
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -0,0 +1,569 @@
+{
+  "schema": "elf.real_world_external_adapter_manifest/v1",
+  "manifest_id": "real-world-memory-project-adapters-2026-06-10",
+  "docker_isolation": {
+    "default": true,
+    "compose_file": "docker-compose.baseline.yml",
+    "runner": "scripts/live-baseline-benchmark.sh",
+    "artifact_dir": "tmp/live-baseline/",
+    "host_global_installs_required": false,
+    "notes": [
+      "External project runs default to Docker Compose and Docker-managed caches.",
+      "Real-world job fixture reports and live baseline reports use separate schemas and claim boundaries."
+    ]
+  },
+  "adapters": [
+    {
+      "adapter_id": "elf_real_world_memory_fixture",
+      "project": "ELF",
+      "adapter_kind": "offline_fixture_response",
+      "evidence_class": "fixture_backed",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "wrong_result",
+      "setup": {
+        "status": "pass",
+        "evidence": "The checked-in real_world_memory fixtures parse and score through the ELF fixture runner.",
+        "command": "cargo make real-world-memory",
+        "artifact": "tmp/real-world-memory/real-world-memory-report.json"
+      },
+      "run": {
+        "status": "wrong_result",
+        "evidence": "The current fixture set reports 27 jobs, 25 pass, 1 wrong_result, and 1 not_encoded.",
+        "command": "cargo make real-world-memory",
+        "artifact": "tmp/real-world-memory/real-world-memory-report.json"
+      },
+      "result": {
+        "status": "wrong_result",
+        "evidence": "This is fixture-backed ELF scoring, not a live external adapter result.",
+        "artifact": "tmp/real-world-memory/real-world-memory-report.md"
+      },
+      "capabilities": [
+        {
+          "capability": "real_world_job_fixture_scoring",
+          "status": "real",
+          "evidence": "The runner scores checked-in real_world_job records with expected evidence, traps, and typed status output."
+        },
+        {
+          "capability": "live_external_adapter_execution",
+          "status": "not_encoded",
+          "evidence": "The ELF fixture response path does not exercise an external memory project runtime."
+        },
+        {
+          "capability": "docker_isolated_baseline",
+          "status": "pass",
+          "evidence": "ELF live baseline runs execute through docker-compose.baseline.yml for retrieval and lifecycle evidence."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "trust_source_of_truth",
+          "status": "pass",
+          "evidence": "Checked-in source-of-truth rebuild fixture is encoded and passing."
+        },
+        {
+          "suite_id": "work_resume",
+          "status": "pass",
+          "evidence": "Checked-in work-resume fixtures are encoded and passing."
+        },
+        {
+          "suite_id": "retrieval",
+          "status": "pass",
+          "evidence": "Checked-in retrieval fixtures are encoded; one deliberate operator-debug wrong-result case is reported under operator_debugging_ux."
+        },
+        {
+          "suite_id": "memory_evolution",
+          "status": "not_encoded",
+          "evidence": "The relation temporal-validity case is deliberately not_encoded until temporal graph validity is implemented."
+        },
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "wrong_result",
+          "evidence": "The aggregate fixture set includes one deliberate wrong-result trace attribution case."
+        },
+        {
+          "suite_id": "capture_integration",
+          "status": "pass",
+          "evidence": "The redaction and capture-boundary fixture is encoded and passing."
+        },
+        {
+          "suite_id": "personalization",
+          "status": "pass",
+          "evidence": "The scoped preference fixture is encoded and passing."
+        },
+        {
+          "suite_id": "consolidation",
+          "status": "pass",
+          "evidence": "Proposal-only consolidation fixtures are encoded and passing without source mutation."
+        },
+        {
+          "suite_id": "knowledge_compilation",
+          "status": "pass",
+          "evidence": "Knowledge page fixtures are encoded and passing with citation and rebuild metrics."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "fixture_dir",
+          "ref": "apps/elf-eval/fixtures/real_world_memory/",
+          "status": "real"
+        },
+        {
+          "kind": "command",
+          "ref": "cargo make real-world-memory",
+          "status": "pass"
+        }
+      ],
+      "notes": [
+        "This adapter record exists to keep ELF fixture results separate from live external adapter results."
+      ],
+      "follow_up": {
+        "title": "[ELF benchmark vNext] Replace fixture-only ELF answers with live real-world adapter execution where appropriate",
+        "reason": "The current report proves fixture scoring, not an end-to-end live real-world memory service run."
+      }
+    },
+    {
+      "adapter_id": "qmd_live_baseline",
+      "project": "qmd",
+      "adapter_kind": "docker_cli_same_corpus",
+      "evidence_class": "live_baseline_only",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "pass",
+      "setup": {
+        "status": "pass",
+        "evidence": "The live-baseline Docker runner installs qmd inside the baseline container.",
+        "command": "ELF_BASELINE_PROJECTS=qmd cargo make baseline-live-docker",
+        "artifact": "tmp/live-baseline/qmd.log"
+      },
+      "run": {
+        "status": "pass",
+        "evidence": "qmd same-corpus retrieval, update, delete, and cold-start checks are encoded in the live baseline runner.",
+        "command": "ELF_BASELINE_PROJECTS=qmd cargo make baseline-live-docker",
+        "artifact": "tmp/live-baseline/live-baseline-report.json"
+      },
+      "result": {
+        "status": "pass",
+        "evidence": "The current evidence is same-corpus live-baseline evidence only; no real_world_job qmd adapter is encoded yet.",
+        "artifact": "docs/guide/benchmarking/live_baseline_benchmark.md"
+      },
+      "capabilities": [
+        {
+          "capability": "same_corpus_retrieval",
+          "status": "pass",
+          "evidence": "qmd has an encoded Docker same-corpus retrieval adapter."
+        },
+        {
+          "capability": "update_delete_cold_start",
+          "status": "pass",
+          "evidence": "qmd lifecycle smoke checks are encoded in the live-baseline runner."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No qmd adapter currently executes real_world_job prompts and answer scoring."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "retrieval",
+          "status": "not_encoded",
+          "evidence": "qmd is a retrieval-debug reference, but no real_world_job retrieval adapter run is encoded."
+        },
+        {
+          "suite_id": "memory_evolution",
+          "status": "not_encoded",
+          "evidence": "Live-baseline lifecycle checks exist, but no real_world_job memory_evolution run is encoded."
+        },
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "not_encoded",
+          "evidence": "qmd debug ergonomics are a reference dimension; no operator_debugging_ux fixture is executed against qmd."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "runner",
+          "ref": "scripts/live-baseline-benchmark.sh",
+          "status": "real"
+        },
+        {
+          "kind": "compose",
+          "ref": "docker-compose.baseline.yml",
+          "status": "real"
+        }
+      ],
+      "notes": [
+        "Do not claim a qmd real-world suite pass until a real_world_job adapter executes qmd and records job-level evidence."
+      ]
+    },
+    {
+      "adapter_id": "agentmemory_live_baseline",
+      "project": "agentmemory",
+      "adapter_kind": "docker_sdk_mock_same_corpus",
+      "evidence_class": "live_baseline_only",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "lifecycle_fail",
+      "setup": {
+        "status": "pass",
+        "evidence": "The live-baseline Docker runner installs and exercises agentmemory package APIs.",
+        "command": "ELF_BASELINE_PROJECTS=agentmemory cargo make baseline-live-docker",
+        "artifact": "tmp/live-baseline/agentmemory.log"
+      },
+      "run": {
+        "status": "lifecycle_fail",
+        "evidence": "Same-corpus retrieval can run, but durable lifecycle behavior is not proven because the adapter uses an in-memory SDK/KV mock.",
+        "artifact": "tmp/live-baseline/live-baseline-report.json"
+      },
+      "result": {
+        "status": "lifecycle_fail",
+        "evidence": "agentmemory remains a reference for capture and continuity UX, but current Docker evidence is not a durable lifecycle pass.",
+        "artifact": "docs/guide/benchmarking/live_baseline_benchmark.md"
+      },
+      "capabilities": [
+        {
+          "capability": "same_corpus_retrieval",
+          "status": "pass",
+          "evidence": "The current adapter can run mem::remember and mem::search against the shared corpus."
+        },
+        {
+          "capability": "adapter_storage",
+          "status": "mocked",
+          "evidence": "The current adapter uses a process-local StateKV Map and in-memory index."
+        },
+        {
+          "capability": "durable_cold_start",
+          "status": "blocked",
+          "evidence": "A persistent upstream KV/index path or hosted runtime is needed before cold-start recovery can be fairly scored."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No agentmemory adapter currently executes real_world_job prompts and answer scoring."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "work_resume",
+          "status": "blocked",
+          "evidence": "A durable upstream agentmemory session/capture path is required before work-resume jobs can be compared fairly."
+        },
+        {
+          "suite_id": "capture_integration",
+          "status": "blocked",
+          "evidence": "The current fixture import boundary is offline and does not run live agentmemory hooks."
+        },
+        {
+          "suite_id": "memory_evolution",
+          "status": "blocked",
+          "evidence": "Durable update/supersede/delete history is not proven by the in-memory adapter."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "guide",
+          "ref": "docs/guide/research/agentmemory_adapter.md",
+          "status": "real"
+        },
+        {
+          "kind": "runner",
+          "ref": "scripts/live-baseline-benchmark.sh",
+          "status": "mocked"
+        }
+      ],
+      "notes": [
+        "The offline agentmemory fixture adapter is an import/comparison boundary and must not be treated as live benchmark proof."
+      ],
+      "follow_up": {
+        "title": "[ELF benchmark P0] Make agentmemory adapter lifecycle-durable and fail-typed",
+        "reason": "A durable upstream agentmemory storage path is required before lifecycle and real-world job suites can be fairly scored."
+      }
+    },
+    {
+      "adapter_id": "mem0_openmemory_live_baseline",
+      "project": "mem0/OpenMemory",
+      "adapter_kind": "docker_sdk_same_corpus",
+      "evidence_class": "live_baseline_only",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "wrong_result",
+      "setup": {
+        "status": "pass",
+        "evidence": "The live-baseline Docker runner can install mem0 and configure local FastEmbed/Qdrant paths.",
+        "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
+        "artifact": "tmp/live-baseline/mem0.log"
+      },
+      "run": {
+        "status": "wrong_result",
+        "evidence": "The current same-corpus retrieval result is typed wrong_result or incomplete in the checked-in benchmark evidence.",
+        "artifact": "tmp/live-baseline/live-baseline-report.json"
+      },
+      "result": {
+        "status": "wrong_result",
+        "evidence": "No real_world_job mem0/OpenMemory adapter is encoded; local same-corpus evidence must not be upgraded to suite coverage.",
+        "artifact": "docs/guide/research/comparison_external_projects.md"
+      },
+      "capabilities": [
+        {
+          "capability": "local_storage",
+          "status": "real",
+          "evidence": "The adapter targets local FastEmbed, Qdrant path storage, and local history DB paths in Docker."
+        },
+        {
+          "capability": "same_corpus_retrieval",
+          "status": "wrong_result",
+          "evidence": "The checked-in smoke evidence did not prove a correct same-corpus result for mem0."
+        },
+        {
+          "capability": "openmemory_ui_readback",
+          "status": "not_encoded",
+          "evidence": "OpenMemory UI readback is not encoded in the Docker baseline or real-world job runner."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No mem0/OpenMemory adapter currently executes real_world_job prompts and answer scoring."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "memory_evolution",
+          "status": "incomplete",
+          "evidence": "mem0 lifecycle/history is a target dimension, but current Docker evidence has not produced a complete real-world job result."
+        },
+        {
+          "suite_id": "personalization",
+          "status": "not_encoded",
+          "evidence": "Entity-scoped personalization is not encoded as a real_world_job adapter run."
+        },
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "not_encoded",
+          "evidence": "OpenMemory inspection is not encoded in this runner."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "runner",
+          "ref": "scripts/live-baseline-benchmark.sh",
+          "status": "real"
+        }
+      ],
+      "notes": [
+        "Separate local OSS mem0 evidence from hosted Platform and OpenMemory UI claims."
+      ]
+    },
+    {
+      "adapter_id": "memsearch_live_baseline",
+      "project": "memsearch",
+      "adapter_kind": "docker_cli_same_corpus",
+      "evidence_class": "live_baseline_only",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "wrong_result",
+      "setup": {
+        "status": "pass",
+        "evidence": "The live-baseline Docker runner can install memsearch and run its CLI path.",
+        "command": "ELF_BASELINE_PROJECTS=memsearch cargo make baseline-live-docker",
+        "artifact": "tmp/live-baseline/memsearch.log"
+      },
+      "run": {
+        "status": "wrong_result",
+        "evidence": "The current same-corpus retrieval evidence is not a clean pass for memsearch.",
+        "artifact": "tmp/live-baseline/live-baseline-report.json"
+      },
+      "result": {
+        "status": "wrong_result",
+        "evidence": "No real_world_job memsearch adapter is encoded; Markdown-first behavior remains a design reference.",
+        "artifact": "docs/guide/research/comparison_external_projects.md"
+      },
+      "capabilities": [
+        {
+          "capability": "canonical_markdown_store",
+          "status": "real",
+          "evidence": "memsearch is tracked as a Markdown-first source-of-truth reference."
+        },
+        {
+          "capability": "same_corpus_retrieval",
+          "status": "wrong_result",
+          "evidence": "The checked-in smoke evidence did not prove correct same-corpus retrieval."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No memsearch adapter currently executes real_world_job prompts and answer scoring."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "trust_source_of_truth",
+          "status": "incomplete",
+          "evidence": "The Markdown-first source model is relevant, but no real_world_job source-of-truth run is encoded."
+        },
+        {
+          "suite_id": "retrieval",
+          "status": "incomplete",
+          "evidence": "The live-baseline retrieval path is not a clean pass and no job-level run is encoded."
+        },
+        {
+          "suite_id": "memory_evolution",
+          "status": "incomplete",
+          "evidence": "Update/delete reindex semantics need a complete Docker evidence path before suite claims."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "runner",
+          "ref": "scripts/live-baseline-benchmark.sh",
+          "status": "real"
+        }
+      ],
+      "notes": [
+        "Do not mark memsearch worse solely because setup or local indexing is heavier; preserve the typed incomplete/wrong-result boundary."
+      ]
+    },
+    {
+      "adapter_id": "openviking_live_baseline",
+      "project": "OpenViking",
+      "adapter_kind": "docker_local_embed_same_corpus",
+      "evidence_class": "live_baseline_only",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "incomplete",
+      "setup": {
+        "status": "incomplete",
+        "evidence": "OpenViking local-embed setup can fail in Docker while building or importing local embedding dependencies.",
+        "command": "ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker",
+        "artifact": "tmp/live-baseline/OpenViking.log"
+      },
+      "run": {
+        "status": "incomplete",
+        "evidence": "The adapter cannot reliably reach same-corpus add_resource/find behavior until local embedding setup is pinned for Docker.",
+        "artifact": "tmp/live-baseline/live-baseline-report.json"
+      },
+      "result": {
+        "status": "incomplete",
+        "evidence": "No real_world_job OpenViking adapter is encoded; current blocker is dependency setup, not a quality claim.",
+        "artifact": "docs/guide/benchmarking/live_baseline_benchmark.md"
+      },
+      "capabilities": [
+        {
+          "capability": "local_embed_setup",
+          "status": "incomplete",
+          "evidence": "Docker local embedding dependency setup is not reliable in the current adapter."
+        },
+        {
+          "capability": "context_trajectory",
+          "status": "not_encoded",
+          "evidence": "OpenViking staged/hierarchical retrieval is a reference dimension but is not encoded as a real_world_job run."
+        },
+        {
+          "capability": "real_world_job_adapter",
+          "status": "not_encoded",
+          "evidence": "No OpenViking adapter currently executes real_world_job prompts and answer scoring."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "retrieval",
+          "status": "incomplete",
+          "evidence": "The local embedding install blocker prevents a fair retrieval job run."
+        },
+        {
+          "suite_id": "work_resume",
+          "status": "not_encoded",
+          "evidence": "Hierarchical context resume scenarios are not encoded for OpenViking."
+        },
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "not_encoded",
+          "evidence": "Stage trajectory readback is not encoded in this runner."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "runner",
+          "ref": "scripts/live-baseline-benchmark.sh",
+          "status": "incomplete"
+        }
+      ],
+      "notes": [
+        "Record OpenViking as incomplete until Docker-compatible local embeddings are pinned; do not treat setup weight as a negative quality result."
+      ],
+      "follow_up": {
+        "title": "[ELF benchmark adapter] Pin OpenViking Docker local embedding dependency path",
+        "reason": "The current adapter must reach add_resource/find before real-world job suites can be scored."
+      }
+    },
+    {
+      "adapter_id": "claude_mem_live_baseline",
+      "project": "claude-mem",
+      "adapter_kind": "docker_repository_same_corpus",
+      "evidence_class": "live_baseline_only",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "wrong_result",
+      "setup": {
+        "status": "pass",
+        "evidence": "The live-baseline Docker runner can install and build claude-mem.",
+        "command": "ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker",
+        "artifact": "tmp/live-baseline/claude-mem.log"
+      },
+      "run": {
+        "status": "wrong_result",
+        "evidence": "The current same-corpus SQLite repository search is not a clean pass for claude-mem and lifecycle checks are not encoded.",
+        "artifact": "tmp/live-baseline/live-baseline-report.json"
+      },
+      "result": {
+        "status": "wrong_result",
+        "evidence": "No real_world_job claude-mem adapter is encoded; progressive disclosure remains a design reference.",
+        "artifact": "docs/guide/research/comparison_external_projects.md"
+      },
+      "capabilities": [
+        {
+          "capability": "same_corpus_retrieval",
+          "status": "wrong_result",
+          "evidence": "The current Docker adapter did not prove correct same-corpus retrieval."
+        },
+        {
+          "capability": "durable_storage",
+          "status": "mocked",
+          "evidence": "The current adapter uses in-memory SQLite and does not reopen a durable store."
+        },
+        {
+          "capability": "progressive_disclosure_real_world_job",
+          "status": "not_encoded",
+          "evidence": "search -> timeline -> observation workflows are not encoded against real_world_job prompts."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "work_resume",
+          "status": "incomplete",
+          "evidence": "Hook-driven capture and progressive disclosure need a durable local repository run before work-resume suite claims."
+        },
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "not_encoded",
+          "evidence": "Local viewer/operator workflow is not encoded in the benchmark runner."
+        },
+        {
+          "suite_id": "capture_integration",
+          "status": "not_encoded",
+          "evidence": "claude-mem hooks are not executed by this runner."
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "runner",
+          "ref": "scripts/live-baseline-benchmark.sh",
+          "status": "mocked"
+        }
+      ],
+      "notes": [
+        "claude-mem remains a UX reference; current Docker evidence is not a real-world progressive-disclosure pass."
+      ]
+    }
+  ]
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index f5a5fee6..9ce9b4e3 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -18,9 +18,13 @@ use elf_cli::VERSION;
 
 const JOB_SCHEMA: &str = "elf.real_world_job/v1";
 const REPORT_SCHEMA: &str = "elf.real_world_job_report/v1";
+const EXTERNAL_ADAPTER_MANIFEST_SCHEMA: &str = "elf.real_world_external_adapter_manifest/v1";
+const EXTERNAL_ADAPTER_REPORT_SCHEMA: &str = "elf.real_world_external_adapter_report/v1";
 const DEFAULT_FIXTURE_PATH: &str = "apps/elf-eval/fixtures/real_world_memory/work_resume";
 const DEFAULT_REPORT_PATH: &str = "tmp/real-world-job/real-world-job-smoke-report.json";
 const DEFAULT_MARKDOWN_PATH: &str = "tmp/real-world-job/real-world-job-smoke-report.md";
+const DEFAULT_EXTERNAL_ADAPTER_MANIFEST_PATH: &str =
+	"apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json";
 const DEFAULT_RUN_ID: &str = "real-world-job-smoke";
 const DEFAULT_ADAPTER_ID: &str = "fixture_smoke";
 const DEFAULT_ADAPTER_NAME: &str = "ELF fixture smoke";
@@ -85,6 +89,12 @@ struct RunArgs {
 	/// Human-readable adapter name recorded in the generated report.
 	#[arg(long, default_value = DEFAULT_ADAPTER_NAME)]
 	adapter_name: String,
+	/// Real-world external adapter manifest to include in report coverage.
+	#[arg(long, value_name = "FILE", default_value = DEFAULT_EXTERNAL_ADAPTER_MANIFEST_PATH)]
+	external_adapter_manifest: PathBuf,
+	/// Skip loading the real-world external adapter coverage manifest.
+	#[arg(long)]
+	skip_external_adapter_manifest: bool,
 }
 
 #[derive(Debug, Parser)]
@@ -562,6 +572,8 @@ struct RealWorldReport {
 	runner_version: String,
 	corpus_profile: String,
 	adapter: AdapterReport,
+	#[serde(default)]
+	external_adapters: ExternalAdapterSection,
 	capture_integration: CaptureIntegrationReport,
 	summary: ReportSummary,
 	suites: Vec<SuiteReport>,
@@ -585,6 +597,133 @@ struct AdapterReport {
 	notes: String,
 }
 
+#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Deserialize, Serialize)]
+#[serde(rename_all = "snake_case")]
+enum AdapterCoverageStatus {
+	Real,
+	Mocked,
+	Unsupported,
+	Blocked,
+	Incomplete,
+	WrongResult,
+	LifecycleFail,
+	Pass,
+	NotEncoded,
+}
+
+#[derive(Debug, Deserialize)]
+struct ExternalAdapterManifest {
+	schema: String,
+	manifest_id: String,
+	docker_isolation: ExternalDockerIsolation,
+	#[serde(default)]
+	adapters: Vec<ExternalAdapterReport>,
+}
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+struct ExternalAdapterSection {
+	schema: String,
+	manifest_id: String,
+	docker_isolation: ExternalDockerIsolation,
+	summary: ExternalAdapterSummary,
+	#[serde(default)]
+	adapters: Vec<ExternalAdapterReport>,
+}
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+struct ExternalDockerIsolation {
+	default: bool,
+	compose_file: String,
+	runner: String,
+	artifact_dir: String,
+	host_global_installs_required: bool,
+	#[serde(default)]
+	notes: Vec<String>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct ExternalAdapterReport {
+	adapter_id: String,
+	project: String,
+	adapter_kind: String,
+	evidence_class: String,
+	docker_default: bool,
+	host_global_installs_required: bool,
+	overall_status: AdapterCoverageStatus,
+	setup: AdapterExecutionEvidence,
+	run: AdapterExecutionEvidence,
+	result: AdapterExecutionEvidence,
+	#[serde(default)]
+	capabilities: Vec<AdapterCapabilityCoverage>,
+	#[serde(default)]
+	suites: Vec<AdapterSuiteCoverage>,
+	#[serde(default)]
+	evidence: Vec<AdapterEvidencePointer>,
+	#[serde(default)]
+	notes: Vec<String>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	follow_up: Option<FollowUpInput>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct AdapterExecutionEvidence {
+	status: AdapterCoverageStatus,
+	evidence: String,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	command: Option<String>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	artifact: Option<String>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct AdapterCapabilityCoverage {
+	capability: String,
+	status: AdapterCoverageStatus,
+	evidence: String,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct AdapterSuiteCoverage {
+	suite_id: String,
+	status: AdapterCoverageStatus,
+	evidence: String,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct AdapterEvidencePointer {
+	kind: String,
+	#[serde(rename = "ref")]
+	reference: String,
+	status: AdapterCoverageStatus,
+}
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+struct ExternalAdapterSummary {
+	adapter_count: usize,
+	external_project_count: usize,
+	docker_default_count: usize,
+	host_global_install_required_count: usize,
+	fixture_backed_count: usize,
+	live_baseline_only_count: usize,
+	live_real_world_count: usize,
+	overall_status_counts: AdapterStatusCounts,
+	capability_status_counts: AdapterStatusCounts,
+	suite_status_counts: AdapterStatusCounts,
+}
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+struct AdapterStatusCounts {
+	real: usize,
+	mocked: usize,
+	unsupported: usize,
+	blocked: usize,
+	incomplete: usize,
+	wrong_result: usize,
+	lifecycle_fail: usize,
+	pass: usize,
+	not_encoded: usize,
+}
+
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 struct CaptureIntegrationReport {
 	#[serde(default)]
@@ -1826,6 +1965,10 @@ fn build_report(jobs: &[RealWorldJob], args: &RunArgs) -> Result<RealWorldReport
 	let summary = report_summary(&job_reports, &suites);
 	let evolution = evolution_summary(&job_reports);
 	let follow_ups = follow_up_reports(jobs);
+	let external_adapters = external_adapter_section(
+		&args.external_adapter_manifest,
+		args.skip_external_adapter_manifest,
+	)?;
 
 	Ok(RealWorldReport {
 		schema: REPORT_SCHEMA.to_string(),
@@ -1834,6 +1977,7 @@ fn build_report(jobs: &[RealWorldJob], args: &RunArgs) -> Result<RealWorldReport
 		runner_version: VERSION.to_string(),
 		corpus_profile: corpus_profile(jobs),
 		adapter: adapter_report(args),
+		external_adapters,
 		capture_integration: capture_integration_report(jobs),
 		summary,
 		suites,
@@ -3341,6 +3485,289 @@ fn adapter_report(args: &RunArgs) -> AdapterReport {
 	}
 }
 
+fn external_adapter_section(
+	manifest_path: &Path,
+	skip_manifest: bool,
+) -> Result<ExternalAdapterSection> {
+	if skip_manifest {
+		return Ok(empty_external_adapter_section("skipped"));
+	}
+
+	let manifest_path = resolve_external_adapter_manifest_path(manifest_path);
+
+	if !manifest_path.exists() {
+		return Ok(empty_external_adapter_section("missing"));
+	}
+
+	let raw = fs::read_to_string(&manifest_path)?;
+	let manifest = serde_json::from_str::<ExternalAdapterManifest>(&raw).map_err(|err| {
+		eyre::eyre!("Failed to parse external adapter manifest {}: {err}", manifest_path.display())
+	})?;
+
+	validate_external_adapter_manifest(&manifest, &manifest_path)?;
+
+	let summary = external_adapter_summary(&manifest.adapters);
+
+	Ok(ExternalAdapterSection {
+		schema: EXTERNAL_ADAPTER_REPORT_SCHEMA.to_string(),
+		manifest_id: manifest.manifest_id,
+		docker_isolation: manifest.docker_isolation,
+		summary,
+		adapters: manifest.adapters,
+	})
+}
+
+fn empty_external_adapter_section(reason: &str) -> ExternalAdapterSection {
+	ExternalAdapterSection {
+		schema: EXTERNAL_ADAPTER_REPORT_SCHEMA.to_string(),
+		manifest_id: reason.to_string(),
+		docker_isolation: ExternalDockerIsolation::default(),
+		summary: ExternalAdapterSummary::default(),
+		adapters: Vec::new(),
+	}
+}
+
+fn resolve_external_adapter_manifest_path(path: &Path) -> PathBuf {
+	if path.exists() || path.is_absolute() {
+		return path.to_path_buf();
+	}
+
+	let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
+	let Some(workspace_root) = manifest_dir.parent().and_then(Path::parent) else {
+		return path.to_path_buf();
+	};
+	let workspace_candidate = workspace_root.join(path);
+
+	if workspace_candidate.exists() { workspace_candidate } else { path.to_path_buf() }
+}
+
+fn validate_external_adapter_manifest(
+	manifest: &ExternalAdapterManifest,
+	path: &Path,
+) -> Result<()> {
+	if manifest.schema != EXTERNAL_ADAPTER_MANIFEST_SCHEMA {
+		return Err(eyre::eyre!(
+			"{} has schema {}, expected {EXTERNAL_ADAPTER_MANIFEST_SCHEMA}.",
+			path.display(),
+			manifest.schema
+		));
+	}
+	if manifest.manifest_id.trim().is_empty() {
+		return Err(eyre::eyre!("{} has an empty manifest_id.", path.display()));
+	}
+
+	validate_external_docker_isolation(path, &manifest.docker_isolation)?;
+
+	validate_external_adapters(path, &manifest.adapters)
+}
+
+fn validate_external_docker_isolation(path: &Path, docker: &ExternalDockerIsolation) -> Result<()> {
+	if docker.compose_file.trim().is_empty()
+		|| docker.runner.trim().is_empty()
+		|| docker.artifact_dir.trim().is_empty()
+	{
+		return Err(eyre::eyre!("{} has incomplete docker_isolation metadata.", path.display()));
+	}
+	if !docker.default {
+		return Err(eyre::eyre!(
+			"{} external adapter manifest must default to Docker isolation.",
+			path.display()
+		));
+	}
+	if docker.host_global_installs_required {
+		return Err(eyre::eyre!(
+			"{} external adapter manifest must not require host-global installs by default.",
+			path.display()
+		));
+	}
+
+	Ok(())
+}
+
+fn validate_external_adapters(path: &Path, adapters: &[ExternalAdapterReport]) -> Result<()> {
+	if adapters.is_empty() {
+		return Err(eyre::eyre!("{} declares no external adapters.", path.display()));
+	}
+
+	let mut seen = BTreeSet::new();
+
+	for adapter in adapters {
+		validate_external_adapter(path, adapter)?;
+
+		if !seen.insert(adapter.adapter_id.as_str()) {
+			return Err(eyre::eyre!(
+				"{} declares duplicate adapter_id {}.",
+				path.display(),
+				adapter.adapter_id
+			));
+		}
+	}
+
+	Ok(())
+}
+
+fn validate_external_adapter(path: &Path, adapter: &ExternalAdapterReport) -> Result<()> {
+	if adapter.adapter_id.trim().is_empty()
+		|| adapter.project.trim().is_empty()
+		|| adapter.adapter_kind.trim().is_empty()
+		|| adapter.evidence_class.trim().is_empty()
+	{
+		return Err(eyre::eyre!("{} has an incomplete external adapter.", path.display()));
+	}
+	if !matches!(
+		adapter.evidence_class.as_str(),
+		"fixture_backed" | "live_baseline_only" | "live_real_world"
+	) {
+		return Err(eyre::eyre!(
+			"{} adapter {} has unsupported evidence_class {}.",
+			path.display(),
+			adapter.adapter_id,
+			adapter.evidence_class
+		));
+	}
+	if adapter.docker_default && adapter.host_global_installs_required {
+		return Err(eyre::eyre!(
+			"{} adapter {} is Docker-default but requires host-global installs.",
+			path.display(),
+			adapter.adapter_id
+		));
+	}
+
+	validate_adapter_execution(path, adapter)?;
+	validate_adapter_capabilities(path, adapter)?;
+	validate_adapter_suites(path, adapter)?;
+	validate_adapter_evidence(path, adapter)?;
+
+	if let Some(follow_up) = &adapter.follow_up
+		&& (follow_up.title.trim().is_empty() || follow_up.reason.trim().is_empty())
+	{
+		return Err(eyre::eyre!(
+			"{} adapter {} has an incomplete follow_up.",
+			path.display(),
+			adapter.adapter_id
+		));
+	}
+
+	Ok(())
+}
+
+fn validate_adapter_execution(path: &Path, adapter: &ExternalAdapterReport) -> Result<()> {
+	for evidence in [&adapter.setup, &adapter.run, &adapter.result] {
+		if evidence.evidence.trim().is_empty()
+			|| evidence.command.as_deref().is_some_and(str::is_empty)
+			|| evidence.artifact.as_deref().is_some_and(str::is_empty)
+		{
+			return Err(eyre::eyre!(
+				"{} adapter {} has incomplete setup/run/result evidence.",
+				path.display(),
+				adapter.adapter_id
+			));
+		}
+	}
+
+	Ok(())
+}
+
+fn validate_adapter_capabilities(path: &Path, adapter: &ExternalAdapterReport) -> Result<()> {
+	for capability in &adapter.capabilities {
+		if capability.capability.trim().is_empty() || capability.evidence.trim().is_empty() {
+			return Err(eyre::eyre!(
+				"{} adapter {} has incomplete capability coverage.",
+				path.display(),
+				adapter.adapter_id
+			));
+		}
+	}
+
+	Ok(())
+}
+
+fn validate_adapter_suites(path: &Path, adapter: &ExternalAdapterReport) -> Result<()> {
+	for suite in &adapter.suites {
+		if !SUITES.contains(&suite.suite_id.as_str()) {
+			return Err(eyre::eyre!(
+				"{} adapter {} references unknown suite {}.",
+				path.display(),
+				adapter.adapter_id,
+				suite.suite_id
+			));
+		}
+		if suite.evidence.trim().is_empty() {
+			return Err(eyre::eyre!(
+				"{} adapter {} has suite {} without evidence.",
+				path.display(),
+				adapter.adapter_id,
+				suite.suite_id
+			));
+		}
+	}
+
+	Ok(())
+}
+
+fn validate_adapter_evidence(path: &Path, adapter: &ExternalAdapterReport) -> Result<()> {
+	for evidence in &adapter.evidence {
+		if evidence.kind.trim().is_empty() || evidence.reference.trim().is_empty() {
+			return Err(eyre::eyre!(
+				"{} adapter {} has incomplete evidence pointers.",
+				path.display(),
+				adapter.adapter_id
+			));
+		}
+	}
+
+	Ok(())
+}
+
+fn external_adapter_summary(adapters: &[ExternalAdapterReport]) -> ExternalAdapterSummary {
+	let mut summary = ExternalAdapterSummary {
+		adapter_count: adapters.len(),
+		external_project_count: adapters.iter().filter(|adapter| adapter.project != "ELF").count(),
+		..ExternalAdapterSummary::default()
+	};
+
+	for adapter in adapters {
+		accumulate_adapter_summary(&mut summary, adapter);
+	}
+
+	summary
+}
+
+fn accumulate_adapter_summary(
+	summary: &mut ExternalAdapterSummary,
+	adapter: &ExternalAdapterReport,
+) {
+	summary.docker_default_count += usize::from(adapter.docker_default);
+	summary.host_global_install_required_count +=
+		usize::from(adapter.host_global_installs_required);
+	summary.fixture_backed_count += usize::from(adapter.evidence_class == "fixture_backed");
+	summary.live_baseline_only_count += usize::from(adapter.evidence_class == "live_baseline_only");
+	summary.live_real_world_count += usize::from(adapter.evidence_class == "live_real_world");
+
+	increment_adapter_status_count(&mut summary.overall_status_counts, adapter.overall_status);
+
+	for capability in &adapter.capabilities {
+		increment_adapter_status_count(&mut summary.capability_status_counts, capability.status);
+	}
+	for suite in &adapter.suites {
+		increment_adapter_status_count(&mut summary.suite_status_counts, suite.status);
+	}
+}
+
+fn increment_adapter_status_count(counts: &mut AdapterStatusCounts, status: AdapterCoverageStatus) {
+	match status {
+		AdapterCoverageStatus::Real => counts.real += 1,
+		AdapterCoverageStatus::Mocked => counts.mocked += 1,
+		AdapterCoverageStatus::Unsupported => counts.unsupported += 1,
+		AdapterCoverageStatus::Blocked => counts.blocked += 1,
+		AdapterCoverageStatus::Incomplete => counts.incomplete += 1,
+		AdapterCoverageStatus::WrongResult => counts.wrong_result += 1,
+		AdapterCoverageStatus::LifecycleFail => counts.lifecycle_fail += 1,
+		AdapterCoverageStatus::Pass => counts.pass += 1,
+		AdapterCoverageStatus::NotEncoded => counts.not_encoded += 1,
+	}
+}
+
 fn capture_integration_report(jobs: &[RealWorldJob]) -> CaptureIntegrationReport {
 	let mut report = CaptureIntegrationReport::default();
 
@@ -3397,6 +3824,7 @@ fn render_markdown(report: &RealWorldReport, report_path: &Path) -> String {
 	let mut out = String::new();
 
 	render_markdown_header(&mut out, report, report_path.as_str());
+	render_markdown_external_adapters(&mut out, report);
 	render_markdown_capture_integration(&mut out, report);
 	render_markdown_suites(&mut out, report);
 	render_markdown_jobs(&mut out, report);
@@ -3446,6 +3874,91 @@ fn render_markdown_capture_integration(out: &mut String, report: &RealWorldRepor
 	out.push('\n');
 }
 
+fn render_markdown_external_adapters(out: &mut String, report: &RealWorldReport) {
+	out.push_str("## External Adapter Coverage\n\n");
+
+	if report.external_adapters.adapters.is_empty() {
+		out.push_str("No external adapter coverage manifest was loaded for this report.\n\n");
+
+		return;
+	}
+
+	let summary = &report.external_adapters.summary;
+
+	out.push_str("This section is manifest-backed. It records external adapter coverage and blockers, but it does not convert live-baseline retrieval results into real-world suite wins.\n\n");
+	out.push_str(&format!(
+		"- Manifest: `{}`\n",
+		md_inline(report.external_adapters.manifest_id.as_str())
+	));
+	out.push_str(&format!(
+		"- Docker default: `{}` via `{}`; artifact dir `{}`\n",
+		report.external_adapters.docker_isolation.default,
+		md_inline(report.external_adapters.docker_isolation.compose_file.as_str()),
+		md_inline(report.external_adapters.docker_isolation.artifact_dir.as_str())
+	));
+	out.push_str(&format!(
+		"- Adapter records: `{}` total, `{}` external project(s), `{}` Docker-default, `{}` requiring host-global installs\n",
+		summary.adapter_count,
+		summary.external_project_count,
+		summary.docker_default_count,
+		summary.host_global_install_required_count
+	));
+	out.push_str(&format!(
+		"- Evidence classes: `{}` fixture-backed, `{}` live-baseline-only, `{}` live real-world\n",
+		summary.fixture_backed_count,
+		summary.live_baseline_only_count,
+		summary.live_real_world_count
+	));
+	out.push_str(&format!(
+		"- Overall statuses: `{}`\n",
+		adapter_status_counts_display(&summary.overall_status_counts)
+	));
+	out.push_str(&format!(
+		"- Capability coverage statuses: `{}`\n",
+		adapter_status_counts_display(&summary.capability_status_counts)
+	));
+	out.push_str(&format!(
+		"- Real-world suite statuses: `{}`\n\n",
+		adapter_status_counts_display(&summary.suite_status_counts)
+	));
+	out.push_str("| Project | Adapter | Evidence Class | Overall | Setup | Run | Result | Docker | Suites | Evidence |\n");
+	out.push_str("| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |\n");
+
+	for adapter in &report.external_adapters.adapters {
+		out.push_str(&format!(
+			"| {} | `{}` | `{}` | `{}` | `{}` | `{}` | `{}` | `{}` | {} | {} |\n",
+			md_cell(adapter.project.as_str()),
+			md_inline(adapter.adapter_id.as_str()),
+			md_inline(adapter.evidence_class.as_str()),
+			adapter_status_str(adapter.overall_status),
+			adapter_status_str(adapter.setup.status),
+			adapter_status_str(adapter.run.status),
+			adapter_status_str(adapter.result.status),
+			adapter.docker_default,
+			adapter_suite_cell(adapter.suites.as_slice()),
+			adapter_evidence_cell(adapter)
+		));
+	}
+
+	out.push_str("\n### Adapter Capability Details\n\n");
+	out.push_str("| Adapter | Capability | Status | Evidence |\n");
+	out.push_str("| --- | --- | --- | --- |\n");
+
+	for adapter in &report.external_adapters.adapters {
+		for capability in &adapter.capabilities {
+			out.push_str(&format!(
+				"| `{}` | {} | `{}` | {} |\n",
+				md_inline(adapter.adapter_id.as_str()),
+				md_cell(capability.capability.as_str()),
+				adapter_status_str(capability.status),
+				md_cell(capability.evidence.as_str())
+			));
+		}
+	}
+
+	out.push('\n');
+}
+
 fn render_markdown_header(out: &mut String, report: &RealWorldReport, report_path: &str) {
 	out.push_str("# Real-World Job Benchmark Report\n\n");
 	out.push_str(
@@ -4024,6 +4537,74 @@ fn status_str(status: TypedStatus) -> &'static str {
 	}
 }
 
+fn adapter_status_str(status: AdapterCoverageStatus) -> &'static str {
+	match status {
+		AdapterCoverageStatus::Real => "real",
+		AdapterCoverageStatus::Mocked => "mocked",
+		AdapterCoverageStatus::Unsupported => "unsupported",
+		AdapterCoverageStatus::Blocked => "blocked",
+		AdapterCoverageStatus::Incomplete => "incomplete",
+		AdapterCoverageStatus::WrongResult => "wrong_result",
+		AdapterCoverageStatus::LifecycleFail => "lifecycle_fail",
+		AdapterCoverageStatus::Pass => "pass",
+		AdapterCoverageStatus::NotEncoded => "not_encoded",
+	}
+}
+
+fn adapter_status_counts_display(counts: &AdapterStatusCounts) -> String {
+	[
+		("real", counts.real),
+		("mocked", counts.mocked),
+		("unsupported", counts.unsupported),
+		("blocked", counts.blocked),
+		("incomplete", counts.incomplete),
+		("wrong_result", counts.wrong_result),
+		("lifecycle_fail", counts.lifecycle_fail),
+		("pass", counts.pass),
+		("not_encoded", counts.not_encoded),
+	]
+	.into_iter()
+	.filter(|(_, count)| *count > 0)
+	.map(|(status, count)| format!("{status}={count}"))
+	.collect::<Vec<_>>()
+	.join(", ")
+}
+
+fn adapter_suite_cell(suites: &[AdapterSuiteCoverage]) -> String {
+	if suites.is_empty() {
+		return "`none`".to_string();
+	}
+
+	suites
+		.iter()
+		.map(|suite| {
+			format!(
+				"`{}`: `{}`",
+				md_inline(suite.suite_id.as_str()),
+				adapter_status_str(suite.status)
+			)
+		})
+		.collect::<Vec<_>>()
+		.join("<br>")
+}
+
+fn adapter_evidence_cell(adapter: &ExternalAdapterReport) -> String {
+	let setup = adapter
+		.setup
+		.command
+		.as_deref()
+		.or(adapter.setup.artifact.as_deref())
+		.unwrap_or(adapter.setup.evidence.as_str());
+	let result = adapter
+		.result
+		.artifact
+		.as_deref()
+		.or(adapter.result.command.as_deref())
+		.unwrap_or(adapter.result.evidence.as_str());
+
+	format!("setup: `{}`<br>result: `{}`", md_inline(setup), md_inline(result))
+}
+
 fn trace_failure_stage(trace: Option<&TraceExplainability>) -> Option<&str> {
 	trace.and_then(|trace| trace.failure_stage.as_deref())
 }
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index cc665cb4..bb158eb5 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -108,6 +108,14 @@ fn smoke_fixture_produces_typed_json_report() -> Result<()> {
 	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(6));
 	assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(0));
+	assert_eq!(
+		report.pointer("/external_adapters/summary/adapter_count").and_then(Value::as_u64),
+		Some(7)
+	);
+	assert_eq!(
+		report.pointer("/external_adapters/summary/live_real_world_count").and_then(Value::as_u64),
+		Some(0)
+	);
 
 	let jobs = array_at(&report, "/jobs")?;
 	let job = find_by_field(jobs, "/job_id", "work-resume-stale-worktree-001")?;
@@ -150,6 +158,105 @@ fn smoke_fixture_produces_typed_json_report() -> Result<()> {
 	Ok(())
 }
 
+#[test]
+fn real_world_report_includes_external_adapter_coverage_manifest() -> Result<()> {
+	let report = run_json_report_from(real_world_memory_fixture_dir())?;
+
+	assert_eq!(
+		report.pointer("/external_adapters/schema").and_then(Value::as_str),
+		Some("elf.real_world_external_adapter_report/v1")
+	);
+	assert_eq!(
+		report.pointer("/external_adapters/manifest_id").and_then(Value::as_str),
+		Some("real-world-memory-project-adapters-2026-06-10")
+	);
+	assert_eq!(
+		report.pointer("/external_adapters/docker_isolation/default").and_then(Value::as_bool),
+		Some(true)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/docker_isolation/host_global_installs_required")
+			.and_then(Value::as_bool),
+		Some(false)
+	);
+	assert_eq!(
+		report.pointer("/external_adapters/summary/adapter_count").and_then(Value::as_u64),
+		Some(7)
+	);
+	assert_eq!(
+		report.pointer("/external_adapters/summary/external_project_count").and_then(Value::as_u64),
+		Some(6)
+	);
+	assert_eq!(
+		report.pointer("/external_adapters/summary/fixture_backed_count").and_then(Value::as_u64),
+		Some(1)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/live_baseline_only_count")
+			.and_then(Value::as_u64),
+		Some(6)
+	);
+	assert_eq!(
+		report.pointer("/external_adapters/summary/live_real_world_count").and_then(Value::as_u64),
+		Some(0)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/overall_status_counts/pass")
+			.and_then(Value::as_u64),
+		Some(1)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/overall_status_counts/wrong_result")
+			.and_then(Value::as_u64),
+		Some(4)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/overall_status_counts/lifecycle_fail")
+			.and_then(Value::as_u64),
+		Some(1)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/overall_status_counts/incomplete")
+			.and_then(Value::as_u64),
+		Some(1)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/capability_status_counts/mocked")
+			.and_then(Value::as_u64),
+		Some(2)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/suite_status_counts/blocked")
+			.and_then(Value::as_u64),
+		Some(3)
+	);
+
+	let adapters = array_at(&report, "/external_adapters/adapters")?;
+	let elf = find_by_field(adapters, "/adapter_id", "elf_real_world_memory_fixture")?;
+	let qmd = find_by_field(adapters, "/adapter_id", "qmd_live_baseline")?;
+	let agentmemory = find_by_field(adapters, "/adapter_id", "agentmemory_live_baseline")?;
+	let openviking = find_by_field(adapters, "/adapter_id", "openviking_live_baseline")?;
+
+	assert_eq!(elf.pointer("/evidence_class").and_then(Value::as_str), Some("fixture_backed"));
+	assert_eq!(qmd.pointer("/overall_status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(qmd.pointer("/suites/0/status").and_then(Value::as_str), Some("not_encoded"));
+	assert_eq!(
+		agentmemory.pointer("/capabilities/1/status").and_then(Value::as_str),
+		Some("mocked")
+	);
+	assert_eq!(openviking.pointer("/overall_status").and_then(Value::as_str), Some("incomplete"));
+
+	Ok(())
+}
+
 #[test]
 fn runner_discovers_nested_fixture_layout() -> Result<()> {
 	let report = run_json_report_from(fixture_root())?;
@@ -362,6 +469,9 @@ fn generated_json_report_renders_markdown() -> Result<()> {
 	assert!(markdown.contains("# Real-World Job Benchmark Report"));
 	assert!(markdown.contains("work_resume"));
 	assert!(markdown.contains("Capture And Integration Coverage"));
+	assert!(markdown.contains("External Adapter Coverage"));
+	assert!(markdown.contains("live-baseline-only"));
+	assert!(markdown.contains("does not convert live-baseline retrieval results"));
 	assert!(markdown.contains("fixture-backed"));
 	assert!(markdown.contains("agentmemory-style hook capture"));
 	assert!(markdown.contains("xy844-current-worktree"));
diff --git a/docs/guide/benchmarking/live_baseline_benchmark.md b/docs/guide/benchmarking/live_baseline_benchmark.md
index 5d5f0387..d419af0c 100644
--- a/docs/guide/benchmarking/live_baseline_benchmark.md
+++ b/docs/guide/benchmarking/live_baseline_benchmark.md
@@ -290,6 +290,13 @@ the interpretation manually under `docs/guide/benchmarking/`.
 The live-baseline runner and real-world job runner publish separate report schemas.
 Live-baseline reports remain evidence for Docker retrieval and lifecycle checks only.
 They are not real-world suite wins.
+The real-world runner loads
+`apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`
+by default and records live-baseline-only external adapter evidence under
+`external_adapters`; those records preserve the typed setup/run evidence but still
+leave real-world suites as `not_encoded`, `blocked`, `incomplete`, `wrong_result`, or
+`lifecycle_fail` until an adapter actually executes `real_world_job` prompts and
+scoring.
 
 To run the checked-in real-world job smoke fixture and render its Markdown report:
 
diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
index 305ec553..ab8fa512 100644
--- a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
+++ b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -167,6 +167,51 @@ for stale blockers, unsupported prior claims, stale deleted facts, stale histori
 facts, cross-project preference leakage, private/redacted text leakage, obsolete
 retrieval context, and distractor context.
 
+The report also loads the checked-in external adapter coverage manifest by default:
+
+```text
+apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+```
+
+That manifest records the first memory-project set: ELF, qmd, agentmemory,
+mem0/OpenMemory, claude-mem, memsearch, and OpenViking. Its `external_adapters`
+report section distinguishes:
+
+- `fixture_backed`: checked-in real-world fixture scoring, such as the ELF fixture
+  response path.
+- `live_baseline_only`: Docker live-baseline retrieval/lifecycle evidence that is not
+  a real-world suite win.
+- `live_real_world`: future external adapters that actually execute `real_world_job`
+  prompts and scoring.
+
+Current state: no external project has a `live_real_world` adapter in this runner yet.
+qmd has Docker live-baseline pass evidence for the encoded same-corpus checks, but its
+real-world suites remain `not_encoded`. agentmemory is blocked on durable upstream
+storage for lifecycle proof. mem0/OpenMemory, memsearch, and claude-mem currently
+retain wrong-result or incomplete live-baseline states for the checked-in adapter
+evidence. OpenViking is incomplete until its local embedding setup is reliable inside
+Docker. These typed states describe benchmark coverage; do not treat them as broad
+project quality rankings.
+
+To run the fixture report without the manifest during local debugging:
+
+```sh
+cargo run -p elf-eval --bin real_world_job_benchmark -- \
+  run \
+  --fixtures apps/elf-eval/fixtures/real_world_memory \
+  --skip-external-adapter-manifest
+```
+
+To test an adapter-pack manifest before committing it:
+
+```sh
+cargo run -p elf-eval --bin real_world_job_benchmark -- \
+  run \
+  --fixtures apps/elf-eval/fixtures/real_world_memory \
+  --external-adapter-manifest path/to/manifest.json \
+  --out tmp/real-world-memory/adapter-contract-report.json
+```
+
 Narrow memory evolution increment:
 
 ```sh
diff --git a/docs/guide/research/comparison_external_projects.md b/docs/guide/research/comparison_external_projects.md
index 54be2ba7..9d8ae4f1 100644
--- a/docs/guide/research/comparison_external_projects.md
+++ b/docs/guide/research/comparison_external_projects.md
@@ -56,6 +56,14 @@ or could not prove durable lifecycle behavior; memsearch, mem0, OpenViking, and
 claude-mem retained `incomplete`, wrong-result, or not-encoded states. All broader suite
 fit below is research guidance, not a benchmark result.
 
+The real-world job runner now carries a separate external adapter coverage manifest:
+`apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`.
+That manifest is a contract and evidence ledger, not a leaderboard. It records which
+projects only have `live_baseline_only` Docker retrieval/lifecycle evidence, which
+capabilities are `mocked`, `blocked`, `unsupported`, `incomplete`, `wrong_result`, or
+`lifecycle_fail`, and which real-world suites remain `not_encoded`. No external project
+in the first manifest has `live_real_world` suite evidence yet.
+
 Benchmark suite labels:
 
 | Suite | Real-world job shape |
diff --git a/docs/guide/research/external_memory_improvement_plan.md b/docs/guide/research/external_memory_improvement_plan.md
index f288685e..bd37e8fc 100644
--- a/docs/guide/research/external_memory_improvement_plan.md
+++ b/docs/guide/research/external_memory_improvement_plan.md
@@ -231,12 +231,17 @@ Implementation shape:
 - For every external adapter, mark which behaviors are real, mocked, unsupported, or blocked.
 - Add lifecycle checks: update, delete/expire, cold-start reload, and same-corpus retrieval.
 - Keep failures typed with the terms in this document.
+- Use `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`
+  as the real-world adapter coverage contract so fixture-only, live-baseline-only, and
+  future live-real-world evidence stay separate.
 
 Acceptance:
 
 - agentmemory adapter either passes durable lifecycle checks or is explicitly marked blocked with evidence.
 - OpenViking incomplete state records a pinned dependency failure and retry path.
 - qmd smoke pass remains covered and gains scale/stress profiles.
+- Real-world reports include adapter coverage counters before any external adapter is
+  allowed to claim a real-world suite pass.
 
 Linear mapping:
 
diff --git a/docs/spec/real_world_agent_memory_benchmark_v1.md b/docs/spec/real_world_agent_memory_benchmark_v1.md
index d1aefae9..8591590c 100644
--- a/docs/spec/real_world_agent_memory_benchmark_v1.md
+++ b/docs/spec/real_world_agent_memory_benchmark_v1.md
@@ -125,6 +125,88 @@ Optional corpus fields:
 Private corpus fixtures MUST use sanitized inline text or local refs excluded from git.
 Reports MAY publish evidence ids and score summaries without publishing private text.
 
+### External Adapter Manifest
+
+Real-world reports MAY include an external adapter manifest. When present, the manifest
+MUST use this schema id:
+
+```text
+elf.real_world_external_adapter_manifest/v1
+```
+
+The manifest is the stable adapter-pack contract for comparing external memory projects
+against `real_world_job` suites. It records what an adapter actually executed, which
+coverage is only fixture-backed or live-baseline-only, and which suites remain blocked,
+unsupported, incomplete, or not encoded. It MUST NOT be used to convert retrieval-only
+live-baseline evidence into a real-world suite win.
+
+Required manifest fields:
+
+- `manifest_id`: stable ASCII id for the checked-in or generated manifest.
+- `docker_isolation`: object describing the default execution boundary.
+- `adapters`: array of adapter records.
+
+`docker_isolation` MUST include:
+
+- `default`: boolean; MUST be `true` for repository-supported external adapter runs
+  unless a separate issue records why Docker is impossible.
+- `compose_file`: Docker Compose file used by the supported runner.
+- `runner`: script or command entrypoint used inside the Compose boundary.
+- `artifact_dir`: relative artifact directory for logs and reports.
+- `host_global_installs_required`: boolean; MUST be `false` for default external
+  runs.
+- `notes`: optional bounded explanatory strings.
+
+Each `adapters[]` record MUST include:
+
+- `adapter_id`: stable id unique within the manifest.
+- `project`: display name such as `qmd`, `agentmemory`, or `mem0/OpenMemory`.
+- `adapter_kind`: local execution shape, for example `docker_cli_same_corpus`,
+  `docker_sdk_same_corpus`, or `offline_fixture_response`.
+- `evidence_class`: one of `fixture_backed`, `live_baseline_only`, or
+  `live_real_world`.
+- `docker_default`: boolean.
+- `host_global_installs_required`: boolean.
+- `overall_status`: one adapter status from the table below.
+- `setup`, `run`, and `result`: evidence objects with `status`, `evidence`, and
+  optional `command` and `artifact`.
+- `capabilities`: array of capability coverage records with `capability`, `status`,
+  and `evidence`.
+- `suites`: array of real-world suite coverage records with `suite_id`, `status`, and
+  `evidence`.
+- `evidence`: array of evidence pointers with `kind`, `ref`, and `status`.
+- `notes`: optional bounded explanatory strings.
+- `follow_up`: optional `title` and `reason`.
+
+Adapter coverage status terms:
+
+| Term | Meaning |
+| --- | --- |
+| `real` | The adapter capability is exercised through the project's real local API, CLI, storage, or service surface. |
+| `mocked` | The adapter uses a mock, in-memory substitute, fixture replay, or other non-durable stand-in for the named capability. |
+| `unsupported` | The project or safe Docker profile does not expose the capability. This is not a quality penalty. |
+| `blocked` | The check cannot run safely without credentials, manual setup, durable runtime integration, private input, or host integration outside the run scope. |
+| `incomplete` | Setup, build, dependency, adapter wiring, parse, or runtime execution did not reach the behavioral check. |
+| `wrong_result` | The adapter reached execution but produced the wrong answer, memory, evidence, or action. |
+| `lifecycle_fail` | Retrieval may work, but encoded update, delete, expiry, cold-start, persistence, history, or supersession behavior failed. |
+| `pass` | The declared adapter check completed and met its encoded expectations. |
+| `not_encoded` | The capability, suite, or adapter path is not implemented in the runner, so no pass/fail claim is allowed. |
+
+Reports that load a manifest MUST emit an `external_adapters` section with schema id
+`elf.real_world_external_adapter_report/v1`, the manifest id, Docker isolation
+metadata, per-adapter records, and summary counters for:
+
+- adapter count, external project count, Docker-default count, host-global-install
+  count;
+- `fixture_backed`, `live_baseline_only`, and `live_real_world` evidence classes;
+- overall adapter statuses;
+- capability coverage statuses;
+- real-world suite coverage statuses.
+
+Adapter-pack issues SHOULD add new projects by appending adapter records to this
+manifest shape. They MUST NOT change these status meanings to make a project look
+better or worse.
+
 ### `timeline`
 
 `timeline` MUST model the user job as prior agent work, not just a bag of documents.
@@ -454,6 +536,10 @@ Reports MUST include:
 - capture/integration coverage classes when any fixture declares `capture_behaviors`,
   preserving the `real`, `fixture_backed`, `mocked`, `blocked`, and `not_encoded`
   distinction.
+- external adapter coverage when an external adapter manifest is loaded, preserving
+  `fixture_backed`, `live_baseline_only`, `live_real_world`, `real`, `mocked`,
+  `unsupported`, `blocked`, `incomplete`, `wrong_result`, `lifecycle_fail`, `pass`,
+  and `not_encoded` distinctions.
 
 Reports that encode `memory_evolution` jobs SHOULD also include stale-answer counts,
 conflict detection counts, update rationale availability, and temporal-validity