From f78661a2d3d236fc8c11637ac1d4e01269a5597e Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 00:58:32 +0800
Subject: [PATCH 1/7] {"schema":"decodex/commit/1","summary":"Expand
 first-generation OSS adapter benchmark coverage","authority":"XY-925"}

---
 Makefile.toml                                 |  52 ++++
 README.md                                     |  12 +-
 ...ntmemory_durable_capture_path_blocked.json | 208 ++++++++++++++
 .../claude_mem_hook_viewer_blocked.json       | 208 ++++++++++++++
 .../claude_mem_progressive_disclosure.json    | 215 +++++++++++++++
 .../claude_mem_retrieval_repair.json          | 192 +++++++++++++
 .../memsearch_markdown_rebuild_reload.json    | 192 +++++++++++++
 .../memsearch_retrieval_debug_prompt.json     | 254 ++++++++++++++++++
 .../memory_projects_manifest.json             | 144 ++++++++--
 .../tests/real_world_job_benchmark.rs         | 175 ++++++++++--
 ...-11-competitor-strength-adoption-report.md |  26 +-
 ...-11-competitor-strength-evidence-matrix.md |  16 +-
 ...tion-oss-continuity-source-store-report.md |  99 +++++++
 .../2026-06-11-measurement-coverage-audit.md  |   4 +-
 docs/guide/benchmarking/index.md              |   5 +
 ...1-competitor-strength-adoption-report.json |  34 ++-
 ...on-oss-continuity-source-store-report.json | 140 ++++++++++
 ...-11-xy-897-competitor-strength-matrix.json |  56 ++--
 18 files changed, 1920 insertions(+), 112 deletions(-)
 create mode 100644 apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json
 create mode 100644 apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json
 create mode 100644 apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_progressive_disclosure.json
 create mode 100644 apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_retrieval_repair.json
 create mode 100644 apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_markdown_rebuild_reload.json
 create mode 100644 apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_retrieval_debug_prompt.json
 create mode 100644 docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md
 create mode 100644 docs/research/2026-06-11-first-generation-oss-continuity-source-store-report.json

diff --git a/Makefile.toml b/Makefile.toml
index 42b2033c..9dcc099b 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -839,6 +839,9 @@ args = [
 # | real-world-memory-knowledge        | composite | |
 # | real-world-memory-knowledge-json   | command   | |
 # | real-world-memory-knowledge-report | command   | |
+# | real-world-first-generation-oss        | composite | |
+# | real-world-first-generation-oss-json   | command   | |
+# | real-world-first-generation-oss-report | command   | |
 # | ragflow-docker-smoke                    | command   | |
 # | lightrag-docker-context-smoke           | command   | |
 # | graphrag-docker-smoke                   | command   | |
@@ -933,6 +936,55 @@ args = [
 	"tmp/real-world-memory/knowledge-report.md",
 ]
 
+[tasks.real-world-first-generation-oss]
+workspace = false
+dependencies = [
+	"real-world-first-generation-oss-report",
+]
+
+[tasks.real-world-first-generation-oss-json]
+workspace = false
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"run",
+	"--fixtures",
+	"apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss",
+	"--out",
+	"tmp/real-world-memory/first-generation-oss/report.json",
+	"--run-id",
+	"first-generation-oss-continuity-source-store",
+	"--adapter-id",
+	"fixture_first_generation_oss",
+	"--adapter-name",
+	"First-generation OSS fixture coverage",
+]
+
+[tasks.real-world-first-generation-oss-report]
+workspace = false
+dependencies = [
+	"real-world-first-generation-oss-json",
+]
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"publish",
+	"--report",
+	"tmp/real-world-memory/first-generation-oss/report.json",
+	"--out",
+	"tmp/real-world-memory/first-generation-oss/report.md",
+]
+
 
 # External memory pattern radar
 # | task                               | type      | cwd |
diff --git a/README.md b/README.md
index f9ef9e1b..11319c42 100644
--- a/README.md
+++ b/README.md
@@ -172,6 +172,13 @@ provider-backed ELF evidence was required.
   command and repair-action clarity but is `wrong_result` for trace hydration and
   candidate-drop stage visibility. OpenMemory UI/export and claude-mem viewer flows
   remain blocked or not encoded, so this is not a broad viewer-product claim.
+- First-generation OSS continuity/source-store follow-up after XY-925: `cargo make
+  real-world-first-generation-oss` emits a fixture-backed external-adapter slice for
+  agentmemory, memsearch, and claude-mem with 6 jobs, 4 pass, 2 blocked, and full
+  evidence/source-ref/quote coverage. It selects agentmemory's durable local path,
+  adds memsearch canonical Markdown source-store and retrieval-debug prompt coverage,
+  and records claude-mem progressive-disclosure/retrieval-repair coverage while
+  keeping hook and viewer/operator workflows blocked.
 - Expanded adapter-pack coverage after XY-834: the real-world external adapter
   manifest now includes `research_gate` records for RAGFlow, LightRAG, GraphRAG,
   Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, and deeper
@@ -208,7 +215,8 @@ provider-backed ELF evidence was required.
   `cargo make baseline-backfill-10k-docker`,
   `cargo make baseline-backfill-100k-docker`,
   `cargo make baseline-soak-docker`, `cargo make baseline-live-report`,
-  `cargo make real-world-memory-live-adapters`, and
+  `cargo make real-world-memory-live-adapters`,
+  `cargo make real-world-first-generation-oss`, and
   `cargo make baseline-live-docker-clean`. Expensive 100k and long-soak profiles
   are opt-in and do not run in normal checks.
 
@@ -225,6 +233,7 @@ Detailed evidence and interpretation:
 - [Graph/RAG Scored Smoke Adapter Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md)
 - [mem0/OpenMemory History and UI Export Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md)
 - [Capture/Write-Policy Live Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md)
+- [First-Generation OSS Continuity and Source-Store Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md)
 - [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
 - [Single-User Production Runbook](docs/guide/single_user_production.md)
 - Benchmark contract:
@@ -303,6 +312,7 @@ Detailed comparison, mechanism-level analysis, and source map:
 - [Graph/RAG Scored Smoke Adapter Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md)
 - [mem0/OpenMemory History and UI Export Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md)
 - [Capture/Write-Policy Live Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md)
+- [First-Generation OSS Continuity and Source-Store Report - June 11, 2026](docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md)
 - [Live Baseline Benchmark Runbook](docs/guide/benchmarking/live_baseline_benchmark.md)
 - [Real-World Agent Memory Benchmark](docs/guide/benchmarking/real_world_agent_memory_benchmark.md)
 - [External Memory Improvement Plan](docs/guide/research/external_memory_improvement_plan.md)
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json
new file mode 100644
index 00000000..68cc2395
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json
@@ -0,0 +1,208 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "first-gen-agentmemory-durable-capture-blocked-001",
+  "suite": "capture_integration",
+  "title": "Select the durable agentmemory capture path before scoring hooks",
+  "encoding": {
+    "status": "blocked",
+    "reason": "agentmemory's current Docker baseline still uses a process-local SDK/KV mock, so work-resume and write-policy hook capture cannot be scored until a persistent local session, KV, and index path survives a fresh process.",
+    "follow_up": {
+      "title": "Wire agentmemory durable local session capture for work-resume jobs",
+      "reason": "The fair path is a Docker-contained adapter that persists the agentmemory observation log, KV store, and searchable index between capture and replay processes."
+    }
+  },
+  "corpus": {
+    "corpus_id": "first-generation-oss-agentmemory-2026-06-11",
+    "profile": "external_adapter",
+    "items": [
+      {
+        "evidence_id": "agentmemory-selected-durable-path",
+        "kind": "adapter_plan",
+        "text": "Selected agentmemory path: run capture hooks into a Docker-local session directory, persist the SDK KV store and searchable index, restart a fresh process, then score work_resume and write-policy prompts against that recovered store.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "agentmemory_durable_capture_path_blocked",
+            "evidence_id": "agentmemory-selected-durable-path"
+          },
+          "locator": {
+            "quote": "persist the SDK KV store and searchable index"
+          }
+        },
+        "created_at": "2026-06-11T10:00:00Z"
+      },
+      {
+        "evidence_id": "agentmemory-mock-boundary",
+        "kind": "adapter_blocker",
+        "text": "Current blocker: the live-baseline adapter registers agentmemory functions against a process-local StateKV Map and in-memory index, so it cannot prove cold-start recovery or hook capture durability.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "agentmemory_durable_capture_path_blocked",
+            "evidence_id": "agentmemory-mock-boundary"
+          },
+          "locator": {
+            "quote": "process-local StateKV Map and in-memory index"
+          }
+        },
+        "created_at": "2026-06-11T10:01:00Z"
+      },
+      {
+        "evidence_id": "agentmemory-pass-decoy",
+        "kind": "adapter_state",
+        "text": "Decoy: agentmemory same-corpus retrieval passing through the mock proves durable coding-agent continuity and write-policy capture.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "agentmemory_durable_capture_path_blocked",
+            "evidence_id": "agentmemory-pass-decoy"
+          }
+        },
+        "created_at": "2026-06-11T09:59:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_first_generation_oss",
+      "answer": {
+        "content": "agentmemory remains blocked for durable work-resume and write-policy hook capture. The selected local path is a Docker-contained session directory that persists the SDK KV store and searchable index across a fresh process; the current StateKV Map and in-memory index cannot prove that.",
+        "claims": [
+          {
+            "claim_id": "selected_durable_path",
+            "text": "The selected local path persists the SDK KV store and searchable index across a fresh process.",
+            "evidence_ids": ["agentmemory-selected-durable-path"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "current_mock_blocker",
+            "text": "The current StateKV Map and in-memory index cannot prove durable continuity.",
+            "evidence_ids": ["agentmemory-mock-boundary"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["agentmemory-selected-durable-path", "agentmemory-mock-boundary"],
+        "latency_ms": 1.0,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    },
+    "capture_behaviors": {
+      "blocked": [
+        "agentmemory durable hook capture waits for a persistent Docker-local session, KV, and index path."
+      ],
+      "notes": [
+        "Same-corpus mock retrieval is not promoted into work-resume or capture integration pass evidence."
+      ]
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "agentmemory-durable-path-selected",
+      "ts": "2026-06-11T10:00:00Z",
+      "actor": "benchmark",
+      "action": "selected_durable_adapter_path",
+      "evidence_ids": ["agentmemory-selected-durable-path"],
+      "summary": "The next fair agentmemory path must persist capture state across a fresh process."
+    },
+    {
+      "event_id": "agentmemory-mock-blocker-preserved",
+      "ts": "2026-06-11T10:01:00Z",
+      "actor": "benchmark",
+      "action": "kept_blocked_state",
+      "evidence_ids": ["agentmemory-mock-boundary"],
+      "summary": "The current in-memory adapter remains blocked for durable continuity."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "What local agentmemory path should be used for work-resume and write-policy capture, and can the current mock be scored?",
+    "job_mode": "operate",
+    "constraints": ["cite_evidence", "state_blockers", "do_not_promote_mock_smoke"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "selected_durable_path",
+        "text": "The selected local path persists the SDK KV store and searchable index across a fresh process."
+      },
+      {
+        "claim_id": "current_mock_blocker",
+        "text": "The current StateKV Map and in-memory index cannot prove durable continuity."
+      }
+    ],
+    "must_not_include": [
+      "same-corpus retrieval passing through the mock proves durable coding-agent continuity"
+    ],
+    "evidence_links": {
+      "selected_durable_path": ["agentmemory-selected-durable-path"],
+      "current_mock_blocker": ["agentmemory-mock-boundary"]
+    },
+    "answer_type": "blocked_plan",
+    "accepted_alternates": [],
+    "requires_caveat": true,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "agentmemory-selected-durable-path",
+      "claim_id": "selected_durable_path",
+      "requirement": "cite",
+      "quote": "persist the SDK KV store and searchable index"
+    },
+    {
+      "evidence_id": "agentmemory-mock-boundary",
+      "claim_id": "current_mock_blocker",
+      "requirement": "cite",
+      "quote": "process-local StateKV Map and in-memory index"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "mock-smoke-durable-pass",
+      "type": "unsupported_prior",
+      "evidence_ids": ["agentmemory-pass-decoy"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "uncertainty_handling": {
+        "weight": 0.35,
+        "max_points": 1.0,
+        "criteria": "Keeps the durable path blocked until persistent state is proven."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Names the concrete local path needed for the next adapter."
+      },
+      "evidence_grounding": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Cites the selected path and the current mock boundary."
+      },
+      "trap_avoidance": {
+        "weight": 0.15,
+        "max_points": 1.0,
+        "criteria": "Does not promote the mock same-corpus smoke into durable continuity proof."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": [],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["external_adapter", "agentmemory", "capture_integration", "blocked", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json
new file mode 100644
index 00000000..49d0dc92
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json
@@ -0,0 +1,208 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "first-gen-claude-mem-hook-viewer-blocked-001",
+  "suite": "capture_integration",
+  "title": "Keep claude-mem hook and viewer workflows blocked until Docker-contained",
+  "encoding": {
+    "status": "blocked",
+    "reason": "The current claude-mem Docker baseline exercises repository classes and durable SQLite only; it does not launch hooks, timeline capture, the local viewer, or an operator workflow over the same corpus.",
+    "follow_up": {
+      "title": "Encode claude-mem hook capture and viewer workflow in Docker",
+      "reason": "A fair UX comparison requires hook observations, timeline/viewer readback, and retrieval repair artifacts produced inside the same containerized run."
+    }
+  },
+  "corpus": {
+    "corpus_id": "first-generation-oss-claude-mem-2026-06-11",
+    "profile": "external_adapter",
+    "items": [
+      {
+        "evidence_id": "claude-mem-hook-viewer-blocker",
+        "kind": "adapter_blocker",
+        "text": "claude-mem hook/viewer blocker: the current Docker runner uses repository classes only and does not execute hook capture, local viewer timeline readback, or operator repair workflows.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "claude_mem_hook_viewer_blocked",
+            "evidence_id": "claude-mem-hook-viewer-blocker"
+          },
+          "locator": {
+            "quote": "does not execute hook capture, local viewer timeline readback"
+          }
+        },
+        "created_at": "2026-06-11T10:50:00Z"
+      },
+      {
+        "evidence_id": "claude-mem-needed-docker-path",
+        "kind": "adapter_plan",
+        "text": "Needed claude-mem path: run hook capture and viewer/operator readback inside Docker against the same durable SQLite corpus, then emit timeline, detail hydration, and repair-command artifacts.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "claude_mem_hook_viewer_blocked",
+            "evidence_id": "claude-mem-needed-docker-path"
+          },
+          "locator": {
+            "quote": "run hook capture and viewer/operator readback inside Docker"
+          }
+        },
+        "created_at": "2026-06-11T10:51:00Z"
+      },
+      {
+        "evidence_id": "claude-mem-hook-pass-decoy",
+        "kind": "adapter_state",
+        "text": "Decoy: repository class tests prove claude-mem hook capture and viewer workflows pass.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "claude_mem_hook_viewer_blocked",
+            "evidence_id": "claude-mem-hook-pass-decoy"
+          }
+        },
+        "created_at": "2026-06-11T10:49:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_first_generation_oss",
+      "answer": {
+        "content": "claude-mem hook capture and viewer/operator workflows remain blocked. The current runner uses repository classes only; the next comparable path must run hook capture plus viewer/operator readback inside Docker against the same durable SQLite corpus and emit timeline, hydration, and repair-command artifacts.",
+        "claims": [
+          {
+            "claim_id": "hook_viewer_blocked",
+            "text": "The current runner does not execute hook capture or local viewer timeline readback.",
+            "evidence_ids": ["claude-mem-hook-viewer-blocker"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "needed_docker_path",
+            "text": "The needed path is hook capture and viewer/operator readback inside Docker against the same durable SQLite corpus.",
+            "evidence_ids": ["claude-mem-needed-docker-path"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["claude-mem-hook-viewer-blocker", "claude-mem-needed-docker-path"],
+        "latency_ms": 1.0,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    },
+    "capture_behaviors": {
+      "blocked": [
+        "claude-mem hook capture and viewer/operator readback are not Docker-contained yet."
+      ],
+      "notes": [
+        "Repository class lifecycle and hydration evidence must not be reused as hook or viewer workflow proof."
+      ]
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "claude-mem-hook-viewer-blocker-recorded",
+      "ts": "2026-06-11T10:50:00Z",
+      "actor": "benchmark",
+      "action": "recorded_blocker",
+      "evidence_ids": ["claude-mem-hook-viewer-blocker"],
+      "summary": "Hook capture and local viewer readback are outside the current Docker runner."
+    },
+    {
+      "event_id": "claude-mem-needed-path-recorded",
+      "ts": "2026-06-11T10:51:00Z",
+      "actor": "benchmark",
+      "action": "selected_next_path",
+      "evidence_ids": ["claude-mem-needed-docker-path"],
+      "summary": "The next fair path must run hook capture and viewer/operator readback inside Docker."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Can claude-mem hook capture and viewer workflows be scored from the current Docker baseline?",
+    "job_mode": "operate",
+    "constraints": ["cite_evidence", "state_blockers", "avoid_repository_overclaim"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "hook_viewer_blocked",
+        "text": "The current runner does not execute hook capture or local viewer timeline readback."
+      },
+      {
+        "claim_id": "needed_docker_path",
+        "text": "The needed path is hook capture and viewer/operator readback inside Docker against the same durable SQLite corpus."
+      }
+    ],
+    "must_not_include": [
+      "repository class tests prove claude-mem hook capture and viewer workflows pass"
+    ],
+    "evidence_links": {
+      "hook_viewer_blocked": ["claude-mem-hook-viewer-blocker"],
+      "needed_docker_path": ["claude-mem-needed-docker-path"]
+    },
+    "answer_type": "blocked_plan",
+    "accepted_alternates": [],
+    "requires_caveat": true,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "claude-mem-hook-viewer-blocker",
+      "claim_id": "hook_viewer_blocked",
+      "requirement": "cite",
+      "quote": "does not execute hook capture, local viewer timeline readback"
+    },
+    {
+      "evidence_id": "claude-mem-needed-docker-path",
+      "claim_id": "needed_docker_path",
+      "requirement": "explain",
+      "quote": "run hook capture and viewer/operator readback inside Docker"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "repository-class-hook-viewer-pass",
+      "type": "unsupported_prior",
+      "evidence_ids": ["claude-mem-hook-pass-decoy"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "uncertainty_handling": {
+        "weight": 0.35,
+        "max_points": 1.0,
+        "criteria": "Keeps hook/viewer workflow blocked until a Docker-contained run exists."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Names the next comparable Docker path."
+      },
+      "evidence_grounding": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Cites the current blocker and needed path."
+      },
+      "trap_avoidance": {
+        "weight": 0.15,
+        "max_points": 1.0,
+        "criteria": "Does not reuse repository class checks as hook/viewer proof."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": [],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["external_adapter", "claude-mem", "capture_integration", "blocked", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_progressive_disclosure.json b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_progressive_disclosure.json
new file mode 100644
index 00000000..48bd8092
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_progressive_disclosure.json
@@ -0,0 +1,215 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "first-gen-claude-mem-progressive-disclosure-001",
+  "suite": "operator_debugging_ux",
+  "title": "Preserve claude-mem progressive-disclosure evidence boundary",
+  "corpus": {
+    "corpus_id": "first-generation-oss-claude-mem-2026-06-11",
+    "profile": "external_adapter",
+    "items": [
+      {
+        "evidence_id": "claude-mem-detail-hydration",
+        "kind": "adapter_artifact",
+        "text": "claude-mem progressive evidence: the Docker repository path verified search result to getById detail hydration plus listSources source evidence on a durable SQLite repository.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "claude_mem_progressive_disclosure",
+            "evidence_id": "claude-mem-detail-hydration"
+          },
+          "locator": {
+            "quote": "getById detail hydration plus listSources source evidence"
+          }
+        },
+        "created_at": "2026-06-11T10:30:00Z"
+      },
+      {
+        "evidence_id": "claude-mem-progressive-boundary",
+        "kind": "claim_boundary",
+        "text": "claude-mem boundary: repository search-to-detail hydration is useful progressive-disclosure evidence, but it does not execute hooks, timeline capture, viewer workflows, or real-world prompt scoring.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "claude_mem_progressive_disclosure",
+            "evidence_id": "claude-mem-progressive-boundary"
+          },
+          "locator": {
+            "quote": "does not execute hooks, timeline capture, viewer workflows"
+          }
+        },
+        "created_at": "2026-06-11T10:31:00Z"
+      },
+      {
+        "evidence_id": "claude-mem-viewer-decoy",
+        "kind": "adapter_state",
+        "text": "Decoy: repository detail hydration proves claude-mem viewer and hook workflows pass.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "claude_mem_progressive_disclosure",
+            "evidence_id": "claude-mem-viewer-decoy"
+          }
+        },
+        "created_at": "2026-06-11T10:29:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_first_generation_oss",
+      "answer": {
+        "content": "claude-mem has Docker-contained progressive-disclosure evidence at the repository layer: search results can be hydrated through getById and listSources on durable SQLite. That should stay separate from hook, timeline, viewer, and real-world prompt scoring, which are not executed by the current runner.",
+        "claims": [
+          {
+            "claim_id": "repository_progressive_evidence",
+            "text": "claude-mem search results can be hydrated through getById and listSources on durable SQLite.",
+            "evidence_ids": ["claude-mem-detail-hydration"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "viewer_hook_boundary",
+            "text": "Hook, timeline, viewer, and real-world prompt scoring are not executed by the current runner.",
+            "evidence_ids": ["claude-mem-progressive-boundary"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["claude-mem-detail-hydration", "claude-mem-progressive-boundary"],
+        "latency_ms": 1.3,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "claude-mem-detail-hydration-recorded",
+      "ts": "2026-06-11T10:30:00Z",
+      "actor": "benchmark",
+      "action": "recorded_progressive_disclosure_evidence",
+      "evidence_ids": ["claude-mem-detail-hydration"],
+      "summary": "The Docker repository path exposes search-to-detail/source hydration."
+    },
+    {
+      "event_id": "claude-mem-viewer-boundary-recorded",
+      "ts": "2026-06-11T10:31:00Z",
+      "actor": "benchmark",
+      "action": "preserved_viewer_hook_boundary",
+      "evidence_ids": ["claude-mem-progressive-boundary"],
+      "summary": "Repository hydration is not promoted into hook or viewer pass evidence."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "What claude-mem progressive-disclosure evidence is measured, and what remains outside the Docker-contained path?",
+    "job_mode": "debug",
+    "constraints": ["cite_evidence", "separate_repository_from_viewer", "avoid_hook_claims"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "repository_progressive_evidence",
+        "text": "claude-mem search results can be hydrated through getById and listSources on durable SQLite."
+      },
+      {
+        "claim_id": "viewer_hook_boundary",
+        "text": "Hook, timeline, viewer, and real-world prompt scoring are not executed by the current runner."
+      }
+    ],
+    "must_not_include": [
+      "repository detail hydration proves claude-mem viewer and hook workflows pass"
+    ],
+    "evidence_links": {
+      "repository_progressive_evidence": ["claude-mem-detail-hydration"],
+      "viewer_hook_boundary": ["claude-mem-progressive-boundary"]
+    },
+    "answer_type": "debug_report",
+    "accepted_alternates": [],
+    "requires_caveat": true,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "claude-mem-detail-hydration",
+      "claim_id": "repository_progressive_evidence",
+      "requirement": "cite",
+      "quote": "getById detail hydration plus listSources source evidence"
+    },
+    {
+      "evidence_id": "claude-mem-progressive-boundary",
+      "claim_id": "viewer_hook_boundary",
+      "requirement": "cite",
+      "quote": "does not execute hooks, timeline capture, viewer workflows"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "repository-hydration-viewer-pass",
+      "type": "unsupported_prior",
+      "evidence_ids": ["claude-mem-viewer-decoy"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "debuggability": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Explains the measured progressive-disclosure path."
+      },
+      "evidence_grounding": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Cites detail hydration and boundary evidence."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Separates repository evidence from viewer/hook follow-up."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Does not promote repository hydration into viewer or hook claims."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": [],
+    "fallback_action": "state_blocker"
+  },
+  "operator_debug": {
+    "failure_mode": "viewer_hook_workflow_not_encoded",
+    "trace_id": "claude-mem-repository-detail",
+    "root_cause": "The Docker-contained evidence stops at repository detail/source hydration and does not run the product viewer or hooks.",
+    "steps_to_root_cause": 2,
+    "raw_sql_needed": false,
+    "dropped_candidate_visibility": "repository search result can be hydrated to detail and source rows",
+    "trace_completeness": "complete",
+    "repair_action_clarity": "clear",
+    "trace_available": true,
+    "replay_command_available": true,
+    "replay_command": "ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker",
+    "replay_artifact": "tmp/live-baseline/claude-mem.log",
+    "viewer_panels": ["Repository Search Result", "Memory Item Detail", "Source List"],
+    "cli_steps": [
+      "run the claude-mem Docker baseline",
+      "inspect getById detail hydration",
+      "inspect listSources evidence",
+      "keep hook and viewer workflows blocked until separately encoded"
+    ],
+    "trace_evidence": ["claude-mem-detail-hydration", "claude-mem-progressive-boundary"],
+    "ux_gaps": []
+  },
+  "tags": ["external_adapter", "claude-mem", "operator_debugging_ux", "progressive_disclosure", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_retrieval_repair.json b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_retrieval_repair.json
new file mode 100644
index 00000000..4fb20191
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_retrieval_repair.json
@@ -0,0 +1,192 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "first-gen-claude-mem-retrieval-repair-001",
+  "suite": "retrieval",
+  "title": "Preserve claude-mem retrieval repair evidence after same-corpus miss",
+  "corpus": {
+    "corpus_id": "first-generation-oss-claude-mem-2026-06-11",
+    "profile": "external_adapter",
+    "items": [
+      {
+        "evidence_id": "claude-mem-same-corpus-miss",
+        "kind": "adapter_artifact",
+        "text": "claude-mem retrieval repair evidence: the Docker baseline built the durable SQLite repository but same-corpus retrieval returned 0 of 3 expected query checks, so retrieval quality remains wrong_result.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "claude_mem_retrieval_repair",
+            "evidence_id": "claude-mem-same-corpus-miss"
+          },
+          "locator": {
+            "quote": "same-corpus retrieval returned 0 of 3 expected query checks"
+          }
+        },
+        "created_at": "2026-06-11T10:40:00Z"
+      },
+      {
+        "evidence_id": "claude-mem-repair-command",
+        "kind": "debug_command",
+        "text": "claude-mem repair command: rerun ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker, then inspect tmp/live-baseline/claude-mem.log and tmp/live-baseline/claude-mem-checks.json before changing retrieval scoring.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "claude_mem_retrieval_repair",
+            "evidence_id": "claude-mem-repair-command"
+          },
+          "locator": {
+            "quote": "inspect tmp/live-baseline/claude-mem.log"
+          }
+        },
+        "created_at": "2026-06-11T10:41:00Z"
+      },
+      {
+        "evidence_id": "claude-mem-retrieval-pass-decoy",
+        "kind": "adapter_state",
+        "text": "Decoy: because claude-mem repository lifecycle passed, same-corpus retrieval also passed.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "claude_mem_retrieval_repair",
+            "evidence_id": "claude-mem-retrieval-pass-decoy"
+          }
+        },
+        "created_at": "2026-06-11T10:39:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_first_generation_oss",
+      "answer": {
+        "content": "claude-mem retrieval remains wrong_result: the durable SQLite repository built, but same-corpus retrieval returned 0 of 3 expected query checks. The repair path is to rerun the claude-mem baseline, inspect tmp/live-baseline/claude-mem.log and tmp/live-baseline/claude-mem-checks.json, then fix retrieval before any pass claim.",
+        "claims": [
+          {
+            "claim_id": "retrieval_wrong_result",
+            "text": "claude-mem same-corpus retrieval returned 0 of 3 expected query checks.",
+            "evidence_ids": ["claude-mem-same-corpus-miss"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "repair_artifact_path",
+            "text": "The repair path is to inspect tmp/live-baseline/claude-mem.log and tmp/live-baseline/claude-mem-checks.json.",
+            "evidence_ids": ["claude-mem-repair-command"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["claude-mem-same-corpus-miss", "claude-mem-repair-command"],
+        "latency_ms": 1.4,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "claude-mem-wrong-result-recorded",
+      "ts": "2026-06-11T10:40:00Z",
+      "actor": "benchmark",
+      "action": "recorded_same_corpus_wrong_result",
+      "evidence_ids": ["claude-mem-same-corpus-miss"],
+      "summary": "The same-corpus result remains wrong_result despite durable repository lifecycle evidence."
+    },
+    {
+      "event_id": "claude-mem-repair-artifact-recorded",
+      "ts": "2026-06-11T10:41:00Z",
+      "actor": "benchmark",
+      "action": "recorded_repair_artifact_path",
+      "evidence_ids": ["claude-mem-repair-command"],
+      "summary": "The repair path points at the reproducible Docker baseline and logs."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Did claude-mem retrieval pass, and what artifact should I inspect to repair the miss?",
+    "job_mode": "debug",
+    "constraints": ["cite_evidence", "preserve_wrong_result", "name_repair_artifact"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "retrieval_wrong_result",
+        "text": "claude-mem same-corpus retrieval returned 0 of 3 expected query checks."
+      },
+      {
+        "claim_id": "repair_artifact_path",
+        "text": "The repair path is to inspect tmp/live-baseline/claude-mem.log and tmp/live-baseline/claude-mem-checks.json."
+      }
+    ],
+    "must_not_include": [
+      "same-corpus retrieval also passed"
+    ],
+    "evidence_links": {
+      "retrieval_wrong_result": ["claude-mem-same-corpus-miss"],
+      "repair_artifact_path": ["claude-mem-repair-command"]
+    },
+    "answer_type": "debug_report",
+    "accepted_alternates": [],
+    "requires_caveat": true,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "claude-mem-same-corpus-miss",
+      "claim_id": "retrieval_wrong_result",
+      "requirement": "cite",
+      "quote": "same-corpus retrieval returned 0 of 3 expected query checks"
+    },
+    {
+      "evidence_id": "claude-mem-repair-command",
+      "claim_id": "repair_artifact_path",
+      "requirement": "explain",
+      "quote": "inspect tmp/live-baseline/claude-mem.log and tmp/live-baseline/claude-mem-checks.json"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "lifecycle-pass-implies-retrieval-pass",
+      "type": "unsupported_prior",
+      "evidence_ids": ["claude-mem-retrieval-pass-decoy"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Keeps same-corpus retrieval as wrong_result."
+      },
+      "evidence_grounding": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Cites the wrong-result artifact and repair command."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Names the concrete artifact path for repair."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Does not infer retrieval pass from lifecycle pass."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": [],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["external_adapter", "claude-mem", "retrieval", "wrong_result", "repair"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_markdown_rebuild_reload.json b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_markdown_rebuild_reload.json
new file mode 100644
index 00000000..c94b9486
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_markdown_rebuild_reload.json
@@ -0,0 +1,192 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "first-gen-memsearch-markdown-rebuild-reload-001",
+  "suite": "trust_source_of_truth",
+  "title": "Verify memsearch canonical Markdown rebuild and reload boundary",
+  "corpus": {
+    "corpus_id": "first-generation-oss-memsearch-2026-06-11",
+    "profile": "external_adapter",
+    "items": [
+      {
+        "evidence_id": "memsearch-canonical-markdown-store",
+        "kind": "source_store",
+        "text": "memsearch source-store evidence: the canonical Markdown corpus file is the source of truth, and the index is rebuilt by rerunning memsearch index over the file tree.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "memsearch_markdown_rebuild_reload",
+            "evidence_id": "memsearch-canonical-markdown-store"
+          },
+          "locator": {
+            "quote": "canonical Markdown corpus file is the source of truth"
+          }
+        },
+        "created_at": "2026-06-11T10:10:00Z"
+      },
+      {
+        "evidence_id": "memsearch-reload-proof",
+        "kind": "adapter_artifact",
+        "text": "memsearch reload proof: the Docker baseline rewrote auth-memory.md, deleted another corpus file, reran memsearch index, and a fresh memsearch search process retrieved the replacement marker while suppressing deleted evidence.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "memsearch_markdown_rebuild_reload",
+            "evidence_id": "memsearch-reload-proof"
+          },
+          "locator": {
+            "quote": "a fresh memsearch search process retrieved the replacement marker"
+          }
+        },
+        "created_at": "2026-06-11T10:11:00Z"
+      },
+      {
+        "evidence_id": "memsearch-suite-pass-decoy",
+        "kind": "claim_boundary",
+        "text": "Decoy: because memsearch reload passed a Docker smoke, memsearch has passed the full real-world source-of-truth suite.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "memsearch_markdown_rebuild_reload",
+            "evidence_id": "memsearch-suite-pass-decoy"
+          }
+        },
+        "created_at": "2026-06-11T10:09:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_first_generation_oss",
+      "answer": {
+        "content": "memsearch's comparable source-store path is the canonical Markdown corpus file, with the derived index rebuilt by rerunning memsearch index. The Docker smoke proves rewrite, delete, reindex, and fresh-process reload behavior, but it must not be promoted to a full real-world suite pass.",
+        "claims": [
+          {
+            "claim_id": "markdown_is_source_store",
+            "text": "The canonical Markdown corpus file is the source of truth for memsearch.",
+            "evidence_ids": ["memsearch-canonical-markdown-store"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "rebuild_reload_smoke",
+            "text": "The Docker smoke proves rewrite, delete, reindex, and fresh-process reload behavior.",
+            "evidence_ids": ["memsearch-reload-proof"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["memsearch-canonical-markdown-store", "memsearch-reload-proof"],
+        "latency_ms": 1.2,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "memsearch-markdown-store-selected",
+      "ts": "2026-06-11T10:10:00Z",
+      "actor": "benchmark",
+      "action": "selected_canonical_markdown_store",
+      "evidence_ids": ["memsearch-canonical-markdown-store"],
+      "summary": "The memsearch comparable source-store job uses the Markdown corpus as authoritative state."
+    },
+    {
+      "event_id": "memsearch-reload-artifact-recorded",
+      "ts": "2026-06-11T10:11:00Z",
+      "actor": "benchmark",
+      "action": "recorded_reindex_reload_smoke",
+      "evidence_ids": ["memsearch-reload-proof"],
+      "summary": "The Docker smoke supplies command-level reindex/reload evidence."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "What is the comparable memsearch source-of-truth path, and what does the rebuild/reload evidence prove?",
+    "job_mode": "answer",
+    "constraints": ["cite_evidence", "state_claim_boundary", "avoid_suite_promotion"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "markdown_is_source_store",
+        "text": "The canonical Markdown corpus file is the source of truth for memsearch."
+      },
+      {
+        "claim_id": "rebuild_reload_smoke",
+        "text": "The Docker smoke proves rewrite, delete, reindex, and fresh-process reload behavior."
+      }
+    ],
+    "must_not_include": [
+      "memsearch has passed the full real-world source-of-truth suite"
+    ],
+    "evidence_links": {
+      "markdown_is_source_store": ["memsearch-canonical-markdown-store"],
+      "rebuild_reload_smoke": ["memsearch-reload-proof"]
+    },
+    "answer_type": "direct_answer",
+    "accepted_alternates": [],
+    "requires_caveat": true,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "memsearch-canonical-markdown-store",
+      "claim_id": "markdown_is_source_store",
+      "requirement": "cite",
+      "quote": "canonical Markdown corpus file is the source of truth"
+    },
+    {
+      "evidence_id": "memsearch-reload-proof",
+      "claim_id": "rebuild_reload_smoke",
+      "requirement": "cite",
+      "quote": "a fresh memsearch search process retrieved the replacement marker"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "memsearch-smoke-suite-pass",
+      "type": "unsupported_prior",
+      "evidence_ids": ["memsearch-suite-pass-decoy"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "answer_correctness": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Identifies Markdown as source store and index as rebuildable derived state."
+      },
+      "evidence_grounding": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Cites source-store and reload proof evidence."
+      },
+      "lifecycle_behavior": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Explains rewrite, delete, reindex, and fresh-process reload behavior."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Does not promote smoke evidence into full suite pass evidence."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": [],
+    "fallback_action": "state_blocker"
+  },
+  "tags": ["external_adapter", "memsearch", "source_store", "markdown", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_retrieval_debug_prompt.json b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_retrieval_debug_prompt.json
new file mode 100644
index 00000000..e3dbacdc
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_retrieval_debug_prompt.json
@@ -0,0 +1,254 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "first-gen-memsearch-retrieval-debug-001",
+  "suite": "operator_debugging_ux",
+  "title": "Debug memsearch retrieval through Markdown file and index artifacts",
+  "corpus": {
+    "corpus_id": "first-generation-oss-memsearch-2026-06-11",
+    "profile": "external_adapter",
+    "items": [
+      {
+        "evidence_id": "memsearch-debug-command",
+        "kind": "debug_command",
+        "text": "memsearch retrieval-debug evidence: rerun memsearch search with --top-k, inspect the matching Markdown file, and rerun memsearch index after any file rewrite or delete.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "memsearch_retrieval_debug_prompt",
+            "evidence_id": "memsearch-debug-command"
+          },
+          "locator": {
+            "quote": "inspect the matching Markdown file"
+          }
+        },
+        "created_at": "2026-06-11T10:20:00Z"
+      },
+      {
+        "evidence_id": "memsearch-debug-boundary",
+        "kind": "claim_boundary",
+        "text": "memsearch debug boundary: the current adapter exposes CLI search output and canonical Markdown files, but it does not emit staged query-expansion, fusion, rerank, or candidate-drop trace bundles.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "memsearch_retrieval_debug_prompt",
+            "evidence_id": "memsearch-debug-boundary"
+          },
+          "locator": {
+            "quote": "does not emit staged query-expansion, fusion, rerank, or candidate-drop trace bundles"
+          }
+        },
+        "created_at": "2026-06-11T10:21:00Z"
+      },
+      {
+        "evidence_id": "memsearch-trace-decoy",
+        "kind": "adapter_state",
+        "text": "Decoy: memsearch exposes the same staged retrieval trajectory and candidate-drop trace bundle as ELF.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "memsearch_retrieval_debug_prompt",
+            "evidence_id": "memsearch-trace-decoy"
+          }
+        },
+        "created_at": "2026-06-11T10:19:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_first_generation_oss",
+      "answer": {
+        "content": "For memsearch retrieval debugging, rerun memsearch search with --top-k, inspect the matching Markdown file, and rerun memsearch index after file changes. The useful debug surface is source-file transparency plus CLI replay; staged expansion, fusion, rerank, and candidate-drop trace bundles are not emitted by the current adapter.",
+        "claims": [
+          {
+            "claim_id": "debug_replay_path",
+            "text": "Rerun memsearch search with --top-k and inspect the matching Markdown file.",
+            "evidence_ids": ["memsearch-debug-command"],
+            "confidence": "high"
+          },
+          {
+            "claim_id": "trace_boundary",
+            "text": "The current adapter does not emit staged expansion, fusion, rerank, or candidate-drop trace bundles.",
+            "evidence_ids": ["memsearch-debug-boundary"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["memsearch-debug-command", "memsearch-debug-boundary"],
+        "latency_ms": 1.1,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        },
+        "trace_explainability": {
+          "trace_id": "memsearch-cli-debug",
+          "failure_stage": "trace_bundle",
+          "failure_reason": "memsearch exposes CLI replay and source Markdown inspection, not staged retrieval trace bundles.",
+          "stages": [
+            {
+              "stage_name": "cli.search",
+              "kept_evidence": ["memsearch-debug-command"],
+              "dropped_evidence": [],
+              "demoted_evidence": [],
+              "distractor_evidence": ["memsearch-trace-decoy"],
+              "notes": "CLI replay can reproduce the visible result set."
+            },
+            {
+              "stage_name": "source.markdown",
+              "kept_evidence": ["memsearch-debug-command"],
+              "dropped_evidence": [],
+              "demoted_evidence": [],
+              "distractor_evidence": [],
+              "notes": "The Markdown file remains inspectable as canonical source."
+            },
+            {
+              "stage_name": "trace_bundle",
+              "kept_evidence": ["memsearch-debug-boundary"],
+              "dropped_evidence": [],
+              "demoted_evidence": [],
+              "distractor_evidence": ["memsearch-trace-decoy"],
+              "notes": "Candidate-drop trace bundles are not encoded for memsearch."
+            }
+          ]
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "memsearch-debug-path-recorded",
+      "ts": "2026-06-11T10:20:00Z",
+      "actor": "benchmark",
+      "action": "recorded_debug_path",
+      "evidence_ids": ["memsearch-debug-command"],
+      "summary": "The retrieval-debug job points at CLI replay and source Markdown inspection."
+    },
+    {
+      "event_id": "memsearch-trace-boundary-recorded",
+      "ts": "2026-06-11T10:21:00Z",
+      "actor": "benchmark",
+      "action": "recorded_trace_gap",
+      "evidence_ids": ["memsearch-debug-boundary"],
+      "summary": "The job keeps staged trace bundles as not encoded for memsearch."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "How should I debug a wrong memsearch retrieval result, and what trace visibility is not available?",
+    "job_mode": "debug",
+    "constraints": ["cite_evidence", "identify_debug_surface", "avoid_trace_overclaim"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "debug_replay_path",
+        "text": "Rerun memsearch search with --top-k and inspect the matching Markdown file."
+      },
+      {
+        "claim_id": "trace_boundary",
+        "text": "The current adapter does not emit staged expansion, fusion, rerank, or candidate-drop trace bundles."
+      }
+    ],
+    "must_not_include": [
+      "memsearch exposes the same staged retrieval trajectory and candidate-drop trace bundle as ELF"
+    ],
+    "evidence_links": {
+      "debug_replay_path": ["memsearch-debug-command"],
+      "trace_boundary": ["memsearch-debug-boundary"]
+    },
+    "answer_type": "debug_report",
+    "accepted_alternates": [],
+    "requires_caveat": true,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "memsearch-debug-command",
+      "claim_id": "debug_replay_path",
+      "requirement": "explain",
+      "quote": "inspect the matching Markdown file"
+    },
+    {
+      "evidence_id": "memsearch-debug-boundary",
+      "claim_id": "trace_boundary",
+      "requirement": "explain",
+      "quote": "does not emit staged query-expansion, fusion, rerank, or candidate-drop trace bundles"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "memsearch-full-trace-decoy",
+      "type": "unsupported_prior",
+      "evidence_ids": ["memsearch-trace-decoy"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "debuggability": {
+        "weight": 0.35,
+        "max_points": 1.0,
+        "criteria": "Names the available memsearch debug path."
+      },
+      "evidence_grounding": {
+        "weight": 0.25,
+        "max_points": 1.0,
+        "criteria": "Cites CLI/source debug and trace-boundary evidence."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Provides a concrete replay and reindex sequence."
+      },
+      "trap_avoidance": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Does not overclaim staged trace visibility."
+      }
+    },
+    "pass_threshold": 0.85,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": [],
+    "fallback_action": "state_blocker"
+  },
+  "operator_debug": {
+    "failure_mode": "memsearch_trace_bundle_not_encoded",
+    "trace_id": "memsearch-cli-debug",
+    "root_cause": "memsearch debugging is available through CLI replay and canonical Markdown inspection, while staged candidate-drop trace bundles are not encoded.",
+    "steps_to_root_cause": 3,
+    "raw_sql_needed": false,
+    "dropped_candidate_visibility": "not encoded; inspect CLI search output and Markdown source instead",
+    "trace_completeness": "complete",
+    "repair_action_clarity": "clear",
+    "trace_available": false,
+    "replay_command_available": true,
+    "replay_command": "memsearch search '<query>' --top-k 10 && memsearch index <markdown-corpus>",
+    "replay_artifact": "tmp/live-baseline/memsearch.log",
+    "viewer_panels": ["CLI Search Output", "Markdown Source File", "Index Rebuild Log"],
+    "cli_steps": [
+      "rerun memsearch search with --top-k",
+      "open the matching Markdown file",
+      "edit or delete the canonical file if needed",
+      "rerun memsearch index",
+      "rerun search from a fresh process"
+    ],
+    "trace_evidence": ["memsearch-debug-command", "memsearch-debug-boundary"],
+    "ux_gaps": [
+      {
+        "gap_id": "staged-trace-bundle-not-encoded",
+        "severity": "medium",
+        "description": "No staged expansion/fusion/rerank/candidate-drop bundle is emitted by the current memsearch adapter.",
+        "follow_up_issue": "XY-925"
+      }
+    ]
+  },
+  "tags": ["external_adapter", "memsearch", "operator_debugging_ux", "retrieval_debug", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index c6074d60..33cbf264 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -1,6 +1,6 @@
 {
   "schema": "elf.real_world_external_adapter_manifest/v1",
-  "manifest_id": "real-world-memory-project-adapters-2026-06-11-openmemory-ui-export",
+  "manifest_id": "real-world-memory-project-adapters-2026-06-11-first-generation-continuity-source-store",
   "docker_isolation": {
     "default": true,
     "compose_file": "docker-compose.baseline.yml",
@@ -806,10 +806,20 @@
           "status": "blocked",
           "evidence": "A persistent upstream KV/index path or hosted runtime is needed before cold-start recovery can be fairly scored."
         },
+        {
+          "capability": "durable_work_resume_capture_path",
+          "status": "blocked",
+          "evidence": "XY-925 selects the next local path as a Docker-contained agentmemory session directory with persisted SDK KV store, observation log, and searchable index across a fresh process; the current StateKV Map and in-memory index still block scoring."
+        },
+        {
+          "capability": "write_policy_hook_capture",
+          "status": "blocked",
+          "evidence": "Capture/write-policy jobs require live agentmemory hook observations plus persisted write-policy audit evidence. The current adapter does not execute those hooks."
+        },
         {
           "capability": "real_world_job_adapter",
-          "status": "not_encoded",
-          "evidence": "No agentmemory adapter currently executes real_world_job prompts and answer scoring."
+          "status": "blocked",
+          "evidence": "XY-925 adds fixture-backed blocked prompt coverage for the required durable path, but no live agentmemory real_world_job adapter executes prompts until the persistent local store exists."
         }
       ],
       "suites": [
@@ -835,6 +845,7 @@
           "suite_id": "retrieval",
           "status": "pass",
           "elf_position": "untested",
+          "comparison_outcome": "not_tested",
           "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports agentmemory retrieval_pass with 3/3 same-corpus retrieval checks through mem::remember and mem::search. This is live-baseline-only evidence through an in-memory mock, not a real_world_job suite pass.",
           "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
           "artifact": "tmp/live-baseline/live-baseline-report.json"
@@ -844,6 +855,7 @@
           "suite_id": "memory_evolution",
           "status": "lifecycle_fail",
           "elf_position": "wins",
+          "comparison_outcome": "win",
           "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports ELF passing 8/8 local lifecycle checks, while agentmemory update_replaces_note_text is lifecycle_fail and cold_start_recovery_search is blocked because the harness uses an in-memory SDK/KV mock. This is an ELF baseline win only at the local lifecycle-smoke evidence class.",
           "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
           "artifact": "tmp/live-baseline/live-baseline-report.json"
@@ -853,8 +865,20 @@
           "suite_id": "work_resume",
           "status": "blocked",
           "elf_position": "untested",
-          "evidence": "agentmemory's relevant strength is durable coding-agent continuity and capture, but the Docker harness has not proven a persistent session/capture path. Keep work_resume and capture claims blocked until a durable local adapter path exists.",
-          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          "comparison_outcome": "blocked",
+          "evidence": "agentmemory's relevant strength is durable coding-agent continuity and capture, but the Docker harness has not proven a persistent session/capture path. XY-925 selects the durable local path as a Docker-contained session directory that persists the SDK KV store and searchable index across a fresh process; keep work_resume and capture claims blocked until that path exists.",
+          "command": "cargo make real-world-first-generation-oss",
+          "artifact": "tmp/real-world-memory/first-generation-oss/report.json"
+        },
+        {
+          "scenario_id": "durable_work_resume_local_path",
+          "suite_id": "work_resume",
+          "status": "blocked",
+          "elf_position": "untested",
+          "comparison_outcome": "blocked",
+          "evidence": "The selected comparable path is explicit: capture into a Docker-local agentmemory session directory, persist the SDK KV/index and observation log, restart a fresh process, then score work_resume prompts. The checked-in fixture records this as blocked rather than scoring the current mock.",
+          "command": "cargo make real-world-first-generation-oss",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json"
         },
         {
           "scenario_id": "capture_write_policy_hooks",
@@ -862,8 +886,9 @@
           "status": "blocked",
           "elf_position": "untested",
           "comparison_outcome": "blocked",
-          "evidence": "agentmemory capture breadth is blocked for comparison because the current Docker baseline uses a process-local StateKV Map and in-memory index; no durable local session/capture path stores source ids, exclusions, write-policy audit, or evidence-bound capture output.",
-          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          "evidence": "agentmemory capture/write-policy comparison needs live hook observations and write-policy audit evidence persisted through the selected local store. The fixture preserves this as a typed blocker and does not convert the mem::remember smoke into capture proof.",
+          "command": "cargo make real-world-first-generation-oss",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json"
         }
       ],
       "evidence": [
@@ -1120,19 +1145,24 @@
         {
           "capability": "real_world_job_adapter",
           "status": "not_encoded",
-          "evidence": "No memsearch adapter currently executes real_world_job prompts and answer scoring."
+          "evidence": "XY-925 adds fixture-backed prompt coverage for the Markdown source-store and retrieval-debug jobs, but no live memsearch runtime adapter executes real_world_job prompts and answer scoring."
+        },
+        {
+          "capability": "markdown_source_store_prompt_jobs",
+          "status": "pass",
+          "evidence": "The first-generation OSS fixture slice encodes source-of-truth rebuild/reload and retrieval-debug prompts over the canonical Markdown store while preserving the live-baseline-only evidence boundary."
         }
       ],
       "suites": [
         {
           "suite_id": "trust_source_of_truth",
-          "status": "not_encoded",
-          "evidence": "The Markdown-first source model passed the local reindex/reload smoke, but no real_world_job source-of-truth prompt run is encoded."
+          "status": "pass",
+          "evidence": "The Markdown-first source model passed the local reindex/reload smoke, and XY-925 adds fixture-backed source-of-truth prompt coverage over the canonical Markdown store. No live memsearch runtime adapter executes prompt scoring yet."
         },
         {
           "suite_id": "retrieval",
-          "status": "not_encoded",
-          "evidence": "The Docker same-corpus check now passes, but no job-level real_world retrieval run is encoded for memsearch."
+          "status": "pass",
+          "evidence": "The Docker same-corpus check passes, and XY-925 adds fixture-backed retrieval-debug prompt coverage over memsearch CLI replay and Markdown source inspection. No live memsearch runtime adapter executes retrieval prompt scoring yet."
         },
         {
           "suite_id": "memory_evolution",
@@ -1146,15 +1176,37 @@
           "suite_id": "trust_source_of_truth",
           "status": "pass",
           "elf_position": "untested",
+          "comparison_outcome": "not_tested",
           "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports memsearch passed same-corpus retrieval, update reindex, delete suppression, and cold-start reload over a canonical Markdown corpus. ELF has no directly comparable canonical Markdown source-store scenario in this baseline, so the ELF position remains untested.",
           "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
           "artifact": "tmp/live-baseline/live-baseline-report.json"
         },
+        {
+          "scenario_id": "markdown_source_store_rebuild_reload_prompt",
+          "suite_id": "trust_source_of_truth",
+          "status": "pass",
+          "elf_position": "untested",
+          "comparison_outcome": "not_tested",
+          "evidence": "XY-925 adds a checked-in real_world_job prompt fixture that asks for the memsearch source-of-truth path and rebuild/reload boundary: canonical Markdown files are authoritative, while the index is derived by rerunning memsearch index. This is fixture-backed scenario coverage plus baseline artifact evidence, not a memsearch live real_world_job suite pass.",
+          "command": "cargo make real-world-first-generation-oss",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_markdown_rebuild_reload.json"
+        },
+        {
+          "scenario_id": "markdown_retrieval_debug_prompt",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "untested",
+          "comparison_outcome": "not_tested",
+          "evidence": "XY-925 adds a checked-in retrieval-debug prompt over memsearch's canonical Markdown store. The expected debug surface is CLI replay plus Markdown source inspection and reindexing; staged expansion/fusion/rerank/candidate-drop trace bundles remain not encoded for memsearch.",
+          "command": "cargo make real-world-first-generation-oss",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_retrieval_debug_prompt.json"
+        },
         {
           "scenario_id": "ttl_expiry_lifecycle",
           "suite_id": "memory_evolution",
           "status": "unsupported",
           "elf_position": "untested",
+          "comparison_outcome": "non_goal",
           "evidence": "The encoded memsearch CLI path supports reindex/delete but no TTL or expiry behavior. Unsupported TTL behavior is preserved as unsupported competitor evidence and does not create an ELF win/loss claim without a directly comparable scenario artifact.",
           "artifact": "tmp/live-baseline/live-baseline-report.json"
         },
@@ -1163,7 +1215,8 @@
           "suite_id": "retrieval",
           "status": "not_encoded",
           "elf_position": "untested",
-          "evidence": "No memsearch adapter currently executes real_world_job prompts and answer scoring; baseline retrieval/reindex evidence must stay separate from suite pass claims.",
+          "comparison_outcome": "not_tested",
+          "evidence": "No live memsearch runtime adapter currently executes real_world_job prompts and answer scoring. XY-925 fixture-backed prompt jobs document the source-store and retrieval-debug shape, while baseline retrieval/reindex evidence remains separate from suite pass claims.",
           "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
         }
       ],
@@ -1325,25 +1378,35 @@
         },
         {
           "capability": "progressive_disclosure_real_world_job",
-          "status": "not_encoded",
-          "evidence": "Hook, timeline, viewer, and observation workflows are not encoded against real_world_job prompts."
+          "status": "pass",
+          "evidence": "XY-925 adds fixture-backed prompt coverage for the Docker-contained repository progressive-disclosure path: search result to getById detail hydration and listSources evidence on durable SQLite. Hook, timeline, and viewer workflows remain blocked separately."
+        },
+        {
+          "capability": "retrieval_repair_artifact",
+          "status": "wrong_result",
+          "evidence": "The same-corpus retrieval smoke remains wrong_result, and XY-925 records a repair prompt that tells operators to rerun ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker before inspecting tmp/live-baseline/claude-mem.log and tmp/live-baseline/claude-mem-checks.json."
+        },
+        {
+          "capability": "hook_capture_viewer_workflow",
+          "status": "blocked",
+          "evidence": "The current Docker runner does not launch claude-mem hooks, timeline capture, local viewer readback, or an operator workflow over the same corpus."
         }
       ],
       "suites": [
         {
           "suite_id": "work_resume",
-          "status": "wrong_result",
+          "status": "not_encoded",
           "evidence": "The durable repository run is encoded, but hook-driven capture and real_world_job work-resume prompts are not proven by that local repository check."
         },
         {
           "suite_id": "operator_debugging_ux",
-          "status": "not_encoded",
-          "evidence": "Local viewer/operator workflow is not encoded in the benchmark runner."
+          "status": "blocked",
+          "evidence": "XY-925 adds fixture-backed progressive-disclosure and retrieval-repair prompt coverage, but local viewer/operator workflow remains blocked until a Docker-contained viewer or equivalent readback runner exists."
         },
         {
           "suite_id": "capture_integration",
-          "status": "not_encoded",
-          "evidence": "claude-mem hooks are not executed by this runner."
+          "status": "blocked",
+          "evidence": "claude-mem hook capture remains blocked because hooks, timeline capture, and observation workflows are not executed by this runner."
         }
       ],
       "scenarios": [
@@ -1352,15 +1415,27 @@
           "suite_id": "retrieval",
           "status": "wrong_result",
           "elf_position": "wins",
+          "comparison_outcome": "win",
           "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports ELF retrieval_pass and claude-mem same_corpus_retrieval as wrong_result with 0/3 expected query checks passing, while its durable repository setup completed. This is an ELF baseline win for the narrow retrieval smoke scenario.",
           "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
           "artifact": "tmp/live-baseline/live-baseline-report.json"
         },
+        {
+          "scenario_id": "retrieval_repair_artifact_path",
+          "suite_id": "retrieval",
+          "status": "wrong_result",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "XY-925 adds a checked-in repair prompt that preserves the claude-mem wrong_result and names rerun/inspection targets from the reproducible Docker baseline: tmp/live-baseline/claude-mem.log and tmp/live-baseline/claude-mem-checks.json. This is repair evidence for a miss, not a retrieval pass.",
+          "command": "cargo make real-world-first-generation-oss",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_retrieval_repair.json"
+        },
         {
           "scenario_id": "repository_lifecycle_reload",
           "suite_id": "memory_evolution",
           "status": "pass",
           "elf_position": "ties",
+          "comparison_outcome": "tie",
           "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports ELF passing local lifecycle checks and claude-mem update, delete, and cold-start reload checks passing over a durable Docker-local SQLite repository. This is a local lifecycle-smoke tie, not a hook-driven work-resume or full progressive-disclosure job pass.",
           "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
           "artifact": "tmp/live-baseline/live-baseline-report.json"
@@ -1370,17 +1445,40 @@
           "suite_id": "operator_debugging_ux",
           "status": "pass",
           "elf_position": "untested",
+          "comparison_outcome": "not_tested",
           "evidence": "claude-mem passed the repository-level search-to-detail/source hydration check, which is a useful progressive-disclosure signal. ELF does not have a directly comparable claude-mem-style progressive-disclosure scenario in this baseline, so the ELF position remains untested rather than a loss claim.",
           "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
           "artifact": "tmp/live-baseline/live-baseline-report.json"
         },
+        {
+          "scenario_id": "progressive_disclosure_prompt",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "untested",
+          "comparison_outcome": "not_tested",
+          "evidence": "XY-925 adds fixture-backed prompt coverage that asks for the measured claude-mem progressive-disclosure boundary: repository search results hydrate through getById and listSources on durable SQLite, but hooks, timeline, viewer, and live prompt scoring are not executed.",
+          "command": "cargo make real-world-first-generation-oss",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_progressive_disclosure.json"
+        },
         {
           "scenario_id": "hook_capture_viewer_workflow",
           "suite_id": "capture_integration",
-          "status": "not_encoded",
+          "status": "blocked",
           "elf_position": "untested",
-          "evidence": "The Docker baseline uses repository classes only. claude-mem hooks, timeline, observations, viewer capture, and automatic capture review workflows are not executed by the runner, so capture breadth remains untested rather than an ELF win/loss.",
-          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          "comparison_outcome": "blocked",
+          "evidence": "The Docker baseline uses repository classes only. claude-mem hooks, viewer, timeline, and observation workflows are not executed by the runner, so XY-925 preserves this as a typed blocker rather than not_encoded prose.",
+          "command": "cargo make real-world-first-generation-oss",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json"
+        },
+        {
+          "scenario_id": "viewer_operator_workflow",
+          "suite_id": "operator_debugging_ux",
+          "status": "blocked",
+          "elf_position": "untested",
+          "comparison_outcome": "blocked",
+          "evidence": "A fair claude-mem viewer/operator comparison needs a Docker-contained run that opens the local viewer or equivalent readback over the same durable SQLite corpus and emits timeline, detail hydration, and repair-command artifacts. That path is not available in the current runner.",
+          "command": "cargo make real-world-first-generation-oss",
+          "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json"
         }
       ],
       "evidence": [
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 9b39fd6a..d1ac86e5 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -393,6 +393,7 @@ fn external_adapter_run_summarizes_nonzero_scenario_losses() -> Result<()> {
 		.ok_or_else(|| eyre::eyre!("missing agentmemory adapter"))?;
 
 	set_json_pointer(adapter, "/scenarios/0/elf_position", serde_json::json!("loses"))?;
+	set_json_pointer(adapter, "/scenarios/0/comparison_outcome", serde_json::json!("loss"))?;
 
 	let temp_dir =
 		env::temp_dir().join(format!("elf-real-world-loss-manifest-test-{}", process::id()));
@@ -429,7 +430,7 @@ fn external_adapter_run_summarizes_nonzero_scenario_losses() -> Result<()> {
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/untested")
 			.and_then(Value::as_u64),
-		Some(11)
+		Some(16)
 	);
 	assert_eq!(
 		report
@@ -462,7 +463,9 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/manifest_id").and_then(Value::as_str),
-		Some("real-world-memory-project-adapters-2026-06-11-openmemory-ui-export")
+		Some(
+			"real-world-memory-project-adapters-2026-06-11-first-generation-continuity-source-store"
+		)
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/docker_isolation/default").and_then(Value::as_bool),
@@ -500,6 +503,12 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report.pointer("/external_adapters/summary/research_gate_count").and_then(Value::as_u64),
 		Some(11)
 	);
+
+	assert_external_adapter_manifest_status_summary(report);
+	assert_external_adapter_manifest_scenario_summary(report);
+}
+
+fn assert_external_adapter_manifest_status_summary(report: &Value) {
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/pass")
@@ -552,7 +561,13 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/suite_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(16)
+		Some(18)
+	);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/suite_status_counts/pass")
+			.and_then(Value::as_u64),
+		Some(24)
 	);
 	assert_eq!(
 		report
@@ -560,8 +575,12 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 			.and_then(Value::as_u64),
 		Some(0)
 	);
-
-	assert_external_adapter_manifest_scenario_summary(report);
+	assert_eq!(
+		report
+			.pointer("/external_adapters/summary/suite_status_counts/not_encoded")
+			.and_then(Value::as_u64),
+		Some(38)
+	);
 }
 
 fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
@@ -587,7 +606,7 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(3)
+		Some(6)
 	);
 	assert_eq!(
 		report
@@ -599,7 +618,7 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/wrong_result")
 			.and_then(Value::as_u64),
-		Some(4)
+		Some(5)
 	);
 	assert_eq!(
 		report
@@ -611,19 +630,19 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/pass")
 			.and_then(Value::as_u64),
-		Some(17)
+		Some(20)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/not_encoded")
 			.and_then(Value::as_u64),
-		Some(3)
+		Some(2)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/wins")
 			.and_then(Value::as_u64),
-		Some(8)
+		Some(9)
 	);
 	assert_eq!(
 		report
@@ -641,13 +660,13 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/untested")
 			.and_then(Value::as_u64),
-		Some(12)
+		Some(17)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/win")
 			.and_then(Value::as_u64),
-		Some(8)
+		Some(9)
 	);
 	assert_eq!(
 		report
@@ -671,13 +690,13 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/blocked")
 			.and_then(Value::as_u64),
-		Some(2)
+		Some(6)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/non_goal")
 			.and_then(Value::as_u64),
-		Some(2)
+		Some(3)
 	);
 }
 
@@ -964,6 +983,13 @@ fn assert_first_generation_adapter_records(
 	memsearch: &Value,
 	claude_mem: &Value,
 ) {
+	assert_agentmemory_first_generation_records(agentmemory);
+	assert_mem0_first_generation_records(mem0);
+	assert_memsearch_first_generation_records(memsearch);
+	assert_claude_mem_first_generation_records(claude_mem);
+}
+
+fn assert_agentmemory_first_generation_records(agentmemory: &Value) {
 	assert_eq!(
 		agentmemory.pointer("/scenarios/1/status").and_then(Value::as_str),
 		Some("lifecycle_fail")
@@ -973,6 +999,9 @@ fn assert_first_generation_adapter_records(
 		Some("wins")
 	);
 	assert_eq!(agentmemory.pointer("/scenarios/2/status").and_then(Value::as_str), Some("blocked"));
+}
+
+fn assert_mem0_first_generation_records(mem0: &Value) {
 	assert_eq!(
 		mem0.pointer("/capabilities/2/capability").and_then(Value::as_str),
 		Some("local_lifecycle_update_delete_reload")
@@ -1027,6 +1056,9 @@ fn assert_first_generation_adapter_records(
 		mem0.pointer("/scenarios/6/comparison_outcome").and_then(Value::as_str),
 		Some("non_goal")
 	);
+}
+
+fn assert_memsearch_first_generation_records(memsearch: &Value) {
 	assert_eq!(
 		memsearch.pointer("/capabilities/2/capability").and_then(Value::as_str),
 		Some("reindex_update_delete_reload")
@@ -1040,28 +1072,83 @@ fn assert_first_generation_adapter_records(
 		memsearch.pointer("/scenarios/0/elf_position").and_then(Value::as_str),
 		Some("untested")
 	);
+	assert_eq!(memsearch.pointer("/suites/0/status").and_then(Value::as_str), Some("pass"));
+	assert!(memsearch.pointer("/suites/0/evidence").and_then(Value::as_str).is_some_and(
+		|evidence| evidence.contains("fixture-backed source-of-truth prompt coverage")
+			&& evidence.contains("No live memsearch runtime adapter executes prompt scoring yet.")
+	));
+	assert_eq!(memsearch.pointer("/suites/1/status").and_then(Value::as_str), Some("pass"));
+	assert!(memsearch.pointer("/suites/1/evidence").and_then(Value::as_str).is_some_and(
+		|evidence| evidence.contains("fixture-backed retrieval-debug prompt coverage")
+			&& evidence.contains(
+				"No live memsearch runtime adapter executes retrieval prompt scoring yet."
+			)
+	));
+	assert_eq!(memsearch.pointer("/scenarios/1/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		memsearch.pointer("/scenarios/1/elf_position").and_then(Value::as_str),
+		Some("untested")
+	);
 	assert_eq!(
-		memsearch.pointer("/scenarios/1/status").and_then(Value::as_str),
+		memsearch.pointer("/scenarios/3/status").and_then(Value::as_str),
 		Some("unsupported")
 	);
 	assert_eq!(
-		memsearch.pointer("/scenarios/1/elf_position").and_then(Value::as_str),
-		Some("untested")
+		memsearch.pointer("/capabilities/4/capability").and_then(Value::as_str),
+		Some("markdown_source_store_prompt_jobs")
 	);
+	assert_eq!(memsearch.pointer("/capabilities/4/status").and_then(Value::as_str), Some("pass"));
+}
+
+fn assert_claude_mem_first_generation_records(claude_mem: &Value) {
 	assert_eq!(claude_mem.pointer("/capabilities/1/status").and_then(Value::as_str), Some("real"));
 	assert_eq!(
 		claude_mem.pointer("/capabilities/3/capability").and_then(Value::as_str),
 		Some("repository_progressive_disclosure")
 	);
+	assert_eq!(claude_mem.pointer("/capabilities/4/status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(
-		claude_mem.pointer("/capabilities/4/status").and_then(Value::as_str),
-		Some("not_encoded")
+		claude_mem.pointer("/capabilities/6/status").and_then(Value::as_str),
+		Some("blocked")
+	);
+	assert_eq!(claude_mem.pointer("/suites/0/status").and_then(Value::as_str), Some("not_encoded"));
+	assert_eq!(claude_mem.pointer("/suites/1/status").and_then(Value::as_str), Some("blocked"));
+	assert!(
+		claude_mem
+			.pointer("/suites/1/evidence")
+			.and_then(Value::as_str)
+			.is_some_and(|evidence| evidence.contains("fixture-backed progressive-disclosure")
+				&& evidence.contains("viewer/operator workflow remains blocked"))
+	);
+	assert_eq!(claude_mem.pointer("/suites/2/status").and_then(Value::as_str), Some("blocked"));
+	assert!(
+		claude_mem
+			.pointer("/suites/2/evidence")
+			.and_then(Value::as_str)
+			.is_some_and(|evidence| evidence.contains("hook capture remains blocked"))
 	);
 	assert_eq!(
 		claude_mem.pointer("/scenarios/0/status").and_then(Value::as_str),
 		Some("wrong_result")
 	);
-	assert_eq!(claude_mem.pointer("/scenarios/1/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		claude_mem.pointer("/scenarios/1/scenario_id").and_then(Value::as_str),
+		Some("retrieval_repair_artifact_path")
+	);
+	assert_eq!(
+		claude_mem.pointer("/scenarios/1/status").and_then(Value::as_str),
+		Some("wrong_result")
+	);
+	assert!(
+		claude_mem
+			.pointer("/scenarios/1/evidence")
+			.and_then(Value::as_str)
+			.is_some_and(|evidence| evidence.contains("rerun/inspection targets")
+				&& evidence.contains("tmp/live-baseline/claude-mem-checks.json"))
+	);
+	assert_eq!(claude_mem.pointer("/scenarios/2/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(claude_mem.pointer("/scenarios/4/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(claude_mem.pointer("/scenarios/5/status").and_then(Value::as_str), Some("blocked"));
 }
 
 fn assert_graphiti_zep_adapter(adapter: &Value) {
@@ -1901,6 +1988,8 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> {
 		competitor_matrix
 			.contains("broader live suites remain `wrong_result`, `blocked`, or `not_encoded`")
 	);
+	assert!(competitor_matrix.contains("claude-mem work_resume remains `not_encoded`"));
+	assert!(!competitor_matrix.contains("claude-mem `wrong_result`, OpenViking work_resume"));
 	assert!(external_manifest.contains(
 		"The record is a full-suite sweep, not a full-suite pass; wrong_result, blocked, and not_encoded states remain visible."
 	));
@@ -2195,15 +2284,20 @@ fn assert_trace_replay_adoption_json(adoption: &Value) -> Result<()> {
 
 fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 	let projects = array_at(matrix, "/project_matrix")?;
-	let qmd = find_by_field(projects, "/project", "qmd")?;
-	let mem0 = find_by_field(projects, "/project", "mem0/OpenMemory")?;
-	let openviking = find_by_field(projects, "/project", "OpenViking")?;
 	let scenarios = array_at(matrix, "/scenario_matrix")?;
-	let retrieval_debug = find_by_field(scenarios, "/scenario_id", "retrieval_debug")?;
-	let operator_debug = find_by_field(scenarios, "/scenario_id", "operator_debugging")?;
-	let context_trajectory = find_by_field(scenarios, "/scenario_id", "context_trajectory")?;
 
 	assert_competitor_strength_matrix_manifest_counts(matrix);
+	assert_competitor_strength_matrix_project_json(projects)?;
+	assert_competitor_strength_matrix_scenario_json(scenarios)?;
+
+	Ok(())
+}
+
+fn assert_competitor_strength_matrix_project_json(projects: &[Value]) -> Result<()> {
+	let qmd = find_by_field(projects, "/project", "qmd")?;
+	let mem0 = find_by_field(projects, "/project", "mem0/OpenMemory")?;
+	let claude_mem = find_by_field(projects, "/project", "claude-mem")?;
+	let openviking = find_by_field(projects, "/project", "OpenViking")?;
 
 	assert_eq!(
 		qmd.pointer("/current_evidence_class").and_then(Value::as_str),
@@ -2237,6 +2331,13 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 			.and_then(Value::as_str)
 			.is_some_and(|claim| claim.contains("OpenMemory product app import/export"))
 	);
+	assert!(
+		claude_mem
+			.pointer("/unsupported_or_blocked_status/details")
+			.and_then(Value::as_str)
+			.is_some_and(|details| details.contains("rerun/inspection targets")
+				&& details.contains("tmp/live-baseline/claude-mem-checks.json"))
+	);
 	assert_eq!(
 		openviking.pointer("/current_evidence_class").and_then(Value::as_str),
 		Some("live_baseline_only")
@@ -2261,6 +2362,16 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 			.and_then(Value::as_str)
 			.is_some_and(|claim| claim.contains("evidence-bearing same-corpus output pass"))
 	);
+
+	Ok(())
+}
+
+fn assert_competitor_strength_matrix_scenario_json(scenarios: &[Value]) -> Result<()> {
+	let retrieval_debug = find_by_field(scenarios, "/scenario_id", "retrieval_debug")?;
+	let work_resume = find_by_field(scenarios, "/scenario_id", "work_resume")?;
+	let operator_debug = find_by_field(scenarios, "/scenario_id", "operator_debugging")?;
+	let context_trajectory = find_by_field(scenarios, "/scenario_id", "context_trajectory")?;
+
 	assert!(
 		retrieval_debug
 			.pointer("/current_state")
@@ -2270,6 +2381,13 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 	assert!(retrieval_debug.pointer("/current_state").and_then(Value::as_str).is_some_and(
 		|state| state.contains("qmd remains stronger on local debug ergonomics not fully scored")
 	));
+	assert!(
+		work_resume
+			.pointer("/current_competitor_evidence")
+			.and_then(Value::as_str)
+			.is_some_and(|claim| claim.contains("claude-mem work_resume remains not_encoded")
+				&& !claim.contains("claude-mem is wrong_result"))
+	);
 	assert!(
 		operator_debug
 			.pointer("/current_elf_evidence")
@@ -2792,9 +2910,9 @@ fn generated_json_report_renders_markdown() -> Result<()> {
 	assert!(markdown.contains("xy844-current-worktree"));
 	assert!(markdown.contains("Existing live-baseline reports remain valid"));
 	assert!(markdown.contains("### Adapter Scenario Judgments"));
-	assert!(markdown.contains("ELF scenario positions: `wins=8, ties=9, loses=1, untested=12`"));
+	assert!(markdown.contains("ELF scenario positions: `wins=9, ties=9, loses=1, untested=17`"));
 	assert!(markdown.contains(
-		"Scenario comparison outcomes: `win=8, tie=9, loss=1, not_tested=8, blocked=2, non_goal=2`"
+		"Scenario comparison outcomes: `win=9, tie=9, loss=1, not_tested=8, blocked=6, non_goal=3`"
 	));
 	assert!(markdown.contains("| `claude_mem_live_baseline` | `same_corpus_retrieval`"));
 	assert!(markdown.contains("| `memsearch_live_baseline` | `ttl_expiry_lifecycle`"));
@@ -2818,6 +2936,7 @@ fn external_adapter_markdown_renders_nonzero_scenario_losses() -> Result<()> {
 		.ok_or_else(|| eyre::eyre!("missing agentmemory adapter"))?;
 
 	set_json_pointer(adapter, "/scenarios/0/elf_position", serde_json::json!("loses"))?;
+	set_json_pointer(adapter, "/scenarios/0/comparison_outcome", serde_json::json!("loss"))?;
 	set_json_pointer(
 		&mut report,
 		"/external_adapters/summary/scenario_position_counts",
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
index 000e7dd1..07ef05ad 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
@@ -9,7 +9,8 @@ Inputs: `2026-06-11-measurement-coverage-audit.md`,
 `2026-06-11-qmd-openviking-strength-profile-report.md`,
 `2026-06-11-temporal-history-competitor-gap-report.md`,
 `2026-06-11-graph-rag-scored-smoke-adapter-report.md`,
-`2026-06-11-mem0-openmemory-history-ui-export-report.md`, and
+`2026-06-11-mem0-openmemory-history-ui-export-report.md`,
+`2026-06-11-first-generation-oss-continuity-source-store-report.md`, and
 `2026-06-10-production-adoption-refresh.md`.
 Depends on: `docs/spec/real_world_agent_memory_benchmark_v1.md` and the current
 external adapter manifest.
@@ -47,10 +48,14 @@ The remaining caveats are material:
   ergonomics as stronger than ELF's default stress report, while expansion, fusion,
   and rerank remain untested. XY-932 adds a narrow live operator-debug slice where
   ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory
-  UI/export and claude-mem viewer workflows remain blocked or not encoded. XY-933
-  adds an ELF live capture/write-policy self-check, but agentmemory capture breadth
-  is blocked by mocked/in-memory storage and claude-mem hook/viewer capture remains
-  untested.
+  UI/export and claude-mem viewer workflows remain blocked or not encoded. XY-925
+  now adds fixture-backed first-generation OSS prompt coverage and typed blockers for
+  agentmemory durable continuity, memsearch Markdown source-store/debug jobs, and
+  claude-mem progressive-disclosure, retrieval-repair, hook, and viewer/operator
+  surfaces; those rows still do not create live external real-world suite passes.
+  XY-933 adds an ELF live capture/write-policy self-check, but agentmemory capture
+  breadth is blocked by mocked/in-memory storage and claude-mem hook/viewer capture
+  remains blocked until Docker-contained hook/viewer evidence exists.
 
 ## Evidence Classes
 
@@ -80,6 +85,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | `cargo make real-world-memory-live-adapters` | `2026-06-11-capture-write-policy-live-report.md` | ELF live capture/write-policy jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage; qmd remains not_encoded, agentmemory is blocked, and claude-mem is untested for capture breadth. |
 | `cargo make real-world-job-operator-ux-live-adapters` | `tmp/real-world-job/operator-ux-live-adapters/summary.json` | The narrow live operator-debug slice scores ELF as pass and qmd as wrong_result: ELF wins trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence; both systems expose replay commands and repair-action guidance. |
 | `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker` | `2026-06-11-first-generation-oss-adapter-promotion-report.md` | mem0/OpenMemory and memsearch pass basic local baseline smokes; agentmemory remains lifecycle_fail and claude-mem remains wrong_result. |
+| `cargo make real-world-first-generation-oss` | `2026-06-11-first-generation-oss-continuity-source-store-report.md` | First-generation OSS fixture slice reports 6 jobs: 4 pass, 2 blocked, full evidence/source-ref/quote coverage, and manifest scenario outcomes across win, tie, loss, not_tested, blocked, and non_goal without promoting smoke evidence into live suite passes. |
 | `cargo make openmemory-ui-export-readback` | `2026-06-11-mem0-openmemory-history-ui-export-report.md` | mem0 local OSS passes preference correction history, entity-scoped personalization, local `get_all` export-style readback, and deletion audit history; OpenMemory export-helper setup emits a separate blocked artifact with `DOCKER_UNAVAILABLE_IN_BASELINE_RUNNER`, and hosted Platform export remains non-goal. |
 | `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke` | `2026-06-11-temporal-history-competitor-gap-report.md` | Graphiti/Zep temporal smoke remains blocked by `provider_api_key_missing`. |
 | `cargo make graphify-docker-graph-report-smoke` | `2026-06-11-graph-rag-scored-smoke-adapter-report.md` | graphify reaches tiny Docker graph/report scoring but remains wrong_result. |
@@ -91,15 +97,15 @@ results, or lifecycle failures into one aggregate leaderboard.
 | Scenario | ELF outcome | Evidence classes | Measured claim | Follow-up |
 | --- | --- | --- | --- | --- |
 | Source-of-truth rebuild and evidence-bound writes | `win` | `fixture_backed`, `live_real_world`, `live_baseline_only` | ELF has the strongest measured source-of-truth and rebuild story: Postgres is authoritative, Qdrant is rebuildable, trust-source jobs pass, and production restore/rebuild proof exists. | None |
-| Work resume and coding-agent continuity | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `blocked`, `not_encoded` | ELF and qmd both pass encoded live `work_resume` jobs; agentmemory, claude-mem, and OpenViking continuity strengths remain blocked or not encoded. | XY-925, XY-928 |
+| Work resume and coding-agent continuity | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `blocked`, `not_encoded` | ELF and qmd both pass encoded live `work_resume` jobs. XY-925 selects agentmemory's next durable local path but keeps it blocked until the SDK KV/index and observation log survive a fresh process; claude-mem and OpenViking continuity strengths remain blocked or not encoded. | XY-928 |
 | Project decisions and reversals | `tie` | `fixture_backed`, `live_real_world`, `research_gate`, `not_encoded` | ELF and qmd both pass encoded `project_decisions` jobs; Letta-style core/archival decision memory is not tested. | XY-927 |
 | Retrieval quality | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only` | ELF and qmd both pass encoded live retrieval and stress/same-corpus retrieval evidence. | XY-923 |
 | Retrieval quality and local debug UX | `loss` | `live_baseline_only`, `research_gate`, `wrong_result`, `not_encoded` | The XY-923 trace/replay report scores qmd stronger on immediate top-10 candidate artifacts and short CLI replay commands. ELF keeps useful service trace/admin replay surfaces, and expansion, fusion, rerank-on, and candidate-drop diagnostics remain untested. | XY-923 |
 | Memory evolution and temporal history | `loss` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `wrong_result`, `blocked` | ELF fixture memory evolution passes, but live ELF passes only delete/TTL and reports five wrong_result jobs where current-vs-historical state is not reconciled. The mem0 local OSS preference-correction history scenario is now measured and is also an ELF loss. | XY-905 |
 | Consolidation/proposal review | `not_tested` | `fixture_backed`, `not_encoded` | ELF fixture consolidation passes, but live consolidation proposal generation and review-action scoring are not encoded. | XY-926 |
 | Knowledge page compilation | `not_tested` | `fixture_backed`, `live_real_world`, `wrong_result`, `research_gate`, `not_encoded` | ELF fixture knowledge pages pass, but live knowledge compilation is not encoded; graphify reaches a tiny scored smoke and remains wrong_result. | XY-926, XY-929 |
-| Operator debugging/viewer UX | `win` | `fixture_backed`, `live_real_world`, `blocked`, `not_encoded` | ELF now has a narrow live operator-debug win over qmd on trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence. ELF ties qmd on replay-command availability and repair-action clarity. OpenMemory UI/export remains blocked and claude-mem UI remains not encoded, so this is not a broad viewer-product superiority claim. | XY-926 |
-| Capture/write policy and redaction | `not_tested` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `blocked`, `not_encoded` | ELF live capture/write-policy self-check jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage. qmd remains `not_encoded`; agentmemory comparison is `blocked`; claude-mem capture breadth is `not_encoded`, so no broad capture-hook superiority claim is allowed. | XY-933, XY-925 |
+| Operator debugging/viewer UX | `win` | `fixture_backed`, `live_real_world`, `blocked`, `not_encoded` | ELF now has a narrow live operator-debug win over qmd on trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence. ELF ties qmd on replay-command availability and repair-action clarity. XY-925 adds claude-mem progressive-disclosure and retrieval-repair prompt coverage, but claude-mem viewer/operator workflows and OpenMemory UI/export remain blocked, so this is not a broad viewer-product superiority claim. | XY-926 |
+| Capture/write policy and redaction | `not_tested` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `blocked`, `not_encoded` | ELF live capture/write-policy self-check jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage. qmd remains `not_encoded`; agentmemory and claude-mem hook-capture comparisons remain `blocked` until Docker-contained hook observations and write-policy/viewer readback artifacts exist, so no broad capture-hook superiority claim is allowed. | XY-933, XY-925 |
 | Production ops, restore, backfill, and rebuild | `win` | `live_baseline_only`, `blocked` | ELF has the strongest measured local production-operation story: provider synthetic, stress, resumable backfill, backup/restore, and Qdrant rebuild evidence. | XY-930 |
 | Private corpus and provider boundaries | `blocked` | `blocked` | Private production profile fails closed without an operator-owned manifest; provider-backed production-ops gates require explicit credentials. | XY-930 |
 | Personalization and scoped preferences | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `not_encoded` | ELF and qmd both pass the single encoded live personalization job. mem0 local OSS now passes entity-scoped personalization, so scoped preference behavior is a measured tie; preference correction history remains a separate ELF loss. | XY-927 |
@@ -114,9 +120,9 @@ results, or lifecycle failures into one aggregate leaderboard.
 | XY-905 | P0 | Backlog | Live temporal reconciliation answer and trace contract. |
 | XY-923 | P0 | Backlog | qmd trace-level replay and wrong-result diagnostics. |
 | XY-924/XY-931 | P0 | Encoded local OSS history; UI/export setup blocker measured | mem0/OpenMemory local OSS history and SDK export-style readback are measured; OpenMemory UI/export has a blocked export-helper setup probe and still needs a dedicated compose/import path before any product-UX comparison. |
-| XY-925 | P1 | Backlog | First-generation OSS continuity and source-store adapters. |
+| XY-925 | P1 | Fixture slice encoded; runtime paths still blocked | First-generation OSS prompt coverage and typed blockers are recorded for agentmemory, memsearch, and claude-mem; durable agentmemory hooks and claude-mem viewer/operator runs still need runtime adapters. |
 | XY-926 | P1 | Backlog | Live consolidation and knowledge-page suites; broad operator-debugging remains dependent on OpenMemory and claude-mem UI runners. |
-| XY-933 | P1 | Live ELF self-check encoded | Capture/write-policy redaction, exclusion, source-id, evidence-binding, and no-leak scoring for ELF; durable agentmemory/claude-mem capture-hook comparison remains blocked or untested. |
+| XY-933 | P1 | Live ELF self-check encoded | Capture/write-policy redaction, exclusion, source-id, evidence-binding, and no-leak scoring for ELF; durable agentmemory/claude-mem capture-hook comparison remains blocked. |
 | XY-927 | P1 | Backlog | Letta-style core-vs-archival memory comparison. |
 | XY-928 | P1 | Encoded blocked fixtures | OpenViking context-trajectory and hierarchy benchmark is encoded but blocked until evidence-bearing same-corpus and staged artifacts exist. |
 | XY-929 | P2 | Backlog | Graph/RAG adapters beyond scored smokes. |
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
index c2cdc983..4fb3b15e 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
@@ -77,9 +77,9 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 | qmd | Local retrieval-debug workflow with transparent CLI indexing, querying, expansion, fusion, and rerank ergonomics. | `live_real_world`; supporting `live_baseline_only` and `research_gate`. | `wrong_result` full live sweep: `cargo make real-world-memory-live-adapters`, `tmp/real-world-memory/live-adapters/qmd-report.md`; targeted retrieval suites pass; the narrow operator-debug slice ties replay commands but is `wrong_result` for trace hydration and candidate-drop visibility. | `not_encoded`: deep profile and non-retrieval live behavior are not encoded; memory_evolution is `wrong_result`. | Keep qmd deep retrieval/debug profiling separate from the narrow operator-debug live slice; no broad ELF-over-qmd or qmd-over-ELF claim is allowed until comparable stage artifacts exist. | Weighted fusion, rerank explanation, local debug knobs, and command-line replay. |
 | agentmemory | Coding-agent continuity, MCP/REST packaging, viewer workflow, and durable cross-agent memory lifecycle. | `live_baseline_only`. | `lifecycle_fail`: `ELF_BASELINE_PROJECTS=agentmemory cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`. | `blocked`: durable cold-start, capture-hook persistence, and real-world adapter coverage are missing; current Docker baseline uses a process-local StateKV Map and in-memory index. | Durable local adapter with update, delete, cold-start reload, work_resume, capture/write-policy, and lifecycle-staleness jobs. | Cross-agent hooks, packaging, continuity scenarios, and viewer affordances. |
 | mem0/OpenMemory | Memory lifecycle, personalization, hosted/OpenMemory UI ergonomics, and optional graph memory. | `live_baseline_only`. | `pass`: fresh scoped run `cargo make openmemory-ui-export-readback`, `tmp/live-baseline/live-baseline-report.json`, with mem0 `8/8` local SDK checks passing; `blocked`: OpenMemory export-helper setup probe emits `tmp/live-baseline/mem0-openmemory-ui-export.json` with `DOCKER_UNAVAILABLE_IN_BASELINE_RUNNER`. | `blocked`: OpenMemory UI/export cannot be compared until a compose/import path loads the same corpus into the product app; `unsupported`: hosted Platform export; `not_encoded`: optional graph memory and real-world prompt adapter coverage. | Add a Docker-contained OpenMemory product app import/export path, then score browser/API readback separately from SDK `get_all`; keep hosted Platform and graph memory opt-in/non-goal unless explicitly enabled. | Entity-scoped history, lifecycle surfaces, async update ergonomics, and OpenMemory inspection UX. |
-| memsearch | Markdown-first canonical store with rebuildable local index and practical hybrid retrieval. | `live_baseline_only`. | `pass`: fresh scoped run `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`, with memsearch `4/4` local checks passing. | `not_encoded`: real-world source-of-truth, retrieval, and memory-evolution prompt adapters are not encoded; TTL/expiry is unsupported by the current CLI path. | Score source-of-truth and retrieval-debug real-world jobs over the canonical Markdown store; keep TTL/expiry as unsupported unless a comparable path exists. | Canonical markdown store, local reindex clarity, and user-inspectable source files. |
+| memsearch | Markdown-first canonical store with rebuildable local index and practical hybrid retrieval. | `live_baseline_only`; XY-925 `fixture_backed`. | `pass`: fresh scoped run `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`, with memsearch `4/4` local checks passing. XY-925 adds fixture-backed source-store and retrieval-debug prompts through `cargo make real-world-first-generation-oss`, `tmp/real-world-memory/first-generation-oss/report.json`. | `not_encoded`: no live memsearch runtime adapter executes real-world prompt scoring; memory-evolution prompt adapters remain not encoded; TTL/expiry is unsupported by the current CLI path. | Promote the fixture-backed source-store and retrieval-debug prompts into a live memsearch real-world adapter before any suite-level win/loss claim; keep TTL/expiry as unsupported unless a comparable path exists. | Canonical markdown store, local reindex clarity, and user-inspectable source files. |
 | OpenViking | Filesystem-like context trajectory, hierarchical retrieval, and staged context loading. | `live_baseline_only`; supporting `fixture_backed` and `research_gate`. | `wrong_result`: `ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`; `blocked`: checked-in `context_trajectory` fixtures cover staged retrieval, hierarchy selection, and recursive/context expansion gates. | `blocked`: hierarchical context trajectory is encoded but blocked until same-corpus evidence ids match and staged artifacts are materialized. | Make evidence-bearing same-corpus output pass, then score staged trajectory and hierarchy expansion. | `viking://`-style context model, trajectory readback, and staged retrieval planning. |
-| claude-mem | Progressive disclosure, automatic capture loop, repository-local lifecycle, and local viewer workflow. | `live_baseline_only`. | `wrong_result`: `ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`. | `not_encoded`: progressive-disclosure and hook/viewer capture real-world jobs are not encoded. | Durable repository-backed work_resume, operator_debugging_ux, capture/write-policy, and progressive-disclosure jobs. | Progressive disclosure, automatic capture review loops, and local viewer/operator comfort. |
+| claude-mem | Progressive disclosure, automatic capture loop, repository-local lifecycle, and local viewer workflow. | `live_baseline_only`; XY-925 `fixture_backed`. | `wrong_result`: `ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`. XY-925 adds fixture-backed progressive-disclosure and retrieval-repair prompts through `cargo make real-world-first-generation-oss`, `tmp/real-world-memory/first-generation-oss/report.json`. | `blocked`: hook capture and viewer/operator workflows still lack a Docker-contained runner; retrieval remains `wrong_result`, and the repair prompt lists rerun/inspection targets `tmp/live-baseline/claude-mem.log` and `tmp/live-baseline/claude-mem-checks.json`. | Promote durable repository-backed work_resume, operator_debugging_ux, capture/write-policy, and progressive-disclosure prompts into a live claude-mem adapter before any broader UX claim. | Progressive disclosure, automatic capture review loops, and local viewer/operator comfort. |
 | RAGFlow | Full RAG application workflow with document, chunk, and reference evidence handles. | `research_gate`. | `blocked`: `ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make ragflow-docker-smoke`, `tmp/real-world-memory/ragflow-smoke/ragflow-smoke.json`. | `blocked`: Docker resource envelope and adapter output mapping still need proof. | XY-885 tiny Docker evidence-smoke adapter mapping `reference.chunks` to scored evidence. | Document/chunk references, resource-envelope reporting, and RAG app evidence handles. |
 | LightRAG | Lightweight graph/RAG context export with source file-path citation shape. | `research_gate`. | `blocked`: `ELF_LIGHTRAG_CONTEXT_START=1 cargo make lightrag-docker-context-smoke`, `tmp/real-world-memory/lightrag-context/summary.json`. | `blocked`: Docker service setup and context export are not proven. | XY-886 Docker context-export adapter with explicit provider config and source citation mapping. | Context-only query modes, graph-aware retrieval layout, and file-path citation readback. |
 | GraphRAG | GraphRAG indexing, graph summaries, and document/text-unit evidence tables. | `research_gate`. | `blocked`: `ELF_GRAPHRAG_SMOKE_RUN=1 cargo make graphrag-docker-smoke`, `tmp/real-world-memory/graphrag-smoke/summary.json`. | `blocked`: indexing resource envelope and source citation mapping are not proven. | XY-887 cost-bounded Docker adapter over a tiny corpus and scored output tables. | Graph summary artifacts, local/global search separation, and source table evidence mapping. |
@@ -96,14 +96,14 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 | Scenario | Current ELF evidence | Strongest competitor/reference | Current competitor evidence | Next measurement before claim |
 | --- | --- | --- | --- | --- |
 | Retrieval/debug | Fixture retrieval passes; live retrieval passes. | qmd. | qmd live retrieval passes and live baseline passes, but full-suite live status is `wrong_result`. | Run qmd deep profile and ELF/qmd trace-level replay with expansion, fusion, rerank, and candidate-drop diagnostics. |
-| Work resume | Fixture and live work_resume pass. | agentmemory, claude-mem, OpenViking. | agentmemory `lifecycle_fail`, claude-mem `wrong_result`, OpenViking work_resume `not_encoded`. | Encode durable work_resume adapters or keep each blocked with lifecycle/setup evidence. |
+| Work resume | Fixture and live work_resume pass. | agentmemory, claude-mem, OpenViking. | agentmemory `lifecycle_fail`; claude-mem work_resume remains `not_encoded` pending a durable repository-backed adapter; OpenViking work_resume is `not_encoded`. | Encode durable work_resume adapters or keep each blocked with lifecycle/setup evidence. |
 | Project decisions | Fixture and live project_decisions pass. | qmd, Letta. | qmd live project_decisions pass; Letta is `research_gate` `not_encoded`. | Add Letta core/archival decision jobs only after a contained export path exists. |
-| Source-of-truth | Fixture and live trust_source_of_truth pass. | memsearch. | memsearch canonical-store, reindex, delete, and reload smoke now passes, but source-of-truth real_world_job prompts are `not_encoded`. | Score memsearch source-of-truth rebuild/reload jobs before any suite-level win/loss claim. |
+| Source-of-truth | Fixture and live trust_source_of_truth pass. | memsearch. | memsearch canonical-store, reindex, delete, and reload smoke passes; XY-925 fixture-backed source-of-truth prompts now cover the canonical Markdown rebuild/reload boundary, but no live memsearch prompt adapter pass is claimed. | Promote memsearch source-of-truth rebuild/reload prompts into a live adapter before any suite-level win/loss claim. |
 | Temporal/current-vs-historical memory | Fixture memory_evolution passes; live memory_evolution is `wrong_result`. | Graphiti/Zep, mem0/OpenMemory. | Graphiti/Zep is `research_gate` `blocked`; mem0/OpenMemory local OSS preference history, entity scope, deletion audit, and SDK `get_all` now pass; OpenMemory UI/export is blocked by the export-helper setup probe; graph-memory scenarios are `not_encoded`. | Fix ELF/qmd live memory_evolution evidence links, add OpenMemory product app import/export readback, and run XY-888. |
 | Consolidation | Fixture consolidation passes; live consolidation is `not_encoded`. | agentmemory, managed-memory references, llm-wiki. | No manifest project has live consolidation scoring. | Run reviewable consolidation proposal generation with source refs, unsupported-claim flags, and audit transitions. |
 | Knowledge pages | Fixture knowledge_compilation passes; live knowledge_compilation is `not_encoded`. | llm-wiki, gbrain, GraphRAG, graphify. | llm-wiki and gbrain are `research_gate` `not_encoded` or `blocked`; GraphRAG is `blocked`; graphify has a tiny scored smoke `wrong_result`. | Encode live derived-page rebuild/lint scoring and run contained knowledge/RAG adapters only after setup proof. |
-| Operator debugging | Fixture operator_debugging_ux passes, and the narrow live operator-debug slice passes for trace hydration, candidate-drop visibility, selected-but-not-narrated evidence, replay-command availability, and repair-action clarity. | qmd, claude-mem, OpenMemory. | qmd ties replay-command availability and repair-action clarity but is `wrong_result` for trace hydration, candidate-drop stage visibility, and selected-but-not-narrated evidence; claude-mem and OpenMemory UX remain `not_encoded` or blocked. | Add bounded OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
-| Capture/write policy | Fixture capture_integration passes; ELF live capture_integration passes 4/4 with zero redaction leaks, source ids, write-policy audit, and evidence binding. | agentmemory, claude-mem. | agentmemory capture is `blocked` by mocked/in-memory storage; claude-mem hook/viewer capture is `not_encoded`. | Run durable agentmemory and claude-mem capture-hook jobs proving redaction, exclusion, evidence binding, source ids, and no secret leakage. |
+| Operator debugging | Fixture operator_debugging_ux passes, and the narrow live operator-debug slice passes for trace hydration, candidate-drop visibility, selected-but-not-narrated evidence, replay-command availability, and repair-action clarity. | qmd, claude-mem, OpenMemory. | qmd ties replay-command availability and repair-action clarity but is `wrong_result` for trace hydration, candidate-drop stage visibility, and selected-but-not-narrated evidence. XY-925 adds claude-mem progressive-disclosure and retrieval-repair prompt coverage, while claude-mem viewer/operator and OpenMemory UI/export remain blocked. | Add bounded OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
+| Capture/write policy | Fixture capture_integration passes; ELF live capture_integration passes 4/4 with zero redaction leaks, source ids, write-policy audit, and evidence binding. | agentmemory, claude-mem. | agentmemory and claude-mem hook capture remain `blocked` until Docker-contained hook observations and write-policy/viewer readback artifacts exist. | Run durable agentmemory and claude-mem capture-hook jobs proving redaction, exclusion, evidence binding, source ids, and no secret leakage. |
 | Production ops | Fixture production_ops has 4 pass and 2 blocked; live production_ops is `blocked`; production adoption has provider/backfill/restore evidence. | ELF production gate, qmd, RAG/RAGFlow resource gates. | qmd live production_ops is `blocked`; RAG/resource gates are `research_gate` `blocked`. | Rerun private-corpus and credentialed gates only when operator-owned manifest and credentials exist. |
 | Personalization | Fixture and live personalization pass. | mem0/OpenMemory, Letta. | mem0/OpenMemory and Letta personalization are `not_encoded`. | Encode scoped preference readback for mem0/OpenMemory and Letta before personalization superiority claims. |
 | Context trajectory | ELF has trace direction but no comparable staged trajectory scenario. | OpenViking. | OpenViking setup is pinned, same-corpus retrieval is `wrong_result`, and staged/hierarchy/recursive trajectory jobs are encoded as `blocked`. | Make OpenViking evidence-bearing retrieval pass, then score staged context trajectory outputs. |
@@ -121,9 +121,9 @@ now explicit:
 | agentmemory durable lifecycle adapter | `[ELF benchmark P0] Make external adapters lifecycle-durable and fail-typed` | yes | Durable local adapter path selection. | Update, delete, cold-start reload, work_resume, and capture/write-policy jobs. |
 | agentmemory/claude-mem capture-hook breadth | Follow-up after XY-933 | yes | Docker-contained hook/viewer capture path with durable artifacts. | Source ids, redaction/exclusion audit, evidence-bound output, and typed blocker reporting. |
 | mem0/OpenMemory history and UI coverage | New adapter repair issue | yes | Comparable local OSS path for history/UI/readback evidence. | Preference/entity history, deletion audit readback, personalization, OpenMemory inspection/export, and optional graph-context jobs. |
-| memsearch source-of-truth real-world coverage | New adapter repair issue | yes | Real-world prompt adapter over the canonical Markdown store. | Source-of-truth rebuild/reload jobs and retrieval-debug jobs that preserve baseline reindex/update/delete evidence without converting it into suite pass claims. |
+| memsearch source-of-truth live adapter coverage | New adapter repair issue | yes | Fixture-backed source-store and retrieval-debug prompts are encoded by XY-925; live prompt execution remains missing. | Runtime adapter execution for the existing source-of-truth rebuild/reload and retrieval-debug prompt jobs without converting baseline smoke into suite pass claims. |
 | OpenViking context trajectory | XY-928 encoded blocked fixtures | yes | Evidence-bearing same-corpus retrieval output and staged artifacts. | Hierarchical expansion, staged trajectory, recursive/context expansion, and comparable ELF trace/session evidence jobs. |
-| claude-mem progressive disclosure | New adapter issue | yes | Durable repository path and progressive-disclosure output contract. | Work resume, operator debugging, capture/write-policy, and progressive disclosure jobs. |
+| claude-mem hook/viewer runtime coverage | New adapter issue | yes | Fixture-backed progressive-disclosure and retrieval-repair prompts are encoded by XY-925; hook capture and viewer/operator workflows remain blocked. | Work resume, operator debugging, capture/write-policy, viewer/operator, and live progressive-disclosure adapter execution. |
 | RAGFlow evidence smoke | XY-885 | yes | Resource envelope accepted for tiny Docker smoke. | `reference.chunks` to benchmark evidence mapping. |
 | LightRAG context export | XY-886 | yes | Docker service setup and explicit provider config. | Retrieved context export and source file-path citations. |
 | GraphRAG cost-bounded adapter | XY-887 | yes | Tiny corpus cost/resource envelope. | Document, text-unit, graph-summary, and citation output tables. |
diff --git a/docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md b/docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md
new file mode 100644
index 00000000..1484abcf
--- /dev/null
+++ b/docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md
@@ -0,0 +1,99 @@
+# First-Generation OSS Continuity and Source-Store Report - June 11, 2026
+
+Goal: Expand first-generation OSS adapter coverage for durable continuity,
+canonical source-store, retrieval-debug, progressive-disclosure, hook capture, and
+viewer/operator surfaces without promoting smoke evidence into real-world suite pass
+evidence.
+Read this when: You need the XY-925 result for agentmemory, memsearch, and
+claude-mem after the XY-898 first-generation adapter promotion.
+Inputs: `cargo make real-world-first-generation-oss`, the external adapter manifest,
+and the June 11 first-generation OSS adapter promotion report.
+Outputs: Fixture-backed prompt coverage, scenario-level comparison outcomes, typed
+blockers, and updated claim boundaries.
+
+## Scope Boundary
+
+This is benchmark/report coverage only. It does not change ELF retrieval behavior,
+external project code, or baseline adapter runtime behavior.
+
+The new first-generation fixture slice lives outside
+`apps/elf-eval/fixtures/real_world_memory/`, so it is not counted as the aggregate ELF
+real-world suite. The slice exists to encode comparable prompt shapes and blockers for
+external OSS adapter surfaces while the external adapter manifest keeps evidence
+classes explicit.
+
+## Fresh Run
+
+| Command | Result | Artifact |
+| --- | --- | --- |
+| `cargo make real-world-first-generation-oss` | pass | `tmp/real-world-memory/first-generation-oss/report.json` |
+
+Generated report summary:
+
+| Metric | Value |
+| --- | ---: |
+| Jobs | 6 |
+| Encoded suites | 4 |
+| Pass | 4 |
+| Blocked | 2 |
+| Evidence coverage | 12/12 |
+| Source-ref coverage | 12/12 |
+| Quote coverage | 12/12 |
+| Operator-debug jobs | 2 |
+| Raw SQL needed | 0 |
+
+External adapter manifest scenario outcomes now preserve every normalized outcome:
+
+| Outcome | Count |
+| --- | ---: |
+| win | 9 |
+| tie | 8 |
+| loss | 1 |
+| not_tested | 8 |
+| blocked | 6 |
+| non_goal | 3 |
+
+## Scenario Additions
+
+| Project | Scenario | Status | Outcome | Evidence |
+| --- | --- | --- | --- | --- |
+| agentmemory | `durable_work_resume_local_path` | `blocked` | `blocked` | The selected comparable path is a Docker-local session directory that persists the SDK KV/index and observation log across a fresh process. |
+| agentmemory | `capture_write_policy_hooks` | `blocked` | `blocked` | Live hook observations and write-policy audit evidence are required before scoring capture/write-policy jobs. |
+| memsearch | `markdown_source_store_rebuild_reload_prompt` | `pass` | `not_tested` | The prompt fixture covers canonical Markdown as source of truth and `memsearch index` as derived rebuild/reload behavior. |
+| memsearch | `markdown_retrieval_debug_prompt` | `pass` | `not_tested` | The prompt fixture covers CLI replay plus Markdown source inspection while keeping staged trace bundles not encoded. |
+| claude-mem | `retrieval_repair_artifact_path` | `wrong_result` | `win` | The repair prompt preserves the same-corpus retrieval miss and names rerun/inspection targets `tmp/live-baseline/claude-mem.log` and `tmp/live-baseline/claude-mem-checks.json`. |
+| claude-mem | `progressive_disclosure_prompt` | `pass` | `not_tested` | The prompt fixture covers repository search-to-detail/source hydration on durable SQLite. |
+| claude-mem | `hook_capture_viewer_workflow` | `blocked` | `blocked` | The current Docker baseline uses repository classes only and does not execute hooks, timeline capture, or viewer workflows. |
+| claude-mem | `viewer_operator_workflow` | `blocked` | `blocked` | A fair viewer/operator comparison needs Docker-contained readback over the same durable SQLite corpus. |
+
+## Claim Boundaries
+
+Allowed:
+
+- agentmemory has a selected durable local path for future work-resume and
+  capture/write-policy scoring.
+- memsearch now has checked-in source-store and retrieval-debug prompt coverage over
+  the canonical Markdown store.
+- claude-mem has checked-in progressive-disclosure and retrieval-repair prompt
+  coverage for the Docker-contained repository path.
+- claude-mem hook capture and viewer/operator workflows remain typed blockers.
+
+Not allowed:
+
+- Do not claim agentmemory durable continuity from the in-memory same-corpus smoke.
+- Do not claim memsearch full real-world suite parity from Markdown reindex/reload
+  smoke or fixture-backed prompt coverage.
+- Do not claim claude-mem retrieval passed; same-corpus retrieval remains
+  `wrong_result`.
+- Do not claim claude-mem hooks or viewer workflows pass from repository
+  class-level hydration evidence.
+
+## Touched Artifacts
+
+- `Makefile.toml`: adds `cargo make real-world-first-generation-oss`.
+- `apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/`:
+  checked-in prompt and blocker fixtures.
+- `apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json`:
+  updated scenario rows and explicit `comparison_outcome` values.
+- `docs/research/2026-06-11-first-generation-oss-continuity-source-store-report.json`:
+  machine-readable companion report.
diff --git a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
index efd546a1..0974dcb6 100644
--- a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
+++ b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
@@ -165,9 +165,9 @@ records `unique_project_names: 17` for the full project list including ELF.
 | qmd | `live_real_world` plus `live_baseline_only` | Fresh full sweep is five passes behind ELF because qmd misses the delete/TTL tombstone job and keeps capture/write-policy jobs typed `not_encoded`; same-corpus baseline passes; narrow operator-debug live slice ties replay commands but is `wrong_result` for trace hydration and candidate-drop visibility. | Deep retrieval-debug ergonomics and trace replay beyond the narrow operator-debug slice. | qmd/ELF deep retrieval-debug profile with expansion, fusion, rerank, and dropped-candidate traces. |
 | agentmemory | `live_baseline_only` | `lifecycle_fail`; capture comparison is `blocked` because the Docker baseline uses a process-local StateKV Map and in-memory index, with no durable local session/capture path for source ids, exclusions, write-policy audit, or evidence-bound output. | Durable coding-agent continuity and capture hooks. | Durable lifecycle and work-resume/capture adapter report. |
 | mem0/OpenMemory | `live_baseline_only` | Basic local smoke now passes; history/UI/hosted/graph behavior remains `not_encoded`. | Entity history, lifecycle UI, OpenMemory inspection. | Entity-history, deletion-audit, and UI/export readback report. |
-| memsearch | `live_baseline_only` | Basic canonical Markdown reindex/reload smoke now passes; real-world prompt coverage remains `not_encoded`. | Markdown canonical store and local reindex clarity. | Source-of-truth and retrieval-debug real-world adapter report. |
+| memsearch | `live_baseline_only`; XY-925 `fixture_backed` | Basic canonical Markdown reindex/reload smoke passes, and XY-925 adds fixture-backed source-store and retrieval-debug prompts without claiming a live memsearch adapter pass. | Markdown canonical store and local reindex clarity. | Runtime source-of-truth and retrieval-debug adapter execution over the existing prompt jobs. |
 | OpenViking | `live_baseline_only` plus `fixture_backed` and `research_gate` | Same-corpus retrieval is `wrong_result`; staged retrieval, hierarchy selection, and recursive/context expansion are encoded as blocked fixtures. | Hierarchical staged context trajectory. | Evidence-bearing retrieval fix, then materialized staged trajectory report. |
-| claude-mem | `live_baseline_only` | `wrong_result`; capture breadth is `not_encoded` because hooks, timeline, observations, viewer capture, and automatic capture review were not run against real-world jobs. | Progressive disclosure and automatic capture review. | Work-resume, operator-debugging, and capture/write-policy report. |
+| claude-mem | `live_baseline_only`; XY-925 `fixture_backed` | Same-corpus retrieval remains `wrong_result`; XY-925 adds fixture-backed progressive-disclosure and retrieval-repair prompts, with hook capture and viewer/operator workflows still blocked. | Progressive disclosure and automatic capture review. | Work-resume, operator-debugging, capture/write-policy, and viewer/operator runtime report. |
 | RAGFlow | `research_gate` | `blocked`. | RAG app workflow with document/chunk references. | Tiny Docker evidence-smoke with `reference.chunks` mapped to evidence ids. |
 | LightRAG | `research_gate` | `blocked`. | Graph/RAG context export with source-path citations. | Docker context-export report with explicit provider config and source citation mapping. |
 | GraphRAG | `research_gate` | `blocked`. | Graph summaries and document/text-unit evidence tables. | Cost-bounded Docker adapter report over a tiny corpus. |
diff --git a/docs/guide/benchmarking/index.md b/docs/guide/benchmarking/index.md
index 34fbe8b1..1668aa31 100644
--- a/docs/guide/benchmarking/index.md
+++ b/docs/guide/benchmarking/index.md
@@ -84,6 +84,11 @@ cleanup, use `docs/guide/single_user_production.md`.
   mem0/OpenMemory, memsearch, and claude-mem with fresh scenario-level baseline
   evidence and ELF win/tie/loss/untested positions without converting baseline-only
   evidence into real-world suite wins.
+- `2026-06-11-first-generation-oss-continuity-source-store-report.md`: XY-925
+  follow-up report that adds first-generation OSS fixture-backed prompt coverage and
+  typed blockers for agentmemory durable continuity, memsearch canonical Markdown
+  source-store/debug jobs, and claude-mem progressive-disclosure, retrieval-repair,
+  hook, and viewer/operator surfaces.
 - `2026-06-11-graph-rag-scored-smoke-adapter-report.md`: XY-900 graph/RAG
   scored-smoke adapter report that promotes RAGFlow, LightRAG, GraphRAG,
   Graphiti/Zep, and graphify smoke contracts into scored or typed non-pass
diff --git a/docs/research/2026-06-11-competitor-strength-adoption-report.json b/docs/research/2026-06-11-competitor-strength-adoption-report.json
index 5426b5cb..689132a6 100644
--- a/docs/research/2026-06-11-competitor-strength-adoption-report.json
+++ b/docs/research/2026-06-11-competitor-strength-adoption-report.json
@@ -12,7 +12,7 @@
       "Live temporal reconciliation remains wrong_result for five of six memory_evolution jobs.",
       "Private-corpus production quality is blocked until an operator-owned manifest exists.",
       "Credentialed provider production-ops gates are blocked until explicit provider setup exists.",
-      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, and OpenViking trajectory, Letta core-vs-archival memory, and graph/RAG navigation remain unproven. XY-928 encodes OpenViking staged trajectory, hierarchy selection, and recursive/context expansion as blocked fixtures behind same-corpus evidence output and missing staged artifacts. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, and rerank remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory UI/export and claude-mem viewer workflows remain blocked or not encoded. XY-933 adds an ELF live capture/write-policy self-check, but agentmemory capture breadth is blocked by mocked/in-memory storage and claude-mem hook/viewer capture remains untested."
+      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, and Letta core-vs-archival memory plus graph/RAG navigation remain unproven. XY-928 encodes OpenViking staged trajectory, hierarchy selection, and recursive/context expansion as blocked fixtures behind same-corpus evidence output and missing staged artifacts. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, and rerank remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory UI/export and claude-mem viewer workflows remain blocked or not encoded. XY-925 adds fixture-backed first-generation OSS prompt coverage and typed blockers for agentmemory durable continuity, memsearch Markdown source-store/debug jobs, and claude-mem progressive-disclosure, retrieval-repair, hook, and viewer/operator surfaces without creating live external real-world suite passes. XY-933 adds an ELF live capture/write-policy self-check, but agentmemory and claude-mem hook-capture breadth remain blocked until Docker-contained hook/viewer evidence exists."
     ]
   },
   "evidence_class_terms": [
@@ -61,6 +61,11 @@
       "artifact": "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md",
       "claim": "mem0/OpenMemory and memsearch pass basic local baseline smokes; agentmemory remains lifecycle_fail and claude-mem remains wrong_result on same-corpus retrieval."
     },
+    {
+      "command": "cargo make real-world-first-generation-oss",
+      "artifact": "docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md",
+      "claim": "First-generation OSS fixture slice reports 6 jobs: 4 pass, 2 blocked, full evidence/source-ref/quote coverage, and manifest scenario outcomes across win, tie, loss, not_tested, blocked, and non_goal without promoting smoke evidence into live suite passes."
+    },
     {
       "command": "cargo make openmemory-ui-export-readback",
       "artifact": "docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md",
@@ -103,7 +108,7 @@
         "docs/guide/benchmarking/2026-06-10-production-adoption-refresh.md"
       ],
       "follow_up_issues": [],
-      "caveat": "memsearch canonical Markdown reindex/reload is a useful ergonomics reference, but real-world source-of-truth prompts are not encoded."
+      "caveat": "XY-925 encodes fixture-backed memsearch canonical Markdown source-store prompts, but no live memsearch real_world_job runtime adapter pass is claimed."
     },
     {
       "scenario_id": "work_resume_coding_agent_continuity",
@@ -116,13 +121,13 @@
         "blocked",
         "not_encoded"
       ],
-      "measured_claim": "ELF and qmd both pass the encoded live work_resume jobs. agentmemory, claude-mem, and OpenViking continuity strengths remain blocked or not encoded.",
+      "measured_claim": "ELF and qmd both pass the encoded live work_resume jobs. XY-925 selects agentmemory's durable local path but keeps it blocked until the SDK KV/index and observation log survive a fresh process; claude-mem and OpenViking continuity strengths remain blocked or not encoded.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
-        "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md"
+        "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md",
+        "docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md"
       ],
       "follow_up_issues": [
-        "XY-925",
         "XY-928"
       ],
       "caveat": "The tie is only for encoded live work_resume behavior, not for broad capture hooks or staged context."
@@ -256,17 +261,18 @@
         "blocked",
         "not_encoded"
       ],
-      "measured_claim": "ELF now has a narrow live operator-debug win over qmd on trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence. ELF ties qmd on replay-command availability and repair-action clarity. OpenMemory UI/export remains blocked and claude-mem UI remains not encoded, so this is not a broad viewer-product superiority claim.",
+      "measured_claim": "ELF now has a narrow live operator-debug win over qmd on trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence. ELF ties qmd on replay-command availability and repair-action clarity. XY-925 adds claude-mem progressive-disclosure and retrieval-repair prompt coverage, but claude-mem viewer/operator workflows and OpenMemory UI/export remain blocked, so this is not a broad viewer-product superiority claim.",
       "command_artifacts": [
         "tmp/real-world-job/operator-ux-live-adapters/summary.json",
         "tmp/real-world-job/operator-ux-live-adapters/elf-report.json",
         "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json",
-        "docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md"
+        "docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md",
+        "docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md"
       ],
       "follow_up_issues": [
         "XY-926"
       ],
-      "caveat": "The live slice compares ELF and qmd only; OpenMemory UI/export and claude-mem viewer workflows remain typed blocked or not_encoded until a bounded local runner exists."
+      "caveat": "The live slice compares ELF and qmd only; OpenMemory UI/export and claude-mem viewer workflows remain typed blocked until a bounded local runner exists."
     },
     {
       "scenario_id": "capture_write_policy_redaction",
@@ -279,15 +285,17 @@
         "blocked",
         "not_encoded"
       ],
-      "measured_claim": "ELF live capture/write-policy self-check jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage. qmd remains not_encoded; agentmemory comparison is blocked by mocked/in-memory storage; claude-mem capture breadth is not_encoded because hooks, timeline, observations, viewer capture, and automatic capture review were not run against real-world jobs.",
+      "measured_claim": "ELF live capture/write-policy self-check jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage. qmd remains not_encoded; XY-925 records agentmemory and claude-mem hook capture as typed blockers until Docker-contained hook observations and write-policy/viewer readback artifacts exist.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
         "docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md",
-        "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md"
+        "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md",
+        "docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md"
       ],
       "follow_up_issues": [
         "XY-933",
-        "XY-925"
+        "XY-925",
+        "XY-926"
       ],
       "caveat": "This is an ELF self-check and qmd not_encoded delta, not a broad capture-breadth win over agentmemory or claude-mem."
     },
@@ -427,8 +435,8 @@
     {
       "issue": "XY-925",
       "priority": "P1",
-      "state": "Backlog",
-      "gap": "First-generation OSS continuity and source-store adapters."
+      "state": "Fixture slice encoded; runtime paths still blocked",
+      "gap": "First-generation OSS prompt coverage and typed blockers are recorded for agentmemory, memsearch, and claude-mem; durable agentmemory hooks and claude-mem viewer/operator runs still need runtime adapters."
     },
     {
       "issue": "XY-926",
diff --git a/docs/research/2026-06-11-first-generation-oss-continuity-source-store-report.json b/docs/research/2026-06-11-first-generation-oss-continuity-source-store-report.json
new file mode 100644
index 00000000..f69909b6
--- /dev/null
+++ b/docs/research/2026-06-11-first-generation-oss-continuity-source-store-report.json
@@ -0,0 +1,140 @@
+{
+  "schema": "elf.first_generation_oss_continuity_source_store_report/v1",
+  "report_id": "xy-925-first-generation-oss-continuity-source-store-2026-06-11",
+  "authority": "XY-925",
+  "created_at": "2026-06-11T00:00:00Z",
+  "scope": "Fixture-backed first-generation OSS prompt coverage and typed blockers for agentmemory, memsearch, and claude-mem without promoting smoke evidence into real-world suite pass evidence.",
+  "validation": {
+    "command": "cargo make real-world-first-generation-oss",
+    "status": "pass",
+    "json_artifact": "tmp/real-world-memory/first-generation-oss/report.json",
+    "markdown_artifact": "tmp/real-world-memory/first-generation-oss/report.md",
+    "summary": {
+      "job_count": 6,
+      "encoded_suite_count": 4,
+      "pass": 4,
+      "blocked": 2,
+      "evidence_coverage": 1.0,
+      "source_ref_coverage": 1.0,
+      "quote_coverage": 1.0,
+      "operator_debug_job_count": 2,
+      "raw_sql_needed_count": 0
+    }
+  },
+  "manifest": {
+    "path": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json",
+    "manifest_id": "real-world-memory-project-adapters-2026-06-11-first-generation-continuity-source-store",
+    "scenario_outcome_counts": {
+      "win": 9,
+      "tie": 8,
+      "loss": 1,
+      "not_tested": 8,
+      "blocked": 6,
+      "non_goal": 3
+    },
+    "scenario_status_counts": {
+      "unsupported": 2,
+      "blocked": 6,
+      "wrong_result": 5,
+      "lifecycle_fail": 1,
+      "pass": 19,
+      "not_encoded": 2
+    }
+  },
+  "scenario_judgments": [
+    {
+      "project": "agentmemory",
+      "scenario_id": "durable_work_resume_local_path",
+      "suite_id": "work_resume",
+      "status": "blocked",
+      "comparison_outcome": "blocked",
+      "evidence": "The selected local path is a Docker-contained session directory that persists the SDK KV/index and observation log across a fresh process.",
+      "command": "cargo make real-world-first-generation-oss",
+      "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json"
+    },
+    {
+      "project": "agentmemory",
+      "scenario_id": "capture_write_policy_hooks",
+      "suite_id": "capture_integration",
+      "status": "blocked",
+      "comparison_outcome": "blocked",
+      "evidence": "Live agentmemory hook observations and persisted write-policy audit evidence are required before capture/write-policy scoring.",
+      "command": "cargo make real-world-first-generation-oss",
+      "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json"
+    },
+    {
+      "project": "memsearch",
+      "scenario_id": "markdown_source_store_rebuild_reload_prompt",
+      "suite_id": "trust_source_of_truth",
+      "status": "pass",
+      "comparison_outcome": "not_tested",
+      "evidence": "The prompt fixture covers canonical Markdown files as source of truth and memsearch index as derived rebuild/reload behavior.",
+      "command": "cargo make real-world-first-generation-oss",
+      "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_markdown_rebuild_reload.json"
+    },
+    {
+      "project": "memsearch",
+      "scenario_id": "markdown_retrieval_debug_prompt",
+      "suite_id": "operator_debugging_ux",
+      "status": "pass",
+      "comparison_outcome": "not_tested",
+      "evidence": "The prompt fixture covers CLI replay, Markdown source inspection, and reindexing while keeping staged trace bundles not encoded.",
+      "command": "cargo make real-world-first-generation-oss",
+      "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_retrieval_debug_prompt.json"
+    },
+    {
+      "project": "claude-mem",
+      "scenario_id": "retrieval_repair_artifact_path",
+      "suite_id": "retrieval",
+      "status": "wrong_result",
+      "comparison_outcome": "win",
+      "evidence": "The prompt fixture preserves claude-mem same-corpus retrieval as wrong_result and names rerun/inspection targets tmp/live-baseline/claude-mem.log plus tmp/live-baseline/claude-mem-checks.json.",
+      "command": "cargo make real-world-first-generation-oss",
+      "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_retrieval_repair.json"
+    },
+    {
+      "project": "claude-mem",
+      "scenario_id": "progressive_disclosure_prompt",
+      "suite_id": "operator_debugging_ux",
+      "status": "pass",
+      "comparison_outcome": "not_tested",
+      "evidence": "The prompt fixture covers repository search-to-detail/source hydration on durable SQLite and separates it from hook/viewer claims.",
+      "command": "cargo make real-world-first-generation-oss",
+      "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_progressive_disclosure.json"
+    },
+    {
+      "project": "claude-mem",
+      "scenario_id": "hook_capture_viewer_workflow",
+      "suite_id": "capture_integration",
+      "status": "blocked",
+      "comparison_outcome": "blocked",
+      "evidence": "The current Docker baseline uses repository classes only and does not execute hooks, timeline capture, or viewer workflows.",
+      "command": "cargo make real-world-first-generation-oss",
+      "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json"
+    },
+    {
+      "project": "claude-mem",
+      "scenario_id": "viewer_operator_workflow",
+      "suite_id": "operator_debugging_ux",
+      "status": "blocked",
+      "comparison_outcome": "blocked",
+      "evidence": "A fair viewer/operator comparison needs Docker-contained readback over the same durable SQLite corpus.",
+      "command": "cargo make real-world-first-generation-oss",
+      "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json"
+    }
+  ],
+  "claim_boundaries": {
+    "allowed": [
+      "agentmemory has a selected durable local path for future work-resume and capture/write-policy scoring.",
+      "memsearch has checked-in source-store and retrieval-debug prompt coverage over the canonical Markdown store.",
+      "claude-mem has checked-in progressive-disclosure and retrieval-repair prompt coverage for the Docker-contained repository path.",
+      "claude-mem hook capture and viewer/operator workflows remain typed blockers."
+    ],
+    "not_allowed": [
+      "Do not claim agentmemory durable continuity from the in-memory same-corpus smoke.",
+      "Do not claim memsearch full real-world suite parity from Markdown reindex/reload smoke or fixture-backed prompt coverage.",
+      "Do not claim claude-mem retrieval passed; same-corpus retrieval remains wrong_result.",
+      "Do not claim claude-mem hooks or viewer workflows pass from repository class-level hydration evidence."
+    ]
+  }
+}
diff --git a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
index b2760325..82ac877e 100644
--- a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
+++ b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
@@ -167,19 +167,20 @@
       "strongest_user_facing_scenario": "Markdown-first canonical store with rebuildable local index and practical hybrid retrieval.",
       "current_evidence_class": "live_baseline_only",
       "supporting_evidence_classes": [
-        "live_baseline_only"
+        "live_baseline_only",
+        "fixture_backed"
       ],
       "measured_status": "pass",
       "proof": {
-        "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
-        "artifact": "tmp/live-baseline/live-baseline-report.json"
+        "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker; cargo make real-world-first-generation-oss",
+        "artifact": "tmp/live-baseline/live-baseline-report.json; tmp/real-world-memory/first-generation-oss/report.json"
       },
       "unsupported_or_blocked_status": {
         "state": "not_encoded",
-        "typed_reason": "source_of_truth_and_reindex_real_world_jobs_not_encoded",
-        "details": "Basic canonical Markdown same-corpus/reindex/update/delete/reload smoke now passes, but source-of-truth, retrieval-debug, and memory-evolution real-world prompt adapters are not encoded."
+        "typed_reason": "live_prompt_runtime_adapter_not_encoded",
+        "details": "Basic canonical Markdown same-corpus/reindex/update/delete/reload smoke passes, and XY-925 adds fixture-backed source-store and retrieval-debug prompts. No live memsearch runtime adapter executes prompt scoring yet; memory-evolution prompt adapters remain not encoded and TTL/expiry is unsupported by the current CLI path."
       },
-      "benchmark_before_claim": "Score source-of-truth and retrieval-debug real-world jobs over the canonical Markdown store; keep TTL/expiry unsupported unless a comparable path exists.",
+      "benchmark_before_claim": "Promote the fixture-backed source-store and retrieval-debug prompts into a live memsearch real-world adapter before any suite-level win/loss claim; keep TTL/expiry unsupported unless a comparable path exists.",
       "borrow_if_stronger": "Borrow the canonical markdown-store ergonomics, local reindex clarity, and user-inspectable source files."
     },
     {
@@ -209,19 +210,20 @@
       "strongest_user_facing_scenario": "Progressive disclosure, automatic capture loop, repository-local lifecycle, and practical local viewer workflow.",
       "current_evidence_class": "live_baseline_only",
       "supporting_evidence_classes": [
-        "live_baseline_only"
+        "live_baseline_only",
+        "fixture_backed"
       ],
       "measured_status": "wrong_result",
       "proof": {
-        "command": "ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker",
-        "artifact": "tmp/live-baseline/live-baseline-report.json"
+        "command": "ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker; cargo make real-world-first-generation-oss",
+        "artifact": "tmp/live-baseline/live-baseline-report.json; tmp/real-world-memory/first-generation-oss/report.json"
       },
       "unsupported_or_blocked_status": {
-        "state": "not_encoded",
-        "typed_reason": "progressive_disclosure_and_capture_real_world_jobs_not_encoded",
-        "details": "Current Docker evidence is not a clean retrieval pass, and progressive-disclosure plus hook/viewer capture jobs are not encoded."
+        "state": "blocked",
+        "typed_reason": "hook_viewer_runtime_paths_blocked",
+        "details": "Same-corpus retrieval remains wrong_result; XY-925 adds fixture-backed progressive-disclosure and retrieval-repair prompts. Hook capture and viewer/operator workflows still lack a Docker-contained runner, and the repair prompt lists rerun/inspection targets tmp/live-baseline/claude-mem.log plus tmp/live-baseline/claude-mem-checks.json."
       },
-      "benchmark_before_claim": "Add durable repository-backed work_resume, operator_debugging_ux, capture/write-policy, and progressive-disclosure jobs.",
+      "benchmark_before_claim": "Promote durable repository-backed work_resume, operator_debugging_ux, capture/write-policy, viewer/operator, and progressive-disclosure prompts into a live claude-mem adapter before any broader UX claim.",
       "borrow_if_stronger": "Borrow progressive disclosure, automatic capture review loops, and local viewer/operator comfort."
     },
     {
@@ -440,7 +442,7 @@
       "scenario": "work resume",
       "current_elf_evidence": "ELF fixture-backed work_resume passes and ELF live_real_world work_resume passes.",
       "strongest_competitor_or_reference": "agentmemory, claude-mem, OpenViking",
-      "current_competitor_evidence": "agentmemory is live_baseline_only with lifecycle_fail; claude-mem is wrong_result; OpenViking work_resume is not_encoded.",
+      "current_competitor_evidence": "agentmemory is live_baseline_only with lifecycle_fail; claude-mem work_resume remains not_encoded pending a durable repository-backed adapter; OpenViking work_resume is not_encoded.",
       "current_state": "ELF and qmd have current encoded live pass evidence, but continuity-oriented competitors remain undermeasured.",
       "next_measurement": "Encode durable agentmemory, claude-mem, and OpenViking work_resume adapters or declare each blocked with lifecycle/setup evidence."
     },
@@ -458,9 +460,9 @@
       "scenario": "source-of-truth",
       "current_elf_evidence": "ELF fixture-backed trust_source_of_truth passes and ELF live_real_world trust_source_of_truth passes.",
       "strongest_competitor_or_reference": "memsearch",
-      "current_competitor_evidence": "memsearch has live_baseline_only canonical store evidence and now passes same-corpus retrieval, reindex/update/delete, and cold-start reload smoke, but trust_source_of_truth real-world prompts are not_encoded.",
-      "current_state": "ELF has stronger measured real-world source-of-truth evidence; memsearch now ties the local canonical-store reindex/reload smoke and remains a local-store ergonomics reference.",
-      "next_measurement": "Run memsearch source-of-truth rebuild and reload real_world_job prompts before any suite-level win/loss claim."
+      "current_competitor_evidence": "memsearch canonical-store, reindex, delete, and reload smoke passes; XY-925 fixture-backed source-of-truth prompts now cover the canonical Markdown rebuild/reload boundary, but no live memsearch prompt adapter pass is claimed.",
+      "current_state": "ELF has stronger measured live real-world source-of-truth evidence; memsearch now ties the local canonical-store reindex/reload smoke and has fixture-backed prompt coverage as a local-store ergonomics reference.",
+      "next_measurement": "Promote memsearch source-of-truth rebuild/reload prompts into a live adapter before any suite-level win/loss claim."
     },
     {
       "scenario_id": "temporal_current_historical",
@@ -494,8 +496,8 @@
       "scenario": "operator debugging",
       "current_elf_evidence": "ELF fixture-backed operator_debugging_ux passes, and the narrow live_real_world operator-debug slice passes for trace hydration, candidate-drop visibility, selected-but-not-narrated evidence, replay-command availability, and repair-action clarity.",
       "strongest_competitor_or_reference": "qmd, claude-mem, OpenMemory",
-      "current_competitor_evidence": "qmd now has a narrow live_real_world operator-debug slice: replay-command availability and repair-action clarity pass, but trace hydration, candidate-drop stage visibility, and selected-but-not-narrated evidence are wrong_result. claude-mem and OpenMemory UX remain not_encoded or blocked.",
-      "current_state": "ELF has a narrow comparable live win over qmd for trace hydration and candidate-drop visibility, while OpenMemory and claude-mem UI workflows remain unmeasured.",
+      "current_competitor_evidence": "qmd now has a narrow live_real_world operator-debug slice: replay-command availability and repair-action clarity pass, but trace hydration, candidate-drop stage visibility, and selected-but-not-narrated evidence are wrong_result. XY-925 adds claude-mem progressive-disclosure and retrieval-repair prompt coverage, while claude-mem viewer/operator and OpenMemory UI/export remain blocked.",
+      "current_state": "ELF has a narrow comparable live win over qmd for trace hydration and candidate-drop visibility, while OpenMemory and claude-mem viewer/operator workflows remain blocked for broad UX claims.",
       "next_measurement": "Add bounded OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim."
     },
     {
@@ -503,8 +505,8 @@
       "scenario": "capture/write policy",
       "current_elf_evidence": "ELF fixture-backed capture_integration passes, and ELF live_real_world capture_integration passes 4/4 with zero redaction leaks, source ids, write-policy audit, and evidence binding.",
       "strongest_competitor_or_reference": "agentmemory, claude-mem",
-      "current_competitor_evidence": "agentmemory capture_integration is blocked by mocked/in-memory storage and claude-mem hook/viewer capture is not_encoded.",
-      "current_state": "ELF has live capture/write-policy self-check evidence, but agentmemory and claude-mem capture-breadth comparisons remain blocked or untested.",
+      "current_competitor_evidence": "agentmemory and claude-mem hook capture remain blocked until Docker-contained hook observations and write-policy/viewer readback artifacts exist.",
+      "current_state": "ELF has live capture/write-policy self-check evidence, but agentmemory and claude-mem capture-breadth comparisons remain blocked.",
       "next_measurement": "Run durable agentmemory and claude-mem capture-hook jobs that prove redaction, exclusion, evidence binding, source ids, and no secret leakage."
     },
     {
@@ -583,11 +585,11 @@
       "measurement": "Preference/entity history, deletion audit readback, personalization, OpenMemory inspection/export, and optional graph-context jobs."
     },
     {
-      "workstream": "memsearch source-of-truth real-world coverage",
+      "workstream": "memsearch source-of-truth live adapter coverage",
       "issue_or_candidate": "new adapter repair issue",
       "parallelizable": true,
-      "blocked_by": "Real-world prompt adapter over the canonical Markdown store.",
-      "measurement": "Source-of-truth rebuild/reload jobs and retrieval-debug jobs that preserve baseline reindex/update/delete evidence without converting it into suite pass claims."
+      "blocked_by": "Fixture-backed source-store and retrieval-debug prompts are encoded by XY-925; live prompt execution remains missing.",
+      "measurement": "Runtime adapter execution for the existing source-of-truth rebuild/reload and retrieval-debug prompt jobs without converting baseline smoke into suite pass claims."
     },
     {
       "workstream": "OpenViking context trajectory",
@@ -597,11 +599,11 @@
       "measurement": "Hierarchical expansion, staged trajectory, and resume/retrieval evidence jobs."
     },
     {
-      "workstream": "claude-mem progressive disclosure",
+      "workstream": "claude-mem hook/viewer runtime coverage",
       "issue_or_candidate": "new adapter issue",
       "parallelizable": true,
-      "blocked_by": "Durable repository path and progressive-disclosure output contract.",
-      "measurement": "Work resume, operator debugging, capture/write-policy, and progressive disclosure jobs."
+      "blocked_by": "Fixture-backed progressive-disclosure and retrieval-repair prompts are encoded by XY-925; hook capture and viewer/operator workflows remain blocked.",
+      "measurement": "Work resume, operator debugging, capture/write-policy, viewer/operator, and live progressive-disclosure adapter execution."
     },
     {
       "workstream": "RAGFlow evidence smoke",

From 1b893f6ab2be291834989075276195080df45c5d Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 01:29:07 +0800
Subject: [PATCH 2/7] {"schema":"decodex/commit/1","summary":"Align
 first-generation OSS benchmark assertions","authority":"XY-925"}

---
 apps/elf-eval/tests/real_world_job_benchmark.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index d1ac86e5..46b4a2e1 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -1465,8 +1465,9 @@ fn live_adapter_supports_elf_capture_write_policy_without_external_hook_claims()
 	assert!(manifest.contains("\"scenario_id\": \"capture_write_policy_hooks\""));
 	assert!(manifest.contains("\"comparison_outcome\": \"blocked\""));
 	assert!(manifest.contains("Four redaction, exclusion, source-id, evidence-binding"));
-	assert!(manifest.contains("no durable local session/capture path stores source ids"));
-	assert!(manifest.contains("hooks, timeline, observations, viewer capture"));
+	assert!(manifest.contains("durable upstream agentmemory session/capture path"));
+	assert!(manifest.contains("Docker-contained session directory"));
+	assert!(manifest.contains("claude-mem hooks, viewer, timeline, and observation workflows"));
 
 	Ok(())
 }

From 38ded160ac97bf40cb4b53e425f891716b51e37a Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 01:41:41 +0800
Subject: [PATCH 3/7] {"schema":"decodex/commit/1","summary":"Align
 first-generation OSS report counts","authority":"XY-925"}

---
 ...-11-first-generation-oss-continuity-source-store-report.md | 2 +-
 ...1-first-generation-oss-continuity-source-store-report.json | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md b/docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md
index 1484abcf..80e944cc 100644
--- a/docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md
+++ b/docs/guide/benchmarking/2026-06-11-first-generation-oss-continuity-source-store-report.md
@@ -47,7 +47,7 @@ External adapter manifest scenario outcomes now preserve every normalized outcom
 | Outcome | Count |
 | --- | ---: |
 | win | 9 |
-| tie | 8 |
+| tie | 9 |
 | loss | 1 |
 | not_tested | 8 |
 | blocked | 6 |
diff --git a/docs/research/2026-06-11-first-generation-oss-continuity-source-store-report.json b/docs/research/2026-06-11-first-generation-oss-continuity-source-store-report.json
index f69909b6..f5d38617 100644
--- a/docs/research/2026-06-11-first-generation-oss-continuity-source-store-report.json
+++ b/docs/research/2026-06-11-first-generation-oss-continuity-source-store-report.json
@@ -26,7 +26,7 @@
     "manifest_id": "real-world-memory-project-adapters-2026-06-11-first-generation-continuity-source-store",
     "scenario_outcome_counts": {
       "win": 9,
-      "tie": 8,
+      "tie": 9,
       "loss": 1,
       "not_tested": 8,
       "blocked": 6,
@@ -37,7 +37,7 @@
       "blocked": 6,
       "wrong_result": 5,
       "lifecycle_fail": 1,
-      "pass": 19,
+      "pass": 20,
       "not_encoded": 2
     }
   },

From 6b742038426089ea8c61973f82ebd9966659e899 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 02:19:57 +0800
Subject: [PATCH 4/7] {"schema":"decodex/commit/1","summary":"Constrain
 first-generation suite evidence claims","authority":"XY-925"}

---
 .../memory_projects_manifest.json                 |  8 ++++----
 apps/elf-eval/tests/real_world_job_benchmark.rs   | 15 ++++++++-------
 ...6-06-11-competitor-strength-adoption-report.md |  2 +-
 ...06-11-competitor-strength-adoption-report.json |  2 +-
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index 33cbf264..61fbcf7f 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -1156,13 +1156,13 @@
       "suites": [
         {
           "suite_id": "trust_source_of_truth",
-          "status": "pass",
-          "evidence": "The Markdown-first source model passed the local reindex/reload smoke, and XY-925 adds fixture-backed source-of-truth prompt coverage over the canonical Markdown store. No live memsearch runtime adapter executes prompt scoring yet."
+          "status": "not_encoded",
+          "evidence": "The Markdown-first source model passed the local reindex/reload smoke, and XY-925 adds fixture-backed source-of-truth prompt coverage over the canonical Markdown store. No live memsearch runtime adapter executes prompt scoring yet, so this is not a suite pass."
         },
         {
           "suite_id": "retrieval",
-          "status": "pass",
-          "evidence": "The Docker same-corpus check passes, and XY-925 adds fixture-backed retrieval-debug prompt coverage over memsearch CLI replay and Markdown source inspection. No live memsearch runtime adapter executes retrieval prompt scoring yet."
+          "status": "not_encoded",
+          "evidence": "The Docker same-corpus check passes, and XY-925 adds fixture-backed retrieval-debug prompt coverage over memsearch CLI replay and Markdown source inspection. No live memsearch runtime adapter executes retrieval prompt scoring yet, so this is not a suite pass."
         },
         {
           "suite_id": "memory_evolution",
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 46b4a2e1..99aca745 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -567,7 +567,7 @@ fn assert_external_adapter_manifest_status_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/suite_status_counts/pass")
 			.and_then(Value::as_u64),
-		Some(24)
+		Some(22)
 	);
 	assert_eq!(
 		report
@@ -579,7 +579,7 @@ fn assert_external_adapter_manifest_status_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/suite_status_counts/not_encoded")
 			.and_then(Value::as_u64),
-		Some(38)
+		Some(40)
 	);
 }
 
@@ -1072,17 +1072,18 @@ fn assert_memsearch_first_generation_records(memsearch: &Value) {
 		memsearch.pointer("/scenarios/0/elf_position").and_then(Value::as_str),
 		Some("untested")
 	);
-	assert_eq!(memsearch.pointer("/suites/0/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(memsearch.pointer("/suites/0/status").and_then(Value::as_str), Some("not_encoded"));
 	assert!(memsearch.pointer("/suites/0/evidence").and_then(Value::as_str).is_some_and(
 		|evidence| evidence.contains("fixture-backed source-of-truth prompt coverage")
-			&& evidence.contains("No live memsearch runtime adapter executes prompt scoring yet.")
+			&& evidence.contains("No live memsearch runtime adapter executes prompt scoring yet")
+			&& evidence.contains("not a suite pass")
 	));
-	assert_eq!(memsearch.pointer("/suites/1/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(memsearch.pointer("/suites/1/status").and_then(Value::as_str), Some("not_encoded"));
 	assert!(memsearch.pointer("/suites/1/evidence").and_then(Value::as_str).is_some_and(
 		|evidence| evidence.contains("fixture-backed retrieval-debug prompt coverage")
 			&& evidence.contains(
-				"No live memsearch runtime adapter executes retrieval prompt scoring yet."
-			)
+				"No live memsearch runtime adapter executes retrieval prompt scoring yet"
+			) && evidence.contains("not a suite pass")
 	));
 	assert_eq!(memsearch.pointer("/scenarios/1/status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
index 07ef05ad..6a63a1e1 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
@@ -82,7 +82,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | --- | --- | --- |
 | `cargo make real-world-memory` | `2026-06-11-measurement-coverage-audit.md` | ELF fixture aggregate covers 43 jobs across 12 suites with 38 pass and 5 blocked production-ops or OpenViking context-trajectory measurement gates. |
 | `cargo make real-world-memory-live-adapters` | `2026-06-11-measurement-coverage-audit.md` | ELF live service adapter reports 22 pass, 5 wrong_result, 2 blocked, and 11 not_encoded jobs; qmd reports 17 pass, 6 wrong_result, 2 blocked, and 15 not_encoded jobs. |
-| `cargo make real-world-memory-live-adapters` | `2026-06-11-capture-write-policy-live-report.md` | ELF live capture/write-policy jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage; qmd remains not_encoded, agentmemory is blocked, and claude-mem is untested for capture breadth. |
+| `cargo make real-world-memory-live-adapters` | `2026-06-11-capture-write-policy-live-report.md` | ELF live capture/write-policy jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage; qmd remains not_encoded, while agentmemory and claude-mem capture breadth are blocked until durable hook/viewer evidence exists. |
 | `cargo make real-world-job-operator-ux-live-adapters` | `tmp/real-world-job/operator-ux-live-adapters/summary.json` | The narrow live operator-debug slice scores ELF as pass and qmd as wrong_result: ELF wins trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence; both systems expose replay commands and repair-action guidance. |
 | `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker` | `2026-06-11-first-generation-oss-adapter-promotion-report.md` | mem0/OpenMemory and memsearch pass basic local baseline smokes; agentmemory remains lifecycle_fail and claude-mem remains wrong_result. |
 | `cargo make real-world-first-generation-oss` | `2026-06-11-first-generation-oss-continuity-source-store-report.md` | First-generation OSS fixture slice reports 6 jobs: 4 pass, 2 blocked, full evidence/source-ref/quote coverage, and manifest scenario outcomes across win, tie, loss, not_tested, blocked, and non_goal without promoting smoke evidence into live suite passes. |
diff --git a/docs/research/2026-06-11-competitor-strength-adoption-report.json b/docs/research/2026-06-11-competitor-strength-adoption-report.json
index 689132a6..cb69967b 100644
--- a/docs/research/2026-06-11-competitor-strength-adoption-report.json
+++ b/docs/research/2026-06-11-competitor-strength-adoption-report.json
@@ -49,7 +49,7 @@
     {
       "command": "cargo make real-world-memory-live-adapters",
       "artifact": "docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md",
-      "claim": "ELF live capture/write-policy jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage; qmd remains not_encoded, agentmemory is blocked, and claude-mem is untested for capture breadth."
+      "claim": "ELF live capture/write-policy jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage; qmd remains not_encoded, while agentmemory and claude-mem capture breadth are blocked until durable hook/viewer evidence exists."
     },
     {
       "command": "cargo make real-world-job-operator-ux-live-adapters",

From b5c18e80f648570fb34f01f53809d722bf4cf99e Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 02:34:50 +0800
Subject: [PATCH 5/7] {"schema":"decodex/commit/1","summary":"Normalize
 first-generation evidence summaries","authority":"XY-925"}

---
 README.md                                     |  3 +-
 .../tests/real_world_job_benchmark.rs         | 52 ++++++++++++++++++-
 ...-06-11-capture-write-policy-live-report.md |  4 +-
 ...-11-competitor-strength-adoption-report.md |  2 +-
 ...-11-competitor-strength-evidence-matrix.md |  4 +-
 .../2026-06-11-measurement-coverage-audit.md  |  8 +--
 ...6-11-capture-write-policy-live-report.json |  6 +--
 ...1-competitor-strength-adoption-report.json |  4 +-
 ...-11-xy-897-competitor-strength-matrix.json |  6 +--
 9 files changed, 69 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 11319c42..22df99ec 100644
--- a/README.md
+++ b/README.md
@@ -208,7 +208,8 @@ provider-backed ELF evidence was required.
   source refs, write-policy redaction audit counts, evidence binding, and no secret
   leakage. qmd remains `not_encoded` for this suite. agentmemory capture comparison is
   blocked by mocked/in-memory storage, and claude-mem hook/viewer capture remains
-  untested, so no broad capture-breadth superiority claim is allowed.
+  blocked until Docker-contained hook/viewer capture evidence exists, so no broad
+  capture-breadth superiority claim is allowed.
 - The benchmark runner and report publisher are checked in and Docker-isolated:
   `cargo make baseline-live-docker`, `cargo make baseline-backfill-docker`,
   `cargo make baseline-production-private-addendum`,
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 99aca745..792ffef4 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -1565,20 +1565,30 @@ fn capture_write_policy_live_report_preserves_competitor_boundaries() -> Result<
 	assert!(agentmemory.pointer("/reason").and_then(Value::as_str).is_some_and(|reason| {
 		reason.contains("process-local StateKV Map") && reason.contains("in-memory index")
 	}));
-	assert_eq!(claude_mem.pointer("/position").and_then(Value::as_str), Some("untested"));
+	assert_eq!(claude_mem.pointer("/position").and_then(Value::as_str), Some("blocked"));
 	assert!(
 		claude_mem
 			.pointer("/reason")
 			.and_then(Value::as_str)
-			.is_some_and(|reason| reason.contains("hooks, timeline, observations"))
+			.is_some_and(|reason| reason.contains("hooks, timeline, observations")
+				&& reason.contains("Docker-contained hook/viewer runner"))
 	);
 	assert!(markdown.contains("ELF now has live capture/write-policy self-check evidence"));
 	assert!(markdown.contains("not an ELF-over-qmd win"));
+	assert!(markdown.contains("| claude-mem capture/viewer flows | `blocked` |"));
+	assert!(!markdown.contains("claude-mem capture breadth is untested"));
 	assert!(markdown.contains("runtime `source_ref` metadata returned by search"));
 	assert!(markdown.contains("Do not claim ELF broadly beats agentmemory or claude-mem"));
 	assert!(benchmarking_index.contains("2026-06-11-capture-write-policy-live-report.md"));
 	assert!(readme.contains("Capture/Write-Policy Live Report - June 11, 2026"));
 
+	let readme_normalized = readme.split_whitespace().collect::<Vec<_>>().join(" ");
+
+	assert!(
+		readme_normalized
+			.contains("claude-mem hook/viewer capture remains blocked until Docker-contained")
+	);
+
 	Ok(())
 }
 
@@ -1985,6 +1995,7 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> {
 	);
 
 	assert_measurement_audit_adapter_status_counts(&measurement_audit);
+	assert_first_generation_current_summary_boundaries(&measurement_audit, &competitor_matrix);
 
 	assert!(
 		competitor_matrix
@@ -2069,6 +2080,26 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> {
 	Ok(())
 }
 
+fn assert_first_generation_current_summary_boundaries(
+	measurement_audit: &str,
+	competitor_matrix: &str,
+) {
+	assert!(measurement_audit.contains("claude-mem hook/viewer capture is `blocked`"));
+	assert!(!measurement_audit.contains("claude-mem hook/viewer capture remains untested"));
+	assert!(!measurement_audit.contains("blocked or untested"));
+	assert!(competitor_matrix.contains(
+		"Overall adapter-status counts: 4 `pass`,\n6 `wrong_result`, 1 `lifecycle_fail`, 6 `blocked`, and 6 `not_encoded`."
+	));
+	assert!(!competitor_matrix.contains("5 `blocked`, and 7 `not_encoded`"));
+	assert!(
+		competitor_matrix
+			.contains("mem0/OpenMemory local OSS entity-scoped personalization now passes")
+	);
+	assert!(
+		!competitor_matrix.contains("mem0/OpenMemory and Letta personalization are `not_encoded`")
+	);
+}
+
 #[test]
 fn qmd_trace_replay_diagnostics_report_preserves_claim_boundaries() -> Result<()> {
 	let report = serde_json::from_str::<Value>(&fs::read_to_string(
@@ -2408,6 +2439,23 @@ fn assert_competitor_strength_matrix_scenario_json(scenarios: &[Value]) -> Resul
 			.and_then(Value::as_str)
 			.is_some_and(|claim| claim.contains("OpenMemory and claude-mem UI/export"))
 	);
+
+	let personalization = find_by_field(scenarios, "/scenario_id", "personalization")?;
+
+	assert!(
+		personalization
+			.pointer("/current_competitor_evidence")
+			.and_then(Value::as_str)
+			.is_some_and(|claim| claim
+				.contains("mem0/OpenMemory local OSS entity-scoped personalization now passes")
+				&& claim.contains("Letta personalization is research_gate not_encoded"))
+	);
+	assert!(
+		personalization
+			.pointer("/current_state")
+			.and_then(Value::as_str)
+			.is_some_and(|state| state.contains("scoped personalization is a tie"))
+	);
 	assert!(
 		context_trajectory
 			.pointer("/current_state")
diff --git a/docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md b/docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md
index cb6ff281..185ab65b 100644
--- a/docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md
+++ b/docs/guide/benchmarking/2026-06-11-capture-write-policy-live-report.md
@@ -53,7 +53,7 @@ The ELF materialization artifact records:
 | --- | --- | --- |
 | qmd live real-world adapter | `untested` | ELF executes and passes 4/4 live capture jobs; qmd keeps the same jobs typed `not_encoded`, so this remains an ELF self-check rather than a qmd comparison result. |
 | agentmemory capture hooks | `blocked` | The current Docker baseline uses a process-local StateKV Map and in-memory index. No durable local session/capture path stores source ids, exclusions, write-policy audit, or evidence-bound output. |
-| claude-mem capture/viewer flows | `untested` | The checked evidence exercises repository storage, lifecycle, progressive disclosure, and same-corpus retrieval only. Hooks, timeline, observations, viewer capture, and automatic capture review are not run against real-world jobs. |
+| claude-mem capture/viewer flows | `blocked` | The checked evidence exercises repository storage, lifecycle, progressive disclosure, and same-corpus retrieval only. Hooks, timeline, observations, viewer capture, and automatic capture review need a Docker-contained hook/viewer runner before scoring. |
 
 ## Claims Allowed
 
@@ -62,7 +62,7 @@ The ELF materialization artifact records:
 - qmd remains `not_encoded` for capture/write-policy jobs in the full live sweep.
 - agentmemory capture comparison is blocked by mocked/in-memory storage and lack of a
   durable local capture artifact.
-- claude-mem capture breadth is untested until a Docker-contained hook/viewer capture
+- claude-mem capture breadth is blocked until a Docker-contained hook/viewer capture
   runner exists.
 
 ## Claims Not Allowed
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
index 6a63a1e1..4aa963e4 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
@@ -158,7 +158,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 - Do not claim ELF broadly beats OpenMemory or claude-mem viewer UX from the narrow
   ELF/qmd operator-debug slice.
 - Do not claim ELF broadly beats agentmemory or claude-mem on capture breadth; the
-  current comparison is blocked or untested for their hook/viewer capture paths.
+  current comparison is blocked for their hook/viewer capture paths.
 - Do not claim ELF beats OpenViking on staged context trajectory.
 - Do not claim ELF beats Letta on core-vs-archival memory.
 - Do not claim graph/RAG parity from smoke-only evidence.
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
index 4fb3b15e..40c4c53a 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
@@ -46,7 +46,7 @@ Current boundary:
 The current manifest has 23 adapter records across 16 external projects plus ELF.
 Evidence-class counts: 1 `fixture_backed`, 6 `live_baseline_only`, 5
 `live_real_world`, and 11 `research_gate`. Overall adapter-status counts: 4 `pass`,
-6 `wrong_result`, 1 `lifecycle_fail`, 5 `blocked`, and 7 `not_encoded`.
+6 `wrong_result`, 1 `lifecycle_fail`, 6 `blocked`, and 6 `not_encoded`.
 
 ## State Taxonomy
 
@@ -105,7 +105,7 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 | Operator debugging | Fixture operator_debugging_ux passes, and the narrow live operator-debug slice passes for trace hydration, candidate-drop visibility, selected-but-not-narrated evidence, replay-command availability, and repair-action clarity. | qmd, claude-mem, OpenMemory. | qmd ties replay-command availability and repair-action clarity but is `wrong_result` for trace hydration, candidate-drop stage visibility, and selected-but-not-narrated evidence. XY-925 adds claude-mem progressive-disclosure and retrieval-repair prompt coverage, while claude-mem viewer/operator and OpenMemory UI/export remain blocked. | Add bounded OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
 | Capture/write policy | Fixture capture_integration passes; ELF live capture_integration passes 4/4 with zero redaction leaks, source ids, write-policy audit, and evidence binding. | agentmemory, claude-mem. | agentmemory and claude-mem hook capture remain `blocked` until Docker-contained hook observations and write-policy/viewer readback artifacts exist. | Run durable agentmemory and claude-mem capture-hook jobs proving redaction, exclusion, evidence binding, source ids, and no secret leakage. |
 | Production ops | Fixture production_ops has 4 pass and 2 blocked; live production_ops is `blocked`; production adoption has provider/backfill/restore evidence. | ELF production gate, qmd, RAG/RAGFlow resource gates. | qmd live production_ops is `blocked`; RAG/resource gates are `research_gate` `blocked`. | Rerun private-corpus and credentialed gates only when operator-owned manifest and credentials exist. |
-| Personalization | Fixture and live personalization pass. | mem0/OpenMemory, Letta. | mem0/OpenMemory and Letta personalization are `not_encoded`. | Encode scoped preference readback for mem0/OpenMemory and Letta before personalization superiority claims. |
+| Personalization | Fixture and live personalization pass. | mem0/OpenMemory, Letta. | mem0/OpenMemory local OSS entity-scoped personalization now passes, so scoped preference behavior is a measured tie; OpenMemory UI/export remains blocked, hosted Platform export is non-goal, optional graph memory remains outside local OSS scoring, and Letta personalization is `research_gate` `not_encoded`. | Add OpenMemory product app import/export and contained Letta scoped-preference readback before broader personalization superiority claims. |
 | Context trajectory | ELF has trace direction but no comparable staged trajectory scenario. | OpenViking. | OpenViking setup is pinned, same-corpus retrieval is `wrong_result`, and staged/hierarchy/recursive trajectory jobs are encoded as `blocked`. | Make OpenViking evidence-bearing retrieval pass, then score staged context trajectory outputs. |
 | Core-vs-archival memory | ELF core-block semantics exist in the service contract, but comparative benchmark coverage is not encoded here. | Letta. | Letta is `research_gate` `not_encoded` until contained export proof exists. | Add ELF core-block versus archival-search jobs; compare Letta only after contained export proof. |
 | Graph/RAG navigation | ELF relation context is not enough to claim graph/RAG navigation parity. | RAGFlow, LightRAG, GraphRAG, Graphiti/Zep, graphify. | RAGFlow, LightRAG, GraphRAG, and Graphiti/Zep remain `research_gate` blocked/incomplete without explicit setup; graphify has only a tiny scored smoke `wrong_result`. | Run larger contained graph/RAG adapters with evidence-linked outputs before any ELF graph/RAG win, tie, or loss claim. |
diff --git a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
index 0974dcb6..3174aeed 100644
--- a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
+++ b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
@@ -34,8 +34,8 @@ What is proven today:
 - ELF now has live capture/write-policy self-check evidence for redaction, exclusions,
   source ids, evidence binding, and no secret leakage. This is not a broad
   capture-hook win over agentmemory or claude-mem: agentmemory comparison is blocked
-  by mocked/in-memory storage, and claude-mem hook/viewer capture remains untested in
-  the Docker real-world job runner.
+  by mocked/in-memory storage, and claude-mem hook/viewer capture remains blocked
+  until Docker-contained hook/viewer evidence exists.
 - ELF is ahead on production-operation evidence among tracked systems because it has
   checked-in provider synthetic, stress, backfill, backup/restore, and Qdrant rebuild
   evidence.
@@ -191,7 +191,7 @@ records `unique_project_names: 17` for the full project list including ELF.
 | Consolidation | Fixture aggregate passes; live adapters are not encoded. | Fixture-only claim. | Live proposal generation with lineage, confidence, and review-action audit. |
 | Knowledge pages | Fixture aggregate passes; live adapters are not encoded. | Fixture-only claim. | Live page rebuild/lint plus llm-wiki, gbrain, GraphRAG, and graphify comparisons. |
 | Operator debugging | Fixture aggregate passes; narrow ELF/qmd live operator-debug slice is scored with ELF `pass` and qmd `wrong_result`. | Narrow ELF/qmd live claim only: ELF wins trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence; replay-command and repair-action clarity are tied. | OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
-| Capture/write policy | Fixture aggregate passes; ELF live service adapter passes 4/4 capture jobs with zero redaction leaks; qmd is `not_encoded`; agentmemory is `blocked`; claude-mem is `not_encoded`. | ELF has live self-check evidence for redaction, exclusions, source ids, evidence binding, and no secret leakage. Against agentmemory/claude-mem capture breadth, the comparison remains blocked or untested. | Durable agentmemory and claude-mem capture-hook runners with evidence-bound output. |
+| Capture/write policy | Fixture aggregate passes; ELF live service adapter passes 4/4 capture jobs with zero redaction leaks; qmd is `not_encoded`; agentmemory is `blocked`; claude-mem hook/viewer capture is `blocked`. | ELF has live self-check evidence for redaction, exclusions, source ids, evidence binding, and no secret leakage. Against agentmemory/claude-mem capture breadth, the comparison remains blocked until durable hook/viewer evidence exists. | Durable agentmemory and claude-mem capture-hook runners with evidence-bound output. |
 | Production ops | ELF has separate production-provider/backfill/restore evidence; live sweep is not a full production-ops pass. | Bounded personal-production adoption claim with caveats. | Private corpus manifest and credentialed provider gates. |
 | Personalization | ELF and qmd live pass one scoped preference job. | Narrow encoded pass only. | mem0/OpenMemory and Letta entity/preference history comparison. |
 | Context trajectory | Not comparable. | No claim. | OpenViking staged hierarchy/trajectory scoring. |
@@ -216,7 +216,7 @@ Order these by decision value, not implementation convenience:
 
 3. External capture-hook report for agentmemory and claude-mem
    - Why: ELF now has a live capture/write-policy self-check, but the strongest
-     agentmemory and claude-mem capture-breadth claims are still blocked or untested.
+     agentmemory and claude-mem capture-breadth claims are still blocked.
    - Output: durable local capture artifacts, source ids, redaction/exclusion audit,
      and typed blocker reasons when hooks or viewer capture cannot run in Docker.
 
diff --git a/docs/research/2026-06-11-capture-write-policy-live-report.json b/docs/research/2026-06-11-capture-write-policy-live-report.json
index a00e9a5e..574e1cc1 100644
--- a/docs/research/2026-06-11-capture-write-policy-live-report.json
+++ b/docs/research/2026-06-11-capture-write-policy-live-report.json
@@ -199,8 +199,8 @@
     },
     {
       "project": "claude-mem",
-      "position": "untested",
-      "reason": "Repository storage, lifecycle, progressive disclosure, and same-corpus retrieval are checked; hooks, timeline, observations, viewer capture, and automatic capture review are not run against real-world jobs."
+      "position": "blocked",
+      "reason": "Repository storage, lifecycle, progressive disclosure, and same-corpus retrieval are checked; hooks, timeline, observations, viewer capture, and automatic capture review need a Docker-contained hook/viewer runner before scoring."
     }
   ],
   "claim_boundary": {
@@ -208,7 +208,7 @@
       "ELF live capture/write-policy self-checks pass for redaction, exclusions, source ids, evidence binding, and no secret leakage.",
       "qmd remains not_encoded for capture/write-policy jobs in the full live sweep.",
       "agentmemory capture comparison is blocked by mocked/in-memory storage and lack of a durable local capture artifact.",
-      "claude-mem capture breadth is untested until a Docker-contained hook/viewer capture runner exists."
+      "claude-mem capture breadth is blocked until a Docker-contained hook/viewer capture runner exists."
     ],
     "not_allowed": [
       "Do not claim ELF broadly beats agentmemory or claude-mem on capture breadth.",
diff --git a/docs/research/2026-06-11-competitor-strength-adoption-report.json b/docs/research/2026-06-11-competitor-strength-adoption-report.json
index cb69967b..149bb854 100644
--- a/docs/research/2026-06-11-competitor-strength-adoption-report.json
+++ b/docs/research/2026-06-11-competitor-strength-adoption-report.json
@@ -448,7 +448,7 @@
       "issue": "XY-933",
       "priority": "P1",
       "state": "Live ELF self-check encoded",
-      "gap": "Capture/write-policy redaction, exclusion, source-id, evidence-binding, and no-leak scoring for ELF; durable agentmemory/claude-mem capture-hook comparison remains blocked or untested."
+      "gap": "Capture/write-policy redaction, exclusion, source-id, evidence-binding, and no-leak scoring for ELF; durable agentmemory/claude-mem capture-hook comparison remains blocked until Docker-contained hook/viewer evidence exists."
     },
     {
       "issue": "XY-927",
@@ -500,7 +500,7 @@
       "Do not claim graph/RAG parity from smoke-only evidence.",
       "Do not promote fixture-backed, live_baseline_only, smoke_only, research_gate, blocked, wrong_result, lifecycle_fail, unsupported, or not_encoded states into a generic pass/fail score.",
       "Do not claim ELF broadly beats OpenMemory or claude-mem viewer UX from the narrow ELF/qmd operator-debug slice.",
-      "Do not claim ELF broadly beats agentmemory or claude-mem on capture breadth; the current comparison is blocked or untested for their hook/viewer capture paths."
+      "Do not claim ELF broadly beats agentmemory or claude-mem on capture breadth; the current comparison is blocked for their hook/viewer capture paths."
     ]
   }
 }
diff --git a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
index 82ac877e..7233bf66 100644
--- a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
+++ b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
@@ -523,9 +523,9 @@
       "scenario": "personalization",
       "current_elf_evidence": "ELF fixture-backed personalization passes and ELF live_real_world personalization passes.",
       "strongest_competitor_or_reference": "mem0/OpenMemory, Letta",
-      "current_competitor_evidence": "mem0/OpenMemory personalization is not_encoded and Letta personalization is research_gate not_encoded.",
-      "current_state": "ELF and qmd have live encoded evidence; personalization-specialized competitors are not yet comparable.",
-      "next_measurement": "Encode mem0/OpenMemory and Letta scoped-preference readback jobs before making personalization superiority claims."
+      "current_competitor_evidence": "mem0/OpenMemory local OSS entity-scoped personalization now passes; OpenMemory UI/export remains blocked, hosted Platform export is non-goal, optional graph memory remains outside local OSS scoring, and Letta personalization is research_gate not_encoded.",
+      "current_state": "ELF, qmd, and mem0 local OSS have measured scoped-preference evidence, so scoped personalization is a tie on the current surface; mem0 preference-correction history remains a separate ELF loss.",
+      "next_measurement": "Add OpenMemory product app import/export and contained Letta scoped-preference readback before making broader personalization superiority claims."
     },
     {
       "scenario_id": "context_trajectory",

From fc59da9bc56186b844608bccc4e1bf65a77c98f9 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 02:47:29 +0800
Subject: [PATCH 6/7] {"schema":"decodex/commit/1","summary":"Type
 first-generation viewer blockers","authority":"XY-925"}

---
 README.md                                     |  5 +-
 .../memory_projects_manifest.json             |  2 +-
 .../tests/real_world_job_benchmark.rs         | 54 +++++++++++++++++--
 ...-11-competitor-strength-adoption-report.md |  5 +-
 ...elf-qmd-trace-replay-diagnostics-report.md |  6 ++-
 ...1-competitor-strength-adoption-report.json |  4 +-
 ...f-qmd-trace-replay-diagnostics-report.json |  4 +-
 7 files changed, 65 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 22df99ec..3e7ec848 100644
--- a/README.md
+++ b/README.md
@@ -170,8 +170,9 @@ provider-backed ELF evidence was required.
   ELF passes trace hydration, candidate-drop visibility, selected-but-not-narrated
   evidence, replay-command availability, and repair-action clarity. qmd ties replay
   command and repair-action clarity but is `wrong_result` for trace hydration and
-  candidate-drop stage visibility. OpenMemory UI/export and claude-mem viewer flows
-  remain blocked or not encoded, so this is not a broad viewer-product claim.
+  candidate-drop stage visibility. OpenMemory UI/export remains blocked, and
+  claude-mem viewer flows remain blocked until Docker-contained hook/viewer evidence
+  exists, so this is not a broad viewer-product claim.
 - First-generation OSS continuity/source-store follow-up after XY-925: `cargo make
   real-world-first-generation-oss` emits a fixture-backed external-adapter slice for
   agentmemory, memsearch, and claude-mem with 6 jobs, 4 pass, 2 blocked, and full
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index 61fbcf7f..1189ec5f 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -595,7 +595,7 @@
           "status": "pass",
           "elf_position": "ties",
           "comparison_outcome": "tie",
-          "evidence": "ELF and qmd generated clear repair/replay steps for the narrow operator-debug jobs; OpenMemory and claude-mem UI repair paths remain blocked or not encoded.",
+          "evidence": "ELF and qmd generated clear repair/replay steps for the narrow operator-debug jobs; OpenMemory UI/export remains blocked, and claude-mem UI repair paths remain blocked until Docker-contained hook/viewer evidence exists.",
           "command": "cargo make real-world-job-operator-ux-live-adapters",
           "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
         },
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 792ffef4..2ee9d46a 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -78,6 +78,10 @@ fn workspace_root() -> Result<PathBuf> {
 	Ok(root.to_path_buf())
 }
 
+fn collapse_whitespace(text: &str) -> String {
+	text.split_whitespace().collect::<Vec<_>>().join(" ")
+}
+
 fn strength_profile_report_path() -> Result<PathBuf> {
 	Ok(workspace_root()?
 		.join("docs")
@@ -1581,11 +1585,8 @@ fn capture_write_policy_live_report_preserves_competitor_boundaries() -> Result<
 	assert!(markdown.contains("Do not claim ELF broadly beats agentmemory or claude-mem"));
 	assert!(benchmarking_index.contains("2026-06-11-capture-write-policy-live-report.md"));
 	assert!(readme.contains("Capture/Write-Policy Live Report - June 11, 2026"));
-
-	let readme_normalized = readme.split_whitespace().collect::<Vec<_>>().join(" ");
-
 	assert!(
-		readme_normalized
+		collapse_whitespace(&readme)
 			.contains("claude-mem hook/viewer capture remains blocked until Docker-contained")
 	);
 
@@ -2017,6 +2018,7 @@ fn current_benchmark_reports_preserve_live_sweep_boundaries() -> Result<()> {
 		"wrong_result, incomplete, blocked, and not_encoded states remain visible",
 		"broader live suites remain `wrong_result`, `incomplete`, or `not_encoded`",
 		"The qmd live real-world slice covers representative jobs only",
+		"blocked or not encoded",
 	] {
 		assert!(!measurement_audit.contains(stale_phrase));
 		assert!(!competitor_matrix.contains(stale_phrase));
@@ -2121,6 +2123,15 @@ fn qmd_trace_replay_diagnostics_report_preserves_claim_boundaries() -> Result<()
 	assert!(benchmarking_index.contains("qmd top-10/replay artifact"));
 	assert!(benchmarking_index.contains("ELF trace/admin surfaces"));
 	assert!(adoption_report.contains("| Retrieval quality and local debug UX | `loss` |"));
+
+	assert_trace_replay_viewer_blocker_boundaries(
+		&readme,
+		&markdown,
+		&adoption_report,
+		&report,
+		&adoption_json,
+	)?;
+
 	assert!(
 		adoption_report
 			.contains("Do not claim qmd's trace/replay artifact win is a broad qmd-over-ELF")
@@ -2265,6 +2276,41 @@ fn assert_trace_replay_diagnostics_markdown(markdown: &str) {
 	assert!(markdown.contains("Do not score rerank superiority from a qmd `--no-rerank` run"));
 }
 
+fn assert_trace_replay_viewer_blocker_boundaries(
+	readme: &str,
+	markdown: &str,
+	adoption_report: &str,
+	report: &Value,
+	adoption_json: &Value,
+) -> Result<()> {
+	let checked_surfaces = [
+		collapse_whitespace(readme),
+		collapse_whitespace(markdown),
+		collapse_whitespace(adoption_report),
+		report.to_string(),
+		adoption_json.to_string(),
+	];
+
+	for surface in checked_surfaces {
+		assert!(!surface.contains("blocked or not encoded"));
+	}
+
+	assert!(
+		collapse_whitespace(readme)
+			.contains("claude-mem viewer flows remain blocked until Docker-contained")
+	);
+	assert!(
+		collapse_whitespace(markdown)
+			.contains("claude-mem UI repair paths remain blocked until Docker-contained")
+	);
+	assert!(
+		collapse_whitespace(adoption_report)
+			.contains("claude-mem viewer workflows remain blocked until Docker-contained")
+	);
+
+	Ok(())
+}
+
 fn assert_trace_replay_adoption_json(adoption: &Value) -> Result<()> {
 	let local_debug = find_by_field(
 		array_at(adoption, "/scenario_outcomes")?,
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
index 4aa963e4..5636fc71 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
@@ -48,7 +48,8 @@ The remaining caveats are material:
   ergonomics as stronger than ELF's default stress report, while expansion, fusion,
   and rerank remain untested. XY-932 adds a narrow live operator-debug slice where
   ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory
-  UI/export and claude-mem viewer workflows remain blocked or not encoded. XY-925
+  UI/export remains blocked and claude-mem viewer workflows remain blocked until
+  Docker-contained hook/viewer evidence exists. XY-925
   now adds fixture-backed first-generation OSS prompt coverage and typed blockers for
   agentmemory durable continuity, memsearch Markdown source-store/debug jobs, and
   claude-mem progressive-disclosure, retrieval-repair, hook, and viewer/operator
@@ -97,7 +98,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | Scenario | ELF outcome | Evidence classes | Measured claim | Follow-up |
 | --- | --- | --- | --- | --- |
 | Source-of-truth rebuild and evidence-bound writes | `win` | `fixture_backed`, `live_real_world`, `live_baseline_only` | ELF has the strongest measured source-of-truth and rebuild story: Postgres is authoritative, Qdrant is rebuildable, trust-source jobs pass, and production restore/rebuild proof exists. | None |
-| Work resume and coding-agent continuity | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `blocked`, `not_encoded` | ELF and qmd both pass encoded live `work_resume` jobs. XY-925 selects agentmemory's next durable local path but keeps it blocked until the SDK KV/index and observation log survive a fresh process; claude-mem and OpenViking continuity strengths remain blocked or not encoded. | XY-928 |
+| Work resume and coding-agent continuity | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `blocked`, `not_encoded` | ELF and qmd both pass encoded live `work_resume` jobs. XY-925 selects agentmemory's next durable local path but keeps it blocked until the SDK KV/index and observation log survive a fresh process; claude-mem work_resume remains `not_encoded`, and OpenViking continuity trajectory remains `blocked`. | XY-928 |
 | Project decisions and reversals | `tie` | `fixture_backed`, `live_real_world`, `research_gate`, `not_encoded` | ELF and qmd both pass encoded `project_decisions` jobs; Letta-style core/archival decision memory is not tested. | XY-927 |
 | Retrieval quality | `tie` | `fixture_backed`, `live_real_world`, `live_baseline_only` | ELF and qmd both pass encoded live retrieval and stress/same-corpus retrieval evidence. | XY-923 |
 | Retrieval quality and local debug UX | `loss` | `live_baseline_only`, `research_gate`, `wrong_result`, `not_encoded` | The XY-923 trace/replay report scores qmd stronger on immediate top-10 candidate artifacts and short CLI replay commands. ELF keeps useful service trace/admin replay surfaces, and expansion, fusion, rerank-on, and candidate-drop diagnostics remain untested. | XY-923 |
diff --git a/docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md b/docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md
index aa6213ae..189566c2 100644
--- a/docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md
+++ b/docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md
@@ -69,7 +69,7 @@ This is not a broad qmd-over-ELF claim. It is a scored local-debug artifact gap.
 | Operator-debug trace hydration | `live_real_world` | `pass` | `win` | ELF live operator-debug jobs generate trace ids, viewer URLs, admin trace-bundle URLs, and `trace_available=true`; qmd generates local replay commands but no service trace hydration surface. |
 | Operator-debug replay command availability | `live_real_world` | `pass` | `tie` | ELF emits admin trace-bundle curl commands and qmd emits local CLI query replay commands for the same operator-debugging scenarios; this scores command availability, not equivalent UI quality. |
 | Operator-debug candidate-drop visibility | `live_real_world` | `pass` | `win` | ELF exposes dropped-candidate visibility through generated operator-debug metadata without direct SQL assumptions; qmd exposes top-k replay rows but no intermediate candidate-drop stages in this slice. |
-| Operator-debug repair-action clarity | `live_real_world` | `pass` | `tie` | Both live operator-debug adapters emit concrete next steps for replay or trace-bundle inspection; OpenMemory and claude-mem UI repair paths remain blocked or not encoded. |
+| Operator-debug repair-action clarity | `live_real_world` | `pass` | `tie` | Both live operator-debug adapters emit concrete next steps for replay or trace-bundle inspection; OpenMemory UI/export remains blocked, and claude-mem UI repair paths remain blocked until Docker-contained hook/viewer evidence exists. |
 | Operator-debug selected-but-not-narrated evidence | `live_real_world` | `pass` | `win` | The operator-debug slice now scores selected-but-not-narrated evidence as a trace/answer-composition repair surface without direct database inspection. |
 | Query expansion attribution | `research_gate` | `not_encoded` | `not_tested` | No comparable artifact shows expansion variants or dynamic expansion decisions for both systems. |
 | Dense/sparse channel attribution | `research_gate` | `not_encoded` | `not_tested` | ELF uses dense plus BM25 and qmd uses structured `lex:` plus `vec:`, but the scored artifacts do not expose comparable per-channel contribution. |
@@ -139,7 +139,9 @@ Not allowed:
 - Do not score rerank superiority from a qmd `--no-rerank` run.
 - Do not collapse `not_tested`, `non_goal`, or `wrong_result` into pass evidence.
 - Do not convert the XY-932 operator-debug trace slice into a broad viewer-product win
-  over OpenMemory or claude-mem; those UI paths remain blocked or not encoded.
+  over OpenMemory or claude-mem; OpenMemory UI/export remains blocked, and
+  claude-mem UI repair paths remain blocked until Docker-contained hook/viewer
+  evidence exists.
 
 ## Follow-Up Gate
 
diff --git a/docs/research/2026-06-11-competitor-strength-adoption-report.json b/docs/research/2026-06-11-competitor-strength-adoption-report.json
index 149bb854..7bb448bd 100644
--- a/docs/research/2026-06-11-competitor-strength-adoption-report.json
+++ b/docs/research/2026-06-11-competitor-strength-adoption-report.json
@@ -12,7 +12,7 @@
       "Live temporal reconciliation remains wrong_result for five of six memory_evolution jobs.",
       "Private-corpus production quality is blocked until an operator-owned manifest exists.",
       "Credentialed provider production-ops gates are blocked until explicit provider setup exists.",
-      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, and Letta core-vs-archival memory plus graph/RAG navigation remain unproven. XY-928 encodes OpenViking staged trajectory, hierarchy selection, and recursive/context expansion as blocked fixtures behind same-corpus evidence output and missing staged artifacts. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, and rerank remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory UI/export and claude-mem viewer workflows remain blocked or not encoded. XY-925 adds fixture-backed first-generation OSS prompt coverage and typed blockers for agentmemory durable continuity, memsearch Markdown source-store/debug jobs, and claude-mem progressive-disclosure, retrieval-repair, hook, and viewer/operator surfaces without creating live external real-world suite passes. XY-933 adds an ELF live capture/write-policy self-check, but agentmemory and claude-mem hook-capture breadth remain blocked until Docker-contained hook/viewer evidence exists."
+      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, and Letta core-vs-archival memory plus graph/RAG navigation remain unproven. XY-928 encodes OpenViking staged trajectory, hierarchy selection, and recursive/context expansion as blocked fixtures behind same-corpus evidence output and missing staged artifacts. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, and rerank remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory UI/export remains blocked and claude-mem viewer workflows remain blocked until Docker-contained hook/viewer evidence exists. XY-925 adds fixture-backed first-generation OSS prompt coverage and typed blockers for agentmemory durable continuity, memsearch Markdown source-store/debug jobs, and claude-mem progressive-disclosure, retrieval-repair, hook, and viewer/operator surfaces without creating live external real-world suite passes. XY-933 adds an ELF live capture/write-policy self-check, but agentmemory and claude-mem hook-capture breadth remain blocked until Docker-contained hook/viewer evidence exists."
     ]
   },
   "evidence_class_terms": [
@@ -121,7 +121,7 @@
         "blocked",
         "not_encoded"
       ],
-      "measured_claim": "ELF and qmd both pass the encoded live work_resume jobs. XY-925 selects agentmemory's durable local path but keeps it blocked until the SDK KV/index and observation log survive a fresh process; claude-mem and OpenViking continuity strengths remain blocked or not encoded.",
+      "measured_claim": "ELF and qmd both pass the encoded live work_resume jobs. XY-925 selects agentmemory's durable local path but keeps it blocked until the SDK KV/index and observation log survive a fresh process; claude-mem work_resume remains not_encoded, and OpenViking continuity trajectory remains blocked.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
         "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md",
diff --git a/docs/research/2026-06-11-elf-qmd-trace-replay-diagnostics-report.json b/docs/research/2026-06-11-elf-qmd-trace-replay-diagnostics-report.json
index 42c22615..84a38938 100644
--- a/docs/research/2026-06-11-elf-qmd-trace-replay-diagnostics-report.json
+++ b/docs/research/2026-06-11-elf-qmd-trace-replay-diagnostics-report.json
@@ -199,7 +199,7 @@
       "elf_status": "pass",
       "qmd_status": "pass",
       "outcome": "tie",
-      "diagnostic_judgment": "Both live operator-debug adapters emit concrete next steps for replay or trace-bundle inspection; OpenMemory and claude-mem UI repair paths remain blocked or not encoded.",
+      "diagnostic_judgment": "Both live operator-debug adapters emit concrete next steps for replay or trace-bundle inspection; OpenMemory UI/export remains blocked, and claude-mem UI repair paths remain blocked until Docker-contained hook/viewer evidence exists.",
       "artifacts": [
         "tmp/real-world-job/operator-ux-live-adapters/summary.json"
       ]
@@ -364,6 +364,6 @@
     "Do not collapse not_tested, non_goal, or wrong_result into pass evidence.",
     "ELF narrowly wins the live operator-debug trace hydration and candidate-drop visibility slice against qmd; qmd still ties replay-command and repair-action clarity.",
     "Expansion, dense/sparse contribution, fusion, rerank-on quality, and broad retrieved-but-dropped diagnosis outside the operator-debug slice remain unproven.",
-    "Do not convert the XY-932 operator-debug trace slice into a broad viewer-product win over OpenMemory or claude-mem; those UI paths remain blocked or not encoded."
+    "Do not convert the XY-932 operator-debug trace slice into a broad viewer-product win over OpenMemory or claude-mem; OpenMemory UI/export remains blocked, and claude-mem UI repair paths remain blocked until Docker-contained hook/viewer evidence exists."
   ]
 }

From eeb5595e3d3a3d84bb6f5d1ab590e44441ade6f0 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 12 Jun 2026 03:13:11 +0800
Subject: [PATCH 7/7] {"schema":"decodex/commit/1","summary":"Use local
 tokenizer for context e2e harness","authority":"XY-925"}

---
 scripts/context-misranking-harness.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/context-misranking-harness.sh b/scripts/context-misranking-harness.sh
index 3290fdef..578f09a5 100755
--- a/scripts/context-misranking-harness.sh
+++ b/scripts/context-misranking-harness.sh
@@ -205,7 +205,7 @@ min_importance = 0.0
 enabled        = true
 max_tokens     = 512
 overlap_tokens = 128
-tokenizer_repo = "gpt2"
+tokenizer_repo = "config/local/tokenizer.wordlevel.json"
 
 [search.expansion]
 include_original = true