From 29147148e14027b285ecf704db7894f97a785919 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Thu, 11 Jun 2026 20:49:41 +0800
Subject: [PATCH] {"schema":"decodex/commit/1","summary":"Add live
 operator-debug benchmark scoring","authority":"XY-932"}

---
 Makefile.toml                                 |   9 +
 README.md                                     |   8 +
 .../memory_projects_manifest.json             | 268 +++++++++++++++
 .../selected_but_not_narrated.json            | 160 +++++++++
 .../src/bin/real_world_job_benchmark.rs       |  26 +-
 .../src/bin/real_world_live_adapter.rs        | 318 +++++++++++++++---
 .../tests/real_world_job_benchmark.rs         | 317 +++++++++++++++--
 ...-11-competitor-strength-adoption-report.md |  12 +-
 ...-11-competitor-strength-evidence-matrix.md |  14 +-
 ...on-direction-from-competitor-benchmarks.md |  34 +-
 ...elf-qmd-trace-replay-diagnostics-report.md |  30 +-
 .../2026-06-11-measurement-coverage-audit.md  |  17 +-
 ...1-competitor-strength-adoption-report.json | 193 ++++++++---
 ...f-qmd-trace-replay-diagnostics-report.json |  86 ++++-
 ...2026-06-11-measurement-coverage-audit.json | 102 ++++--
 ...-11-xy-897-competitor-strength-matrix.json |  26 +-
 ...real-world-operator-debug-live-adapters.sh | 129 +++++++
 17 files changed, 1552 insertions(+), 197 deletions(-)
 create mode 100644 apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/selected_but_not_narrated.json
 create mode 100755 scripts/real-world-operator-debug-live-adapters.sh

diff --git a/Makefile.toml b/Makefile.toml
index 86b24c7d..42b2033c 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -421,6 +421,7 @@ args = [
 # | real-world-job-operator-ux             | composite |     |
 # | real-world-job-operator-ux-json        | command   |     |
 # | real-world-job-operator-ux-report      | command   |     |
+# | real-world-job-operator-ux-live-adapters | command |     |
 # | real-world-memory-retrieval            | composite |     |
 # | real-world-memory-retrieval-json       | command   |     |
 # | real-world-memory-retrieval-report     | command   |     |
@@ -668,6 +669,14 @@ args = [
 	"tmp/real-world-job/real-world-job-operator-ux-report.md",
 ]
 
+[tasks.real-world-job-operator-ux-live-adapters]
+workspace = false
+command = "bash"
+args = [
+	"-lc",
+	"docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_OPERATOR_DEBUG_LIVE_REPORT_DIR -e ELF_OPERATOR_DEBUG_LIVE_FIXTURES -e ELF_OPERATOR_DEBUG_LIVE_WORK_DIR -e ELF_OPERATOR_DEBUG_QMD_DIR baseline-runner bash scripts/real-world-operator-debug-live-adapters.sh",
+]
+
 [tasks.real-world-memory-retrieval]
 workspace = false
 dependencies = [
diff --git a/README.md b/README.md
index f4e15199..8261bf13 100644
--- a/README.md
+++ b/README.md
@@ -162,6 +162,14 @@ provider-backed ELF evidence was required.
   17 pass, 6 wrong_result, 2 blocked, and 13 not_encoded jobs. The difference is the
   delete/TTL tombstone case; qmd remains the local retrieval-debug UX reference, and
   no broad ELF-over-qmd claim is allowed.
+- Live operator-debugging slice after XY-932: `cargo make
+  real-world-job-operator-ux-live-adapters` emits narrow Docker-isolated
+  `live_real_world` records for ELF and qmd over the operator-debugging fixtures.
+  ELF passes trace hydration, candidate-drop visibility, selected-but-not-narrated
+  evidence, replay-command availability, and repair-action clarity. qmd ties replay
+  command and repair-action clarity but is `wrong_result` for trace hydration and
+  candidate-drop stage visibility. OpenMemory UI/export and claude-mem viewer flows
+  remain blocked or not encoded, so this is not a broad viewer-product claim.
 - Expanded adapter-pack coverage after XY-834: the real-world external adapter
   manifest now includes `research_gate` records for RAGFlow, LightRAG, GraphRAG,
   Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, and deeper
diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
index f5eabf62..2832b202 100644
--- a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
+++ b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -481,6 +481,274 @@
         "This record does not prove broad RAG/graph adapter parity or private-corpus production quality."
       ]
     },
+    {
+      "adapter_id": "elf_operator_debug_live",
+      "project": "ELF",
+      "adapter_kind": "docker_service_operator_debug_real_world_job",
+      "evidence_class": "live_real_world",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "pass",
+      "setup": {
+        "status": "pass",
+        "evidence": "The narrow operator-debug live task runs inside docker-compose.baseline.yml with Docker-owned Postgres, Qdrant, Cargo, npm, qmd, and cache volumes.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-materialization.json"
+      },
+      "run": {
+        "status": "pass",
+        "evidence": "ELF materializes operator_debugging_ux adapter_response objects through ElfService, worker indexing, search_raw trace ids, and generated operator_debug metadata.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json"
+      },
+      "result": {
+        "status": "pass",
+        "evidence": "The narrow live slice scores operator-debugging jobs with trace availability, replay command availability, candidate-drop visibility, repair-action clarity, and raw-SQL avoidance separated in job-level evidence.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.md"
+      },
+      "capabilities": [
+        {
+          "capability": "operator_debug_real_world_job_adapter",
+          "status": "pass",
+          "evidence": "The adapter executes the checked-in operator_debugging_ux jobs through the live service materializer and generated scoring fixtures."
+        },
+        {
+          "capability": "trace_hydration_metadata",
+          "status": "pass",
+          "evidence": "Generated operator_debug records include service trace ids, viewer links, admin trace-bundle URLs, and trace_available=true."
+        },
+        {
+          "capability": "replay_command_metadata",
+          "status": "pass",
+          "evidence": "Generated operator_debug records include admin trace-bundle curl replay commands; no raw SQL path is required."
+        },
+        {
+          "capability": "candidate_drop_visibility",
+          "status": "pass",
+          "evidence": "The operator-debug jobs keep dropped-candidate visibility as explicit job-level evidence instead of relying on direct database inspection."
+        },
+        {
+          "capability": "openmemory_or_claude_mem_ui_runner",
+          "status": "not_encoded",
+          "evidence": "This ELF live slice does not launch OpenMemory or claude-mem UI flows."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "evidence": "The narrow live operator-debug slice scores trace hydration, stage attribution, candidate-drop visibility, selected-but-not-narrated diagnosis, and repair-action clarity through generated ELF live artifacts."
+        }
+      ],
+      "scenarios": [
+        {
+          "scenario_id": "operator_debug_trace_hydration",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "ELF generated trace_available=true, service trace ids, viewer URLs, and admin trace-bundle replay URLs for the operator-debug jobs; qmd has replay rows but no ELF trace hydration surface.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json"
+        },
+        {
+          "scenario_id": "operator_debug_replay_command",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "ELF generated admin trace-bundle replay commands; qmd generated local CLI query replay commands. These are comparable replay-command availability artifacts, not equivalent UI quality claims.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+        },
+        {
+          "scenario_id": "operator_debug_candidate_drop_visibility",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "ELF generated operator_debug candidate-drop visibility from trace and replay-candidate metadata without direct SQL assumptions; qmd keeps only top-k replay rows and lacks intermediate candidate-drop stages.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-materialization.json"
+        },
+        {
+          "scenario_id": "operator_debug_repair_action_clarity",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "ELF and qmd generated clear repair/replay steps for the narrow operator-debug jobs; OpenMemory and claude-mem UI repair paths remain blocked or not encoded.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+        },
+        {
+          "scenario_id": "operator_debug_selected_but_not_narrated",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "The new selected-but-not-narrated job scores whether selected trace evidence is available for answer-composition repair without direct database inspection.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json"
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "fixture_dir",
+          "ref": "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/",
+          "status": "real"
+        },
+        {
+          "kind": "command",
+          "ref": "cargo make real-world-job-operator-ux-live-adapters",
+          "status": "pass"
+        },
+        {
+          "kind": "artifact",
+          "ref": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json",
+          "status": "pass"
+        }
+      ],
+      "notes": [
+        "This is a narrow operator-debug live slice, not a full-suite live pass.",
+        "The record does not implement product UI improvements and does not claim broad qmd/OpenMemory/claude-mem superiority."
+      ]
+    },
+    {
+      "adapter_id": "qmd_operator_debug_live",
+      "project": "qmd",
+      "adapter_kind": "docker_cli_operator_debug_real_world_job",
+      "evidence_class": "live_real_world",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "wrong_result",
+      "setup": {
+        "status": "pass",
+        "evidence": "The narrow operator-debug live task clones and installs qmd inside the baseline Docker container when the checkout is absent.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-materialization.json"
+      },
+      "run": {
+        "status": "wrong_result",
+        "evidence": "qmd materializes operator_debugging_ux adapter_response objects through collection add, update, embed, and query --json, then records local replay-command metadata but no service trace hydration.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+      },
+      "result": {
+        "status": "wrong_result",
+        "evidence": "The narrow live slice gives qmd explicit replay-command evidence, but operator-debug jobs remain wrong_result where trace availability, trace completeness, or candidate-drop stage visibility is required.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.md"
+      },
+      "capabilities": [
+        {
+          "capability": "operator_debug_real_world_job_adapter",
+          "status": "pass",
+          "evidence": "The adapter executes the checked-in operator_debugging_ux jobs through qmd local CLI materialization and generated scoring fixtures."
+        },
+        {
+          "capability": "local_replay_command_metadata",
+          "status": "pass",
+          "evidence": "Generated operator_debug records include qmd query replay commands tied to per-job collections."
+        },
+        {
+          "capability": "trace_hydration_metadata",
+          "status": "wrong_result",
+          "evidence": "Generated qmd operator_debug records have trace_available=false and no ELF viewer/admin trace bundle because qmd exposes local replay rows rather than service trace hydration."
+        },
+        {
+          "capability": "candidate_drop_visibility",
+          "status": "wrong_result",
+          "evidence": "qmd top-k replay output is available, but intermediate candidate-drop stages are not exposed in the generated artifact."
+        },
+        {
+          "capability": "openmemory_or_claude_mem_ui_runner",
+          "status": "not_encoded",
+          "evidence": "This qmd live slice does not launch OpenMemory or claude-mem UI flows."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "wrong_result",
+          "evidence": "The narrow qmd operator-debug slice scores local replay commands but remains wrong_result for trace hydration and candidate-drop stage visibility."
+        }
+      ],
+      "scenarios": [
+        {
+          "scenario_id": "operator_debug_trace_hydration",
+          "suite_id": "operator_debugging_ux",
+          "status": "wrong_result",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "qmd generated replay-command metadata but trace_available=false, so ELF wins only this trace-hydration dimension; this is not a broad qmd loss.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+        },
+        {
+          "scenario_id": "operator_debug_replay_command",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "qmd generated local CLI query replay commands for the same operator-debugging scenarios; ELF generated admin trace-bundle curl commands.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+        },
+        {
+          "scenario_id": "operator_debug_candidate_drop_visibility",
+          "suite_id": "operator_debugging_ux",
+          "status": "wrong_result",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "qmd generated top-k replay output but not intermediate retrieved-but-dropped stage visibility, so candidate-drop diagnosis remains a qmd wrong_result in this narrow slice.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-materialization.json"
+        },
+        {
+          "scenario_id": "operator_debug_repair_action_clarity",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "qmd generated clear local replay steps for repair investigation, matching ELF on repair-action clarity while differing on trace hydration.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+        },
+        {
+          "scenario_id": "operator_debug_selected_but_not_narrated",
+          "suite_id": "operator_debugging_ux",
+          "status": "wrong_result",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "qmd can replay top-k rows, but the generated artifact does not expose service trace narration stages for the selected-but-not-narrated diagnosis.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "fixture_dir",
+          "ref": "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/",
+          "status": "real"
+        },
+        {
+          "kind": "command",
+          "ref": "cargo make real-world-job-operator-ux-live-adapters",
+          "status": "wrong_result"
+        },
+        {
+          "kind": "artifact",
+          "ref": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json",
+          "status": "wrong_result"
+        }
+      ],
+      "notes": [
+        "This is a narrow operator-debug live slice, not a full-suite live pass.",
+        "qmd's replay-command availability remains useful; the wrong_result status is limited to trace hydration and candidate-drop stage visibility."
+      ]
+    },
     {
       "adapter_id": "agentmemory_live_baseline",
       "project": "agentmemory",
diff --git a/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/selected_but_not_narrated.json b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/selected_but_not_narrated.json
new file mode 100644
index 00000000..3f670ac7
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/selected_but_not_narrated.json
@@ -0,0 +1,160 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "operator-debug-selected-not-narrated-001",
+  "suite": "operator_debugging_ux",
+  "title": "Debug evidence selected but not narrated",
+  "corpus": {
+    "corpus_id": "operator-debugging-ux-2026-06-11",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "trace-selected-not-narrated",
+        "kind": "trace",
+        "text": "Trace 66666666-6666-4666-8666-666666666666 shows final selection included supersession evidence for the release owner change, but the generated answer narrated only the current owner and omitted the selected historical handoff evidence.",
+        "source_ref": {
+          "schema": "source_ref/v1",
+          "resolver": "real_world_job_fixture/v1",
+          "ref": {
+            "fixture": "operator_debugging_ux",
+            "evidence_id": "trace-selected-not-narrated"
+          }
+        },
+        "created_at": "2026-06-11T02:30:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_operator_ux",
+      "answer": {
+        "content": "The trace selected the supersession evidence, but the answer did not narrate it.",
+        "claims": [
+          {
+            "claim_id": "root_cause",
+            "text": "The trace selected the supersession evidence, but the answer did not narrate it.",
+            "evidence_ids": ["trace-selected-not-narrated"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["trace-selected-not-narrated"],
+        "latency_ms": 2.7,
+        "cost": {
+          "currency": "USD",
+          "amount": 0.0,
+          "input_tokens": 0,
+          "output_tokens": 0
+        },
+        "trace_explainability": {
+          "trace_id": "66666666-6666-4666-8666-666666666666",
+          "failure_stage": "selection.narration",
+          "failure_reason": "The selected evidence was present in the final set, but the answer omitted the historical handoff narration.",
+          "stages": [
+            {
+              "stage_name": "selection.final",
+              "kept_evidence": ["trace-selected-not-narrated"],
+              "dropped_evidence": [],
+              "demoted_evidence": [],
+              "distractor_evidence": [],
+              "notes": "Final selection retained the trace that explains the supersession history."
+            },
+            {
+              "stage_name": "selection.narration",
+              "kept_evidence": ["trace-selected-not-narrated"],
+              "dropped_evidence": [],
+              "demoted_evidence": [],
+              "distractor_evidence": [],
+              "notes": "The narration step did not surface the selected historical handoff evidence."
+            }
+          ]
+        }
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "selected-not-narrated-trace",
+      "ts": "2026-06-11T02:30:00Z",
+      "actor": "system",
+      "action": "captured_trace",
+      "evidence_ids": ["trace-selected-not-narrated"],
+      "summary": "The trace captured selected evidence that the final answer failed to narrate."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Why did the debug answer miss the release owner handoff even though the trace had the evidence?",
+    "job_mode": "debug",
+    "constraints": ["cite_evidence", "state_repair_action"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "root_cause",
+        "text": "The trace selected the supersession evidence, but the answer did not narrate it."
+      }
+    ],
+    "must_not_include": ["The supersession evidence was absent from final selection."],
+    "evidence_links": {
+      "root_cause": ["trace-selected-not-narrated"]
+    },
+    "answer_type": "debug_report",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "trace-selected-not-narrated",
+      "claim_id": "root_cause",
+      "requirement": "explain",
+      "quote": "final selection included supersession evidence for the release owner change"
+    }
+  ],
+  "negative_traps": [],
+  "scoring_rubric": {
+    "dimensions": {
+      "debuggability": {
+        "weight": 0.35,
+        "max_points": 1.0,
+        "criteria": "Identifies that the evidence was selected but not narrated."
+      },
+      "evidence_grounding": {
+        "weight": 0.3,
+        "max_points": 1.0,
+        "criteria": "Cites selected trace evidence."
+      },
+      "workflow_helpfulness": {
+        "weight": 0.2,
+        "max_points": 1.0,
+        "criteria": "Names a narration or answer-composition repair action."
+      },
+      "answer_correctness": {
+        "weight": 0.15,
+        "max_points": 1.0,
+        "criteria": "Does not claim the evidence was absent."
+      }
+    },
+    "pass_threshold": 0.8,
+    "hard_fail_rules": ["unsupported high-confidence claim about a required decision or fact"]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "operator_debug": {
+    "failure_mode": "selected_but_not_narrated",
+    "trace_id": "66666666-6666-4666-8666-666666666666",
+    "viewer_url": "/viewer?trace_id=66666666-6666-4666-8666-666666666666",
+    "admin_trace_bundle_url": "/v2/admin/traces/66666666-6666-4666-8666-666666666666/bundle?mode=full&stage_items_limit=128&candidates_limit=200",
+    "root_cause": "The evidence survived final selection, but answer composition failed to narrate the selected supersession context.",
+    "steps_to_root_cause": 3,
+    "raw_sql_needed": false,
+    "dropped_candidate_visibility": "not dropped; selected evidence is visible in final results and narration stage details",
+    "trace_completeness": "complete",
+    "repair_action_clarity": "clear",
+    "viewer_panels": ["Selected Final Results", "Stage Details", "Trace"],
+    "cli_steps": ["open trace bundle", "inspect final selected evidence", "inspect narration stage", "repair answer composition"],
+    "trace_evidence": ["trace-selected-not-narrated"],
+    "ux_gaps": []
+  },
+  "tags": ["synthetic", "operator_debugging_ux", "qmd_reference", "no_live_claim"]
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index 7f0c74e8..a167d2bd 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -534,6 +534,14 @@ struct OperatorDebugEvidence {
 	dropped_candidate_visibility: String,
 	trace_completeness: String,
 	repair_action_clarity: String,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	trace_available: Option<bool>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	replay_command_available: Option<bool>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	replay_command: Option<String>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	replay_artifact: Option<String>,
 	#[serde(default)]
 	viewer_panels: Vec<String>,
 	#[serde(default)]
@@ -1787,6 +1795,8 @@ fn validate_operator_debug(job: &RealWorldJob, path: &Path) -> Result<()> {
 		debug.admin_trace_bundle_url.as_deref(),
 		"admin_trace_bundle_url",
 	)?;
+	validate_optional_debug_field(path, debug.replay_command.as_deref(), "replay_command")?;
+	validate_optional_debug_field(path, debug.replay_artifact.as_deref(), "replay_artifact")?;
 	validate_non_empty_debug_list(path, &debug.viewer_panels, "viewer_panels")?;
 	validate_non_empty_debug_list(path, &debug.cli_steps, "cli_steps")?;
 	validate_non_empty_debug_list(path, &debug.trace_evidence, "trace_evidence")?;
@@ -4598,16 +4608,18 @@ fn render_markdown_operator_debugging(out: &mut String, report: &RealWorldReport
 		return;
 	}
 
-	out.push_str("| Job | Failure Mode | Trace Evidence | Steps | Raw SQL | Dropped Candidate Visibility | Trace Completeness | Repair Clarity | UX Gaps |\n");
-	out.push_str("| --- | --- | --- | ---: | --- | --- | --- | --- | --- |\n");
+	out.push_str("| Job | Failure Mode | Trace Evidence | Trace Available | Replay Command | Steps | Raw SQL | Dropped Candidate Visibility | Trace Completeness | Repair Clarity | UX Gaps |\n");
+	out.push_str("| --- | --- | --- | --- | --- | ---: | --- | --- | --- | --- | --- |\n");
 
 	for job in jobs {
 		if let Some(debug) = &job.operator_debug {
 			out.push_str(&format!(
-				"| {} | {} | {} | {} | `{}` | {} | `{}` | `{}` | {} |\n",
+				"| {} | {} | {} | `{}` | `{}` | {} | `{}` | {} | `{}` | `{}` | {} |\n",
 				md_cell(job.job_id.as_str()),
 				md_cell(debug.failure_mode.as_str()),
 				debug_trace_cell(debug),
+				debug.trace_available.unwrap_or(debug.trace_id.is_some()),
+				debug.replay_command_available.unwrap_or(debug.replay_command.is_some()),
 				debug.steps_to_root_cause,
 				debug.raw_sql_needed,
 				md_cell(debug.dropped_candidate_visibility.as_str()),
@@ -4632,6 +4644,14 @@ fn render_markdown_operator_debugging(out: &mut String, report: &RealWorldReport
 				"- CLI steps: `{}`\n",
 				md_inline(debug.cli_steps.join(" -> ").as_str())
 			));
+
+			if let Some(command) = &debug.replay_command {
+				out.push_str(&format!("- Replay command: `{}`\n", md_inline(command.as_str())));
+			}
+			if let Some(artifact) = &debug.replay_artifact {
+				out.push_str(&format!("- Replay artifact: `{}`\n", md_inline(artifact.as_str())));
+			}
+
 			out.push_str(&format!(
 				"- Trace evidence: `{}`\n",
 				md_inline(debug.trace_evidence.join(", ").as_str())
diff --git a/apps/elf-eval/src/bin/real_world_live_adapter.rs b/apps/elf-eval/src/bin/real_world_live_adapter.rs
index ac30d229..0e6a621f 100644
--- a/apps/elf-eval/src/bin/real_world_live_adapter.rs
+++ b/apps/elf-eval/src/bin/real_world_live_adapter.rs
@@ -234,6 +234,17 @@ struct MaterializedJobEvidence {
 	failure: Option<String>,
 	#[serde(skip_serializing_if = "Vec::is_empty")]
 	source_mappings: Vec<SourceMappingEvidence>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	operator_debug: Option<OperatorDebugMaterializationEvidence>,
+}
+
+#[derive(Clone, Debug, Serialize)]
+struct OperatorDebugMaterializationEvidence {
+	trace_available: bool,
+	replay_command_available: bool,
+	candidate_drop_visibility: String,
+	repair_action_clarity: String,
+	raw_sql_needed: bool,
 }
 
 #[derive(Debug, Serialize)]
@@ -282,6 +293,7 @@ struct TraceStageOutput {
 struct MaterializedJob {
 	response: AdapterResponseOutput,
 	evidence: MaterializedJobEvidence,
+	operator_debug: Option<Value>,
 }
 
 #[derive(Debug)]
@@ -294,6 +306,8 @@ struct MaterializedJobInput {
 	trace_id: Option<Uuid>,
 	failure: Option<String>,
 	source_mappings: Vec<SourceMappingEvidence>,
+	operator_debug: Option<Value>,
+	operator_debug_evidence: Option<OperatorDebugMaterializationEvidence>,
 }
 
 struct MaterializedOutput<'a> {
@@ -642,6 +656,14 @@ fn materialize_qmd_job(
 	}
 
 	let selected = selected_required_corpus_texts(loaded, &corpus, &evidence_ids);
+	let replay_command = qmd_replay_command(&loaded.job.prompt.content, collection.as_str());
+	let (operator_debug, operator_debug_evidence) = operator_debug_output(
+		AdapterKind::QmdCliRuntime,
+		loaded,
+		None,
+		replay_command,
+		log_path.display().to_string(),
+	);
 
 	Ok(materialized_job(
 		loaded,
@@ -655,6 +677,8 @@ fn materialize_qmd_job(
 			trace_id: None,
 			failure: None,
 			source_mappings: Vec::new(),
+			operator_debug,
+			operator_debug_evidence,
 		},
 	))
 }
@@ -698,6 +722,8 @@ fn lightrag_failure_jobs(
 					trace_id: None,
 					failure: Some(format!("{stage}: {reason}")),
 					source_mappings: Vec::new(),
+					operator_debug: None,
+					operator_debug_evidence: None,
 				},
 			)
 		})
@@ -978,6 +1004,7 @@ fn materialized_job(
 				},
 			},
 		},
+		operator_debug: input.operator_debug,
 		evidence: MaterializedJobEvidence {
 			job_id: loaded.job.job_id.clone(),
 			suite: loaded.job.suite.clone(),
@@ -991,11 +1018,16 @@ fn materialized_job(
 			trace_id: input.trace_id,
 			failure: input.failure,
 			source_mappings: input.source_mappings,
+			operator_debug: input.operator_debug_evidence,
 		},
 	}
 }
 
 fn declared_encoding_job(adapter_id: &str, loaded: &LoadedJob) -> Option<MaterializedJob> {
+	if is_operator_debug_live_adapter(adapter_id, loaded.job.suite.as_str()) {
+		return None;
+	}
+
 	let status = loaded.job.encoding.status?;
 	let reason = loaded.job.encoding.reason.clone().unwrap_or_else(|| {
 		format!("Fixture declares {} for this live adapter job.", status.as_str())
@@ -1010,6 +1042,10 @@ fn declared_encoding_job(adapter_id: &str, loaded: &LoadedJob) -> Option<Materia
 }
 
 fn not_encoded_job(adapter_id: &str, loaded: &LoadedJob) -> Option<MaterializedJob> {
+	if is_operator_debug_live_adapter(adapter_id, loaded.job.suite.as_str()) {
+		return None;
+	}
+
 	not_encoded_reason(loaded.job.suite.as_str()).map(|reason| {
 		materialized_declared_status_job(
 			adapter_id,
@@ -1020,6 +1056,11 @@ fn not_encoded_job(adapter_id: &str, loaded: &LoadedJob) -> Option<MaterializedJ
 	})
 }
 
+fn is_operator_debug_live_adapter(adapter_id: &str, suite: &str) -> bool {
+	suite == "operator_debugging_ux"
+		&& matches!(adapter_id, "elf_operator_debug_live" | "qmd_operator_debug_live")
+}
+
 fn not_encoded_reason(suite: &str) -> Option<&'static str> {
 	match suite {
 		"trust_source_of_truth"
@@ -1035,7 +1076,7 @@ fn not_encoded_reason(suite: &str) -> Option<&'static str> {
 			"The live adapter sweep retrieves evidence-linked answers but does not generate derived knowledge pages.",
 		),
 		"operator_debugging_ux" => Some(
-			"The live adapter sweep does not yet hydrate full operator trace/viewer diagnostics for this suite.",
+			"The full live adapter sweep keeps operator trace/viewer diagnostics in a focused operator-debug slice.",
 		),
 		"capture_integration" => Some(
 			"The live adapter sweep does not exercise capture integrations or write-policy redaction boundaries.",
@@ -1102,8 +1143,156 @@ fn materialized_declared_status_job(
 			trace_id: None,
 			failure,
 			source_mappings: Vec::new(),
+			operator_debug: None,
+		},
+		operator_debug: None,
+	}
+}
+
+fn operator_debug_output(
+	adapter_kind: AdapterKind,
+	loaded: &LoadedJob,
+	trace_id: Option<Uuid>,
+	replay_command: String,
+	replay_artifact: String,
+) -> (Option<Value>, Option<OperatorDebugMaterializationEvidence>) {
+	if loaded.job.suite != "operator_debugging_ux" {
+		return (None, None);
+	}
+
+	let Some(source) = loaded.value.get("operator_debug") else {
+		return (None, None);
+	};
+	let mut debug = source.clone();
+	let Some(object) = debug.as_object_mut() else {
+		return (None, None);
+	};
+	let trace_available = trace_id.is_some();
+	let replay_command_available = !replay_command.trim().is_empty();
+	let raw_sql_needed = false;
+	let repair_action_clarity = if replay_command_available { "clear" } else { "unclear" };
+	let candidate_drop_visibility =
+		operator_debug_candidate_visibility(adapter_kind, object).to_string();
+
+	object.insert("trace_available".to_string(), Value::Bool(trace_available));
+	object.insert("replay_command_available".to_string(), Value::Bool(replay_command_available));
+	object.insert("raw_sql_needed".to_string(), Value::Bool(raw_sql_needed));
+	object.insert(
+		"dropped_candidate_visibility".to_string(),
+		Value::String(candidate_drop_visibility.clone()),
+	);
+	object.insert(
+		"trace_completeness".to_string(),
+		Value::String(operator_debug_trace_completeness(adapter_kind, trace_available).to_string()),
+	);
+	object.insert(
+		"repair_action_clarity".to_string(),
+		Value::String(repair_action_clarity.to_string()),
+	);
+	object.insert("replay_command".to_string(), Value::String(replay_command.clone()));
+	object.insert("replay_artifact".to_string(), Value::String(replay_artifact));
+
+	match adapter_kind {
+		AdapterKind::ElfServiceRuntime =>
+			if let Some(trace_id) = trace_id {
+				let trace_id = trace_id.to_string();
+
+				object.insert("trace_id".to_string(), Value::String(trace_id.clone()));
+				object.insert(
+					"viewer_url".to_string(),
+					Value::String(format!("/viewer?trace_id={trace_id}")),
+				);
+				object.insert(
+					"admin_trace_bundle_url".to_string(),
+					Value::String(format!(
+						"/v2/admin/traces/{trace_id}/bundle?mode=full&stage_items_limit=128&candidates_limit=200"
+					)),
+				);
+			},
+		AdapterKind::QmdCliRuntime => {
+			object.remove("trace_id");
+			object.remove("viewer_url");
+			object.remove("admin_trace_bundle_url");
+			object.insert("viewer_panels".to_string(), serde_json::json!(["qmd JSON Replay Rows"]));
 		},
+		AdapterKind::LightragApiContextExport => {},
 	}
+
+	let mut cli_steps = string_array_from_object(object, "cli_steps");
+
+	push_unique(&mut cli_steps, replay_command);
+
+	object.insert("cli_steps".to_string(), serde_json::json!(cli_steps));
+
+	(
+		Some(debug),
+		Some(OperatorDebugMaterializationEvidence {
+			trace_available,
+			replay_command_available,
+			candidate_drop_visibility,
+			repair_action_clarity: repair_action_clarity.to_string(),
+			raw_sql_needed,
+		}),
+	)
+}
+
+fn operator_debug_trace_completeness(
+	adapter_kind: AdapterKind,
+	trace_available: bool,
+) -> &'static str {
+	match adapter_kind {
+		AdapterKind::ElfServiceRuntime if trace_available => "complete",
+		AdapterKind::ElfServiceRuntime => "missing",
+		AdapterKind::QmdCliRuntime | AdapterKind::LightragApiContextExport => "not_available",
+	}
+}
+
+fn operator_debug_candidate_visibility(
+	adapter_kind: AdapterKind,
+	object: &Map<String, Value>,
+) -> &str {
+	match adapter_kind {
+		AdapterKind::ElfServiceRuntime => object
+			.get("dropped_candidate_visibility")
+			.and_then(Value::as_str)
+			.unwrap_or("visible through trace bundle replay candidates"),
+		AdapterKind::QmdCliRuntime =>
+			"qmd top-k replay output is available, but intermediate candidate-drop stages are not exposed",
+		AdapterKind::LightragApiContextExport => "not encoded for this adapter",
+	}
+}
+
+fn string_array_from_object(object: &Map<String, Value>, key: &str) -> Vec<String> {
+	object
+		.get(key)
+		.and_then(Value::as_array)
+		.map(|items| items.iter().filter_map(Value::as_str).map(ToString::to_string).collect())
+		.unwrap_or_default()
+}
+
+fn elf_replay_command(trace_id: Uuid, project_id: &str) -> String {
+	format!(
+		"curl -fsS {} -H {} -H {} -H {}",
+		shell_quote(format!(
+			"http://127.0.0.1:51891/v2/admin/traces/{trace_id}/bundle?mode=full&stage_items_limit=128&candidates_limit=200"
+		)
+		.as_str()),
+		shell_quote("X-ELF-Tenant-Id: elf-live-real-world"),
+		shell_quote(format!("X-ELF-Project-Id: {project_id}").as_str()),
+		shell_quote("X-ELF-Agent-Id: elf-live-real-world-agent")
+	)
+}
+
+fn qmd_replay_command(query: &str, collection: &str) -> String {
+	format!(
+		"npx tsx src/cli/qmd.ts query {} -c {} --json --no-rerank --min-score 0 -n 5",
+		shell_quote(format!("lex: {query}\nvec: {query}").as_str()),
+		shell_quote(collection)
+	)
+}
+
+fn shell_quote(value: &str) -> String {
+	format!("'{}'", value.replace('\'', "'\\''"))
 }
 
 fn evidence_linked_claims(loaded: &LoadedJob, evidence_ids: &[String]) -> Vec<Value> {
@@ -1220,6 +1409,8 @@ fn failure_jobs(
 					trace_id: None,
 					failure: Some(format!("{stage}: {reason}")),
 					source_mappings: Vec::new(),
+					operator_debug: None,
+					operator_debug_evidence: None,
 				},
 			)
 		})
@@ -1247,6 +1438,10 @@ fn write_materialized_output(output: MaterializedOutput<'_>) -> color_eyre::Resu
 
 		value["corpus"]["adapter_response"] = Value::Object(adapter_response);
 
+		if let Some(operator_debug) = &materialized.operator_debug {
+			value["operator_debug"] = operator_debug.clone();
+		}
+
 		if matches!(
 			materialized.evidence.status,
 			MaterializationStatus::Blocked
@@ -1305,6 +1500,7 @@ fn clone_job_evidence(evidence: &MaterializedJobEvidence) -> MaterializedJobEvid
 		trace_id: evidence.trace_id,
 		failure: evidence.failure.clone(),
 		source_mappings: evidence.source_mappings.clone(),
+		operator_debug: evidence.operator_debug.clone(),
 	}
 }
 
@@ -1827,6 +2023,8 @@ async fn materialize_lightrag_job(
 			trace_id: None,
 			failure: None,
 			source_mappings,
+			operator_debug: None,
+			operator_debug_evidence: None,
 		},
 	))
 }
@@ -2045,7 +2243,75 @@ async fn materialize_elf_job(
 	let corpus = corpus_texts(loaded)?;
 	let project_id = project_id_for_job(&loaded.job.job_id);
 
-	for item in &corpus {
+	ingest_elf_corpus(service, loaded, adapter_id, project_id.as_str(), &corpus).await?;
+	run_worker(runtime).await?;
+
+	let started_at = Instant::now();
+	let response = service
+		.search_raw(SearchRequest {
+			tenant_id: TENANT_ID.to_string(),
+			project_id: project_id.clone(),
+			agent_id: AGENT_ID.to_string(),
+			token_id: None,
+			payload_level: PayloadLevel::L2,
+			read_profile: "private_only".to_string(),
+			query: loaded.job.prompt.content.clone(),
+			top_k: Some(5),
+			candidate_k: Some(20),
+			filter: None,
+			record_hits: Some(false),
+			ranking: None,
+		})
+		.await
+		.map_err(|err| eyre::eyre!("ELF search_raw failed for {}: {err}", loaded.job.job_id))?;
+	let latency_ms = started_at.elapsed().as_secs_f64() * 1_000.0;
+	let mut evidence_ids = Vec::new();
+
+	for item in &response.items {
+		if let Some(evidence_id) = item.source_ref.get("evidence_id").and_then(Value::as_str) {
+			push_unique(&mut evidence_ids, evidence_id.to_string());
+		}
+	}
+
+	let selected = selected_required_corpus_texts(loaded, &corpus, &evidence_ids);
+	let replay_command = elf_replay_command(response.trace_id, project_id.as_str());
+	let (operator_debug, operator_debug_evidence) = operator_debug_output(
+		AdapterKind::ElfServiceRuntime,
+		loaded,
+		Some(response.trace_id),
+		replay_command,
+		format!(
+			"/v2/admin/traces/{}/bundle?mode=full&stage_items_limit=128&candidates_limit=200",
+			response.trace_id
+		),
+	);
+
+	Ok(materialized_job(
+		loaded,
+		adapter_id,
+		MaterializedJobInput {
+			content: selected.content,
+			evidence_ids: selected.evidence_ids,
+			latency_ms,
+			indexing_latency_ms: None,
+			returned_count: response.items.len(),
+			trace_id: Some(response.trace_id),
+			failure: None,
+			source_mappings: Vec::new(),
+			operator_debug,
+			operator_debug_evidence,
+		},
+	))
+}
+
+async fn ingest_elf_corpus(
+	service: &ElfService,
+	loaded: &LoadedJob,
+	adapter_id: &str,
+	project_id: &str,
+	corpus: &[CorpusText],
+) -> color_eyre::Result<()> {
+	for item in corpus {
 		let chunks = note_text_chunks(item.text.as_str());
 		let chunk_count = chunks.len();
 
@@ -2058,7 +2324,7 @@ async fn materialize_elf_job(
 			let response = service
 				.add_note(AddNoteRequest {
 					tenant_id: TENANT_ID.to_string(),
-					project_id: project_id.clone(),
+					project_id: project_id.to_string(),
 					agent_id: AGENT_ID.to_string(),
 					scope: SCOPE.to_string(),
 					notes: vec![AddNoteInput {
@@ -2096,51 +2362,7 @@ async fn materialize_elf_job(
 		}
 	}
 
-	run_worker(runtime).await?;
-
-	let started_at = Instant::now();
-	let response = service
-		.search_raw(SearchRequest {
-			tenant_id: TENANT_ID.to_string(),
-			project_id,
-			agent_id: AGENT_ID.to_string(),
-			token_id: None,
-			payload_level: PayloadLevel::L2,
-			read_profile: "private_only".to_string(),
-			query: loaded.job.prompt.content.clone(),
-			top_k: Some(5),
-			candidate_k: Some(20),
-			filter: None,
-			record_hits: Some(false),
-			ranking: None,
-		})
-		.await
-		.map_err(|err| eyre::eyre!("ELF search_raw failed for {}: {err}", loaded.job.job_id))?;
-	let latency_ms = started_at.elapsed().as_secs_f64() * 1_000.0;
-	let mut evidence_ids = Vec::new();
-
-	for item in &response.items {
-		if let Some(evidence_id) = item.source_ref.get("evidence_id").and_then(Value::as_str) {
-			push_unique(&mut evidence_ids, evidence_id.to_string());
-		}
-	}
-
-	let selected = selected_required_corpus_texts(loaded, &corpus, &evidence_ids);
-
-	Ok(materialized_job(
-		loaded,
-		adapter_id,
-		MaterializedJobInput {
-			content: selected.content,
-			evidence_ids: selected.evidence_ids,
-			latency_ms,
-			indexing_latency_ms: None,
-			returned_count: response.items.len(),
-			trace_id: Some(response.trace_id),
-			failure: None,
-			source_mappings: Vec::new(),
-		},
-	))
+	Ok(())
 }
 
 async fn build_service(runtime: &BaselineRuntime) -> color_eyre::Result<ElfService> {
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index fe6da046..a8c7e927 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -255,11 +255,11 @@ fn smoke_fixture_produces_typed_json_report() -> Result<()> {
 	assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(
 		report.pointer("/external_adapters/summary/adapter_count").and_then(Value::as_u64),
-		Some(21)
+		Some(23)
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/live_real_world_count").and_then(Value::as_u64),
-		Some(3)
+		Some(5)
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/research_gate_count").and_then(Value::as_u64),
@@ -420,7 +420,7 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/adapter_count").and_then(Value::as_u64),
-		Some(21)
+		Some(23)
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/external_project_count").and_then(Value::as_u64),
@@ -438,7 +438,7 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/live_real_world_count").and_then(Value::as_u64),
-		Some(3)
+		Some(5)
 	);
 	assert_eq!(
 		report.pointer("/external_adapters/summary/research_gate_count").and_then(Value::as_u64),
@@ -448,13 +448,13 @@ fn assert_external_adapter_manifest_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/pass")
 			.and_then(Value::as_u64),
-		Some(3)
+		Some(4)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/overall_status_counts/wrong_result")
 			.and_then(Value::as_u64),
-		Some(5)
+		Some(6)
 	);
 	assert_eq!(
 		report
@@ -543,7 +543,7 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/wrong_result")
 			.and_then(Value::as_u64),
-		Some(1)
+		Some(4)
 	);
 	assert_eq!(
 		report
@@ -555,7 +555,7 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_status_counts/pass")
 			.and_then(Value::as_u64),
-		Some(9)
+		Some(16)
 	);
 	assert_eq!(
 		report
@@ -567,13 +567,13 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/wins")
 			.and_then(Value::as_u64),
-		Some(2)
+		Some(8)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_position_counts/ties")
 			.and_then(Value::as_u64),
-		Some(4)
+		Some(8)
 	);
 	assert_eq!(
 		report
@@ -591,13 +591,13 @@ fn assert_external_adapter_manifest_scenario_summary(report: &Value) {
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/win")
 			.and_then(Value::as_u64),
-		Some(2)
+		Some(8)
 	);
 	assert_eq!(
 		report
 			.pointer("/external_adapters/summary/scenario_outcome_counts/tie")
 			.and_then(Value::as_u64),
-		Some(4)
+		Some(8)
 	);
 	assert_eq!(
 		report
@@ -629,8 +629,10 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 	let adapters = array_at(report, "/external_adapters/adapters")?;
 	let elf = find_by_field(adapters, "/adapter_id", "elf_real_world_memory_fixture")?;
 	let elf_live = find_by_field(adapters, "/adapter_id", "elf_live_real_world")?;
+	let elf_operator_debug = find_by_field(adapters, "/adapter_id", "elf_operator_debug_live")?;
 	let qmd = find_by_field(adapters, "/adapter_id", "qmd_live_baseline")?;
 	let qmd_live = find_by_field(adapters, "/adapter_id", "qmd_live_real_world")?;
+	let qmd_operator_debug = find_by_field(adapters, "/adapter_id", "qmd_operator_debug_live")?;
 	let agentmemory = find_by_field(adapters, "/adapter_id", "agentmemory_live_baseline")?;
 	let mem0 = find_by_field(adapters, "/adapter_id", "mem0_openmemory_live_baseline")?;
 	let memsearch = find_by_field(adapters, "/adapter_id", "memsearch_live_baseline")?;
@@ -653,6 +655,7 @@ fn assert_external_adapter_manifest_records(report: &Value) -> Result<()> {
 	assert_eq!(elf_live.pointer("/overall_status").and_then(Value::as_str), Some("wrong_result"));
 
 	assert_live_sweep_record(elf_live, "blocked")?;
+	assert_operator_debug_live_adapter_records(elf_operator_debug, qmd_operator_debug)?;
 
 	assert_eq!(qmd.pointer("/overall_status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(qmd.pointer("/suites/0/status").and_then(Value::as_str), Some("not_encoded"));
@@ -758,6 +761,111 @@ fn assert_qmd_live_baseline_record(adapter: &Value) {
 	}));
 }
 
+fn assert_operator_debug_live_adapter_records(elf: &Value, qmd: &Value) -> Result<()> {
+	assert_eq!(elf.pointer("/evidence_class").and_then(Value::as_str), Some("live_real_world"));
+	assert_eq!(elf.pointer("/overall_status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		elf.pointer("/setup/command").and_then(Value::as_str),
+		Some("cargo make real-world-job-operator-ux-live-adapters")
+	);
+	assert_eq!(
+		elf.pointer("/suites/0/suite_id").and_then(Value::as_str),
+		Some("operator_debugging_ux")
+	);
+	assert_eq!(elf.pointer("/suites/0/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		elf.pointer("/capabilities/1/capability").and_then(Value::as_str),
+		Some("trace_hydration_metadata")
+	);
+	assert_eq!(elf.pointer("/capabilities/1/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		elf.pointer("/capabilities/2/capability").and_then(Value::as_str),
+		Some("replay_command_metadata")
+	);
+	assert_eq!(elf.pointer("/capabilities/2/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		elf.pointer("/capabilities/3/capability").and_then(Value::as_str),
+		Some("candidate_drop_visibility")
+	);
+	assert_eq!(elf.pointer("/capabilities/3/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		elf.pointer("/capabilities/4/capability").and_then(Value::as_str),
+		Some("openmemory_or_claude_mem_ui_runner")
+	);
+	assert_eq!(elf.pointer("/capabilities/4/status").and_then(Value::as_str), Some("not_encoded"));
+
+	let elf_scenarios = array_at(elf, "/scenarios")?;
+	let elf_trace = find_by_field(elf_scenarios, "/scenario_id", "operator_debug_trace_hydration")?;
+	let elf_replay = find_by_field(elf_scenarios, "/scenario_id", "operator_debug_replay_command")?;
+	let elf_candidate =
+		find_by_field(elf_scenarios, "/scenario_id", "operator_debug_candidate_drop_visibility")?;
+	let elf_repair =
+		find_by_field(elf_scenarios, "/scenario_id", "operator_debug_repair_action_clarity")?;
+	let elf_selected =
+		find_by_field(elf_scenarios, "/scenario_id", "operator_debug_selected_but_not_narrated")?;
+
+	assert_eq!(elf_scenarios.len(), 5);
+	assert_eq!(elf_trace.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(elf_trace.pointer("/comparison_outcome").and_then(Value::as_str), Some("win"));
+	assert_eq!(elf_replay.pointer("/comparison_outcome").and_then(Value::as_str), Some("tie"));
+	assert_eq!(elf_candidate.pointer("/comparison_outcome").and_then(Value::as_str), Some("win"));
+	assert_eq!(elf_repair.pointer("/comparison_outcome").and_then(Value::as_str), Some("tie"));
+	assert_eq!(elf_selected.pointer("/comparison_outcome").and_then(Value::as_str), Some("win"));
+	assert_eq!(qmd.pointer("/evidence_class").and_then(Value::as_str), Some("live_real_world"));
+	assert_eq!(qmd.pointer("/overall_status").and_then(Value::as_str), Some("wrong_result"));
+	assert_eq!(
+		qmd.pointer("/suites/0/suite_id").and_then(Value::as_str),
+		Some("operator_debugging_ux")
+	);
+	assert_eq!(qmd.pointer("/suites/0/status").and_then(Value::as_str), Some("wrong_result"));
+	assert_eq!(
+		qmd.pointer("/capabilities/1/capability").and_then(Value::as_str),
+		Some("local_replay_command_metadata")
+	);
+	assert_eq!(qmd.pointer("/capabilities/1/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		qmd.pointer("/capabilities/2/capability").and_then(Value::as_str),
+		Some("trace_hydration_metadata")
+	);
+	assert_eq!(qmd.pointer("/capabilities/2/status").and_then(Value::as_str), Some("wrong_result"));
+	assert_eq!(
+		qmd.pointer("/capabilities/3/capability").and_then(Value::as_str),
+		Some("candidate_drop_visibility")
+	);
+	assert_eq!(qmd.pointer("/capabilities/3/status").and_then(Value::as_str), Some("wrong_result"));
+	assert_eq!(qmd.pointer("/capabilities/4/status").and_then(Value::as_str), Some("not_encoded"));
+
+	let qmd_scenarios = array_at(qmd, "/scenarios")?;
+	let qmd_trace = find_by_field(qmd_scenarios, "/scenario_id", "operator_debug_trace_hydration")?;
+	let qmd_replay = find_by_field(qmd_scenarios, "/scenario_id", "operator_debug_replay_command")?;
+	let qmd_candidate =
+		find_by_field(qmd_scenarios, "/scenario_id", "operator_debug_candidate_drop_visibility")?;
+	let qmd_repair =
+		find_by_field(qmd_scenarios, "/scenario_id", "operator_debug_repair_action_clarity")?;
+	let qmd_selected =
+		find_by_field(qmd_scenarios, "/scenario_id", "operator_debug_selected_but_not_narrated")?;
+
+	assert_eq!(qmd_scenarios.len(), 5);
+	assert_eq!(qmd_trace.pointer("/status").and_then(Value::as_str), Some("wrong_result"));
+	assert_eq!(qmd_trace.pointer("/comparison_outcome").and_then(Value::as_str), Some("win"));
+	assert_eq!(qmd_replay.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(qmd_replay.pointer("/comparison_outcome").and_then(Value::as_str), Some("tie"));
+	assert_eq!(qmd_candidate.pointer("/status").and_then(Value::as_str), Some("wrong_result"));
+	assert_eq!(qmd_candidate.pointer("/comparison_outcome").and_then(Value::as_str), Some("win"));
+	assert_eq!(qmd_repair.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(qmd_repair.pointer("/comparison_outcome").and_then(Value::as_str), Some("tie"));
+	assert_eq!(qmd_selected.pointer("/status").and_then(Value::as_str), Some("wrong_result"));
+	assert_eq!(qmd_selected.pointer("/comparison_outcome").and_then(Value::as_str), Some("win"));
+	assert!(array_at(elf, "/notes")?.iter().any(|note| {
+		note.as_str().is_some_and(|text| text.contains("narrow operator-debug live slice"))
+	}));
+	assert!(array_at(qmd, "/notes")?.iter().any(|note| {
+		note.as_str().is_some_and(|text| text.contains("narrow operator-debug live slice"))
+	}));
+
+	Ok(())
+}
+
 fn assert_openviking_deep_profile_gate(adapter: &Value) {
 	let trajectory_evidence = adapter.pointer("/capabilities/1/evidence").and_then(Value::as_str);
 
@@ -1130,6 +1238,40 @@ fn openmemory_ui_export_probe_has_dedicated_docker_task() -> Result<()> {
 	Ok(())
 }
 
+#[test]
+fn operator_debug_live_adapter_task_is_docker_scoped() -> Result<()> {
+	let workspace = workspace_root()?;
+	let makefile = fs::read_to_string(workspace.join("Makefile.toml"))?;
+	let script = fs::read_to_string(
+		workspace.join("scripts").join("real-world-operator-debug-live-adapters.sh"),
+	)?;
+	let live_adapter =
+		fs::read_to_string(workspace.join("apps/elf-eval/src/bin/real_world_live_adapter.rs"))?;
+	let benchmark =
+		fs::read_to_string(workspace.join("apps/elf-eval/src/bin/real_world_job_benchmark.rs"))?;
+
+	assert!(makefile.contains("[tasks.real-world-job-operator-ux-live-adapters]"));
+	assert!(makefile.contains("docker compose -f docker-compose.baseline.yml run --build --rm"));
+	assert!(makefile.contains("scripts/real-world-operator-debug-live-adapters.sh"));
+	assert!(script.contains("apps/elf-eval/fixtures/real_world_job/operator_debugging_ux"));
+	assert!(script.contains("elf_operator_debug_live"));
+	assert!(script.contains("qmd_operator_debug_live"));
+	assert!(script.contains("elf.real_world_operator_debug_live_adapter_sweep/v1"));
+	assert!(script.contains("trace_available"));
+	assert!(script.contains("replay_command_available"));
+	assert!(live_adapter.contains("fn operator_debug_output("));
+	assert!(live_adapter.contains("fn qmd_replay_command("));
+	assert!(live_adapter.contains("fn elf_replay_command("));
+	assert!(
+		!live_adapter
+			.contains("does not yet hydrate full operator trace/viewer diagnostics for this suite")
+	);
+	assert!(benchmark.contains("Replay command:"));
+	assert!(benchmark.contains("replay_command_available"));
+
+	Ok(())
+}
+
 fn assert_live_sweep_record(adapter: &Value, production_ops_status: &str) -> Result<()> {
 	let suites = array_at(adapter, "/suites")?;
 	let capabilities = array_at(adapter, "/capabilities")?;
@@ -1187,24 +1329,25 @@ fn runner_discovers_nested_fixture_layout() -> Result<()> {
 fn operator_debug_fixture_reports_trace_links_and_failure_details() -> Result<()> {
 	let report = run_json_report_from(operator_debug_fixture_dir())?;
 
-	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(5));
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(6));
 	assert_eq!(
 		report.pointer("/summary/operator_debug_job_count").and_then(Value::as_u64),
-		Some(5)
+		Some(6)
 	);
 	assert_eq!(report.pointer("/summary/raw_sql_needed_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/trace_incomplete_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/operator_ux_gap_count").and_then(Value::as_u64), Some(0));
-	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(5));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(6));
 	assert_eq!(report.pointer("/summary/unsupported_claim").and_then(Value::as_u64), Some(0));
 	assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0));
 	assert_eq!(
 		report.pointer("/summary/trace_explainability_count").and_then(Value::as_u64),
-		Some(1)
+		Some(2)
 	);
 
 	let jobs = array_at(&report, "/jobs")?;
 	let dropped = find_by_field(jobs, "/job_id", "operator-debug-dropped-evidence-001")?;
+	let selected = find_by_field(jobs, "/job_id", "operator-debug-selected-not-narrated-001")?;
 
 	assert_eq!(dropped.pointer("/status").and_then(Value::as_str), Some("pass"));
 	assert_eq!(
@@ -1234,6 +1377,15 @@ fn operator_debug_fixture_reports_trace_links_and_failure_details() -> Result<()
 		"trace-dropped-decoy"
 	)?);
 	assert!(array_contains_str(dropped, "/produced_evidence", "trace-dropped-expected")?);
+	assert_eq!(selected.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(
+		selected.pointer("/trace_explainability/failure_stage").and_then(Value::as_str),
+		Some("selection.narration")
+	);
+	assert_eq!(
+		selected.pointer("/operator_debug/failure_mode").and_then(Value::as_str),
+		Some("selected_but_not_narrated")
+	);
 
 	Ok(())
 }
@@ -1639,6 +1791,8 @@ fn assert_trace_replay_diagnostics_json(report: &Value) -> Result<()> {
 		report.pointer("/summary/outcome_counts/not_tested").and_then(Value::as_u64),
 		Some(4)
 	);
+	assert_eq!(report.pointer("/summary/outcome_counts/win").and_then(Value::as_u64), Some(4));
+	assert_eq!(report.pointer("/summary/outcome_counts/tie").and_then(Value::as_u64), Some(5));
 	assert_eq!(report.pointer("/summary/outcome_counts/non_goal").and_then(Value::as_u64), Some(1));
 
 	let scenarios = array_at(report, "/scenario_outcomes")?;
@@ -1647,6 +1801,16 @@ fn assert_trace_replay_diagnostics_json(report: &Value) -> Result<()> {
 	let replay = find_by_field(scenarios, "/scenario_id", "replay_command_locality")?;
 	let trace_surface =
 		find_by_field(scenarios, "/scenario_id", "trace_admin_replay_surface_availability")?;
+	let operator_trace =
+		find_by_field(scenarios, "/scenario_id", "operator_debug_trace_hydration")?;
+	let operator_replay =
+		find_by_field(scenarios, "/scenario_id", "operator_debug_replay_command_availability")?;
+	let operator_candidate =
+		find_by_field(scenarios, "/scenario_id", "operator_debug_candidate_drop_visibility")?;
+	let operator_repair =
+		find_by_field(scenarios, "/scenario_id", "operator_debug_repair_action_clarity")?;
+	let operator_selected =
+		find_by_field(scenarios, "/scenario_id", "operator_debug_selected_but_not_narrated")?;
 	let expansion = find_by_field(scenarios, "/scenario_id", "query_expansion_attribution")?;
 	let dense_sparse =
 		find_by_field(scenarios, "/scenario_id", "dense_sparse_channel_attribution")?;
@@ -1658,11 +1822,31 @@ fn assert_trace_replay_diagnostics_json(report: &Value) -> Result<()> {
 	let tombstone =
 		find_by_field(scenarios, "/scenario_id", "evidence_absent_tombstone_diagnostics")?;
 
-	assert_eq!(scenarios.len(), 11);
+	assert_eq!(scenarios.len(), 16);
 	assert_eq!(retrieval.pointer("/outcome").and_then(Value::as_str), Some("tie"));
 	assert_eq!(top10.pointer("/outcome").and_then(Value::as_str), Some("loss"));
 	assert_eq!(replay.pointer("/outcome").and_then(Value::as_str), Some("loss"));
 	assert_eq!(trace_surface.pointer("/outcome").and_then(Value::as_str), Some("tie"));
+	assert_eq!(
+		operator_trace.pointer("/evidence_class").and_then(Value::as_str),
+		Some("live_real_world")
+	);
+	assert_eq!(operator_trace.pointer("/result_type").and_then(Value::as_str), Some("pass"));
+	assert_eq!(operator_trace.pointer("/outcome").and_then(Value::as_str), Some("win"));
+	assert_eq!(operator_replay.pointer("/outcome").and_then(Value::as_str), Some("tie"));
+	assert_eq!(operator_candidate.pointer("/outcome").and_then(Value::as_str), Some("win"));
+	assert!(array_contains_str(
+		operator_candidate,
+		"/typed_non_pass_states",
+		"retrieved_but_dropped"
+	)?);
+	assert_eq!(operator_repair.pointer("/outcome").and_then(Value::as_str), Some("tie"));
+	assert_eq!(operator_selected.pointer("/outcome").and_then(Value::as_str), Some("win"));
+	assert!(array_contains_str(
+		operator_selected,
+		"/typed_non_pass_states",
+		"selected_but_not_narrated"
+	)?);
 	assert_eq!(expansion.pointer("/outcome").and_then(Value::as_str), Some("not_tested"));
 	assert_eq!(dense_sparse.pointer("/outcome").and_then(Value::as_str), Some("not_tested"));
 	assert_eq!(fusion.pointer("/outcome").and_then(Value::as_str), Some("not_tested"));
@@ -1684,6 +1868,11 @@ fn assert_trace_replay_diagnostics_json(report: &Value) -> Result<()> {
 		"/claim_boundaries",
 		"qmd currently wins the default local-debug artifact surface: top-10 rows plus short CLI replay."
 	)?);
+	assert!(array_contains_str(
+		report,
+		"/claim_boundaries",
+		"ELF narrowly wins the live operator-debug trace hydration and candidate-drop visibility slice against qmd; qmd still ties replay-command and repair-action clarity."
+	)?);
 	assert!(array_contains_str(
 		report,
 		"/claim_boundaries",
@@ -1697,11 +1886,22 @@ fn assert_trace_replay_diagnostics_markdown(markdown: &str) {
 	assert!(markdown.contains("Retrieval correctness is still tied"));
 	assert!(markdown.contains("| Default top-10 candidate artifact |"));
 	assert!(markdown.contains("| Replay command locality |"));
+	assert!(
+		markdown
+			.contains("| Operator-debug trace hydration | `live_real_world` | `pass` | `win` |")
+	);
+	assert!(markdown.contains(
+		"| Operator-debug replay command availability | `live_real_world` | `pass` | `tie` |"
+	));
+	assert!(markdown.contains(
+		"| Operator-debug candidate-drop visibility | `live_real_world` | `pass` | `win` |"
+	));
 	assert!(markdown.contains("| Rerank attribution | `live_baseline_only` | `non_goal` |"));
 	assert!(markdown.contains("| Candidate-drop diagnostics | `research_gate` | `not_encoded` |"));
-	assert!(markdown.contains("`retrieved_but_dropped` | Defined but `not_tested`"));
+	assert!(markdown.contains("`retrieved_but_dropped` | Defined globally as `not_tested`"));
 	assert!(markdown.contains("npx tsx src/cli/qmd.ts query"));
 	assert!(markdown.contains("cargo run -p elf-eval -- --config-a"));
+	assert!(markdown.contains("cargo make real-world-job-operator-ux-live-adapters"));
 	assert!(markdown.contains("Do not claim qmd beats ELF as a memory system overall"));
 	assert!(markdown.contains("Do not score rerank superiority from a qmd `--no-rerank` run"));
 }
@@ -1712,6 +1912,11 @@ fn assert_trace_replay_adoption_json(adoption: &Value) -> Result<()> {
 		"/scenario_id",
 		"local_debug_replay_ux",
 	)?;
+	let operator_debug = find_by_field(
+		array_at(adoption, "/scenario_outcomes")?,
+		"/scenario_id",
+		"operator_debugging_viewer_ux",
+	)?;
 
 	assert_eq!(local_debug.pointer("/outcome").and_then(Value::as_str), Some("loss"));
 	assert!(
@@ -1730,6 +1935,23 @@ fn assert_trace_replay_adoption_json(adoption: &Value) -> Result<()> {
 		"/claim_boundaries/not_allowed",
 		"Do not claim qmd's trace/replay artifact win is a broad qmd-over-ELF memory-system or retrieval-quality win."
 	)?);
+	assert_eq!(operator_debug.pointer("/outcome").and_then(Value::as_str), Some("win"));
+	assert!(
+		operator_debug
+			.pointer("/measured_claim")
+			.and_then(Value::as_str)
+			.is_some_and(|claim| claim.contains("narrow live operator-debug win over qmd"))
+	);
+	assert!(array_contains_str(
+		operator_debug,
+		"/command_artifacts",
+		"tmp/real-world-job/operator-ux-live-adapters/summary.json"
+	)?);
+	assert!(array_contains_str(
+		adoption,
+		"/claim_boundaries/not_allowed",
+		"Do not claim ELF broadly beats OpenMemory or claude-mem viewer UX from the narrow ELF/qmd operator-debug slice."
+	)?);
 
 	Ok(())
 }
@@ -1739,6 +1961,12 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 	let qmd = find_by_field(projects, "/project", "qmd")?;
 	let mem0 = find_by_field(projects, "/project", "mem0/OpenMemory")?;
 	let openviking = find_by_field(projects, "/project", "OpenViking")?;
+	let scenarios = array_at(matrix, "/scenario_matrix")?;
+	let retrieval_debug = find_by_field(scenarios, "/scenario_id", "retrieval_debug")?;
+	let operator_debug = find_by_field(scenarios, "/scenario_id", "operator_debugging")?;
+	let context_trajectory = find_by_field(scenarios, "/scenario_id", "context_trajectory")?;
+
+	assert_competitor_strength_matrix_manifest_counts(matrix);
 
 	assert_eq!(
 		qmd.pointer("/current_evidence_class").and_then(Value::as_str),
@@ -1750,7 +1978,8 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 		Some("not_encoded")
 	);
 	assert!(qmd.pointer("/benchmark_before_claim").and_then(Value::as_str).is_some_and(|claim| {
-		claim.contains("before claiming ELF wins, ties, or loses on retrieval debugging")
+		claim.contains("Keep qmd deep retrieval/debug profiling separate")
+			&& claim.contains("narrow operator-debug live slice")
 	}));
 	assert!(
 		qmd.pointer("/borrow_if_stronger")
@@ -1795,11 +2024,6 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 			.and_then(Value::as_str)
 			.is_some_and(|claim| claim.contains("evidence-bearing same-corpus output pass"))
 	);
-
-	let scenarios = array_at(matrix, "/scenario_matrix")?;
-	let retrieval_debug = find_by_field(scenarios, "/scenario_id", "retrieval_debug")?;
-	let context_trajectory = find_by_field(scenarios, "/scenario_id", "context_trajectory")?;
-
 	assert!(
 		retrieval_debug
 			.pointer("/current_state")
@@ -1809,6 +2033,24 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 	assert!(retrieval_debug.pointer("/current_state").and_then(Value::as_str).is_some_and(
 		|state| state.contains("qmd remains stronger on local debug ergonomics not fully scored")
 	));
+	assert!(
+		operator_debug
+			.pointer("/current_elf_evidence")
+			.and_then(Value::as_str)
+			.is_some_and(|claim| claim.contains("narrow live_real_world operator-debug slice"))
+	);
+	assert!(
+		operator_debug
+			.pointer("/current_competitor_evidence")
+			.and_then(Value::as_str)
+			.is_some_and(|claim| claim.contains("qmd now has a narrow live_real_world"))
+	);
+	assert!(
+		operator_debug
+			.pointer("/next_measurement")
+			.and_then(Value::as_str)
+			.is_some_and(|claim| claim.contains("OpenMemory and claude-mem UI/export"))
+	);
 	assert!(
 		context_trajectory
 			.pointer("/current_state")
@@ -1825,6 +2067,29 @@ fn assert_competitor_strength_matrix_json(matrix: &Value) -> Result<()> {
 	Ok(())
 }
 
+fn assert_competitor_strength_matrix_manifest_counts(matrix: &Value) {
+	assert_eq!(
+		matrix.pointer("/manifest_summary/adapter_records").and_then(Value::as_u64),
+		Some(23)
+	);
+	assert_eq!(
+		matrix
+			.pointer("/manifest_summary/evidence_class_counts/live_real_world")
+			.and_then(Value::as_u64),
+		Some(5)
+	);
+	assert_eq!(
+		matrix.pointer("/manifest_summary/overall_status_counts/pass").and_then(Value::as_u64),
+		Some(4)
+	);
+	assert_eq!(
+		matrix
+			.pointer("/manifest_summary/overall_status_counts/wrong_result")
+			.and_then(Value::as_u64),
+		Some(6)
+	);
+}
+
 fn assert_strength_profile_summary(report: &Value) {
 	assert_eq!(
 		report.pointer("/schema").and_then(Value::as_str),
@@ -2232,9 +2497,9 @@ fn generated_json_report_renders_markdown() -> Result<()> {
 	assert!(markdown.contains("xy844-current-worktree"));
 	assert!(markdown.contains("Existing live-baseline reports remain valid"));
 	assert!(markdown.contains("### Adapter Scenario Judgments"));
-	assert!(markdown.contains("ELF scenario positions: `wins=2, ties=4, loses=1, untested=11`"));
+	assert!(markdown.contains("ELF scenario positions: `wins=8, ties=8, loses=1, untested=11`"));
 	assert!(markdown.contains(
-		"Scenario comparison outcomes: `win=2, tie=4, loss=1, not_tested=8, blocked=1, non_goal=2`"
+		"Scenario comparison outcomes: `win=8, tie=8, loss=1, not_tested=8, blocked=1, non_goal=2`"
 	));
 	assert!(markdown.contains("| `claude_mem_live_baseline` | `same_corpus_retrieval`"));
 	assert!(markdown.contains("| `memsearch_live_baseline` | `ttl_expiry_lifecycle`"));
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
index ec2ea8f2..120c6b3d 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-adoption-report.md
@@ -43,7 +43,9 @@ The remaining caveats are material:
   is measured separately and is an ELF loss on the current correction history
   scenario. The XY-923 follow-up also scores qmd's immediate top-10/replay artifact
   ergonomics as stronger than ELF's default stress report, while expansion, fusion,
-  rerank, and candidate-drop diagnosis remain untested.
+  and rerank remain untested. XY-932 adds a narrow live operator-debug slice where
+  ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory
+  UI/export and claude-mem viewer workflows remain blocked or not encoded.
 
 ## Evidence Classes
 
@@ -70,6 +72,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | --- | --- | --- |
 | `cargo make real-world-memory` | `2026-06-11-measurement-coverage-audit.md` | ELF fixture aggregate covers 38 jobs across 11 suites with 36 pass and 2 blocked production-ops operator boundaries. |
 | `cargo make real-world-memory-live-adapters` | `2026-06-11-measurement-coverage-audit.md` | ELF live service adapter reports 18 pass, 5 wrong_result, 2 blocked, and 13 not_encoded jobs; qmd reports 17 pass, 6 wrong_result, 2 blocked, and 13 not_encoded jobs. |
+| `cargo make real-world-job-operator-ux-live-adapters` | `tmp/real-world-job/operator-ux-live-adapters/summary.json` | The narrow live operator-debug slice scores ELF as pass and qmd as wrong_result: ELF wins trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence; both systems expose replay commands and repair-action guidance. |
 | `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker` | `2026-06-11-first-generation-oss-adapter-promotion-report.md` | mem0/OpenMemory and memsearch pass basic local baseline smokes; agentmemory remains lifecycle_fail and claude-mem remains wrong_result. |
 | `cargo make openmemory-ui-export-readback` | `2026-06-11-mem0-openmemory-history-ui-export-report.md` | mem0 local OSS passes preference correction history, entity-scoped personalization, local `get_all` export-style readback, and deletion audit history; OpenMemory export-helper setup emits a separate blocked artifact with `DOCKER_UNAVAILABLE_IN_BASELINE_RUNNER`, and hosted Platform export remains non-goal. |
 | `ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make graphiti-zep-docker-temporal-smoke` | `2026-06-11-temporal-history-competitor-gap-report.md` | Graphiti/Zep temporal smoke remains blocked by `provider_api_key_missing`. |
@@ -89,7 +92,7 @@ results, or lifecycle failures into one aggregate leaderboard.
 | Memory evolution and temporal history | `loss` | `fixture_backed`, `live_real_world`, `live_baseline_only`, `wrong_result`, `blocked` | ELF fixture memory evolution passes, but live ELF passes only delete/TTL and reports five wrong_result jobs where current-vs-historical state is not reconciled. The mem0 local OSS preference-correction history scenario is now measured and is also an ELF loss. | XY-905 |
 | Consolidation/proposal review | `not_tested` | `fixture_backed`, `not_encoded` | ELF fixture consolidation passes, but live consolidation proposal generation and review-action scoring are not encoded. | XY-926 |
 | Knowledge page compilation | `not_tested` | `fixture_backed`, `live_real_world`, `wrong_result`, `research_gate`, `not_encoded` | ELF fixture knowledge pages pass, but live knowledge compilation is not encoded; graphify reaches a tiny scored smoke and remains wrong_result. | XY-926, XY-929 |
-| Operator debugging/viewer UX | `not_tested` | `fixture_backed`, `live_baseline_only`, `blocked`, `not_encoded`, `research_gate` | ELF fixture operator-debugging UX passes. mem0 local SDK `get_all` readback is measured, but the XY-931 OpenMemory export-helper setup probe is blocked by missing Docker/OpenMemory product container access and must not be inferred from SDK readback. Live trace/viewer scoring and qmd/OpenMemory/claude-mem UX comparisons remain unscored. | XY-923, XY-926 |
+| Operator debugging/viewer UX | `win` | `fixture_backed`, `live_real_world`, `blocked`, `not_encoded` | ELF now has a narrow live operator-debug win over qmd on trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence. ELF ties qmd on replay-command availability and repair-action clarity. OpenMemory UI/export remains blocked and claude-mem UI remains not encoded, so this is not a broad viewer-product superiority claim. | XY-926 |
 | Capture/write policy and redaction | `not_tested` | `fixture_backed`, `live_baseline_only`, `blocked`, `not_encoded` | ELF fixture capture/write-policy jobs pass, but live capture integration and agentmemory/claude-mem capture hooks are not comparable yet. | XY-925, XY-926 |
 | Production ops, restore, backfill, and rebuild | `win` | `live_baseline_only`, `blocked` | ELF has the strongest measured local production-operation story: provider synthetic, stress, resumable backfill, backup/restore, and Qdrant rebuild evidence. | XY-930 |
 | Private corpus and provider boundaries | `blocked` | `blocked` | Private production profile fails closed without an operator-owned manifest; provider-backed production-ops gates require explicit credentials. | XY-930 |
@@ -120,6 +123,9 @@ results, or lifecycle failures into one aggregate leaderboard.
   evidence among the tracked systems.
 - ELF ties qmd on encoded live retrieval, work-resume, project-decisions, and
   personalization slices.
+- ELF has a narrow live operator-debug win over qmd for trace hydration,
+  candidate-drop visibility, and selected-but-not-narrated evidence, with
+  replay-command availability and repair-action clarity tied.
 - ELF has a live temporal reconciliation loss against the benchmark expectation:
   five memory-evolution jobs remain `wrong_result`.
 - Most competitor strengths outside qmd retrieval are `not_tested`, `blocked`,
@@ -134,6 +140,8 @@ results, or lifecycle failures into one aggregate leaderboard.
   behavior, or graph memory. The local OSS correction-history scenario is currently
   an ELF loss, while OpenMemory UI/export is a measured setup blocker and hosted
   behavior plus graph memory remain outside measured local OSS evidence.
+- Do not claim ELF broadly beats OpenMemory or claude-mem viewer UX from the narrow
+  ELF/qmd operator-debug slice.
 - Do not claim ELF beats OpenViking on staged context trajectory.
 - Do not claim ELF beats Letta on core-vs-archival memory.
 - Do not claim graph/RAG parity from smoke-only evidence.
diff --git a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
index 2043ed37..1f770b67 100644
--- a/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
+++ b/docs/guide/benchmarking/2026-06-11-competitor-strength-evidence-matrix.md
@@ -42,10 +42,10 @@ Current boundary:
 
 ## Current Ledger Summary
 
-The current manifest has 21 adapter records across 16 external projects plus ELF.
-Evidence-class counts: 1 `fixture_backed`, 6 `live_baseline_only`, 3
-`live_real_world`, and 11 `research_gate`. Overall adapter-status counts: 3 `pass`,
-5 `wrong_result`, 1 `lifecycle_fail`, 5 `blocked`, and 7 `not_encoded`.
+The current manifest has 23 adapter records across 16 external projects plus ELF.
+Evidence-class counts: 1 `fixture_backed`, 6 `live_baseline_only`, 5
+`live_real_world`, and 11 `research_gate`. Overall adapter-status counts: 4 `pass`,
+6 `wrong_result`, 1 `lifecycle_fail`, 5 `blocked`, and 7 `not_encoded`.
 
 ## State Taxonomy
 
@@ -72,8 +72,8 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 
 | Project | Strongest user-facing scenario | Current evidence | Measured status and proof | Unsupported or blocked status | Required benchmark before ELF claim | Borrow if stronger |
 | --- | --- | --- | --- | --- | --- | --- |
-| ELF | Evidence-linked source-of-truth memory service with real-world fixtures and live retrieval sweeps. | `live_real_world`; supporting `fixture_backed`. | `wrong_result` full live sweep: `cargo make real-world-memory-live-adapters`, `tmp/real-world-memory/live-adapters/elf-report.md`. Fixture contract: `cargo make real-world-memory`, `tmp/real-world-memory/real-world-memory-report.json`. | `blocked`: private manifest and provider credentials; broader live suites remain `wrong_result`, `blocked`, or `not_encoded`. | Full-suite live pass plus separate private-corpus and credentialed production-ops proof. | Keep borrowing qmd debug knobs, OpenViking staged trajectory, mem0 history, Letta core memory, and graph/RAG navigation. |
-| qmd | Local retrieval-debug workflow with transparent CLI indexing, querying, expansion, fusion, and rerank ergonomics. | `live_real_world`; supporting `live_baseline_only` and `research_gate`. | `wrong_result` full live sweep: `cargo make real-world-memory-live-adapters`, `tmp/real-world-memory/live-adapters/qmd-report.md`; targeted retrieval suites pass. | `not_encoded`: deep profile and non-retrieval live behavior are not encoded; memory_evolution is `wrong_result`. | qmd deep retrieval/debug profile plus full-suite live replay with trace-level diagnostics. | Weighted fusion, rerank explanation, local debug knobs, and command-line replay. |
+| ELF | Evidence-linked source-of-truth memory service with real-world fixtures and live retrieval sweeps. | `live_real_world`; supporting `fixture_backed`. | `wrong_result` full live sweep: `cargo make real-world-memory-live-adapters`, `tmp/real-world-memory/live-adapters/elf-report.md`. Narrow operator-debug pass: `cargo make real-world-job-operator-ux-live-adapters`, `tmp/real-world-job/operator-ux-live-adapters/elf-report.md`. Fixture contract: `cargo make real-world-memory`, `tmp/real-world-memory/real-world-memory-report.json`. | `blocked`: private manifest and provider credentials; broader live suites remain `wrong_result`, `blocked`, or `not_encoded`; the narrow operator-debug slice now passes. | Full-suite live pass plus separate private-corpus and credentialed production-ops proof. | Keep borrowing qmd debug knobs, OpenViking staged trajectory, mem0 history, Letta core memory, and graph/RAG navigation. |
+| qmd | Local retrieval-debug workflow with transparent CLI indexing, querying, expansion, fusion, and rerank ergonomics. | `live_real_world`; supporting `live_baseline_only` and `research_gate`. | `wrong_result` full live sweep: `cargo make real-world-memory-live-adapters`, `tmp/real-world-memory/live-adapters/qmd-report.md`; targeted retrieval suites pass; the narrow operator-debug slice ties replay commands but is `wrong_result` for trace hydration and candidate-drop visibility. | `not_encoded`: deep profile and non-retrieval live behavior are not encoded; memory_evolution is `wrong_result`. | Keep qmd deep retrieval/debug profiling separate from the narrow operator-debug live slice; no broad ELF-over-qmd or qmd-over-ELF claim is allowed until comparable stage artifacts exist. | Weighted fusion, rerank explanation, local debug knobs, and command-line replay. |
 | agentmemory | Coding-agent continuity, MCP/REST packaging, viewer workflow, and durable cross-agent memory lifecycle. | `live_baseline_only`. | `lifecycle_fail`: `ELF_BASELINE_PROJECTS=agentmemory cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`. | `blocked`: durable cold-start and real-world adapter coverage are missing. | Durable local adapter with update, delete, cold-start reload, work_resume, capture/write-policy, and lifecycle-staleness jobs. | Cross-agent hooks, packaging, continuity scenarios, and viewer affordances. |
 | mem0/OpenMemory | Memory lifecycle, personalization, hosted/OpenMemory UI ergonomics, and optional graph memory. | `live_baseline_only`. | `pass`: fresh scoped run `cargo make openmemory-ui-export-readback`, `tmp/live-baseline/live-baseline-report.json`, with mem0 `8/8` local SDK checks passing; `blocked`: OpenMemory export-helper setup probe emits `tmp/live-baseline/mem0-openmemory-ui-export.json` with `DOCKER_UNAVAILABLE_IN_BASELINE_RUNNER`. | `blocked`: OpenMemory UI/export cannot be compared until a compose/import path loads the same corpus into the product app; `unsupported`: hosted Platform export; `not_encoded`: optional graph memory and real-world prompt adapter coverage. | Add a Docker-contained OpenMemory product app import/export path, then score browser/API readback separately from SDK `get_all`; keep hosted Platform and graph memory opt-in/non-goal unless explicitly enabled. | Entity-scoped history, lifecycle surfaces, async update ergonomics, and OpenMemory inspection UX. |
 | memsearch | Markdown-first canonical store with rebuildable local index and practical hybrid retrieval. | `live_baseline_only`. | `pass`: fresh scoped run `ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker`, `tmp/live-baseline/live-baseline-report.json`, with memsearch `4/4` local checks passing. | `not_encoded`: real-world source-of-truth, retrieval, and memory-evolution prompt adapters are not encoded; TTL/expiry is unsupported by the current CLI path. | Score source-of-truth and retrieval-debug real-world jobs over the canonical Markdown store; keep TTL/expiry as unsupported unless a comparable path exists. | Canonical markdown store, local reindex clarity, and user-inspectable source files. |
@@ -101,7 +101,7 @@ lifecycle-fail -> `lifecycle_fail`, and not-encoded -> `not_encoded`.
 | Temporal/current-vs-historical memory | Fixture memory_evolution passes; live memory_evolution is `wrong_result`. | Graphiti/Zep, mem0/OpenMemory. | Graphiti/Zep is `research_gate` `blocked`; mem0/OpenMemory local OSS preference history, entity scope, deletion audit, and SDK `get_all` now pass; OpenMemory UI/export is blocked by the export-helper setup probe; graph-memory scenarios are `not_encoded`. | Fix ELF/qmd live memory_evolution evidence links, add OpenMemory product app import/export readback, and run XY-888. |
 | Consolidation | Fixture consolidation passes; live consolidation is `not_encoded`. | agentmemory, managed-memory references, llm-wiki. | No manifest project has live consolidation scoring. | Run reviewable consolidation proposal generation with source refs, unsupported-claim flags, and audit transitions. |
 | Knowledge pages | Fixture knowledge_compilation passes; live knowledge_compilation is `not_encoded`. | llm-wiki, gbrain, GraphRAG, graphify. | llm-wiki and gbrain are `research_gate` `not_encoded` or `blocked`; GraphRAG is `blocked`; graphify has a tiny scored smoke `wrong_result`. | Encode live derived-page rebuild/lint scoring and run contained knowledge/RAG adapters only after setup proof. |
-| Operator debugging | Fixture operator_debugging_ux passes; live operator_debugging_ux is `not_encoded`. | qmd, claude-mem, OpenMemory. | qmd has debug strengths but operator_debugging_ux is `not_encoded`; claude-mem and OpenMemory UX are `not_encoded`. | Score trace hydration, stage attribution, raw-SQL avoidance, and repair-action clarity through live artifacts. |
+| Operator debugging | Fixture operator_debugging_ux passes, and the narrow live operator-debug slice passes for trace hydration, candidate-drop visibility, selected-but-not-narrated evidence, replay-command availability, and repair-action clarity. | qmd, claude-mem, OpenMemory. | qmd ties replay-command availability and repair-action clarity but is `wrong_result` for trace hydration, candidate-drop stage visibility, and selected-but-not-narrated evidence; claude-mem and OpenMemory UX remain `not_encoded` or blocked. | Add bounded OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
 | Capture/write policy | Fixture capture_integration passes; live capture_integration is `not_encoded`. | agentmemory, claude-mem. | agentmemory capture is `blocked`; claude-mem capture is `not_encoded`. | Run live capture/write-policy jobs proving redaction, exclusion, evidence binding, and no secret leakage. |
 | Production ops | Fixture production_ops has 4 pass and 2 blocked; live production_ops is `blocked`; production adoption has provider/backfill/restore evidence. | ELF production gate, qmd, RAG/RAGFlow resource gates. | qmd live production_ops is `blocked`; RAG/resource gates are `research_gate` `blocked`. | Rerun private-corpus and credentialed gates only when operator-owned manifest and credentials exist. |
 | Personalization | Fixture and live personalization pass. | mem0/OpenMemory, Letta. | mem0/OpenMemory and Letta personalization are `not_encoded`. | Encode scoped preference readback for mem0/OpenMemory and Letta before personalization superiority claims. |
diff --git a/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md b/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
index 5a20aacf..78a00da3 100644
--- a/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
+++ b/docs/guide/benchmarking/2026-06-11-elf-iteration-direction-from-competitor-benchmarks.md
@@ -26,7 +26,8 @@ The strongest current statement is:
   evidence.
 - ELF and qmd are tied on the encoded live retrieval, work-resume, and
   project-decision slices. ELF does not yet beat qmd's local retrieval-debug
-  ergonomics.
+  ergonomics, but ELF now has a narrow live operator-debug win over qmd on trace
+  hydration and candidate-drop visibility.
 - Many competitor strengths are still undermeasured: OpenViking context trajectory,
   mem0/OpenMemory entity history and UI, agentmemory and claude-mem continuity
   capture, Letta core-vs-archival memory, Graphiti/Zep temporal graph behavior, and
@@ -76,8 +77,10 @@ Interpretation:
 - Both pass `trust_source_of_truth`, `work_resume`, `project_decisions`,
   `retrieval`, and `personalization`.
 - Both fail most `memory_evolution` live conflict evidence with `wrong_result`.
-- Both leave consolidation, knowledge compilation, operator debugging, capture
-  integration, and production-ops operator boundaries as `not_encoded` or `blocked`.
+- Both leave consolidation, knowledge compilation, capture integration, and
+  production-ops operator boundaries as `not_encoded` or `blocked`. Operator
+  debugging has a separate narrow live slice: ELF passes it, while qmd remains
+  `wrong_result` for trace hydration and candidate-drop stage visibility.
 
 ### Production Evidence
 
@@ -96,21 +99,21 @@ private-corpus quality proof.
 
 ### External Adapter Ledger
 
-The current adapter manifest records 21 adapter records across 17 projects:
+The current adapter manifest records 23 adapter records across 17 projects:
 
 | Evidence class | Count | Meaning |
 | --- | ---: | --- |
 | `fixture_backed` | `1` | ELF real-world fixture scoring. |
 | `live_baseline_only` | `6` | Docker same-corpus or lifecycle evidence without real-world job scoring. |
-| `live_real_world` | `3` | ELF and qmd full-suite live sweeps plus graphify's tiny scored Docker smoke. |
+| `live_real_world` | `5` | ELF and qmd full-suite live sweeps, graphify's tiny scored Docker smoke, and the narrow ELF/qmd operator-debug live slice. |
 | `research_gate` | `11` | Source/setup/resource/output-contract evidence only. |
 
 Overall adapter statuses:
 
 | Status | Count |
 | --- | ---: |
-| `pass` | `3` |
-| `wrong_result` | `5` |
+| `pass` | `4` |
+| `wrong_result` | `6` |
 | `lifecycle_fail` | `1` |
 | `blocked` | `5` |
 | `not_encoded` | `7` |
@@ -130,7 +133,7 @@ one misleading score.
 | Temporal memory | ELF fixture passes, but live memory evolution is wrong_result. | Prioritize current-vs-historical evidence links and Graphiti/Zep-style validity windows. |
 | Consolidation | ELF fixture passes, but live proposal generation is not encoded. | Build reviewable derived proposals with source refs, confidence, unsupported-claim flags, and apply/defer/discard audit. |
 | Knowledge pages | ELF fixture pages pass; live knowledge generation is not encoded. | Borrow llm-wiki lint/query-save loops, gbrain timelines, and graphify reports behind rebuild/lint benchmarks. |
-| Operator debugging | Fixture UX passes; live trace/viewer scoring is not encoded. | Make viewer/CLI debugging a scored live surface, not just an admin convenience. |
+| Operator debugging | Fixture UX passes and the narrow live trace/viewer slice is scored: ELF passes, qmd ties replay/repair clarity but is wrong_result for trace hydration and candidate-drop visibility. | Expand coverage to OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
 | Capture/write policy | Fixture capture boundary passes; live capture is not encoded. | Borrow agentmemory/claude-mem capture hooks while preserving redaction and evidence binding. |
 | Production ops | ELF has the strongest checked-in evidence, with private/credential gates blocked. | Keep Docker-first production proof and add private corpus only when an operator-owned manifest exists. |
 | Personalization | ELF live personalization passes; mem0/OpenMemory and Letta are not encoded. | Add entity-scoped preference history and UI readback before claiming stronger personalization. |
@@ -184,11 +187,13 @@ near tie.
    - Benchmark gate: qmd deep profile plus ELF/qmd trace-level replay report.
 
 3. Live operator debugging UX
-   - Current state: fixture pass, live `not_encoded`.
+   - Current state: fixture pass; narrow live ELF/qmd slice scored with ELF `pass`
+     and qmd `wrong_result`.
    - Borrow from: claude-mem viewer, OpenMemory inspector, qmd command output.
-   - Target: no raw SQL needed to explain a bad memory result.
-   - Benchmark gate: live operator-debugging jobs score trace hydration, stage
-     attribution, and repair-action clarity.
+   - Target: no raw SQL needed to explain a bad memory result, across service traces,
+     CLI replay, and bounded local viewer surfaces.
+   - Benchmark gate: add OpenMemory and claude-mem UI/export or viewer runners before
+     claiming broader operator-debug UX superiority.
 
 ### P1 - Turn ELF Into A Better Daily Memory Product
 
@@ -253,7 +258,8 @@ Do not claim:
   fails closed without an operator-owned manifest.
 - ELF beats OpenViking on context trajectory. That scenario is not encoded.
 - ELF beats mem0/OpenMemory on hosted memory, entity history, UI, or optional graph
-  memory. Those scenarios are not encoded.
+  memory. Those scenarios are not encoded; the operator-debug win is only against
+  qmd on a narrow trace/replay slice.
 - ELF beats Letta on core-vs-archival memory. That scenario is not encoded.
 - ELF beats RAGFlow, LightRAG, GraphRAG, Graphiti/Zep, or graphify on graph/RAG
   navigation. Current evidence is research-gate or blocked except graphify's tiny
@@ -278,7 +284,7 @@ The next reporting work should be ordered by decision value:
 
 1. ELF/qmd retrieval-debug deep profile.
 2. ELF live memory-evolution repair report.
-3. Operator-debugging live trace/viewer report.
+3. OpenMemory and claude-mem operator-debug UI/export runners.
 4. Capture/write-policy live adapter report.
 5. OpenViking context-trajectory report after evidence-bearing retrieval works.
 6. RAG/graph adapter pack report after Docker-contained outputs map to evidence ids.
diff --git a/docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md b/docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md
index e3a7a7c7..aa6213ae 100644
--- a/docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md
+++ b/docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md
@@ -32,8 +32,12 @@ The resulting narrow position:
 - Replay command locality: ELF `loss` against qmd.
 - ELF trace/admin replay surface: `tie` as an available but different replay surface,
   not a default-artifact win.
+- Operator-debug trace hydration and candidate-drop visibility: ELF `win` against qmd
+  in the narrow XY-932 live slice; replay-command availability and repair-action
+  clarity are `tie`.
 - Expansion, dense/sparse contribution, fusion, and candidate-drop diagnostics:
-  `not_tested` until comparable stage artifacts are emitted.
+  `not_tested` outside the operator-debug slice until comparable stage artifacts are
+  emitted.
 - Rerank stage scoring: `non_goal` for the current qmd stress path because it uses
   `--no-rerank`.
 - Wrong-result selected-but-not-narrated diagnosis: `tie` on typed non-pass
@@ -48,9 +52,11 @@ This is not a broad qmd-over-ELF claim. It is a scored local-debug artifact gap.
 | ELF | Stress guardrail with trace ids | `ELF_BASELINE_PROJECTS=ELF,qmd ELF_BASELINE_PROFILE=stress cargo make baseline-live-docker` | `tmp/live-baseline/live-baseline-report.json`; summarized in `docs/research/2026-06-11-elf-qmd-retrieval-debug-profile.json` |
 | ELF | Admin trace bundle hydration | `curl -fsS 'http://127.0.0.1:51891/v2/admin/traces/<trace_id>/bundle?mode=full&stage_items_limit=256&candidates_limit=200' -H 'X-ELF-Tenant-Id: <tenant>' -H 'X-ELF-Project-Id: <project>' -H 'X-ELF-Agent-Id: <agent>'` | `elf.trace_bundle/v1` response from the admin service |
 | ELF | Trace ranking replay | `cargo run -p elf-eval -- --config-a config/local/elf.docker.toml --config-b config/local/elf.docker.toml --trace-id <trace_id>` | JSON trace compare output over `search_trace_candidates` |
+| ELF | Operator-debug live trace slice | `cargo make real-world-job-operator-ux-live-adapters` | `tmp/real-world-job/operator-ux-live-adapters/elf-report.json` and `summary.json` |
 | qmd | Stress guardrail and top-10 rows | `ELF_BASELINE_PROJECTS=qmd ELF_BASELINE_PROFILE=stress cargo make baseline-live-docker` | `tmp/live-baseline/qmd-query.json`; summarized in `docs/research/2026-06-11-elf-qmd-retrieval-debug-profile.json` |
 | qmd | Per-query CLI replay | `npx tsx src/cli/qmd.ts query 'lex: <query>\nvec: <query>' -c elfbench --json --no-rerank --min-score 0 -n 10` | JSON top-10 rows with `file`, line/snippet/score fields when qmd returns them |
 | qmd | Lifecycle replay | `npx tsx src/cli/qmd.ts update && npx tsx src/cli/qmd.ts embed -f -c elfbench && npx tsx src/cli/qmd.ts query ... --json --no-rerank` | `tmp/live-baseline/qmd-query.json` checks for update, delete, and cold-start recovery |
+| qmd | Operator-debug live replay slice | `cargo make real-world-job-operator-ux-live-adapters` | `tmp/real-world-job/operator-ux-live-adapters/qmd-report.json` and `summary.json` |
 
 ## Scenario Outcomes
 
@@ -60,6 +66,11 @@ This is not a broad qmd-over-ELF claim. It is a scored local-debug artifact gap.
 | Default top-10 candidate artifact | `live_baseline_only` | `pass` | `loss` | qmd exposes file, score, line/snippet, and distractor rows directly; ELF records trace ids and top evidence but not the full candidate list in the report. |
 | Replay command locality | `live_baseline_only` | `pass` | `loss` | qmd replay is a short local CLI query/update/embed path; ELF replay requires a live service config, persisted traces, headers, and trace ids. |
 | Trace/admin replay surface availability | `implementation_reference` | `not_encoded` | `tie` | ELF has admin trace bundles and `elf-eval` trace replay; qmd has direct CLI replay. They are different useful surfaces and are not scored as equivalent quality. |
+| Operator-debug trace hydration | `live_real_world` | `pass` | `win` | ELF live operator-debug jobs generate trace ids, viewer URLs, admin trace-bundle URLs, and `trace_available=true`; qmd generates local replay commands but no service trace hydration surface. |
+| Operator-debug replay command availability | `live_real_world` | `pass` | `tie` | ELF emits admin trace-bundle curl commands and qmd emits local CLI query replay commands for the same operator-debugging scenarios; this scores command availability, not equivalent UI quality. |
+| Operator-debug candidate-drop visibility | `live_real_world` | `pass` | `win` | ELF exposes dropped-candidate visibility through generated operator-debug metadata without direct SQL assumptions; qmd exposes top-k replay rows but no intermediate candidate-drop stages in this slice. |
+| Operator-debug repair-action clarity | `live_real_world` | `pass` | `tie` | Both live operator-debug adapters emit concrete next steps for replay or trace-bundle inspection; OpenMemory and claude-mem UI repair paths remain blocked or not encoded. |
+| Operator-debug selected-but-not-narrated evidence | `live_real_world` | `pass` | `win` | The operator-debug slice now scores selected-but-not-narrated evidence as a trace/answer-composition repair surface without direct database inspection. |
 | Query expansion attribution | `research_gate` | `not_encoded` | `not_tested` | No comparable artifact shows expansion variants or dynamic expansion decisions for both systems. |
 | Dense/sparse channel attribution | `research_gate` | `not_encoded` | `not_tested` | ELF uses dense plus BM25 and qmd uses structured `lex:` plus `vec:`, but the scored artifacts do not expose comparable per-channel contribution. |
 | Fusion attribution | `research_gate` | `not_encoded` | `not_tested` | No comparable artifact shows fusion inputs, RRF/weighted-fusion contributions, or fusion-stage candidate drops. |
@@ -68,7 +79,7 @@ This is not a broad qmd-over-ELF claim. It is a scored local-debug artifact gap.
 | Selected-but-not-narrated wrong results | `live_real_world` | `wrong_result` | `tie` | Both live paths produce memory-evolution wrong results where evidence is present but current-vs-historical or lifecycle narration is missing. |
 | Evidence-absent and tombstone diagnosis | `live_real_world` | `wrong_result` | `win` | ELF retrieved all required memory-evolution evidence and passed delete/TTL; qmd missed three required evidence links including the delete tombstone. |
 
-Summary: `1` ELF win, `3` ties, `2` ELF losses, `4` not-tested scenarios, `0`
+Summary: `4` ELF wins, `5` ties, `2` ELF losses, `4` not-tested scenarios, `0`
 blocked scenarios, and `1` non-goal scenario. The losses are local-debug artifact
 losses only. They do not change the retrieval-correctness tie.
 
@@ -81,8 +92,9 @@ losses only. They do not change the retrieval-correctness tie.
 | Sparse retrieval | `not_tested` | qmd `lex:` and ELF BM25 are present in command or service design, but contribution and drops are not scored. |
 | Fusion | `not_tested` | Fusion candidates and final fusion deltas are not materialized comparably. |
 | Rerank | `non_goal` | qmd uses `--no-rerank` in the current path; rerank superiority is out of scope for this run. |
-| Candidate drops | `not_tested` | No current report can prove retrieved-but-dropped evidence for qmd, and ELF candidate bundles are not hydrated into the stress artifact. |
+| Candidate drops | `not_tested` globally; `win` in operator-debug slice | No current stress/default report can prove retrieved-but-dropped evidence for qmd, but the XY-932 operator-debug slice scores ELF candidate-drop visibility without direct SQL assumptions. |
 | Selected-but-not-narrated | `tie` | Both systems have typed memory-evolution wrong-result rows where evidence is selected or available but not narrated as lifecycle history. |
+| Operator-debug selected-but-not-narrated | `win` | The XY-932 operator-debug job proves selected-but-not-narrated evidence is visible as a trace/answer-composition repair surface in ELF but not in qmd's generated service-trace metadata. |
 | Replay commands | `loss` | qmd's local CLI replay is shorter and directly tied to top-10 JSON output. |
 
 ## Typed Non-Pass States
@@ -92,8 +104,8 @@ The report preserves the wrong-result classes from the June 11 diagnostics:
 | Class | Current coverage |
 | --- | --- |
 | `evidence_absent` | Observed for qmd on verdict caveat, preference rationale, and delete tombstone misses. |
-| `retrieved_but_dropped` | Defined but `not_tested`; current artifacts do not expose enough candidate-stage data. |
-| `selected_but_not_narrated` | Observed for both ELF and qmd on supersession and temporal-validity jobs. |
+| `retrieved_but_dropped` | Defined globally as `not_tested`; observed as an ELF operator-debug visibility win in the narrow XY-932 slice. |
+| `selected_but_not_narrated` | Observed for both ELF and qmd on supersession and temporal-validity jobs; additionally scored as an ELF operator-debug visibility win in the narrow XY-932 slice. |
 | `contradicted_by_lifecycle_evidence` | Observed when current, historical, supersession, or tombstone evidence makes the answer incomplete. |
 
 These states are typed evidence, not leaderboard shortcuts. A `wrong_result` with
@@ -108,10 +120,14 @@ Allowed:
   CLI replay.
 - ELF has useful service trace/admin replay surfaces, but they are not yet hydrated
   into the default stress report as qmd-like candidate artifacts.
+- ELF narrowly wins the live operator-debug trace hydration and candidate-drop
+  visibility slice against qmd; qmd still ties replay-command and repair-action
+  clarity.
 - ELF narrowly wins the memory-evolution evidence-retention slice because qmd misses
   the delete tombstone and two other required evidence links.
 - Expansion, dense/sparse contribution, fusion, rerank-on quality, and
-  retrieved-but-dropped candidate diagnosis remain unproven.
+  broad retrieved-but-dropped candidate diagnosis outside the operator-debug slice
+  remain unproven.
 
 Not allowed:
 
@@ -122,6 +138,8 @@ Not allowed:
   benchmark report has qmd-level candidate visibility.
 - Do not score rerank superiority from a qmd `--no-rerank` run.
 - Do not collapse `not_tested`, `non_goal`, or `wrong_result` into pass evidence.
+- Do not convert the XY-932 operator-debug trace slice into a broad viewer-product win
+  over OpenMemory or claude-mem; those UI paths remain blocked or not encoded.
 
 ## Follow-Up Gate
 
diff --git a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
index 584b3142..e10ce945 100644
--- a/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
+++ b/docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md
@@ -34,6 +34,9 @@ What is proven today:
   trajectory, mem0/OpenMemory entity history and UI, Letta core-vs-archival memory,
   Graphiti/Zep temporal graph behavior, graph/RAG navigation, agentmemory and
   claude-mem capture/continuity, and knowledge-page workflows remain non-claims.
+  The separate XY-932 operator-debug live slice now scores ELF against qmd for trace
+  hydration and candidate-drop visibility, but does not cover OpenMemory or
+  claude-mem UI flows.
 
 So the current adoption decision can remain "credible for bounded personal
 production," but the competitiveness objective remains open.
@@ -119,19 +122,19 @@ conflict evidence links for current-vs-historical reasoning.
 
 ## External Adapter Ledger
 
-The checked-in manifest records 21 adapter records across 17 unique project names.
+The checked-in manifest records 23 adapter records across 17 unique project names.
 
 | Evidence class | Adapter records | Meaning |
 | --- | ---: | --- |
 | `fixture_backed` | `1` | ELF fixture scoring only. |
 | `live_baseline_only` | `6` | Docker same-corpus or lifecycle evidence without real-world job scoring. |
-| `live_real_world` | `3` | ELF and qmd live real-world sweeps plus graphify's tiny scored Docker smoke. |
+| `live_real_world` | `5` | ELF and qmd live real-world sweeps, graphify's tiny scored Docker smoke, and the narrow ELF/qmd operator-debug live slice. |
 | `research_gate` | `11` | Setup, source, resource, or output-contract gate only. |
 
 | Overall status | Adapter records |
 | --- | ---: |
-| `pass` | `3` |
-| `wrong_result` | `5` |
+| `pass` | `4` |
+| `wrong_result` | `6` |
 | `lifecycle_fail` | `1` |
 | `blocked` | `5` |
 | `not_encoded` | `7` |
@@ -144,8 +147,8 @@ records `unique_project_names: 17` for the full project list including ELF.
 
 | Project | Best current evidence | Current measured state | Strongest unproven scenario | Next measurement before claim |
 | --- | --- | --- | --- | --- |
-| ELF | `fixture_backed` plus `live_real_world` | Fixture aggregate passes except 2 blocked operator boundaries; live full sweep is `wrong_result`. | Full live memory evolution, live consolidation, live knowledge pages, live capture, live production ops. | Memory-evolution diagnostic report, then live operator/capture/consolidation reports. |
-| qmd | `live_real_world` plus `live_baseline_only` | Fresh full sweep is one pass behind ELF because qmd misses the delete/TTL tombstone job; same-corpus baseline passes. | Deep retrieval-debug ergonomics and trace replay. | qmd/ELF deep retrieval-debug profile with expansion, fusion, rerank, and dropped-candidate traces. |
+| ELF | `fixture_backed` plus `live_real_world` | Fixture aggregate passes except 2 blocked operator boundaries; live full sweep is `wrong_result`; narrow operator-debug live slice passes. | Full live memory evolution, live consolidation, live knowledge pages, live capture, live production ops, and broader operator UI runners. | Memory-evolution diagnostic report, then live capture/consolidation/knowledge reports and OpenMemory/claude-mem UI runners. |
+| qmd | `live_real_world` plus `live_baseline_only` | Fresh full sweep is one pass behind ELF because qmd misses the delete/TTL tombstone job; same-corpus baseline passes; narrow operator-debug live slice ties replay commands but is `wrong_result` for trace hydration and candidate-drop visibility. | Deep retrieval-debug ergonomics and trace replay beyond the narrow operator-debug slice. | qmd/ELF deep retrieval-debug profile with expansion, fusion, rerank, and dropped-candidate traces. |
 | agentmemory | `live_baseline_only` | `lifecycle_fail`. | Durable coding-agent continuity and capture hooks. | Durable lifecycle and work-resume/capture adapter report. |
 | mem0/OpenMemory | `live_baseline_only` | Basic local smoke now passes; history/UI/hosted/graph behavior remains `not_encoded`. | Entity history, lifecycle UI, OpenMemory inspection. | Entity-history, deletion-audit, and UI/export readback report. |
 | memsearch | `live_baseline_only` | Basic canonical Markdown reindex/reload smoke now passes; real-world prompt coverage remains `not_encoded`. | Markdown canonical store and local reindex clarity. | Source-of-truth and retrieval-debug real-world adapter report. |
@@ -173,7 +176,7 @@ records `unique_project_names: 17` for the full project list including ELF.
 | Memory evolution | ELF live fails 5/6 jobs; qmd live fails 6/6 jobs after missing the delete/TTL tombstone evidence; fixture aggregate passes. | No broad live superiority claim. | Historical conflict evidence links and Graphiti/Zep temporal comparison. |
 | Consolidation | Fixture aggregate passes; live adapters are not encoded. | Fixture-only claim. | Live proposal generation with lineage, confidence, and review-action audit. |
 | Knowledge pages | Fixture aggregate passes; live adapters are not encoded. | Fixture-only claim. | Live page rebuild/lint plus llm-wiki, gbrain, GraphRAG, and graphify comparisons. |
-| Operator debugging | Fixture aggregate passes; live adapters are not encoded. | Fixture-only claim. | Trace hydration, stage attribution, dropped-candidate, and repair-action scoring. |
+| Operator debugging | Fixture aggregate passes; narrow ELF/qmd live operator-debug slice is scored with ELF `pass` and qmd `wrong_result`. | Narrow ELF/qmd live claim only: ELF wins trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence; replay-command and repair-action clarity are tied. | OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim. |
 | Capture/write policy | Fixture aggregate passes; live adapters are not encoded. | Fixture-only claim. | agentmemory/claude-mem style capture with redaction and evidence binding. |
 | Production ops | ELF has separate production-provider/backfill/restore evidence; live sweep is not a full production-ops pass. | Bounded personal-production adoption claim with caveats. | Private corpus manifest and credentialed provider gates. |
 | Personalization | ELF and qmd live pass one scoped preference job. | Narrow encoded pass only. | mem0/OpenMemory and Letta entity/preference history comparison. |
diff --git a/docs/research/2026-06-11-competitor-strength-adoption-report.json b/docs/research/2026-06-11-competitor-strength-adoption-report.json
index 906c2659..56ec65a5 100644
--- a/docs/research/2026-06-11-competitor-strength-adoption-report.json
+++ b/docs/research/2026-06-11-competitor-strength-adoption-report.json
@@ -12,7 +12,7 @@
       "Live temporal reconciliation remains wrong_result for five of six memory_evolution jobs.",
       "Private-corpus production quality is blocked until an operator-owned manifest exists.",
       "Credentialed provider production-ops gates are blocked until explicit provider setup exists.",
-      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, and OpenViking trajectory, Letta core-vs-archival memory, and graph/RAG navigation remain unproven. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up now scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, rerank, and candidate-drop diagnosis remain untested."
+      "Several competitor strengths remain not_tested or blocked: OpenMemory UI/export is blocked by the XY-931 export-helper setup probe, hosted mem0 Platform behavior remains a non-goal, and OpenViking trajectory, Letta core-vs-archival memory, and graph/RAG navigation remain unproven. mem0 local OSS preference history is measured separately and is an ELF loss on the current correction-history scenario. The XY-923 follow-up scores qmd immediate top-10/replay artifact ergonomics as stronger than ELF's default stress report, while expansion, fusion, and rerank remain untested. XY-932 adds a narrow live operator-debug slice where ELF beats qmd on trace hydration and candidate-drop visibility, but OpenMemory UI/export and claude-mem viewer workflows remain blocked or not encoded."
     ]
   },
   "evidence_class_terms": [
@@ -46,6 +46,11 @@
       "artifact": "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
       "claim": "ELF live service adapter reports 18 pass, 5 wrong_result, 2 blocked, and 13 not_encoded jobs; qmd reports 17 pass, 6 wrong_result, 2 blocked, and 13 not_encoded jobs."
     },
+    {
+      "command": "cargo make real-world-job-operator-ux-live-adapters",
+      "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json",
+      "claim": "The narrow live operator-debug slice scores ELF as pass and qmd as wrong_result: ELF wins trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence; both systems expose replay commands and repair-action guidance."
+    },
     {
       "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
       "artifact": "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md",
@@ -82,7 +87,11 @@
       "scenario_id": "source_of_truth_rebuild_evidence_writes",
       "title": "Source-of-truth rebuild and evidence-bound writes",
       "outcome": "win",
-      "evidence_classes": ["fixture_backed", "live_real_world", "live_baseline_only"],
+      "evidence_classes": [
+        "fixture_backed",
+        "live_real_world",
+        "live_baseline_only"
+      ],
       "measured_claim": "ELF has the strongest measured source-of-truth and rebuild story: Postgres is authoritative, Qdrant is rebuildable, trust_source_of_truth passes in fixture and live sweeps, and production restore/rebuild proof exists.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
@@ -95,192 +104,296 @@
       "scenario_id": "work_resume_coding_agent_continuity",
       "title": "Work resume and coding-agent continuity",
       "outcome": "tie",
-      "evidence_classes": ["fixture_backed", "live_real_world", "live_baseline_only", "blocked", "not_encoded"],
+      "evidence_classes": [
+        "fixture_backed",
+        "live_real_world",
+        "live_baseline_only",
+        "blocked",
+        "not_encoded"
+      ],
       "measured_claim": "ELF and qmd both pass the encoded live work_resume jobs. agentmemory, claude-mem, and OpenViking continuity strengths remain blocked or not encoded.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
         "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md"
       ],
-      "follow_up_issues": ["XY-925", "XY-928"],
+      "follow_up_issues": [
+        "XY-925",
+        "XY-928"
+      ],
       "caveat": "The tie is only for encoded live work_resume behavior, not for broad capture hooks or staged context."
     },
     {
       "scenario_id": "project_decisions_reversals",
       "title": "Project decisions and reversals",
       "outcome": "tie",
-      "evidence_classes": ["fixture_backed", "live_real_world", "research_gate", "not_encoded"],
+      "evidence_classes": [
+        "fixture_backed",
+        "live_real_world",
+        "research_gate",
+        "not_encoded"
+      ],
       "measured_claim": "ELF and qmd both pass encoded project_decisions jobs. Letta-style core/archival decision memory is not tested.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md"
       ],
-      "follow_up_issues": ["XY-927"],
+      "follow_up_issues": [
+        "XY-927"
+      ],
       "caveat": "No Letta comparison exists until a contained export path is selected."
     },
     {
       "scenario_id": "retrieval_quality",
       "title": "Retrieval quality",
       "outcome": "tie",
-      "evidence_classes": ["fixture_backed", "live_real_world", "live_baseline_only"],
+      "evidence_classes": [
+        "fixture_backed",
+        "live_real_world",
+        "live_baseline_only"
+      ],
       "measured_claim": "ELF and qmd both pass the encoded live retrieval suite and both pass stress/same-corpus retrieval evidence.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-qmd-openviking-strength-profile-report.md",
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md"
       ],
-      "follow_up_issues": ["XY-923"],
+      "follow_up_issues": [
+        "XY-923"
+      ],
       "caveat": "Retrieval correctness is separate from debug/replay ergonomics."
     },
     {
       "scenario_id": "local_debug_replay_ux",
       "title": "Retrieval quality and local debug UX",
       "outcome": "loss",
-      "evidence_classes": ["live_baseline_only", "research_gate", "wrong_result", "not_encoded"],
+      "evidence_classes": [
+        "live_baseline_only",
+        "research_gate",
+        "wrong_result",
+        "not_encoded"
+      ],
       "measured_claim": "The XY-923 trace/replay report scores qmd stronger on immediate top-10 candidate artifacts and short CLI replay commands. ELF keeps useful service trace/admin replay surfaces, and expansion, fusion, rerank-on, and candidate-drop diagnostics remain untested.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-qmd-openviking-strength-profile-report.md",
         "docs/guide/benchmarking/2026-06-11-elf-qmd-retrieval-debug-profile.md",
         "docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md"
       ],
-      "follow_up_issues": ["XY-923"],
+      "follow_up_issues": [
+        "XY-923"
+      ],
       "caveat": "The loss is a local-debug artifact loss only; retrieval correctness remains tied and no broad qmd-over-ELF memory-system claim is allowed."
     },
     {
       "scenario_id": "memory_evolution_temporal_history",
       "title": "Memory evolution and temporal history",
       "outcome": "loss",
-      "evidence_classes": ["fixture_backed", "live_real_world", "live_baseline_only", "wrong_result", "blocked"],
+      "evidence_classes": [
+        "fixture_backed",
+        "live_real_world",
+        "live_baseline_only",
+        "wrong_result",
+        "blocked"
+      ],
       "measured_claim": "ELF fixture memory_evolution passes, but live ELF passes only the delete/TTL job and reports five wrong_result jobs where evidence is retrieved but current-vs-historical state is not reconciled. The mem0 local OSS preference-correction history scenario is now measured and is also an ELF loss.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md",
         "docs/research/2026-06-11-temporal-history-competitor-gap-report.json"
       ],
-      "follow_up_issues": ["XY-905"],
+      "follow_up_issues": [
+        "XY-905"
+      ],
       "caveat": "Graphiti/Zep remains a temporal-validity reference, but its local provider-backed smoke is blocked by provider_api_key_missing."
     },
     {
       "scenario_id": "consolidation_proposal_review",
       "title": "Consolidation/proposal review",
       "outcome": "not_tested",
-      "evidence_classes": ["fixture_backed", "not_encoded"],
+      "evidence_classes": [
+        "fixture_backed",
+        "not_encoded"
+      ],
       "measured_claim": "ELF fixture consolidation passes, but live consolidation proposal generation and review-action scoring are not encoded.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md"
       ],
-      "follow_up_issues": ["XY-926"],
+      "follow_up_issues": [
+        "XY-926"
+      ],
       "caveat": "Fixture evidence cannot be promoted into live proposal-quality proof."
     },
     {
       "scenario_id": "knowledge_page_compilation",
       "title": "Knowledge page compilation",
       "outcome": "not_tested",
-      "evidence_classes": ["fixture_backed", "live_real_world", "wrong_result", "research_gate", "not_encoded"],
+      "evidence_classes": [
+        "fixture_backed",
+        "live_real_world",
+        "wrong_result",
+        "research_gate",
+        "not_encoded"
+      ],
       "measured_claim": "ELF fixture knowledge pages pass, but live knowledge compilation is not encoded. graphify reaches a tiny scored smoke and remains wrong_result.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
         "docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md"
       ],
-      "follow_up_issues": ["XY-926", "XY-929"],
+      "follow_up_issues": [
+        "XY-926",
+        "XY-929"
+      ],
       "caveat": "llm-wiki, gbrain, GraphRAG, and graphify remain references until representative citation/lint jobs are scored."
     },
     {
       "scenario_id": "operator_debugging_viewer_ux",
       "title": "Operator debugging/viewer UX",
-      "outcome": "not_tested",
-      "evidence_classes": ["fixture_backed", "live_baseline_only", "blocked", "not_encoded", "research_gate"],
-      "measured_claim": "ELF fixture operator-debugging UX passes. mem0 local SDK get_all readback is measured, but the XY-931 OpenMemory export-helper setup probe is blocked by missing Docker/OpenMemory product container access and must not be inferred from SDK readback. Live trace/viewer scoring and qmd/OpenMemory/claude-mem UX comparisons remain unscored.",
+      "outcome": "win",
+      "evidence_classes": [
+        "fixture_backed",
+        "live_real_world",
+        "blocked",
+        "not_encoded"
+      ],
+      "measured_claim": "ELF now has a narrow live operator-debug win over qmd on trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence. ELF ties qmd on replay-command availability and repair-action clarity. OpenMemory UI/export remains blocked and claude-mem UI remains not encoded, so this is not a broad viewer-product superiority claim.",
       "command_artifacts": [
-        "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
-        "docs/guide/benchmarking/2026-06-11-qmd-openviking-strength-profile-report.md"
+        "tmp/real-world-job/operator-ux-live-adapters/summary.json",
+        "tmp/real-world-job/operator-ux-live-adapters/elf-report.json",
+        "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json",
+        "docs/guide/benchmarking/2026-06-11-elf-qmd-trace-replay-diagnostics-report.md"
+      ],
+      "follow_up_issues": [
+        "XY-926"
       ],
-      "follow_up_issues": ["XY-923", "XY-926"],
-      "caveat": "No raw-SQL-avoidance or repair-action live benchmark exists yet."
+      "caveat": "The live slice compares ELF and qmd only; OpenMemory UI/export and claude-mem viewer workflows remain typed blocked or not_encoded until a bounded local runner exists."
     },
     {
       "scenario_id": "capture_write_policy_redaction",
       "title": "Capture/write policy and redaction",
       "outcome": "not_tested",
-      "evidence_classes": ["fixture_backed", "live_baseline_only", "blocked", "not_encoded"],
+      "evidence_classes": [
+        "fixture_backed",
+        "live_baseline_only",
+        "blocked",
+        "not_encoded"
+      ],
       "measured_claim": "ELF fixture capture/write-policy jobs pass, but live capture integration remains not encoded and agentmemory/claude-mem capture hooks are not comparable yet.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
         "docs/guide/benchmarking/2026-06-11-first-generation-oss-adapter-promotion-report.md"
       ],
-      "follow_up_issues": ["XY-925", "XY-926"],
+      "follow_up_issues": [
+        "XY-925",
+        "XY-926"
+      ],
       "caveat": "Future evidence must prove redaction, exclusions, evidence binding, and no secret leakage."
     },
     {
       "scenario_id": "production_ops_restore_backfill",
       "title": "Production ops, restore, backfill, and rebuild",
       "outcome": "win",
-      "evidence_classes": ["live_baseline_only", "blocked"],
+      "evidence_classes": [
+        "live_baseline_only",
+        "blocked"
+      ],
       "measured_claim": "ELF has the strongest measured local production-operation story: provider synthetic, stress, resumable backfill, backup/restore, and Qdrant rebuild evidence are checked in.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-09-production-adoption-gate-report.md",
         "docs/guide/benchmarking/2026-06-10-production-adoption-refresh.md"
       ],
-      "follow_up_issues": ["XY-930"],
+      "follow_up_issues": [
+        "XY-930"
+      ],
       "caveat": "Private-corpus and credentialed provider gates remain blocked, so this is not private production quality proof."
     },
     {
       "scenario_id": "private_corpus_provider_boundaries",
       "title": "Private corpus and provider boundaries",
       "outcome": "blocked",
-      "evidence_classes": ["blocked"],
+      "evidence_classes": [
+        "blocked"
+      ],
       "measured_claim": "The private production profile fails closed without an operator-owned manifest, and provider-backed production-ops gates require explicit credentials.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-09-production-adoption-gate-report.md",
         "docs/guide/benchmarking/2026-06-10-production-adoption-refresh.md"
       ],
-      "follow_up_issues": ["XY-930"],
+      "follow_up_issues": [
+        "XY-930"
+      ],
       "caveat": "The blocker is an input boundary, not a hidden benchmark pass or loss."
     },
     {
       "scenario_id": "personalization_scoped_preferences",
       "title": "Personalization and scoped preferences",
       "outcome": "tie",
-      "evidence_classes": ["fixture_backed", "live_real_world", "live_baseline_only", "not_encoded"],
+      "evidence_classes": [
+        "fixture_backed",
+        "live_real_world",
+        "live_baseline_only",
+        "not_encoded"
+      ],
       "measured_claim": "ELF and qmd both pass the single encoded live personalization job. mem0 local OSS now passes entity-scoped personalization, so scoped preference behavior is a measured tie; preference correction history remains a separate ELF loss.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-measurement-coverage-audit.md",
         "docs/guide/benchmarking/2026-06-11-mem0-openmemory-history-ui-export-report.md"
       ],
-      "follow_up_issues": ["XY-927"],
+      "follow_up_issues": [
+        "XY-927"
+      ],
       "caveat": "The tie is scoped to encoded personalization and local OSS entity filters; OpenMemory UI readback and long-term preference evolution remain separate surfaces."
     },
     {
       "scenario_id": "context_trajectory_hierarchical_retrieval",
       "title": "Context trajectory and hierarchical retrieval",
       "outcome": "not_tested",
-      "evidence_classes": ["live_baseline_only", "research_gate", "wrong_result", "not_encoded"],
+      "evidence_classes": [
+        "live_baseline_only",
+        "research_gate",
+        "wrong_result",
+        "not_encoded"
+      ],
       "measured_claim": "OpenViking reaches the pinned Docker local embedding path but misses expected same-corpus evidence, and staged trajectory/hierarchy scoring is not encoded.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-qmd-openviking-strength-profile-report.md"
       ],
-      "follow_up_issues": ["XY-928"],
+      "follow_up_issues": [
+        "XY-928"
+      ],
       "caveat": "ELF only has a narrow precondition win over OpenViking, not a trajectory win."
     },
     {
       "scenario_id": "core_vs_archival_memory",
       "title": "Core-vs-archival memory",
       "outcome": "not_tested",
-      "evidence_classes": ["research_gate", "not_encoded"],
+      "evidence_classes": [
+        "research_gate",
+        "not_encoded"
+      ],
       "measured_claim": "ELF has core block semantics in the service contract, but comparable core-vs-archival benchmark jobs and a contained Letta export path are not encoded.",
       "command_artifacts": [
         "docs/spec/system_elf_memory_service_v2.md",
         "docs/guide/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
       ],
-      "follow_up_issues": ["XY-927"],
+      "follow_up_issues": [
+        "XY-927"
+      ],
       "caveat": "No ELF-over-Letta claim is allowed."
     },
     {
       "scenario_id": "graph_rag_navigation_citations",
       "title": "Graph/RAG navigation and citations",
       "outcome": "not_tested",
-      "evidence_classes": ["smoke_only", "research_gate", "blocked", "wrong_result", "not_encoded"],
+      "evidence_classes": [
+        "smoke_only",
+        "research_gate",
+        "blocked",
+        "wrong_result",
+        "not_encoded"
+      ],
       "measured_claim": "Graph/RAG smokes now produce scored or typed non-pass adapter reports where possible, but broad graph/RAG navigation and citation quality are not tested.",
       "command_artifacts": [
         "docs/guide/benchmarking/2026-06-11-graph-rag-scored-smoke-adapter-report.md"
       ],
-      "follow_up_issues": ["XY-929"],
+      "follow_up_issues": [
+        "XY-929"
+      ],
       "caveat": "RAGFlow, LightRAG, GraphRAG, Graphiti/Zep, llm-wiki, and gbrain remain blocked, research_gate, or not_encoded; graphify only has a tiny wrong_result smoke."
     }
   ],
@@ -352,7 +465,8 @@
       "ELF has the strongest measured source-of-truth, rebuild, restore, and backfill evidence among the tracked systems.",
       "ELF ties qmd on encoded live retrieval, work_resume, project_decisions, and personalization slices.",
       "ELF has a live temporal reconciliation loss against the benchmark expectation: five memory_evolution jobs remain wrong_result.",
-      "Most competitor strengths outside qmd retrieval are not_tested, blocked, smoke_only, or research_gate."
+      "Most competitor strengths outside qmd retrieval are not_tested, blocked, smoke_only, or research_gate.",
+      "ELF has a narrow live operator-debug win over qmd for trace hydration, candidate-drop visibility, and selected-but-not-narrated evidence, with replay-command availability and repair-action clarity tied."
     ],
     "not_allowed": [
       "Do not claim ELF broadly beats qmd.",
@@ -361,7 +475,8 @@
       "Do not claim ELF beats OpenViking on staged context trajectory.",
       "Do not claim ELF beats Letta on core-vs-archival memory.",
       "Do not claim graph/RAG parity from smoke-only evidence.",
-      "Do not promote fixture-backed, live_baseline_only, smoke_only, research_gate, blocked, wrong_result, lifecycle_fail, unsupported, or not_encoded states into a generic pass/fail score."
+      "Do not promote fixture-backed, live_baseline_only, smoke_only, research_gate, blocked, wrong_result, lifecycle_fail, unsupported, or not_encoded states into a generic pass/fail score.",
+      "Do not claim ELF broadly beats OpenMemory or claude-mem viewer UX from the narrow ELF/qmd operator-debug slice."
     ]
   }
 }
diff --git a/docs/research/2026-06-11-elf-qmd-trace-replay-diagnostics-report.json b/docs/research/2026-06-11-elf-qmd-trace-replay-diagnostics-report.json
index ebc095d2..42c22615 100644
--- a/docs/research/2026-06-11-elf-qmd-trace-replay-diagnostics-report.json
+++ b/docs/research/2026-06-11-elf-qmd-trace-replay-diagnostics-report.json
@@ -35,13 +35,14 @@
     "debug_ergonomics": "qmd wins the current default top-10 candidate artifact and short replay-command surfaces.",
     "elf_trace_position": "ELF has service trace, admin bundle, and trace replay surfaces, but they are not hydrated into the default stress report as qmd-like candidate artifacts.",
     "outcome_counts": {
-      "win": 1,
-      "tie": 3,
+      "win": 4,
+      "tie": 5,
       "loss": 2,
       "not_tested": 4,
       "blocked": 0,
       "non_goal": 1
-    }
+    },
+    "operator_debug_live_slice": "XY-932 adds a narrow live_real_world operator-debug slice: ELF passes trace hydration, candidate-drop visibility, selected-but-not-narrated evidence, and repair-action clarity; qmd ties replay-command and repair-action clarity but remains wrong_result for trace hydration and candidate-drop stage visibility."
   },
   "commands": [
     {
@@ -146,6 +147,79 @@
         "scripts/live-baseline-benchmark.sh"
       ]
     },
+    {
+      "scenario_id": "operator_debug_trace_hydration",
+      "surface": "operator-debug trace hydration",
+      "evidence_class": "live_real_world",
+      "result_type": "pass",
+      "elf_status": "pass",
+      "qmd_status": "wrong_result",
+      "outcome": "win",
+      "diagnostic_judgment": "ELF live operator-debug jobs generate trace_available=true, service trace ids, viewer URLs, and admin trace-bundle replay URLs; qmd generates local replay commands but no service trace hydration surface.",
+      "artifacts": [
+        "tmp/real-world-job/operator-ux-live-adapters/elf-report.json",
+        "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+      ]
+    },
+    {
+      "scenario_id": "operator_debug_replay_command_availability",
+      "surface": "operator-debug replay command availability",
+      "evidence_class": "live_real_world",
+      "result_type": "pass",
+      "elf_status": "pass",
+      "qmd_status": "pass",
+      "outcome": "tie",
+      "diagnostic_judgment": "ELF emits admin trace-bundle curl commands and qmd emits local CLI query replay commands for the same operator-debugging scenarios; this scores command availability, not equivalent UI quality.",
+      "artifacts": [
+        "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+      ]
+    },
+    {
+      "scenario_id": "operator_debug_candidate_drop_visibility",
+      "surface": "operator-debug candidate-drop visibility",
+      "evidence_class": "live_real_world",
+      "result_type": "pass",
+      "elf_status": "pass",
+      "qmd_status": "wrong_result",
+      "outcome": "win",
+      "diagnostic_judgment": "ELF exposes dropped-candidate visibility through generated operator_debug metadata without direct SQL assumptions; qmd exposes top-k replay rows but no intermediate candidate-drop stages in this slice.",
+      "typed_non_pass_states": [
+        "retrieved_but_dropped"
+      ],
+      "artifacts": [
+        "tmp/real-world-job/operator-ux-live-adapters/elf-materialization.json",
+        "tmp/real-world-job/operator-ux-live-adapters/qmd-materialization.json"
+      ]
+    },
+    {
+      "scenario_id": "operator_debug_repair_action_clarity",
+      "surface": "operator-debug repair-action clarity",
+      "evidence_class": "live_real_world",
+      "result_type": "pass",
+      "elf_status": "pass",
+      "qmd_status": "pass",
+      "outcome": "tie",
+      "diagnostic_judgment": "Both live operator-debug adapters emit concrete next steps for replay or trace-bundle inspection; OpenMemory and claude-mem UI repair paths remain blocked or not encoded.",
+      "artifacts": [
+        "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+      ]
+    },
+    {
+      "scenario_id": "operator_debug_selected_but_not_narrated",
+      "surface": "operator-debug selected-but-not-narrated evidence",
+      "evidence_class": "live_real_world",
+      "result_type": "pass",
+      "elf_status": "pass",
+      "qmd_status": "wrong_result",
+      "outcome": "win",
+      "diagnostic_judgment": "The operator-debug slice now scores selected-but-not-narrated evidence as a trace/answer-composition repair surface without direct database inspection.",
+      "typed_non_pass_states": [
+        "selected_but_not_narrated"
+      ],
+      "artifacts": [
+        "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/selected_but_not_narrated.json"
+      ]
+    },
     {
       "scenario_id": "query_expansion_attribution",
       "surface": "query expansion attribution",
@@ -286,8 +360,10 @@
     "qmd currently wins the default local-debug artifact surface: top-10 rows plus short CLI replay.",
     "ELF trace/admin endpoint availability is not proof that the default benchmark report has qmd-level candidate visibility.",
     "Rerank superiority is not scored from a qmd --no-rerank run.",
-    "Expansion, dense/sparse contribution, fusion, and retrieved-but-dropped candidate diagnostics remain not_tested.",
     "Do not claim qmd beats ELF as a memory system overall.",
-    "Do not collapse not_tested, non_goal, or wrong_result into pass evidence."
+    "Do not collapse not_tested, non_goal, or wrong_result into pass evidence.",
+    "ELF narrowly wins the live operator-debug trace hydration and candidate-drop visibility slice against qmd; qmd still ties replay-command and repair-action clarity.",
+    "Expansion, dense/sparse contribution, fusion, rerank-on quality, and broad retrieved-but-dropped diagnosis outside the operator-debug slice remain unproven.",
+    "Do not convert the XY-932 operator-debug trace slice into a broad viewer-product win over OpenMemory or claude-mem; those UI paths remain blocked or not encoded."
   ]
 }
diff --git a/docs/research/2026-06-11-measurement-coverage-audit.json b/docs/research/2026-06-11-measurement-coverage-audit.json
index d11270f4..ab71c30e 100644
--- a/docs/research/2026-06-11-measurement-coverage-audit.json
+++ b/docs/research/2026-06-11-measurement-coverage-audit.json
@@ -72,88 +72,136 @@
     {
       "suite": "trust_source_of_truth",
       "jobs": 1,
-      "elf_status_counts": {"pass": 1},
-      "qmd_status_counts": {"pass": 1}
+      "elf_status_counts": {
+        "pass": 1
+      },
+      "qmd_status_counts": {
+        "pass": 1
+      }
     },
     {
       "suite": "work_resume",
       "jobs": 5,
-      "elf_status_counts": {"pass": 5},
-      "qmd_status_counts": {"pass": 5}
+      "elf_status_counts": {
+        "pass": 5
+      },
+      "qmd_status_counts": {
+        "pass": 5
+      }
     },
     {
       "suite": "retrieval",
       "jobs": 5,
-      "elf_status_counts": {"pass": 5},
-      "qmd_status_counts": {"pass": 5}
+      "elf_status_counts": {
+        "pass": 5
+      },
+      "qmd_status_counts": {
+        "pass": 5
+      }
     },
     {
       "suite": "project_decisions",
       "jobs": 5,
-      "elf_status_counts": {"pass": 5},
-      "qmd_status_counts": {"pass": 5}
+      "elf_status_counts": {
+        "pass": 5
+      },
+      "qmd_status_counts": {
+        "pass": 5
+      }
     },
     {
       "suite": "personalization",
       "jobs": 1,
-      "elf_status_counts": {"pass": 1},
-      "qmd_status_counts": {"pass": 1}
+      "elf_status_counts": {
+        "pass": 1
+      },
+      "qmd_status_counts": {
+        "pass": 1
+      }
     },
     {
       "suite": "memory_evolution",
       "jobs": 6,
-      "elf_status_counts": {"pass": 1, "wrong_result": 5},
-      "qmd_status_counts": {"wrong_result": 6}
+      "elf_status_counts": {
+        "pass": 1,
+        "wrong_result": 5
+      },
+      "qmd_status_counts": {
+        "wrong_result": 6
+      }
     },
     {
       "suite": "capture_integration",
       "jobs": 2,
-      "elf_status_counts": {"not_encoded": 2},
-      "qmd_status_counts": {"not_encoded": 2}
+      "elf_status_counts": {
+        "not_encoded": 2
+      },
+      "qmd_status_counts": {
+        "not_encoded": 2
+      }
     },
     {
       "suite": "consolidation",
       "jobs": 4,
-      "elf_status_counts": {"not_encoded": 4},
-      "qmd_status_counts": {"not_encoded": 4}
+      "elf_status_counts": {
+        "not_encoded": 4
+      },
+      "qmd_status_counts": {
+        "not_encoded": 4
+      }
     },
     {
       "suite": "knowledge_compilation",
       "jobs": 2,
-      "elf_status_counts": {"not_encoded": 2},
-      "qmd_status_counts": {"not_encoded": 2}
+      "elf_status_counts": {
+        "not_encoded": 2
+      },
+      "qmd_status_counts": {
+        "not_encoded": 2
+      }
     },
     {
       "suite": "operator_debugging_ux",
       "jobs": 1,
-      "elf_status_counts": {"not_encoded": 1},
-      "qmd_status_counts": {"not_encoded": 1}
+      "elf_status_counts": {
+        "not_encoded": 1
+      },
+      "qmd_status_counts": {
+        "not_encoded": 1
+      }
     },
     {
       "suite": "production_ops",
       "jobs": 6,
-      "elf_status_counts": {"blocked": 2, "not_encoded": 4},
-      "qmd_status_counts": {"blocked": 2, "not_encoded": 4}
+      "elf_status_counts": {
+        "blocked": 2,
+        "not_encoded": 4
+      },
+      "qmd_status_counts": {
+        "blocked": 2,
+        "not_encoded": 4
+      }
     }
   ],
   "adapter_ledger": {
-    "adapter_records": 21,
+    "adapter_records": 23,
     "unique_project_names": 17,
     "external_project_count_note": "The generated report field external_project_count reports unique non-ELF project names after the XY-900 runner repair; the manifest has 16 external projects and 17 total project names including ELF.",
     "evidence_class_counts": {
       "fixture_backed": 1,
       "live_baseline_only": 6,
-      "live_real_world": 3,
+      "live_real_world": 5,
       "research_gate": 11
     },
     "overall_status_counts": {
-      "pass": 3,
-      "wrong_result": 5,
+      "pass": 4,
+      "wrong_result": 6,
       "lifecycle_fail": 1,
       "blocked": 5,
       "not_encoded": 7
     },
-    "xy900_update_note": "XY-900 promotes graphify from research_gate/blocked to a tiny scored live_real_world wrong_result smoke; broad graph/RAG quality remains unproven."
+    "xy900_update_note": "XY-900 promotes graphify from research_gate/blocked to a tiny scored live_real_world wrong_result smoke; broad graph/RAG quality remains unproven.",
+    "xy932_update_note": "XY-932 adds narrow ELF/qmd operator-debug live_real_world records: ELF pass and qmd wrong_result for trace hydration/candidate-drop visibility, with OpenMemory and claude-mem UI still unmeasured."
   },
   "claim_boundary": {
     "elf_vs_qmd": "near_tie_with_narrow_delete_ttl_elf_lead_not_overall_win",
diff --git a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
index a741778a..f67d9d5f 100644
--- a/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
+++ b/docs/research/2026-06-11-xy-897-competitor-strength-matrix.json
@@ -20,20 +20,20 @@
     "operator_boundary": "Private corpus and credentialed production-ops checks remain blocked until operator-owned inputs are supplied."
   },
   "manifest_summary": {
-    "adapter_records": 21,
+    "adapter_records": 23,
     "project_count": 17,
     "evidence_class_counts": {
       "fixture_backed": 1,
       "live_baseline_only": 6,
-      "live_real_world": 3,
+      "live_real_world": 5,
       "research_gate": 11
     },
     "overall_status_counts": {
       "lifecycle_fail": 1,
       "blocked": 5,
       "not_encoded": 7,
-      "pass": 3,
-      "wrong_result": 5
+      "pass": 4,
+      "wrong_result": 6
     }
   },
   "state_taxonomy": [
@@ -90,12 +90,12 @@
       "measured_status": "wrong_result",
       "proof": {
         "command": "cargo make real-world-memory-live-adapters",
-        "artifact": "tmp/real-world-memory/live-adapters/elf-report.md"
+        "artifact": "tmp/real-world-memory/live-adapters/elf-report.md; tmp/real-world-job/operator-ux-live-adapters/elf-report.md"
       },
       "unsupported_or_blocked_status": {
         "state": "blocked",
         "typed_reason": "private_manifest_and_provider_credentials",
-        "details": "Fixture production-ops keeps private corpus and provider credential gates blocked; live sweep keeps broader non-retrieval suites typed non-pass."
+        "details": "Fixture production-ops keeps private corpus and provider credential gates blocked; the full live sweep keeps broader non-retrieval suites typed non-pass, while the narrow operator-debug slice now passes."
       },
       "benchmark_before_claim": "A full-suite live_real_world pass plus separate private-corpus and credentialed production-ops evidence is required before broad live parity or production proof claims.",
       "borrow_if_stronger": "Keep borrowing qmd debug knobs, OpenViking staged trajectory, mem0 history, Letta core memory, and graph/RAG navigation patterns where they remain stronger."
@@ -112,14 +112,14 @@
       "measured_status": "wrong_result",
       "proof": {
         "command": "cargo make real-world-memory-live-adapters",
-        "artifact": "tmp/real-world-memory/live-adapters/qmd-report.md"
+        "artifact": "tmp/real-world-memory/live-adapters/qmd-report.md; tmp/real-world-job/operator-ux-live-adapters/qmd-report.md"
       },
       "unsupported_or_blocked_status": {
         "state": "not_encoded",
         "typed_reason": "deep_profile_and_non_retrieval_suites_not_encoded",
-        "details": "The full live sweep passes targeted retrieval suites but keeps memory_evolution wrong_result and several broader suites not_encoded or blocked."
+        "details": "The full live sweep passes targeted retrieval suites but keeps memory_evolution wrong_result and several broader suites not_encoded or blocked; the narrow operator-debug slice ties replay commands but is wrong_result for trace hydration and candidate-drop visibility."
       },
-      "benchmark_before_claim": "Run qmd deep retrieval/debug profile and full-suite live real-world replay with trace-level diagnostics before claiming ELF wins, ties, or loses on retrieval debugging.",
+      "benchmark_before_claim": "Keep qmd deep retrieval/debug profiling separate from the narrow operator-debug live slice; no broad ELF-over-qmd or qmd-over-ELF claim is allowed until comparable stage artifacts exist.",
       "borrow_if_stronger": "Borrow transparent local knobs for query rewriting, weighted fusion, rerank explanation, and command-line replay."
     },
     {
@@ -491,11 +491,11 @@
     {
       "scenario_id": "operator_debugging",
       "scenario": "operator debugging",
-      "current_elf_evidence": "ELF fixture-backed operator_debugging_ux passes, but ELF live_real_world operator_debugging_ux is not_encoded.",
+      "current_elf_evidence": "ELF fixture-backed operator_debugging_ux passes, and the narrow live_real_world operator-debug slice passes for trace hydration, candidate-drop visibility, selected-but-not-narrated evidence, replay-command availability, and repair-action clarity.",
       "strongest_competitor_or_reference": "qmd, claude-mem, OpenMemory",
-      "current_competitor_evidence": "qmd has local debug strengths but operator_debugging_ux is not_encoded in live sweeps; claude-mem and OpenMemory UX are not_encoded.",
-      "current_state": "Operator debugging remains mostly product/UX evidence, not comparable live benchmark evidence.",
-      "next_measurement": "Score trace hydration, candidate-stage attribution, raw-SQL avoidance, and repair-action clarity through live viewer or CLI artifacts."
+      "current_competitor_evidence": "qmd now has a narrow live_real_world operator-debug slice: replay-command availability and repair-action clarity pass, but trace hydration, candidate-drop stage visibility, and selected-but-not-narrated evidence are wrong_result. claude-mem and OpenMemory UX remain not_encoded or blocked.",
+      "current_state": "ELF has a narrow comparable live win over qmd for trace hydration and candidate-drop visibility, while OpenMemory and claude-mem UI workflows remain unmeasured.",
+      "next_measurement": "Add bounded OpenMemory and claude-mem UI/export or viewer runners before any broader operator-UX claim."
     },
     {
       "scenario_id": "capture_write_policy",
diff --git a/scripts/real-world-operator-debug-live-adapters.sh b/scripts/real-world-operator-debug-live-adapters.sh
new file mode 100755
index 00000000..f027fe4d
--- /dev/null
+++ b/scripts/real-world-operator-debug-live-adapters.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+REPORT_DIR="${ELF_OPERATOR_DEBUG_LIVE_REPORT_DIR:-${ROOT_DIR}/tmp/real-world-job/operator-ux-live-adapters}"
+FIXTURE_DIR="${ELF_OPERATOR_DEBUG_LIVE_FIXTURES:-${ROOT_DIR}/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux}"
+WORK_DIR="${ELF_OPERATOR_DEBUG_LIVE_WORK_DIR:-/bench/operator-debug-live-adapters}"
+QMD_DIR="${ELF_OPERATOR_DEBUG_QMD_DIR:-/bench/repos/qmd}"
+
+if [[ ! -f "/.dockerenv" && "${ELF_OPERATOR_DEBUG_LIVE_ALLOW_HOST:-0}" != "1" ]]; then
+  echo "Refusing to run operator-debug live adapters outside Docker. Use cargo make real-world-job-operator-ux-live-adapters." >&2
+  exit 1
+fi
+
+for cmd in bash cargo git jq npm npx; do
+  if ! command -v "${cmd}" >/dev/null 2>&1; then
+    echo "Missing ${cmd} in operator-debug live adapter runner." >&2
+    exit 1
+  fi
+done
+
+mkdir -p "${REPORT_DIR}" "${WORK_DIR}"
+rm -rf "${REPORT_DIR:?}/elf-fixtures" \
+  "${REPORT_DIR:?}/qmd-fixtures" \
+  "${REPORT_DIR:?}/elf-materialization.json" \
+  "${REPORT_DIR:?}/qmd-materialization.json" \
+  "${REPORT_DIR:?}/elf-report.json" \
+  "${REPORT_DIR:?}/elf-report.md" \
+  "${REPORT_DIR:?}/qmd-report.json" \
+  "${REPORT_DIR:?}/qmd-report.md" \
+  "${REPORT_DIR:?}/summary.json"
+
+cd "${ROOT_DIR}"
+
+cargo run -p elf-eval --bin real_world_live_adapter -- elf \
+  --fixtures "${FIXTURE_DIR}" \
+  --out-fixtures "${REPORT_DIR}/elf-fixtures" \
+  --evidence-out "${REPORT_DIR}/elf-materialization.json" \
+  --config config/local/elf.docker.toml \
+  --adapter-id elf_operator_debug_live
+
+cargo run -p elf-eval --bin real_world_job_benchmark -- run \
+  --fixtures "${REPORT_DIR}/elf-fixtures" \
+  --out "${REPORT_DIR}/elf-report.json" \
+  --run-id real-world-operator-debug-live-elf \
+  --adapter-id elf_operator_debug_live \
+  --adapter-name "ELF live operator-debug service adapter" \
+  --adapter-behavior live_operator_debug_adapter \
+  --adapter-storage-status pass \
+  --adapter-runtime-status pass \
+  --adapter-notes "Materialized by real_world_live_adapter through ElfService, worker indexing, search_raw trace ids, and operator-debug trace metadata."
+
+cargo run -p elf-eval --bin real_world_job_benchmark -- publish \
+  --report "${REPORT_DIR}/elf-report.json" \
+  --out "${REPORT_DIR}/elf-report.md"
+
+cargo run -p elf-eval --bin real_world_live_adapter -- qmd \
+  --fixtures "${FIXTURE_DIR}" \
+  --out-fixtures "${REPORT_DIR}/qmd-fixtures" \
+  --evidence-out "${REPORT_DIR}/qmd-materialization.json" \
+  --qmd-dir "${QMD_DIR}" \
+  --work-dir "${WORK_DIR}/qmd" \
+  --adapter-id qmd_operator_debug_live
+
+cargo run -p elf-eval --bin real_world_job_benchmark -- run \
+  --fixtures "${REPORT_DIR}/qmd-fixtures" \
+  --out "${REPORT_DIR}/qmd-report.json" \
+  --run-id real-world-operator-debug-live-qmd \
+  --adapter-id qmd_operator_debug_live \
+  --adapter-name "qmd live operator-debug CLI adapter" \
+  --adapter-behavior live_operator_debug_adapter \
+  --adapter-storage-status pass \
+  --adapter-runtime-status pass \
+  --adapter-notes "Materialized by real_world_live_adapter through qmd collection add, update, embed, query --json, and local replay command metadata; ELF trace/viewer surfaces are not inferred."
+
+cargo run -p elf-eval --bin real_world_job_benchmark -- publish \
+  --report "${REPORT_DIR}/qmd-report.json" \
+  --out "${REPORT_DIR}/qmd-report.md"
+
+jq -n \
+  --slurpfile elf_materialization "${REPORT_DIR}/elf-materialization.json" \
+  --slurpfile qmd_materialization "${REPORT_DIR}/qmd-materialization.json" \
+  --slurpfile elf_report "${REPORT_DIR}/elf-report.json" \
+  --slurpfile qmd_report "${REPORT_DIR}/qmd-report.json" \
+  '{
+    schema: "elf.real_world_operator_debug_live_adapter_sweep/v1",
+    generated_at: (now | todateiso8601),
+    artifact_dir: (env.ELF_OPERATOR_DEBUG_LIVE_REPORT_DIR // "tmp/real-world-job/operator-ux-live-adapters"),
+    fixture_dir: (env.ELF_OPERATOR_DEBUG_LIVE_FIXTURES // "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux"),
+    adapters: [
+      {
+        adapter_id: "elf_operator_debug_live",
+        evidence_class: "live_real_world",
+        materialization: $elf_materialization[0],
+        report: {
+          json: "tmp/real-world-job/operator-ux-live-adapters/elf-report.json",
+          markdown: "tmp/real-world-job/operator-ux-live-adapters/elf-report.md",
+          summary: $elf_report[0].summary,
+          suites: $elf_report[0].suites
+        }
+      },
+      {
+        adapter_id: "qmd_operator_debug_live",
+        evidence_class: "live_real_world",
+        materialization: $qmd_materialization[0],
+        report: {
+          json: "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json",
+          markdown: "tmp/real-world-job/operator-ux-live-adapters/qmd-report.md",
+          summary: $qmd_report[0].summary,
+          suites: $qmd_report[0].suites
+        }
+      }
+    ],
+    scenario_dimensions: [
+      "trace_available",
+      "replay_command_available",
+      "candidate_drop_visibility",
+      "repair_action_clarity",
+      "raw_sql_needed"
+    ],
+    boundary: "This narrow sweep scores operator-debugging fixtures only. It does not change core ranking, launch OpenMemory or claude-mem UI flows, or convert fixture-only UX evidence into broad product superiority."
+  }' >"${REPORT_DIR}/summary.json"
+
+echo "Operator-debug live adapter reports:"
+echo "  ${REPORT_DIR}/elf-report.json"
+echo "  ${REPORT_DIR}/elf-report.md"
+echo "  ${REPORT_DIR}/qmd-report.json"
+echo "  ${REPORT_DIR}/qmd-report.md"
+echo "  ${REPORT_DIR}/summary.json"