hack-ink · yvette-carlisle · Jun 11, 2026 · Jun 11, 2026
diff --git a/Makefile.toml b/Makefile.toml
@@ -421,6 +421,7 @@ args = [
 # | real-world-job-operator-ux             | composite |     |
 # | real-world-job-operator-ux-json        | command   |     |
 # | real-world-job-operator-ux-report      | command   |     |
+# | real-world-job-operator-ux-live-adapters | command |     |
 # | real-world-memory-retrieval            | composite |     |
 # | real-world-memory-retrieval-json       | command   |     |
 # | real-world-memory-retrieval-report     | command   |     |
@@ -668,6 +669,14 @@ args = [
 	"tmp/real-world-job/real-world-job-operator-ux-report.md",
 ]
 
+[tasks.real-world-job-operator-ux-live-adapters]
+workspace = false
+command = "bash"
+args = [
+	"-lc",
+	"docker compose -f docker-compose.baseline.yml run --build --rm -e ELF_OPERATOR_DEBUG_LIVE_REPORT_DIR -e ELF_OPERATOR_DEBUG_LIVE_FIXTURES -e ELF_OPERATOR_DEBUG_LIVE_WORK_DIR -e ELF_OPERATOR_DEBUG_QMD_DIR baseline-runner bash scripts/real-world-operator-debug-live-adapters.sh",
+]
+
 [tasks.real-world-memory-retrieval]
 workspace = false
 dependencies = [

diff --git a/README.md b/README.md
@@ -162,6 +162,14 @@ provider-backed ELF evidence was required.
   17 pass, 6 wrong_result, 2 blocked, and 13 not_encoded jobs. The difference is the
   delete/TTL tombstone case; qmd remains the local retrieval-debug UX reference, and
   no broad ELF-over-qmd claim is allowed.
+- Live operator-debugging slice after XY-932: `cargo make
+  real-world-job-operator-ux-live-adapters` emits narrow Docker-isolated
+  `live_real_world` records for ELF and qmd over the operator-debugging fixtures.
+  ELF passes trace hydration, candidate-drop visibility, selected-but-not-narrated
+  evidence, replay-command availability, and repair-action clarity. qmd ties replay
+  command and repair-action clarity but is `wrong_result` for trace hydration and
+  candidate-drop stage visibility. OpenMemory UI/export and claude-mem viewer flows
+  remain blocked or not encoded, so this is not a broad viewer-product claim.
 - Expanded adapter-pack coverage after XY-834: the real-world external adapter
   manifest now includes `research_gate` records for RAGFlow, LightRAG, GraphRAG,
   Graphiti/Zep, Letta, LangGraph, nanograph, llm-wiki, gbrain, and deeper

diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json b/apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json
@@ -481,6 +481,274 @@
         "This record does not prove broad RAG/graph adapter parity or private-corpus production quality."
       ]
     },
+    {
+      "adapter_id": "elf_operator_debug_live",
+      "project": "ELF",
+      "adapter_kind": "docker_service_operator_debug_real_world_job",
+      "evidence_class": "live_real_world",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "pass",
+      "setup": {
+        "status": "pass",
+        "evidence": "The narrow operator-debug live task runs inside docker-compose.baseline.yml with Docker-owned Postgres, Qdrant, Cargo, npm, qmd, and cache volumes.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-materialization.json"
+      },
+      "run": {
+        "status": "pass",
+        "evidence": "ELF materializes operator_debugging_ux adapter_response objects through ElfService, worker indexing, search_raw trace ids, and generated operator_debug metadata.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json"
+      },
+      "result": {
+        "status": "pass",
+        "evidence": "The narrow live slice scores operator-debugging jobs with trace availability, replay command availability, candidate-drop visibility, repair-action clarity, and raw-SQL avoidance separated in job-level evidence.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.md"
+      },
+      "capabilities": [
+        {
+          "capability": "operator_debug_real_world_job_adapter",
+          "status": "pass",
+          "evidence": "The adapter executes the checked-in operator_debugging_ux jobs through the live service materializer and generated scoring fixtures."
+        },
+        {
+          "capability": "trace_hydration_metadata",
+          "status": "pass",
+          "evidence": "Generated operator_debug records include service trace ids, viewer links, admin trace-bundle URLs, and trace_available=true."
+        },
+        {
+          "capability": "replay_command_metadata",
+          "status": "pass",
+          "evidence": "Generated operator_debug records include admin trace-bundle curl replay commands; no raw SQL path is required."
+        },
+        {
+          "capability": "candidate_drop_visibility",
+          "status": "pass",
+          "evidence": "The operator-debug jobs keep dropped-candidate visibility as explicit job-level evidence instead of relying on direct database inspection."
+        },
+        {
+          "capability": "openmemory_or_claude_mem_ui_runner",
+          "status": "not_encoded",
+          "evidence": "This ELF live slice does not launch OpenMemory or claude-mem UI flows."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "evidence": "The narrow live operator-debug slice scores trace hydration, stage attribution, candidate-drop visibility, selected-but-not-narrated diagnosis, and repair-action clarity through generated ELF live artifacts."
+        }
+      ],
+      "scenarios": [
+        {
+          "scenario_id": "operator_debug_trace_hydration",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "ELF generated trace_available=true, service trace ids, viewer URLs, and admin trace-bundle replay URLs for the operator-debug jobs; qmd has replay rows but no ELF trace hydration surface.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json"
+        },
+        {
+          "scenario_id": "operator_debug_replay_command",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "ELF generated admin trace-bundle replay commands; qmd generated local CLI query replay commands. These are comparable replay-command availability artifacts, not equivalent UI quality claims.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+        },
+        {
+          "scenario_id": "operator_debug_candidate_drop_visibility",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "ELF generated operator_debug candidate-drop visibility from trace and replay-candidate metadata without direct SQL assumptions; qmd keeps only top-k replay rows and lacks intermediate candidate-drop stages.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-materialization.json"
+        },
+        {
+          "scenario_id": "operator_debug_repair_action_clarity",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "ELF and qmd generated clear repair/replay steps for the narrow operator-debug jobs; OpenMemory and claude-mem UI repair paths remain blocked or not encoded.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+        },
+        {
+          "scenario_id": "operator_debug_selected_but_not_narrated",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "The new selected-but-not-narrated job scores whether selected trace evidence is available for answer-composition repair without direct database inspection.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json"
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "fixture_dir",
+          "ref": "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/",
+          "status": "real"
+        },
+        {
+          "kind": "command",
+          "ref": "cargo make real-world-job-operator-ux-live-adapters",
+          "status": "pass"
+        },
+        {
+          "kind": "artifact",
+          "ref": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json",
+          "status": "pass"
+        }
+      ],
+      "notes": [
+        "This is a narrow operator-debug live slice, not a full-suite live pass.",
+        "The record does not implement product UI improvements and does not claim broad qmd/OpenMemory/claude-mem superiority."
+      ]
+    },
+    {
+      "adapter_id": "qmd_operator_debug_live",
+      "project": "qmd",
+      "adapter_kind": "docker_cli_operator_debug_real_world_job",
+      "evidence_class": "live_real_world",
+      "docker_default": true,
+      "host_global_installs_required": false,
+      "overall_status": "wrong_result",
+      "setup": {
+        "status": "pass",
+        "evidence": "The narrow operator-debug live task clones and installs qmd inside the baseline Docker container when the checkout is absent.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-materialization.json"
+      },
+      "run": {
+        "status": "wrong_result",
+        "evidence": "qmd materializes operator_debugging_ux adapter_response objects through collection add, update, embed, and query --json, then records local replay-command metadata but no service trace hydration.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+      },
+      "result": {
+        "status": "wrong_result",
+        "evidence": "The narrow live slice gives qmd explicit replay-command evidence, but operator-debug jobs remain wrong_result where trace availability, trace completeness, or candidate-drop stage visibility is required.",
+        "command": "cargo make real-world-job-operator-ux-live-adapters",
+        "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.md"
+      },
+      "capabilities": [
+        {
+          "capability": "operator_debug_real_world_job_adapter",
+          "status": "pass",
+          "evidence": "The adapter executes the checked-in operator_debugging_ux jobs through qmd local CLI materialization and generated scoring fixtures."
+        },
+        {
+          "capability": "local_replay_command_metadata",
+          "status": "pass",
+          "evidence": "Generated operator_debug records include qmd query replay commands tied to per-job collections."
+        },
+        {
+          "capability": "trace_hydration_metadata",
+          "status": "wrong_result",
+          "evidence": "Generated qmd operator_debug records have trace_available=false and no ELF viewer/admin trace bundle because qmd exposes local replay rows rather than service trace hydration."
+        },
+        {
+          "capability": "candidate_drop_visibility",
+          "status": "wrong_result",
+          "evidence": "qmd top-k replay output is available, but intermediate candidate-drop stages are not exposed in the generated artifact."
+        },
+        {
+          "capability": "openmemory_or_claude_mem_ui_runner",
+          "status": "not_encoded",
+          "evidence": "This qmd live slice does not launch OpenMemory or claude-mem UI flows."
+        }
+      ],
+      "suites": [
+        {
+          "suite_id": "operator_debugging_ux",
+          "status": "wrong_result",
+          "evidence": "The narrow qmd operator-debug slice scores local replay commands but remains wrong_result for trace hydration and candidate-drop stage visibility."
+        }
+      ],
+      "scenarios": [
+        {
+          "scenario_id": "operator_debug_trace_hydration",
+          "suite_id": "operator_debugging_ux",
+          "status": "wrong_result",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "qmd generated replay-command metadata but trace_available=false, so ELF wins only this trace-hydration dimension; this is not a broad qmd loss.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+        },
+        {
+          "scenario_id": "operator_debug_replay_command",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "qmd generated local CLI query replay commands for the same operator-debugging scenarios; ELF generated admin trace-bundle curl commands.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+        },
+        {
+          "scenario_id": "operator_debug_candidate_drop_visibility",
+          "suite_id": "operator_debugging_ux",
+          "status": "wrong_result",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "qmd generated top-k replay output but not intermediate retrieved-but-dropped stage visibility, so candidate-drop diagnosis remains a qmd wrong_result in this narrow slice.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-materialization.json"
+        },
+        {
+          "scenario_id": "operator_debug_repair_action_clarity",
+          "suite_id": "operator_debugging_ux",
+          "status": "pass",
+          "elf_position": "ties",
+          "comparison_outcome": "tie",
+          "evidence": "qmd generated clear local replay steps for repair investigation, matching ELF on repair-action clarity while differing on trace hydration.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+        },
+        {
+          "scenario_id": "operator_debug_selected_but_not_narrated",
+          "suite_id": "operator_debugging_ux",
+          "status": "wrong_result",
+          "elf_position": "wins",
+          "comparison_outcome": "win",
+          "evidence": "qmd can replay top-k rows, but the generated artifact does not expose service trace narration stages for the selected-but-not-narrated diagnosis.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+        }
+      ],
+      "evidence": [
+        {
+          "kind": "fixture_dir",
+          "ref": "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/",
+          "status": "real"
+        },
+        {
+          "kind": "command",
+          "ref": "cargo make real-world-job-operator-ux-live-adapters",
+          "status": "wrong_result"
+        },
+        {
+          "kind": "artifact",
+          "ref": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json",
+          "status": "wrong_result"
+        }
+      ],
+      "notes": [
+        "This is a narrow operator-debug live slice, not a full-suite live pass.",
+        "qmd's replay-command availability remains useful; the wrong_result status is limited to trace hydration and candidate-drop stage visibility."
+      ]
+    },
     {
       "adapter_id": "agentmemory_live_baseline",
       "project": "agentmemory",