diff --git a/Makefile.toml b/Makefile.toml
index 838c9a33..21568da1 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -356,14 +356,17 @@ args = [
 
 
 # Real-world job benchmark smoke
-# | task                        | type      | cwd |
-# | --------------------------- | --------- | --- |
-# | real-world-job-smoke        | composite |     |
-# | real-world-job-smoke-json   | command   |     |
-# | real-world-job-smoke-report | command   |     |
-# | real-world-memory           | composite |     |
-# | real-world-memory-json      | command   |     |
-# | real-world-memory-report    | command   |     |
+# | task                             | type      | cwd |
+# | -------------------------------- | --------- | --- |
+# | real-world-job-smoke             | composite |     |
+# | real-world-job-smoke-json        | command   |     |
+# | real-world-job-smoke-report      | command   |     |
+# | real-world-memory                | composite |     |
+# | real-world-memory-json           | command   |     |
+# | real-world-memory-report         | command   |     |
+# | real-world-job-operator-ux       | composite |     |
+# | real-world-job-operator-ux-json  | command   |     |
+# | real-world-job-operator-ux-report | command  |     |
 
 [tasks.real-world-job-smoke]
 workspace = false
@@ -457,6 +460,55 @@ args = [
 	"tmp/real-world-memory/real-world-memory-report.md",
 ]
 
+[tasks.real-world-job-operator-ux]
+workspace = false
+dependencies = [
+	"real-world-job-operator-ux-report",
+]
+
+[tasks.real-world-job-operator-ux-json]
+workspace = false
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"run",
+	"--fixtures",
+	"apps/elf-eval/fixtures/real_world_job/operator_debugging_ux",
+	"--out",
+	"tmp/real-world-job/real-world-job-operator-ux-report.json",
+	"--run-id",
+	"real-world-job-operator-ux",
+	"--adapter-id",
+	"fixture_operator_ux",
+	"--adapter-name",
+	"ELF operator UX fixture",
+]
+
+[tasks.real-world-job-operator-ux-report]
+workspace = false
+dependencies = [
+	"real-world-job-operator-ux-json",
+]
+command = "cargo"
+args = [
+	"run",
+	"-p",
+	"elf-eval",
+	"--bin",
+	"real_world_job_benchmark",
+	"--",
+	"publish",
+	"--report",
+	"tmp/real-world-job/real-world-job-operator-ux-report.json",
+	"--out",
+	"tmp/real-world-job/real-world-job-operator-ux-report.md",
+]
+
 
 # Meta
 # | task   | type      | cwd |
diff --git a/apps/elf-api/src/routes.rs b/apps/elf-api/src/routes.rs
index 2f6e6516..3887ba2d 100644
--- a/apps/elf-api/src/routes.rs
+++ b/apps/elf-api/src/routes.rs
@@ -2969,6 +2969,8 @@ mod tests {
 		assert!(html.contains("Providers And Ranking"));
 		assert!(html.contains("Relation Context"));
 		assert!(html.contains("directTraceId"));
+		assert!(html.contains("trace_id"));
+		assert!(html.contains("loadInitialTrace"));
 		assert!(!html.contains("method: \"PATCH\""));
 		assert!(!html.contains("method: \"PUT\""));
 		assert!(!html.contains("method: \"DELETE\""));
diff --git a/apps/elf-api/static/viewer.html b/apps/elf-api/static/viewer.html
index f25cb956..05de83af 100644
--- a/apps/elf-api/static/viewer.html
+++ b/apps/elf-api/static/viewer.html
@@ -1506,6 +1506,30 @@ <h2>Recent Traces</h2>
 			$$(".nav button").forEach((node) => node.classList.toggle("active", node.dataset.tab === tabId));
 		}
 
+		function initialTraceId() {
+			const params = new URLSearchParams(window.location.search);
+			const queryTrace = params.get("trace_id") || params.get("traceId");
+			if (queryTrace && queryTrace.trim()) {
+				return queryTrace.trim();
+			}
+			const hash = window.location.hash.replace(/^#/, "");
+			if (!hash) {
+				return "";
+			}
+			const hashParams = new URLSearchParams(hash.includes("=") ? hash : `trace_id=${hash}`);
+			const hashTrace = hashParams.get("trace_id") || hashParams.get("traceId");
+			return hashTrace ? hashTrace.trim() : "";
+		}
+
+		async function loadInitialTrace() {
+			const traceId = initialTraceId();
+			if (!traceId) {
+				return;
+			}
+			showTab("tracesView");
+			await loadTraceBundle(traceId, $("#traceBundleDetail"));
+		}
+
 		async function refreshActive() {
 			if (state.activeTab === "searchView") {
 				if (state.session) {
@@ -1537,6 +1561,7 @@ <h2>Recent Traces</h2>
 
 		loadContext();
 		bindEvents();
+		loadInitialTrace();
 	</script>
 </body>
 </html>
diff --git a/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/dropped_evidence_filter.json b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/dropped_evidence_filter.json
new file mode 100644
index 00000000..32daf4f8
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/dropped_evidence_filter.json
@@ -0,0 +1,124 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "operator-debug-dropped-evidence-001",
+  "suite": "operator_debugging_ux",
+  "title": "Debug expected evidence dropped after recall filtering",
+  "corpus": {
+    "corpus_id": "operator-debugging-ux-2026-06-09",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "trace-dropped-expected",
+        "kind": "trace",
+        "text": "Trace 11111111-1111-4111-8111-111111111111 shows the expected note present in recall.candidates before service-side filtering and absent after the read-profile scope filter.",
+        "source_ref": {"schema": "source_ref/v1", "resolver": "real_world_job_fixture/v1", "ref": {"fixture": "operator_debugging_ux", "evidence_id": "trace-dropped-expected"}},
+        "created_at": "2026-06-09T02:00:00Z"
+      },
+      {
+        "evidence_id": "trace-dropped-decoy",
+        "kind": "note",
+        "text": "Decoy note: the auth retry policy note ranked first but does not explain the missing expected deployment evidence.",
+        "source_ref": {"schema": "source_ref/v1", "resolver": "real_world_job_fixture/v1", "ref": {"fixture": "operator_debugging_ux", "evidence_id": "trace-dropped-decoy"}},
+        "created_at": "2026-06-09T02:01:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_operator_ux",
+      "answer": {
+        "content": "The auth retry policy note is the root cause; no expected deployment evidence was dropped.",
+        "claims": [
+          {
+            "claim_id": "wrong_root_cause",
+            "text": "No expected evidence was dropped.",
+            "evidence_ids": ["trace-dropped-decoy"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["trace-dropped-decoy"],
+        "latency_ms": 2.4,
+        "cost": {"currency": "USD", "amount": 0.0, "input_tokens": 0, "output_tokens": 0}
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "expected-evidence-recalled",
+      "ts": "2026-06-09T02:00:00Z",
+      "actor": "system",
+      "action": "captured_trace",
+      "evidence_ids": ["trace-dropped-expected"],
+      "summary": "The trace captured recall-stage visibility for the expected evidence before filtering."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Why did the memory result miss the expected deployment evidence?",
+    "job_mode": "debug",
+    "constraints": ["cite_evidence", "avoid_repeating_completed_work"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "root_cause",
+        "text": "The expected evidence was dropped after recall by the read-profile filter."
+      }
+    ],
+    "must_not_include": ["No expected deployment evidence was dropped."],
+    "evidence_links": {"root_cause": ["trace-dropped-expected"]},
+    "answer_type": "debug_report",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "trace-dropped-expected",
+      "claim_id": "root_cause",
+      "requirement": "explain",
+      "quote": "present in recall.candidates before service-side filtering and absent after the read-profile scope filter"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "decoy-top-auth-note",
+      "type": "decoy_evidence",
+      "evidence_ids": ["trace-dropped-decoy"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "debuggability": {"weight": 0.35, "max_points": 1.0, "criteria": "Identifies the trace stage that dropped expected evidence."},
+      "evidence_grounding": {"weight": 0.3, "max_points": 1.0, "criteria": "Uses trace evidence rather than the decoy top note."},
+      "workflow_helpfulness": {"weight": 0.2, "max_points": 1.0, "criteria": "Names a concrete repair action."},
+      "answer_correctness": {"weight": 0.15, "max_points": 1.0, "criteria": "Reports the correct root cause."}
+    },
+    "pass_threshold": 0.8,
+    "hard_fail_rules": [
+      "unsupported high-confidence claim about a required decision or fact",
+      "use of a negative trap marked failure_if_used = true"
+    ]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "operator_debug": {
+    "failure_mode": "expected_evidence_dropped",
+    "trace_id": "11111111-1111-4111-8111-111111111111",
+    "viewer_url": "/viewer?trace_id=11111111-1111-4111-8111-111111111111",
+    "admin_trace_bundle_url": "/v2/admin/traces/11111111-1111-4111-8111-111111111111/bundle?mode=full&stage_items_limit=128&candidates_limit=200",
+    "root_cause": "The expected candidate survived recall but was removed by the read-profile scope filter before final selection.",
+    "steps_to_root_cause": 4,
+    "raw_sql_needed": false,
+    "dropped_candidate_visibility": "visible in Retrieval Funnel and Replay Candidates",
+    "trace_completeness": "complete",
+    "repair_action_clarity": "clear",
+    "viewer_panels": ["Trace", "Retrieval Funnel", "Replay Candidates", "Stage Details"],
+    "cli_steps": ["open viewer trace link", "compare recall before and after filter", "inspect replay candidates", "repair read profile or grant"],
+    "trace_evidence": ["trace-dropped-expected"],
+    "ux_gaps": []
+  },
+  "tags": ["synthetic", "operator_debugging_ux", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/provider_latency_failure.json b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/provider_latency_failure.json
new file mode 100644
index 00000000..c1562e83
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/provider_latency_failure.json
@@ -0,0 +1,107 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "operator-debug-provider-latency-001",
+  "suite": "operator_debugging_ux",
+  "title": "Debug provider latency degrading retrieval quality",
+  "corpus": {
+    "corpus_id": "operator-debugging-ux-2026-06-09",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "trace-provider-timeout",
+        "kind": "trace",
+        "text": "Trace 33333333-3333-4333-8333-333333333333 records provider metadata with embedding provider latency near timeout and expansion fallback to the original query only.",
+        "source_ref": {"schema": "source_ref/v1", "resolver": "real_world_job_fixture/v1", "ref": {"fixture": "operator_debugging_ux", "evidence_id": "trace-provider-timeout"}},
+        "created_at": "2026-06-09T02:10:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_operator_ux",
+      "answer": {
+        "content": "Provider latency caused expansion fallback to the original query only, which reduced candidate recall.",
+        "claims": [
+          {
+            "claim_id": "root_cause",
+            "text": "Provider latency caused expansion fallback to the original query only.",
+            "evidence_ids": ["trace-provider-timeout"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["trace-provider-timeout"],
+        "latency_ms": 4.8,
+        "cost": {"currency": "USD", "amount": 0.0, "input_tokens": 0, "output_tokens": 0}
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "provider-timeout-recorded",
+      "ts": "2026-06-09T02:10:00Z",
+      "actor": "system",
+      "action": "captured_trace",
+      "evidence_ids": ["trace-provider-timeout"],
+      "summary": "Provider metadata and stage details recorded degraded expansion behavior."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Why did recall get worse during the slow provider window?",
+    "job_mode": "debug",
+    "constraints": ["cite_evidence", "state_blockers"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "root_cause",
+        "text": "Provider latency caused expansion fallback to the original query only."
+      }
+    ],
+    "must_not_include": ["The corpus did not contain the expected evidence."],
+    "evidence_links": {"root_cause": ["trace-provider-timeout"]},
+    "answer_type": "debug_report",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "trace-provider-timeout",
+      "claim_id": "root_cause",
+      "requirement": "explain",
+      "quote": "embedding provider latency near timeout and expansion fallback to the original query only"
+    }
+  ],
+  "negative_traps": [],
+  "scoring_rubric": {
+    "dimensions": {
+      "debuggability": {"weight": 0.35, "max_points": 1.0, "criteria": "Uses provider and stage metadata."},
+      "evidence_grounding": {"weight": 0.3, "max_points": 1.0, "criteria": "Cites trace provider metadata."},
+      "workflow_helpfulness": {"weight": 0.2, "max_points": 1.0, "criteria": "Suggests timeout or provider health repair."},
+      "latency_resource": {"weight": 0.15, "max_points": 1.0, "criteria": "Reports latency as part of the root cause."}
+    },
+    "pass_threshold": 0.8,
+    "hard_fail_rules": ["unsupported high-confidence claim about a required decision or fact"]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "operator_debug": {
+    "failure_mode": "provider_latency_or_failure",
+    "trace_id": "33333333-3333-4333-8333-333333333333",
+    "viewer_url": "/viewer?trace_id=33333333-3333-4333-8333-333333333333",
+    "admin_trace_bundle_url": "/v2/admin/traces/33333333-3333-4333-8333-333333333333/bundle?mode=full&stage_items_limit=128&candidates_limit=200",
+    "root_cause": "Provider latency forced fallback behavior, shrinking expanded-query recall.",
+    "steps_to_root_cause": 3,
+    "raw_sql_needed": false,
+    "dropped_candidate_visibility": "visible as low recall counts rather than a post-recall drop",
+    "trace_completeness": "complete",
+    "repair_action_clarity": "clear",
+    "viewer_panels": ["Providers And Ranking", "Stage Summary", "Stage Details"],
+    "cli_steps": ["open trace bundle", "inspect provider metadata", "compare expanded queries", "raise timeout or repair provider health"],
+    "trace_evidence": ["trace-provider-timeout"],
+    "ux_gaps": []
+  },
+  "tags": ["synthetic", "operator_debugging_ux", "agentmemory_reference", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/rebuild_changed_results.json b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/rebuild_changed_results.json
new file mode 100644
index 00000000..abd8c048
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/rebuild_changed_results.json
@@ -0,0 +1,135 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "operator-debug-rebuild-changed-results-001",
+  "suite": "operator_debugging_ux",
+  "title": "Debug result changes after Qdrant rebuild",
+  "corpus": {
+    "corpus_id": "operator-debugging-ux-2026-06-09",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "trace-before-rebuild",
+        "kind": "trace",
+        "text": "Before rebuild, trace 44444444-4444-4444-8444-444444444440 returned an orphan Qdrant candidate that no longer had an active source-of-truth note.",
+        "source_ref": {"schema": "source_ref/v1", "resolver": "real_world_job_fixture/v1", "ref": {"fixture": "operator_debugging_ux", "evidence_id": "trace-before-rebuild"}},
+        "created_at": "2026-06-09T02:15:00Z"
+      },
+      {
+        "evidence_id": "trace-after-rebuild",
+        "kind": "trace",
+        "text": "After rebuild, trace 44444444-4444-4444-8444-444444444444 shows the orphan candidate removed and the active Postgres-backed note selected.",
+        "source_ref": {"schema": "source_ref/v1", "resolver": "real_world_job_fixture/v1", "ref": {"fixture": "operator_debugging_ux", "evidence_id": "trace-after-rebuild"}},
+        "created_at": "2026-06-09T02:20:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_operator_ux",
+      "answer": {
+        "content": "Rebuild changed results because a stale derived-index candidate was removed and the active Postgres-backed note became top result.",
+        "claims": [
+          {
+            "claim_id": "root_cause",
+            "text": "Qdrant rebuild removed a stale derived-index candidate and selected the active source-of-truth note.",
+            "evidence_ids": ["trace-before-rebuild", "trace-after-rebuild"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["trace-before-rebuild", "trace-after-rebuild"],
+        "latency_ms": 3.3,
+        "cost": {"currency": "USD", "amount": 0.0, "input_tokens": 0, "output_tokens": 0}
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "before-rebuild-trace",
+      "ts": "2026-06-09T02:15:00Z",
+      "actor": "system",
+      "action": "captured_trace",
+      "evidence_ids": ["trace-before-rebuild"],
+      "summary": "The pre-rebuild trace included a stale derived-index candidate."
+    },
+    {
+      "event_id": "after-rebuild-trace",
+      "ts": "2026-06-09T02:20:00Z",
+      "actor": "system",
+      "action": "captured_trace",
+      "evidence_ids": ["trace-after-rebuild"],
+      "summary": "The post-rebuild trace selected only source-of-truth-backed evidence."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Why did search change after rebuild?",
+    "job_mode": "debug",
+    "constraints": ["cite_evidence"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "root_cause",
+        "text": "Qdrant rebuild removed a stale derived-index candidate and selected the active source-of-truth note."
+      }
+    ],
+    "must_not_include": ["Postgres source-of-truth changed during rebuild."],
+    "evidence_links": {"root_cause": ["trace-before-rebuild", "trace-after-rebuild"]},
+    "answer_type": "debug_report",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "trace-before-rebuild",
+      "claim_id": "root_cause",
+      "requirement": "explain",
+      "quote": "orphan Qdrant candidate that no longer had an active source-of-truth note"
+    },
+    {
+      "evidence_id": "trace-after-rebuild",
+      "claim_id": "root_cause",
+      "requirement": "explain",
+      "quote": "orphan candidate removed and the active Postgres-backed note selected"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "treat-qdrant-as-source-of-truth",
+      "type": "unsupported_prior",
+      "evidence_ids": ["trace-before-rebuild"],
+      "failure_if_used": false
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "debuggability": {"weight": 0.3, "max_points": 1.0, "criteria": "Compares before and after trace evidence."},
+      "evidence_grounding": {"weight": 0.3, "max_points": 1.0, "criteria": "Uses both rebuild traces."},
+      "workflow_helpfulness": {"weight": 0.25, "max_points": 1.0, "criteria": "Explains source-of-truth versus derived index repair."},
+      "answer_correctness": {"weight": 0.15, "max_points": 1.0, "criteria": "Does not claim Postgres changed."}
+    },
+    "pass_threshold": 0.8,
+    "hard_fail_rules": ["unsupported high-confidence claim about a required decision or fact"]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "operator_debug": {
+    "failure_mode": "rebuild_changed_results",
+    "trace_id": "44444444-4444-4444-8444-444444444444",
+    "viewer_url": "/viewer?trace_id=44444444-4444-4444-8444-444444444444",
+    "admin_trace_bundle_url": "/v2/admin/traces/44444444-4444-4444-8444-444444444444/bundle?mode=full&stage_items_limit=128&candidates_limit=200",
+    "root_cause": "Rebuild removed stale derived-index state and restored source-of-truth-backed ranking.",
+    "steps_to_root_cause": 5,
+    "raw_sql_needed": false,
+    "dropped_candidate_visibility": "visible by comparing before and after trace candidates",
+    "trace_completeness": "complete",
+    "repair_action_clarity": "clear",
+    "viewer_panels": ["Trace", "Replay Candidates", "Selected Final Results"],
+    "cli_steps": ["open before trace", "open after trace", "compare replay candidates", "confirm active note selected", "keep Qdrant rebuild as repair"],
+    "trace_evidence": ["trace-before-rebuild", "trace-after-rebuild"],
+    "ux_gaps": []
+  },
+  "tags": ["synthetic", "operator_debugging_ux", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/relation_context_mislead.json b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/relation_context_mislead.json
new file mode 100644
index 00000000..8bdc01e5
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/relation_context_mislead.json
@@ -0,0 +1,121 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "operator-debug-relation-context-mislead-001",
+  "suite": "operator_debugging_ux",
+  "title": "Debug relation context that misleads search",
+  "corpus": {
+    "corpus_id": "operator-debugging-ux-2026-06-09",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "trace-relation-context",
+        "kind": "trace",
+        "text": "Trace 55555555-5555-4555-8555-555555555555 includes relation_context with deprecated predicate deployment_owner pointing to a stale owner, while the selected note text says the current owner is release engineering.",
+        "source_ref": {"schema": "source_ref/v1", "resolver": "real_world_job_fixture/v1", "ref": {"fixture": "operator_debugging_ux", "evidence_id": "trace-relation-context"}},
+        "created_at": "2026-06-09T02:25:00Z"
+      },
+      {
+        "evidence_id": "stale-relation-fact",
+        "kind": "adapter_state",
+        "text": "Stale graph fact: deployment_owner points to the old infra group and should not drive the current answer.",
+        "source_ref": {"schema": "source_ref/v1", "resolver": "real_world_job_fixture/v1", "ref": {"fixture": "operator_debugging_ux", "evidence_id": "stale-relation-fact"}},
+        "created_at": "2026-06-08T02:25:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_operator_ux",
+      "answer": {
+        "content": "Relation context misled the search because a deprecated deployment_owner fact conflicted with the selected note text.",
+        "claims": [
+          {
+            "claim_id": "root_cause",
+            "text": "A deprecated relation_context fact conflicted with the selected note text.",
+            "evidence_ids": ["trace-relation-context"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["trace-relation-context"],
+        "latency_ms": 2.9,
+        "cost": {"currency": "USD", "amount": 0.0, "input_tokens": 0, "output_tokens": 0}
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "relation-context-trace",
+      "ts": "2026-06-09T02:25:00Z",
+      "actor": "system",
+      "action": "captured_trace",
+      "evidence_ids": ["trace-relation-context"],
+      "summary": "The trace captured relation_context and selected note text for the misleading result."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Why did graph context point to the wrong owner?",
+    "job_mode": "debug",
+    "constraints": ["cite_evidence"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "root_cause",
+        "text": "A deprecated relation_context fact conflicted with the selected note text."
+      }
+    ],
+    "must_not_include": ["The old infra group is the current owner."],
+    "evidence_links": {"root_cause": ["trace-relation-context"]},
+    "answer_type": "debug_report",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "trace-relation-context",
+      "claim_id": "root_cause",
+      "requirement": "explain",
+      "quote": "relation_context with deprecated predicate deployment_owner pointing to a stale owner"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "trust-stale-relation",
+      "type": "stale_fact",
+      "evidence_ids": ["stale-relation-fact"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "debuggability": {"weight": 0.35, "max_points": 1.0, "criteria": "Uses relation context panel evidence."},
+      "evidence_grounding": {"weight": 0.3, "max_points": 1.0, "criteria": "Cites trace relation_context evidence."},
+      "workflow_helpfulness": {"weight": 0.2, "max_points": 1.0, "criteria": "Suggests relation invalidation or predicate repair."},
+      "answer_correctness": {"weight": 0.15, "max_points": 1.0, "criteria": "Does not trust the stale owner."}
+    },
+    "pass_threshold": 0.8,
+    "hard_fail_rules": ["unsupported high-confidence claim about a required decision or fact", "use of a negative trap marked failure_if_used = true"]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "operator_debug": {
+    "failure_mode": "relation_context_misled_search",
+    "trace_id": "55555555-5555-4555-8555-555555555555",
+    "viewer_url": "/viewer?trace_id=55555555-5555-4555-8555-555555555555",
+    "admin_trace_bundle_url": "/v2/admin/traces/55555555-5555-4555-8555-555555555555/bundle?mode=full&stage_items_limit=128&candidates_limit=200",
+    "root_cause": "A deprecated graph relation remained visible in relation_context and conflicted with the selected note text.",
+    "steps_to_root_cause": 4,
+    "raw_sql_needed": false,
+    "dropped_candidate_visibility": "not dropped; misleading context is visible on selected result",
+    "trace_completeness": "complete",
+    "repair_action_clarity": "clear",
+    "viewer_panels": ["Selected Final Results", "Relation Context", "Stage Details"],
+    "cli_steps": ["open trace link", "inspect selected result relation count", "open Relation Context", "invalidate stale relation fact"],
+    "trace_evidence": ["trace-relation-context"],
+    "ux_gaps": []
+  },
+  "tags": ["synthetic", "operator_debugging_ux", "claude_mem_reference", "openmemory_reference", "no_live_claim"]
+}
diff --git a/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/rerank_bad_candidate.json b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/rerank_bad_candidate.json
new file mode 100644
index 00000000..5be298b7
--- /dev/null
+++ b/apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/rerank_bad_candidate.json
@@ -0,0 +1,121 @@
+{
+  "schema": "elf.real_world_job/v1",
+  "job_id": "operator-debug-rerank-bad-candidate-001",
+  "suite": "operator_debugging_ux",
+  "title": "Debug rerank promotion of a bad candidate",
+  "corpus": {
+    "corpus_id": "operator-debugging-ux-2026-06-09",
+    "profile": "synthetic",
+    "items": [
+      {
+        "evidence_id": "trace-rerank-promotion",
+        "kind": "trace",
+        "text": "Trace 22222222-2222-4222-8222-222222222222 shows the correct candidate at retrieval rank 2 and the decoy at retrieval rank 5, then rerank.score promotes the decoy above the correct candidate.",
+        "source_ref": {"schema": "source_ref/v1", "resolver": "real_world_job_fixture/v1", "ref": {"fixture": "operator_debugging_ux", "evidence_id": "trace-rerank-promotion"}},
+        "created_at": "2026-06-09T02:05:00Z"
+      },
+      {
+        "evidence_id": "rerank-decoy-note",
+        "kind": "note",
+        "text": "Decoy note: deployment retry discussion shares query terms but belongs to a different project.",
+        "source_ref": {"schema": "source_ref/v1", "resolver": "real_world_job_fixture/v1", "ref": {"fixture": "operator_debugging_ux", "evidence_id": "rerank-decoy-note"}},
+        "created_at": "2026-06-09T02:06:00Z"
+      }
+    ],
+    "adapter_response": {
+      "adapter_id": "fixture_operator_ux",
+      "answer": {
+        "content": "The wrong result came from rerank.score promoting a cross-project decoy over the correct retrieval candidate.",
+        "claims": [
+          {
+            "claim_id": "root_cause",
+            "text": "Rerank promoted a cross-project decoy above the correct retrieval candidate.",
+            "evidence_ids": ["trace-rerank-promotion"],
+            "confidence": "high"
+          }
+        ],
+        "evidence_ids": ["trace-rerank-promotion"],
+        "latency_ms": 2.1,
+        "cost": {"currency": "USD", "amount": 0.0, "input_tokens": 0, "output_tokens": 0}
+      }
+    }
+  },
+  "timeline": [
+    {
+      "event_id": "rerank-trace-captured",
+      "ts": "2026-06-09T02:05:00Z",
+      "actor": "system",
+      "action": "captured_trace",
+      "evidence_ids": ["trace-rerank-promotion"],
+      "summary": "The trace captured retrieval ranks and rerank scores for the correct and decoy candidates."
+    }
+  ],
+  "prompt": {
+    "role": "user",
+    "content": "Explain why the wrong note ranked first.",
+    "job_mode": "debug",
+    "constraints": ["cite_evidence"]
+  },
+  "expected_answer": {
+    "must_include": [
+      {
+        "claim_id": "root_cause",
+        "text": "Rerank promoted a cross-project decoy above the correct retrieval candidate."
+      }
+    ],
+    "must_not_include": ["The correct candidate was missing from retrieval."],
+    "evidence_links": {"root_cause": ["trace-rerank-promotion"]},
+    "answer_type": "debug_report",
+    "accepted_alternates": [],
+    "requires_caveat": false,
+    "requires_refusal": false
+  },
+  "required_evidence": [
+    {
+      "evidence_id": "trace-rerank-promotion",
+      "claim_id": "root_cause",
+      "requirement": "explain",
+      "quote": "rerank.score promotes the decoy above the correct candidate"
+    }
+  ],
+  "negative_traps": [
+    {
+      "trap_id": "accept-decoy-as-answer",
+      "type": "decoy_evidence",
+      "evidence_ids": ["rerank-decoy-note"],
+      "failure_if_used": true
+    }
+  ],
+  "scoring_rubric": {
+    "dimensions": {
+      "debuggability": {"weight": 0.35, "max_points": 1.0, "criteria": "Uses rerank and replay candidate evidence."},
+      "evidence_grounding": {"weight": 0.3, "max_points": 1.0, "criteria": "Cites the trace rather than the decoy note."},
+      "workflow_helpfulness": {"weight": 0.2, "max_points": 1.0, "criteria": "Suggests rerank or scope repair."},
+      "answer_correctness": {"weight": 0.15, "max_points": 1.0, "criteria": "Names rerank promotion as the cause."}
+    },
+    "pass_threshold": 0.8,
+    "hard_fail_rules": ["unsupported high-confidence claim about a required decision or fact", "use of a negative trap marked failure_if_used = true"]
+  },
+  "allowed_uncertainty": {
+    "can_answer_unknown": false,
+    "acceptable_phrases": ["The fixture does not provide that evidence."],
+    "fallback_action": "state_blocker"
+  },
+  "operator_debug": {
+    "failure_mode": "rerank_promoted_bad_candidate",
+    "trace_id": "22222222-2222-4222-8222-222222222222",
+    "viewer_url": "/viewer?trace_id=22222222-2222-4222-8222-222222222222",
+    "admin_trace_bundle_url": "/v2/admin/traces/22222222-2222-4222-8222-222222222222/bundle?mode=full&stage_items_limit=128&candidates_limit=200",
+    "root_cause": "The correct item was in the candidate set, but rerank.score elevated a cross-project decoy.",
+    "steps_to_root_cause": 3,
+    "raw_sql_needed": false,
+    "dropped_candidate_visibility": "not dropped; visible with lower final rank in Replay Candidates",
+    "trace_completeness": "complete",
+    "repair_action_clarity": "clear",
+    "viewer_panels": ["Selected Final Results", "Replay Candidates", "Providers And Ranking"],
+    "cli_steps": ["open trace bundle", "compare retrieval rank with final rank", "inspect rerank score", "tighten scope or rerank inputs"],
+    "trace_evidence": ["trace-rerank-promotion"],
+    "ux_gaps": []
+  },
+  "tags": ["synthetic", "operator_debugging_ux", "qmd_reference", "no_live_claim"]
+}
diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
index 2f92dd55..59ee9bd2 100644
--- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs
+++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs
@@ -105,6 +105,7 @@ struct RealWorldJob {
 	negative_traps: Vec<NegativeTrap>,
 	scoring_rubric: ScoringRubric,
 	allowed_uncertainty: AllowedUncertainty,
+	operator_debug: Option<OperatorDebugEvidence>,
 	#[serde(default)]
 	tags: Vec<String>,
 }
@@ -314,6 +315,39 @@ struct CostReport {
 	output_tokens: Option<u64>,
 }
 
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct OperatorDebugEvidence {
+	failure_mode: String,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	trace_id: Option<String>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	viewer_url: Option<String>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	admin_trace_bundle_url: Option<String>,
+	root_cause: String,
+	steps_to_root_cause: u32,
+	raw_sql_needed: bool,
+	dropped_candidate_visibility: String,
+	trace_completeness: String,
+	repair_action_clarity: String,
+	#[serde(default)]
+	viewer_panels: Vec<String>,
+	#[serde(default)]
+	cli_steps: Vec<String>,
+	#[serde(default)]
+	trace_evidence: Vec<String>,
+	#[serde(default)]
+	ux_gaps: Vec<OperatorUxGap>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct OperatorUxGap {
+	gap_id: String,
+	severity: String,
+	description: String,
+	follow_up_issue: String,
+}
+
 #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Deserialize, Serialize)]
 #[serde(rename_all = "snake_case")]
 enum TypedStatus {
@@ -402,6 +436,14 @@ struct ReportSummary {
 	qdrant_rebuild_case_count: usize,
 	#[serde(default)]
 	qdrant_rebuild_pass_count: usize,
+	#[serde(default)]
+	operator_debug_job_count: usize,
+	#[serde(default)]
+	raw_sql_needed_count: usize,
+	#[serde(default)]
+	trace_incomplete_count: usize,
+	#[serde(default)]
+	operator_ux_gap_count: usize,
 }
 
 #[derive(Debug, Deserialize, Serialize)]
@@ -457,6 +499,8 @@ struct JobReport {
 	redaction_leak_count: usize,
 	#[serde(default)]
 	qdrant_rebuild_case: bool,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	operator_debug: Option<OperatorDebugEvidence>,
 }
 
 #[derive(Debug, Deserialize, Serialize)]
@@ -509,6 +553,10 @@ struct FailureCounts {
 	missing_evidence: usize,
 	trap_uses: usize,
 	unsupported_claims: usize,
+	operator_debug_missing: usize,
+	operator_debug_raw_sql: usize,
+	operator_debug_trace_gaps: usize,
+	operator_debug_repair_unclear: usize,
 }
 
 #[derive(Debug, Default)]
@@ -627,6 +675,7 @@ fn validate_job(job: &RealWorldJob, path: &Path) -> Result<()> {
 	validate_required_evidence(job, path)?;
 	validate_scoring_rubric(job, path)?;
 	validate_allowed_uncertainty(job, path)?;
+	validate_operator_debug(job, path)?;
 
 	Ok(())
 }
@@ -854,6 +903,68 @@ fn validate_allowed_uncertainty(job: &RealWorldJob, path: &Path) -> Result<()> {
 	Ok(())
 }
 
+fn validate_operator_debug(job: &RealWorldJob, path: &Path) -> Result<()> {
+	let Some(debug) = &job.operator_debug else {
+		if job.suite == "operator_debugging_ux" {
+			return Err(eyre::eyre!(
+				"{} operator_debugging_ux job must include operator_debug.",
+				path.display()
+			));
+		}
+
+		return Ok(());
+	};
+
+	if debug.failure_mode.trim().is_empty()
+		|| debug.root_cause.trim().is_empty()
+		|| debug.dropped_candidate_visibility.trim().is_empty()
+		|| debug.trace_completeness.trim().is_empty()
+		|| debug.repair_action_clarity.trim().is_empty()
+		|| debug.steps_to_root_cause == 0
+	{
+		return Err(eyre::eyre!("{} has incomplete operator_debug evidence.", path.display()));
+	}
+
+	validate_optional_debug_field(path, debug.trace_id.as_deref(), "trace_id")?;
+	validate_optional_debug_field(path, debug.viewer_url.as_deref(), "viewer_url")?;
+	validate_optional_debug_field(
+		path,
+		debug.admin_trace_bundle_url.as_deref(),
+		"admin_trace_bundle_url",
+	)?;
+	validate_non_empty_debug_list(path, &debug.viewer_panels, "viewer_panels")?;
+	validate_non_empty_debug_list(path, &debug.cli_steps, "cli_steps")?;
+	validate_non_empty_debug_list(path, &debug.trace_evidence, "trace_evidence")?;
+
+	for gap in &debug.ux_gaps {
+		if gap.gap_id.trim().is_empty()
+			|| gap.severity.trim().is_empty()
+			|| gap.description.trim().is_empty()
+			|| gap.follow_up_issue.trim().is_empty()
+		{
+			return Err(eyre::eyre!("{} has incomplete operator_debug ux_gaps.", path.display()));
+		}
+	}
+
+	Ok(())
+}
+
+fn validate_optional_debug_field(path: &Path, value: Option<&str>, field: &str) -> Result<()> {
+	if value.is_some_and(|value| value.trim().is_empty()) {
+		return Err(eyre::eyre!("{} has empty operator_debug {field}.", path.display()));
+	}
+
+	Ok(())
+}
+
+fn validate_non_empty_debug_list(path: &Path, values: &[String], field: &str) -> Result<()> {
+	if values.iter().any(|value| value.trim().is_empty()) {
+		return Err(eyre::eyre!("{} has empty operator_debug {field} entry.", path.display()));
+	}
+
+	Ok(())
+}
+
 fn validate_required_rfc3339(value: &str, path: &Path, id: &str) -> Result<()> {
 	if OffsetDateTime::parse(value, &Rfc3339).is_err() {
 		return Err(eyre::eyre!("{} has invalid RFC3339 timestamp for {}.", path.display(), id));
@@ -933,6 +1044,7 @@ fn score_job(job: &RealWorldJob) -> JobScoring {
 	let missing_evidence = missing_required_evidence(job, &produced_evidence);
 	let trap_ids_used = trap_ids_used(job, &produced_evidence);
 	let mut unsupported_claims = unsupported_claims(job, answer);
+	let operator_counts = operator_debug_failure_counts(job);
 	let hard_fail_hits = hard_fail_hits(job, &unsupported_claims, &trap_ids_used);
 	let counts = FailureCounts {
 		missing_claims: missing_claims.len(),
@@ -940,13 +1052,21 @@ fn score_job(job: &RealWorldJob) -> JobScoring {
 		missing_evidence: missing_evidence.len(),
 		trap_uses: trap_ids_used.len(),
 		unsupported_claims: unsupported_claims.len(),
+		operator_debug_missing: operator_counts.operator_debug_missing,
+		operator_debug_raw_sql: operator_counts.operator_debug_raw_sql,
+		operator_debug_trace_gaps: operator_counts.operator_debug_trace_gaps,
+		operator_debug_repair_unclear: operator_counts.operator_debug_repair_unclear,
 	};
 	let dimension_scores = dimension_scores(job, &counts);
 	let normalized_score = normalized_score(&dimension_scores);
 	let wrong_result_count = counts.missing_claims
 		+ counts.forbidden_claims
 		+ counts.missing_evidence
-		+ counts.trap_uses;
+		+ counts.trap_uses
+		+ counts.operator_debug_missing
+		+ counts.operator_debug_raw_sql
+		+ counts.operator_debug_trace_gaps
+		+ counts.operator_debug_repair_unclear;
 	let status = job_status(
 		normalized_score,
 		job.scoring_rubric.pass_threshold,
@@ -972,6 +1092,22 @@ fn score_job(job: &RealWorldJob) -> JobScoring {
 	}
 }
 
+fn operator_debug_failure_counts(job: &RealWorldJob) -> FailureCounts {
+	let Some(debug) = &job.operator_debug else {
+		return FailureCounts {
+			operator_debug_missing: usize::from(job.suite == "operator_debugging_ux"),
+			..FailureCounts::default()
+		};
+	};
+
+	FailureCounts {
+		operator_debug_raw_sql: usize::from(debug.raw_sql_needed),
+		operator_debug_trace_gaps: usize::from(debug.trace_completeness != "complete"),
+		operator_debug_repair_unclear: usize::from(debug.repair_action_clarity != "clear"),
+		..FailureCounts::default()
+	}
+}
+
 fn produced_answer(job: &RealWorldJob) -> &ProducedAnswer {
 	job.corpus
 		.adapter_response
@@ -1152,12 +1288,20 @@ fn dimension_scores(job: &RealWorldJob, counts: &FailureCounts) -> Vec<Dimension
 fn dimension_score(dimension_id: &str, max_points: f64, counts: &FailureCounts) -> f64 {
 	let failed = match dimension_id {
 		"answer_correctness" | "workflow_helpfulness" =>
-			counts.missing_claims > 0 || counts.forbidden_claims > 0,
+			counts.missing_claims > 0
+				|| counts.forbidden_claims > 0
+				|| counts.operator_debug_repair_unclear > 0,
 		"evidence_grounding" => counts.missing_evidence > 0 || counts.unsupported_claims > 0,
 		"trap_avoidance" => counts.trap_uses > 0,
 		"uncertainty_handling" => counts.unsupported_claims > 0,
 		"lifecycle_behavior" => false,
-		"debuggability" | "latency_resource" | "personalization_fit" =>
+		"debuggability" =>
+			counts.missing_claims > 0
+				|| counts.unsupported_claims > 0
+				|| counts.operator_debug_missing > 0
+				|| counts.operator_debug_raw_sql > 0
+				|| counts.operator_debug_trace_gaps > 0,
+		"latency_resource" | "personalization_fit" =>
 			counts.missing_claims > 0 || counts.unsupported_claims > 0,
 		_ => counts.missing_claims > 0 || counts.unsupported_claims > 0 || counts.trap_uses > 0,
 	};
@@ -1203,6 +1347,10 @@ fn job_reason(status: TypedStatus, counts: &FailureCounts, normalized_score: f64
 				+ counts.forbidden_claims
 				+ counts.missing_evidence
 				+ counts.trap_uses
+				+ counts.operator_debug_missing
+				+ counts.operator_debug_raw_sql
+				+ counts.operator_debug_trace_gaps
+				+ counts.operator_debug_repair_unclear
 		),
 		TypedStatus::WrongResult => format!(
 			"Job produced {} wrong-result signal(s) and normalized_score {normalized_score:.3}.",
@@ -1210,6 +1358,10 @@ fn job_reason(status: TypedStatus, counts: &FailureCounts, normalized_score: f64
 				+ counts.forbidden_claims
 				+ counts.missing_evidence
 				+ counts.trap_uses
+				+ counts.operator_debug_missing
+				+ counts.operator_debug_raw_sql
+				+ counts.operator_debug_trace_gaps
+				+ counts.operator_debug_repair_unclear
 		),
 		_ => "Job did not reach a runnable scoring state.".to_string(),
 	}
@@ -1248,6 +1400,7 @@ fn job_report(job: &RealWorldJob, scoring: JobScoring) -> JobReport {
 		scope_violation_count: metrics.scope_violation_count,
 		redaction_leak_count: metrics.redaction_leak_count,
 		qdrant_rebuild_case: metrics.qdrant_rebuild_case,
+		operator_debug: job.operator_debug.clone(),
 	}
 }
 
@@ -1472,6 +1625,22 @@ fn report_summary(jobs: &[JobReport], suites: &[SuiteReport]) -> ReportSummary {
 			.iter()
 			.filter(|job| job.qdrant_rebuild_case && job.status == TypedStatus::Pass)
 			.count(),
+		operator_debug_job_count: jobs.iter().filter(|job| job.operator_debug.is_some()).count(),
+		raw_sql_needed_count: jobs
+			.iter()
+			.filter_map(|job| job.operator_debug.as_ref())
+			.filter(|debug| debug.raw_sql_needed)
+			.count(),
+		trace_incomplete_count: jobs
+			.iter()
+			.filter_map(|job| job.operator_debug.as_ref())
+			.filter(|debug| debug.trace_completeness != "complete")
+			.count(),
+		operator_ux_gap_count: jobs
+			.iter()
+			.filter_map(|job| job.operator_debug.as_ref())
+			.map(|debug| debug.ux_gaps.len())
+			.sum(),
 		..ReportSummary::default()
 	};
 
@@ -1586,6 +1755,7 @@ fn render_markdown(report: &RealWorldReport, report_path: &Path) -> String {
 	render_markdown_header(&mut out, report, report_path.as_str());
 	render_markdown_suites(&mut out, report);
 	render_markdown_jobs(&mut out, report);
+	render_markdown_operator_debugging(&mut out, report);
 	render_markdown_unsupported_claims(&mut out, report);
 	render_markdown_semantics(&mut out, report);
 
@@ -1661,6 +1831,16 @@ fn render_markdown_header(out: &mut String, report: &RealWorldReport, report_pat
 		optional_f64(report.summary.mean_latency_ms, " ms")
 	));
 	out.push_str(&format!("- Cost: `{}`\n", cost_display(report.summary.total_cost.as_ref())));
+	out.push_str(&format!(
+		"- Operator-debug jobs: `{}`\n",
+		report.summary.operator_debug_job_count
+	));
+	out.push_str(&format!("- Raw SQL needed: `{}`\n", report.summary.raw_sql_needed_count));
+	out.push_str(&format!(
+		"- Trace-incomplete debug jobs: `{}`\n",
+		report.summary.trace_incomplete_count
+	));
+	out.push_str(&format!("- Operator UX gaps: `{}`\n", report.summary.operator_ux_gap_count));
 	out.push_str(&format!(
 		"- Private corpus redaction: `{}`\n\n",
 		md_inline(report.private_corpus_redaction.policy.as_str())
@@ -1722,6 +1902,94 @@ fn render_markdown_jobs(out: &mut String, report: &RealWorldReport) {
 	out.push('\n');
 }
 
+fn render_markdown_operator_debugging(out: &mut String, report: &RealWorldReport) {
+	let jobs = report.jobs.iter().filter(|job| job.operator_debug.is_some()).collect::<Vec<_>>();
+
+	out.push_str("## Operator Debugging UX\n\n");
+
+	if jobs.is_empty() {
+		out.push_str("No encoded job reported operator debugging evidence.\n\n");
+
+		return;
+	}
+
+	out.push_str("| Job | Failure Mode | Trace Evidence | Steps | Raw SQL | Dropped Candidate Visibility | Trace Completeness | Repair Clarity | UX Gaps |\n");
+	out.push_str("| --- | --- | --- | ---: | --- | --- | --- | --- | --- |\n");
+
+	for job in jobs {
+		if let Some(debug) = &job.operator_debug {
+			out.push_str(&format!(
+				"| {} | {} | {} | {} | `{}` | {} | `{}` | `{}` | {} |\n",
+				md_cell(job.job_id.as_str()),
+				md_cell(debug.failure_mode.as_str()),
+				debug_trace_cell(debug),
+				debug.steps_to_root_cause,
+				debug.raw_sql_needed,
+				md_cell(debug.dropped_candidate_visibility.as_str()),
+				md_inline(debug.trace_completeness.as_str()),
+				md_inline(debug.repair_action_clarity.as_str()),
+				ux_gap_cell(debug.ux_gaps.as_slice())
+			));
+		}
+	}
+
+	out.push_str("\n### Operator Debug Details\n\n");
+
+	for job in report.jobs.iter().filter(|job| job.operator_debug.is_some()) {
+		if let Some(debug) = &job.operator_debug {
+			out.push_str(&format!("#### `{}`\n\n", md_inline(job.job_id.as_str())));
+			out.push_str(&format!("- Root cause: {}\n", md_cell(debug.root_cause.as_str())));
+			out.push_str(&format!(
+				"- Viewer panels: `{}`\n",
+				md_inline(debug.viewer_panels.join(", ").as_str())
+			));
+			out.push_str(&format!(
+				"- CLI steps: `{}`\n",
+				md_inline(debug.cli_steps.join(" -> ").as_str())
+			));
+			out.push_str(&format!(
+				"- Trace evidence: `{}`\n",
+				md_inline(debug.trace_evidence.join(", ").as_str())
+			));
+			out.push('\n');
+		}
+	}
+}
+
+fn debug_trace_cell(debug: &OperatorDebugEvidence) -> String {
+	let trace = debug.trace_id.as_deref().unwrap_or("-");
+	let viewer = debug
+		.viewer_url
+		.as_deref()
+		.map(|url| format!("[viewer]({})", md_url(url)))
+		.unwrap_or_else(|| "viewer: -".to_string());
+	let bundle = debug
+		.admin_trace_bundle_url
+		.as_deref()
+		.map(|url| format!("[bundle]({})", md_url(url)))
+		.unwrap_or_else(|| "bundle: -".to_string());
+
+	format!("`{}`<br>{}<br>{}", md_inline(trace), viewer, bundle)
+}
+
+fn ux_gap_cell(gaps: &[OperatorUxGap]) -> String {
+	if gaps.is_empty() {
+		return "`none`".to_string();
+	}
+
+	gaps.iter()
+		.map(|gap| {
+			format!(
+				"`{}`: {} ({})",
+				md_inline(gap.gap_id.as_str()),
+				md_cell(gap.description.as_str()),
+				md_inline(gap.follow_up_issue.as_str())
+			)
+		})
+		.collect::<Vec<_>>()
+		.join("<br>")
+}
+
 fn render_markdown_unsupported_claims(out: &mut String, report: &RealWorldReport) {
 	out.push_str("## Unsupported Claims\n\n");
 
@@ -1838,6 +2106,10 @@ fn md_cell(value: &str) -> String {
 	md_inline(value).replace('|', "\\|")
 }
 
+fn md_url(value: &str) -> String {
+	value.replace(')', "%29").replace(' ', "%20")
+}
+
 fn round3(value: f64) -> f64 {
 	(value * 1_000.0).round() / 1_000.0
 }
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 512da9f1..8c53299c 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -23,6 +23,10 @@ fn real_world_memory_fixture_dir() -> PathBuf {
 	Path::new(env!("CARGO_MANIFEST_DIR")).join("fixtures").join("real_world_memory")
 }
 
+fn operator_debug_fixture_dir() -> PathBuf {
+	fixture_root().join("operator_debugging_ux")
+}
+
 fn run_json_report_from(fixtures: PathBuf) -> Result<Value> {
 	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
 		.arg("run")
@@ -99,7 +103,47 @@ fn smoke_fixture_produces_typed_json_report() -> Result<()> {
 fn runner_discovers_nested_fixture_layout() -> Result<()> {
 	let report = run_json_report_from(fixture_root())?;
 
-	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(1));
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(6));
+
+	let suites = array_at(&report, "/suites")?;
+	let operator_suite = find_by_field(suites, "/suite_id", "operator_debugging_ux")?;
+
+	assert_eq!(operator_suite.pointer("/encoded_job_count").and_then(Value::as_u64), Some(5));
+
+	Ok(())
+}
+
+#[test]
+fn operator_debug_fixture_reports_trace_links_and_failure_details() -> Result<()> {
+	let report = run_json_report_from(operator_debug_fixture_dir())?;
+
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(5));
+	assert_eq!(
+		report.pointer("/summary/operator_debug_job_count").and_then(Value::as_u64),
+		Some(5)
+	);
+	assert_eq!(report.pointer("/summary/raw_sql_needed_count").and_then(Value::as_u64), Some(0));
+	assert_eq!(report.pointer("/summary/trace_incomplete_count").and_then(Value::as_u64), Some(0));
+	assert_eq!(report.pointer("/summary/operator_ux_gap_count").and_then(Value::as_u64), Some(0));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(4));
+	assert_eq!(report.pointer("/summary/unsupported_claim").and_then(Value::as_u64), Some(1));
+
+	let jobs = array_at(&report, "/jobs")?;
+	let dropped = find_by_field(jobs, "/job_id", "operator-debug-dropped-evidence-001")?;
+
+	assert_eq!(dropped.pointer("/status").and_then(Value::as_str), Some("unsupported_claim"));
+	assert_eq!(
+		dropped.pointer("/operator_debug/raw_sql_needed").and_then(Value::as_bool),
+		Some(false)
+	);
+	assert_eq!(
+		dropped.pointer("/operator_debug/dropped_candidate_visibility").and_then(Value::as_str),
+		Some("visible in Retrieval Funnel and Replay Candidates")
+	);
+	assert_eq!(
+		dropped.pointer("/operator_debug/viewer_url").and_then(Value::as_str),
+		Some("/viewer?trace_id=11111111-1111-4111-8111-111111111111")
+	);
 
 	Ok(())
 }
@@ -135,6 +179,7 @@ fn generated_json_report_renders_markdown() -> Result<()> {
 	assert!(markdown.contains("# Real-World Job Benchmark Report"));
 	assert!(markdown.contains("work_resume"));
 	assert!(markdown.contains("issue-xy812-resume"));
+	assert!(markdown.contains("## Operator Debugging UX"));
 	assert!(markdown.contains("Existing live-baseline reports remain valid"));
 
 	Ok(())
@@ -188,3 +233,41 @@ fn real_world_memory_fixtures_report_trust_and_personalization_metrics() -> Resu
 
 	Ok(())
 }
+
+#[test]
+fn operator_debug_json_report_renders_markdown_links() -> Result<()> {
+	let report = run_json_report_from(operator_debug_fixture_dir())?;
+	let temp_dir =
+		env::temp_dir().join(format!("elf-real-world-job-operator-test-{}", process::id()));
+
+	fs::create_dir_all(&temp_dir)?;
+
+	let report_path = temp_dir.join("operator.json");
+	let markdown_path = temp_dir.join("operator.md");
+
+	fs::write(&report_path, serde_json::to_vec_pretty(&report)?)?;
+
+	let output = Command::new(env!("CARGO_BIN_EXE_real_world_job_benchmark"))
+		.arg("publish")
+		.arg("--report")
+		.arg(&report_path)
+		.arg("--out")
+		.arg(&markdown_path)
+		.output()?;
+
+	assert!(
+		output.status.success(),
+		"real_world_job publisher failed: {}",
+		String::from_utf8_lossy(&output.stderr),
+	);
+
+	let markdown = fs::read_to_string(markdown_path)?;
+
+	assert!(markdown.contains("operator-debug-dropped-evidence-001"));
+	assert!(markdown.contains("/viewer?trace_id=11111111-1111-4111-8111-111111111111"));
+	assert!(markdown.contains("Raw SQL"));
+	assert!(markdown.contains("Replay Candidates"));
+	assert!(markdown.contains("Root cause"));
+
+	Ok(())
+}
diff --git a/docs/guide/benchmarking/2026-06-09-operator-debugging-ux-report.md b/docs/guide/benchmarking/2026-06-09-operator-debugging-ux-report.md
new file mode 100644
index 00000000..ac2415fe
--- /dev/null
+++ b/docs/guide/benchmarking/2026-06-09-operator-debugging-ux-report.md
@@ -0,0 +1,132 @@
+# Real-World Job Benchmark Report
+
+Goal: Publish a Markdown summary for one generated real_world_job benchmark report.
+Read this when: You need a durable smoke report for real-world agent memory job fixtures.
+Inputs: `tmp/real-world-job/real-world-job-operator-ux-report.json`.
+Depends on: `apps/elf-eval/fixtures/real_world_job/`, `docs/spec/real_world_agent_memory_benchmark_v1.md`, and `Makefile.toml`.
+Verification: Compare this Markdown summary with the source JSON before committing.
+
+## Summary
+
+- Run ID: `real-world-job-operator-ux`
+- Generated at: `2026-06-09T14:52:05.906877Z`
+- Runner version: `0.2.0-9b60dee3de54705a71a683d9a36b48d94ce8e752-aarch64-apple-darwin`
+- Corpus profile: `synthetic`
+- Adapter: `fixture_operator_ux` (offline_fixture_response)
+- Jobs: `5`
+- Encoded suites: `1`
+- Not-encoded suites: `10`
+- Status summary: `4` pass, `0` wrong_result, `0` lifecycle_fail, `0` incomplete, `0` blocked, `1` unsupported_claim
+- Unsupported claim count: `1`
+- Wrong-result count: `3`
+- Mean score: `0.800`
+- Mean latency: `3.100 ms`
+- Cost: `0.000 USD`
+- Operator-debug jobs: `5`
+- Raw SQL needed: `0`
+- Trace-incomplete debug jobs: `0`
+- Operator UX gaps: `0`
+- Private corpus redaction: `no_private_corpus`
+
+## Suites
+
+| Suite | Status | Jobs | Score | Unsupported Claims | Wrong Results | Reason |
+| --- | --- | ---: | ---: | ---: | ---: | --- |
+| trust_source_of_truth | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+| work_resume | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+| project_decisions | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+| retrieval | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+| memory_evolution | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+| consolidation | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+| knowledge_compilation | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+| operator_debugging_ux | `unsupported_claim` | 5 | `0.800` | 1 | 3 | At least one encoded job produced an unsupported claim. |
+| capture_integration | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+| production_ops | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+| personalization | `not_encoded` | 0 | `-` | 0 | 0 | No checked-in real_world_job fixture is encoded for this suite. |
+
+## Jobs
+
+| Suite | Job | Status | Score | Expected Evidence | Produced Evidence | Unsupported Claims | Wrong Results | Latency | Cost |
+| --- | --- | --- | ---: | --- | --- | ---: | ---: | ---: | --- |
+| operator_debugging_ux | operator-debug-dropped-evidence-001 | `unsupported_claim` | `0.000` | `trace-dropped-expected` | `trace-dropped-decoy` | 1 | 3 | `2.400 ms` | `0.000 USD` |
+| operator_debugging_ux | operator-debug-provider-latency-001 | `pass` | `1.000` | `trace-provider-timeout` | `trace-provider-timeout` | 0 | 0 | `4.800 ms` | `0.000 USD` |
+| operator_debugging_ux | operator-debug-rebuild-changed-results-001 | `pass` | `1.000` | `trace-before-rebuild, trace-after-rebuild` | `trace-after-rebuild, trace-before-rebuild` | 0 | 0 | `3.300 ms` | `0.000 USD` |
+| operator_debugging_ux | operator-debug-relation-context-mislead-001 | `pass` | `1.000` | `trace-relation-context` | `trace-relation-context` | 0 | 0 | `2.900 ms` | `0.000 USD` |
+| operator_debugging_ux | operator-debug-rerank-bad-candidate-001 | `pass` | `1.000` | `trace-rerank-promotion` | `trace-rerank-promotion` | 0 | 0 | `2.100 ms` | `0.000 USD` |
+
+## Operator Debugging UX
+
+| Job | Failure Mode | Trace Evidence | Steps | Raw SQL | Dropped Candidate Visibility | Trace Completeness | Repair Clarity | UX Gaps |
+| --- | --- | --- | ---: | --- | --- | --- | --- | --- |
+| operator-debug-dropped-evidence-001 | expected_evidence_dropped | `11111111-1111-4111-8111-111111111111`<br>[viewer](/viewer?trace_id=11111111-1111-4111-8111-111111111111)<br>[bundle](/v2/admin/traces/11111111-1111-4111-8111-111111111111/bundle?mode=full&stage_items_limit=128&candidates_limit=200) | 4 | `false` | visible in Retrieval Funnel and Replay Candidates | `complete` | `clear` | `none` |
+| operator-debug-provider-latency-001 | provider_latency_or_failure | `33333333-3333-4333-8333-333333333333`<br>[viewer](/viewer?trace_id=33333333-3333-4333-8333-333333333333)<br>[bundle](/v2/admin/traces/33333333-3333-4333-8333-333333333333/bundle?mode=full&stage_items_limit=128&candidates_limit=200) | 3 | `false` | visible as low recall counts rather than a post-recall drop | `complete` | `clear` | `none` |
+| operator-debug-rebuild-changed-results-001 | rebuild_changed_results | `44444444-4444-4444-8444-444444444444`<br>[viewer](/viewer?trace_id=44444444-4444-4444-8444-444444444444)<br>[bundle](/v2/admin/traces/44444444-4444-4444-8444-444444444444/bundle?mode=full&stage_items_limit=128&candidates_limit=200) | 5 | `false` | visible by comparing before and after trace candidates | `complete` | `clear` | `none` |
+| operator-debug-relation-context-mislead-001 | relation_context_misled_search | `55555555-5555-4555-8555-555555555555`<br>[viewer](/viewer?trace_id=55555555-5555-4555-8555-555555555555)<br>[bundle](/v2/admin/traces/55555555-5555-4555-8555-555555555555/bundle?mode=full&stage_items_limit=128&candidates_limit=200) | 4 | `false` | not dropped; misleading context is visible on selected result | `complete` | `clear` | `none` |
+| operator-debug-rerank-bad-candidate-001 | rerank_promoted_bad_candidate | `22222222-2222-4222-8222-222222222222`<br>[viewer](/viewer?trace_id=22222222-2222-4222-8222-222222222222)<br>[bundle](/v2/admin/traces/22222222-2222-4222-8222-222222222222/bundle?mode=full&stage_items_limit=128&candidates_limit=200) | 3 | `false` | not dropped; visible with lower final rank in Replay Candidates | `complete` | `clear` | `none` |
+
+### Operator Debug Details
+
+#### `operator-debug-dropped-evidence-001`
+
+- Root cause: The expected candidate survived recall but was removed by the read-profile scope filter before final selection.
+- Viewer panels: `Trace, Retrieval Funnel, Replay Candidates, Stage Details`
+- CLI steps: `open viewer trace link -> compare recall before and after filter -> inspect replay candidates -> repair read profile or grant`
+- Trace evidence: `trace-dropped-expected`
+
+#### `operator-debug-provider-latency-001`
+
+- Root cause: Provider latency forced fallback behavior, shrinking expanded-query recall.
+- Viewer panels: `Providers And Ranking, Stage Summary, Stage Details`
+- CLI steps: `open trace bundle -> inspect provider metadata -> compare expanded queries -> raise timeout or repair provider health`
+- Trace evidence: `trace-provider-timeout`
+
+#### `operator-debug-rebuild-changed-results-001`
+
+- Root cause: Rebuild removed stale derived-index state and restored source-of-truth-backed ranking.
+- Viewer panels: `Trace, Replay Candidates, Selected Final Results`
+- CLI steps: `open before trace -> open after trace -> compare replay candidates -> confirm active note selected -> keep Qdrant rebuild as repair`
+- Trace evidence: `trace-before-rebuild, trace-after-rebuild`
+
+#### `operator-debug-relation-context-mislead-001`
+
+- Root cause: A deprecated graph relation remained visible in relation_context and conflicted with the selected note text.
+- Viewer panels: `Selected Final Results, Relation Context, Stage Details`
+- CLI steps: `open trace link -> inspect selected result relation count -> open Relation Context -> invalidate stale relation fact`
+- Trace evidence: `trace-relation-context`
+
+#### `operator-debug-rerank-bad-candidate-001`
+
+- Root cause: The correct item was in the candidate set, but rerank.score elevated a cross-project decoy.
+- Viewer panels: `Selected Final Results, Replay Candidates, Providers And Ranking`
+- CLI steps: `open trace bundle -> compare retrieval rank with final rank -> inspect rerank score -> tighten scope or rerank inputs`
+- Trace evidence: `trace-rerank-promotion`
+
+## Unsupported Claims
+
+| Suite | Job | Claim | Evidence | Reason |
+| --- | --- | --- | --- | --- |
+| operator_debugging_ux | operator-debug-dropped-evidence-001 | No expected evidence was dropped. | `trace-dropped-decoy` | claim_id is not present in expected_answer.evidence_links |
+
+## Result Semantics
+
+This report uses `docs/spec/real_world_agent_memory_benchmark_v1.md` status terms.
+It is a real-world job fixture report, not a Docker live-baseline report.
+Existing live-baseline reports remain valid for their encoded retrieval and lifecycle checks and are not reinterpreted as real-world suite wins.
+
+- `pass`: encoded jobs met their pass threshold with required evidence and no hard-fail rule.
+- `wrong_result`: a job completed but missed required answer or evidence expectations.
+- `unsupported_claim`: a job produced a substantive claim not supported by the fixture evidence links.
+- `not_encoded`: a suite has no checked-in real_world_job fixture, so no pass/fail claim is allowed.
+
+## Not-Encoded Suites
+
+- `trust_source_of_truth`
+- `work_resume`
+- `project_decisions`
+- `retrieval`
+- `memory_evolution`
+- `consolidation`
+- `knowledge_compilation`
+- `capture_integration`
+- `production_ops`
+- `personalization`
diff --git a/docs/guide/benchmarking/index.md b/docs/guide/benchmarking/index.md
index 6f1a606a..06e89da5 100644
--- a/docs/guide/benchmarking/index.md
+++ b/docs/guide/benchmarking/index.md
@@ -33,6 +33,9 @@ cleanup, use `docs/guide/single_user_production.md`.
 - `2026-06-09-production-adoption-gate-report.md`: XY-836 production adoption
   decision report with fresh provider-backed synthetic, stress, backfill, restore, and
   external adapter evidence.
+- `2026-06-09-operator-debugging-ux-report.md`: checked-in real-world job
+  operator-debugging UX report with trace/viewer links, raw-SQL avoidance, root-cause
+  step counts, dropped-candidate visibility, and repair-action clarity.
 - `real_world_agent_memory_benchmark.md`: operator overview for the v1 real-world
   agent memory benchmark contract, including suite taxonomy and typed report states.
 
diff --git a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
index 6cc18971..b354af1d 100644
--- a/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
+++ b/docs/guide/benchmarking/real_world_agent_memory_benchmark.md
@@ -150,6 +150,30 @@ count, and Qdrant rebuild case/pass counts. The fixtures include negative traps
 unsupported prior claims, stale deleted facts, cross-project preference leakage, and
 private/redacted text leakage.
 
+Operator debugging UX increment:
+
+```sh
+cargo make real-world-job-operator-ux
+```
+
+Artifacts:
+
+```text
+tmp/real-world-job/real-world-job-operator-ux-report.json
+tmp/real-world-job/real-world-job-operator-ux-report.md
+```
+
+The operator UX fixtures live under
+`apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/`. They cover dropped
+expected evidence, rerank promotion of a bad candidate, provider latency or failure,
+Qdrant rebuild result changes, and misleading relation context. Reports include direct
+viewer and admin trace bundle links, steps to root cause, whether raw SQL was needed,
+dropped-candidate visibility, trace completeness, repair-action clarity, and any
+encoded UX gaps.
+
+Checked-in evidence snapshot:
+`docs/guide/benchmarking/2026-06-09-operator-debugging-ux-report.md`.
+
 Do not generate large fixtures or update production-adoption verdicts while adding the
 contract. The current adoption gate remains an existing benchmark decision until new
 real-world job reports are implemented and published.
diff --git a/docs/spec/real_world_agent_memory_benchmark_v1.md b/docs/spec/real_world_agent_memory_benchmark_v1.md
index fa94656f..5b65c0d0 100644
--- a/docs/spec/real_world_agent_memory_benchmark_v1.md
+++ b/docs/spec/real_world_agent_memory_benchmark_v1.md
@@ -66,6 +66,7 @@ runner execution.
   "negative_traps": [],
   "scoring_rubric": {},
   "allowed_uncertainty": {},
+  "operator_debug": {},
   "tags": []
 }
 ```
@@ -86,6 +87,7 @@ runner execution.
 | `negative_traps` | array | Distractors, stale facts, or misleading memories that must not drive the answer. |
 | `scoring_rubric` | object | Dimensions, weights, thresholds, and hard-fail rules for this job. |
 | `allowed_uncertainty` | object | Explicit uncertainty language and fallback behavior accepted for the job. |
+| `operator_debug` | object or null | Optional for most suites; required for `operator_debugging_ux` jobs. Records trace/viewer evidence and operator workflow scoring inputs. |
 | `tags` | array | Optional labels such as `private_corpus`, `synthetic`, `adapter_required`, or `no_live_claim`. |
 
 ### `corpus`
@@ -192,6 +194,38 @@ Trap types:
 
 Each trap MUST include `trap_id`, `type`, `evidence_ids`, and `failure_if_used`.
 
+### `operator_debug`
+
+`operator_debug` is required when `suite = "operator_debugging_ux"` and optional
+elsewhere. It records whether a human operator can identify the root cause through
+viewer, trace, or CLI readback without raw SQL.
+
+Required fields:
+
+- `failure_mode`: stable label such as `expected_evidence_dropped`,
+  `rerank_promoted_bad_candidate`, `provider_latency_or_failure`,
+  `rebuild_changed_results`, or `relation_context_misled_search`.
+- `trace_id`: trace handle when available.
+- `viewer_url`: read-only viewer path that opens the trace evidence when available.
+- `admin_trace_bundle_url`: direct admin trace bundle path when available.
+- `root_cause`: concise expected diagnosis.
+- `steps_to_root_cause`: number of viewer or CLI steps needed to reach the diagnosis.
+- `raw_sql_needed`: must be `false` for a pass under this suite.
+- `dropped_candidate_visibility`: whether dropped, retained, or misleading candidates
+  are visible through trace/viewer evidence.
+- `trace_completeness`: `complete`, `partial`, or `missing`.
+- `repair_action_clarity`: `clear`, `partial`, or `missing`.
+- `viewer_panels`: viewer panels used, such as `Replay Candidates`, `Stage Details`,
+  `Providers And Ranking`, or `Relation Context`.
+- `cli_steps`: equivalent CLI or endpoint steps.
+- `trace_evidence`: evidence ids used for the diagnosis.
+- `ux_gaps`: array of focused follow-up pointers when a needed panel or endpoint is
+  absent.
+
+Each `ux_gaps[]` entry MUST include `gap_id`, `severity`, `description`, and
+`follow_up_issue`. If a fixture requires a missing panel, the report must encode the
+gap instead of hiding it behind a wrong-result score.
+
 ### `scoring_rubric`
 
 The rubric MUST be job-specific but use the shared dimensions below.