Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,15 @@ provider-backed ELF evidence was required.
Postgres graph-lite facts to show current, historical, future, sourced, inferred,
ambiguous, stale, and superseded markers without introducing a separate graph
database or replacing source evidence.
- Recall/debug panel after XY-1022: the June 20 follow-up adds
`elf.recall_debug_panel/v1` through service, HTTP, and MCP readback. The panel
groups Memory Note trace selected rows and retained dropped replay candidates,
Source Library document candidates, Knowledge Workspace page snippets, graph facts,
and Dreaming proposals with
authority layer, freshness state, source refs, stage reason, evidence class, and
replay command. Missing anchors remain explicit `not_requested` layers, so the
panel improves debug ergonomics without turning untested or blocked layers into
pass claims.
- Operator-approved public-proxy addendum after XY-930: the June 19 follow-up runs
`cargo make baseline-production-private-addendum` with a simulated/public-proxy
production corpus manifest approved for this stage. The run records 12 documents,
Expand Down
71 changes: 66 additions & 5 deletions apps/elf-api/src/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ use elf_service::{
KnowledgePageSearchResponse, KnowledgePagesListRequest, KnowledgePagesListResponse,
ListRequest, ListResponse, MemoryHistoryGetRequest, MemoryHistoryResponse, NoteFetchRequest,
NoteFetchResponse, NoteProvenanceBundleResponse, NoteProvenanceGetRequest, PayloadLevel,
PublishNoteRequest, QueryPlan, RankingRequestOverride, RebuildReport, SearchDetailsRequest,
SearchDetailsResult, SearchExplainRequest, SearchExplainResponse, SearchIndexItem,
SearchRequest, SearchResponse, SearchSessionGetRequest, SearchTimelineGroup,
SearchTimelineRequest, SearchTrajectoryResponse, SearchTrajectorySummary, ShareScope,
SpaceGrantRevokeRequest, SpaceGrantRevokeResponse, SpaceGrantUpsertRequest,
PublishNoteRequest, QueryPlan, RankingRequestOverride, RebuildReport, RecallDebugPanelRequest,
RecallDebugPanelResponse, SearchDetailsRequest, SearchDetailsResult, SearchExplainRequest,
SearchExplainResponse, SearchIndexItem, SearchRequest, SearchResponse, SearchSessionGetRequest,
SearchTimelineGroup, SearchTimelineRequest, SearchTrajectoryResponse, SearchTrajectorySummary,
ShareScope, SpaceGrantRevokeRequest, SpaceGrantRevokeResponse, SpaceGrantUpsertRequest,
SpaceGrantsListRequest, TextPositionSelector, TextQuoteSelector, TraceBundleGetRequest,
TraceBundleResponse, TraceGetRequest, TraceGetResponse, TraceRecentListRequest,
TraceRecentListResponse, TraceTrajectoryGetRequest, UnpublishNoteRequest, UpdateRequest,
Expand Down Expand Up @@ -148,6 +148,7 @@ const VIEWER_HTML: &str = include_str!("../static/viewer.html");
consolidation_proposal_get,
consolidation_proposal_review,
dreaming_review_queue,
recall_debug_panel,
knowledge_page_rebuild,
knowledge_pages_list,
knowledge_pages_search,
Expand Down Expand Up @@ -181,6 +182,7 @@ const VIEWER_HTML: &str = include_str!("../static/viewer.html");
(name = "graph", description = "Graph query and predicate administration."),
(name = "consolidation", description = "Reviewable derived consolidation proposals."),
(name = "dreaming", description = "Dreaming review queue and derived memory organization."),
(name = "recall", description = "Cross-layer recall and debug readback."),
(name = "knowledge", description = "Derived knowledge page rebuild and lint readback."),
(name = "admin", description = "Local admin and operator inspection routes."),
)
Expand Down Expand Up @@ -515,6 +517,18 @@ struct TraceBundleGetQuery {
candidates_limit: Option<u32>,
}

#[derive(Clone, Debug, Deserialize)]
struct RecallDebugPanelBody {
trace_id: Option<Uuid>,
query: Option<String>,
docs_query: Option<String>,
knowledge_query: Option<String>,
graph_subject: Option<GraphQueryEntityRef>,
graph_predicate: Option<GraphQueryPredicateRef>,
include_dreaming: Option<bool>,
limit: Option<u32>,
}

#[derive(Clone, Debug, Deserialize)]
struct ShareScopeBody {
space: String,
Expand Down Expand Up @@ -753,6 +767,7 @@ pub fn admin_router(state: AppState) -> Router {
routing::post(consolidation_proposal_review),
)
.route("/v2/admin/dreaming/review-queue", routing::get(dreaming_review_queue))
.route("/v2/admin/recall-debug/panel", routing::post(recall_debug_panel))
.route("/v2/admin/knowledge/pages", routing::get(knowledge_pages_list))
.route("/v2/admin/knowledge/pages/rebuild", routing::post(knowledge_page_rebuild))
.route("/v2/admin/knowledge/pages/search", routing::post(knowledge_pages_search))
Expand Down Expand Up @@ -3118,6 +3133,52 @@ async fn dreaming_review_queue(
Ok(Json(response))
}

#[utoipa::path(
post,
path = "/v2/admin/recall-debug/panel",
tag = "recall",
request_body = Value,
responses(
(status = 200, description = "Cross-layer recall/debug panel.", body = Value),
(status = 400, description = "Invalid request.", body = ErrorBody),
(status = 401, description = "Authentication required.", body = ErrorBody),
(status = 403, description = "Admin access required.", body = ErrorBody),
(status = 500, description = "Internal error.", body = ErrorBody),
)
)]
async fn recall_debug_panel(
State(state): State<AppState>,
headers: HeaderMap,
payload: Result<Json<RecallDebugPanelBody>, JsonRejection>,
) -> Result<Json<RecallDebugPanelResponse>, ApiError> {
let ctx = RequestContext::from_headers(&headers)?;
let read_profile = required_read_profile(&headers)?;
let Json(payload) = payload.map_err(|err| {
tracing::warn!(error = %err, "Invalid request payload.");

json_error(StatusCode::BAD_REQUEST, "INVALID_REQUEST", "Invalid request payload.", None)
})?;
let response = state
.service
.recall_debug_panel(RecallDebugPanelRequest {
tenant_id: ctx.tenant_id,
project_id: ctx.project_id,
agent_id: ctx.agent_id,
read_profile,
trace_id: payload.trace_id,
query: payload.query,
docs_query: payload.docs_query,
knowledge_query: payload.knowledge_query,
graph_subject: payload.graph_subject,
graph_predicate: payload.graph_predicate,
include_dreaming: payload.include_dreaming,
limit: payload.limit,
})
.await?;

Ok(Json(response))
}

#[utoipa::path(
post,
path = "/v2/admin/knowledge/pages/rebuild",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"schema": "elf.recall_debug_panel_report/v1",
"authority": "XY-1022",
"generated_at": "2026-06-20T00:00:00Z",
"service_contract": {
"response_schema": "elf.recall_debug_panel/v1",
"service_module": "packages/elf-service/src/recall_debug.rs",
"http_endpoint": "POST /v2/admin/recall-debug/panel",
"mcp_tool": "elf_recall_debug_panel",
"spec": "docs/spec/system_recall_debug_panel_v1.md",
"read_model_only": true,
"raw_sql_needed": false
},
"layer_contract": {
"layer_count": 5,
"layers": [
{
"layer": "memory_notes",
"anchor": "trace_id",
"selection_states": ["selected", "dropped"],
"authority_layer": "memory_note",
"source_ref_surface": "memory_notes.source_ref",
"replay_surface": "elf_admin_trace_bundle_get",
"evidence_class": "pass"
},
{
"layer": "source_documents",
"anchor": "docs_query or query",
"selection_states": ["selected"],
"authority_layer": "source_library",
"source_ref_surface": "source_ref/v1 resolver elf_doc_ext/v1",
"replay_surface": "elf_docs_search_l0",
"effective_limit": 32,
"evidence_class": "pass"
},
{
"layer": "knowledge_pages",
"anchor": "knowledge_query or query",
"selection_states": ["selected"],
"authority_layer": "derived_knowledge_page",
"source_ref_surface": "source_coverage plus section source refs",
"replay_surface": "elf_recall_debug_panel",
"evidence_class": "pass"
},
{
"layer": "graph_facts",
"anchor": "graph_subject",
"selection_states": ["available"],
"authority_layer": "graph_fact",
"source_ref_surface": "evidence_note_ids and supersession ids",
"replay_surface": "elf_graph_report",
"evidence_class": "pass"
},
{
"layer": "dreaming_proposals",
"anchor": "include_dreaming",
"selection_states": ["reviewable"],
"authority_layer": "reviewable_dreaming_proposal",
"source_ref_surface": "source_refs, source_snapshot, affected_refs",
"replay_surface": "elf_dreaming_review_queue",
"evidence_class": "pass"
}
]
},
"debug_invariants": {
"not_requested_layers_preserved": true,
"requested_layer_failures_preserved_as_blocked": true,
"selected_and_dropped_memory_candidates": true,
"evidence_class_counts_preserved": true,
"authority_layer_required": true,
"freshness_state_required": true,
"stage_reason_required": true,
"source_refs_required": true,
"replay_command_or_artifact_path_required_when_available": true,
"no_source_mutation": true,
"no_graph_mutation": true,
"no_proposal_review_mutation": true
},
"command_evidence": [
{
"command": "cargo test -p elf-service recall_debug -- --nocapture",
"status": "pass",
"purpose": "Unit-check summary counters and not_requested layer behavior."
},
{
"command": "cargo test -p elf-mcp registers_all_tools -- --nocapture",
"status": "pass",
"purpose": "Guard MCP registration for elf_recall_debug_panel."
},
{
"command": "cargo test -p elf-eval --test real_world_job_benchmark recall_debug_panel_report_wires_cross_layer_debug_contract -- --nocapture",
"status": "pass",
"purpose": "Guard service/API/MCP/docs/snapshot coverage for XY-1022."
}
],
"claim_boundaries": {
"allowed": [
"ELF exposes a typed cross-layer recall/debug read model.",
"Memory trace selected rows and retained dropped replay candidates are visible through trace bundles when candidate capture/retention preserved them.",
"Source documents, knowledge pages, graph facts, and Dreaming proposals can be inspected from one panel response when their anchors are supplied.",
"not_requested layers remain explicit instead of being hidden behind aggregate pass claims."
],
"not_allowed": [
"Do not claim the panel is a mutating UI.",
"Do not claim external competitor UI parity from this read model alone.",
"Do not claim graph facts, source documents, or high-impact memories can be changed through the panel.",
"Do not treat missing anchors as pass evidence."
]
},
"next_optimization_direction": [
"Add a visual operator panel that groups rows by layer, authority, freshness, and stage reason.",
"Attach one-click replay to trace bundles, docs search, graph reports, and Dreaming queue filters.",
"Use XY-1023 to score full benchmark deltas and keep competitor debug advantages separate from ELF's typed cross-layer readback."
]
}
133 changes: 133 additions & 0 deletions apps/elf-eval/tests/real_world_job_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,10 @@ fn dreaming_review_queue_report_json_path() -> Result<PathBuf> {
report_snapshot_path("2026-06-20-dreaming-review-queue-report.json")
}

fn recall_debug_panel_report_json_path() -> Result<PathBuf> {
report_snapshot_path("2026-06-20-recall-debug-panel-report.json")
}

fn openmemory_ui_export_product_readback_report_json_path() -> Result<PathBuf> {
report_snapshot_path("2026-06-19-openmemory-ui-export-product-readback-report.json")
}
Expand Down Expand Up @@ -300,6 +304,14 @@ fn dreaming_review_queue_report_markdown_path() -> Result<PathBuf> {
.join("2026-06-20-dreaming-review-queue-report.md"))
}

fn recall_debug_panel_report_markdown_path() -> Result<PathBuf> {
Ok(workspace_root()?
.join("docs")
.join("evidence")
.join("benchmarking")
.join("2026-06-20-recall-debug-panel-report.md"))
}

fn openmemory_ui_export_product_readback_report_markdown_path() -> Result<PathBuf> {
Ok(workspace_root()?
.join("docs")
Expand Down Expand Up @@ -3676,6 +3688,127 @@ fn dreaming_review_queue_report_wires_reviewable_policy_contract() -> Result<()>
Ok(())
}

#[test]
fn recall_debug_panel_report_wires_cross_layer_debug_contract() -> Result<()> {
let report = serde_json::from_str::<Value>(&fs::read_to_string(
recall_debug_panel_report_json_path()?,
)?)?;
let markdown = fs::read_to_string(recall_debug_panel_report_markdown_path()?)?;
let benchmarking_index = fs::read_to_string(benchmarking_index_path()?)?;
let readme = fs::read_to_string(readme_path()?)?;
let workspace = workspace_root()?;
let service = fs::read_to_string(workspace.join("packages/elf-service/src/recall_debug.rs"))?;
let service_lib = fs::read_to_string(workspace.join("packages/elf-service/src/lib.rs"))?;
let routes = fs::read_to_string(workspace.join("apps/elf-api/src/routes.rs"))?;
let mcp = fs::read_to_string(workspace.join("apps/elf-mcp/src/server.rs"))?;
let recall_spec =
fs::read_to_string(workspace.join("docs/spec/system_recall_debug_panel_v1.md"))?;
let service_spec =
fs::read_to_string(workspace.join("docs/spec/system_elf_memory_service_v2.md"))?;
let version_registry =
fs::read_to_string(workspace.join("docs/spec/system_version_registry.md"))?;

assert_eq!(
report.pointer("/schema").and_then(Value::as_str),
Some("elf.recall_debug_panel_report/v1")
);
assert_eq!(report.pointer("/authority").and_then(Value::as_str), Some("XY-1022"));
assert_eq!(
report.pointer("/service_contract/response_schema").and_then(Value::as_str),
Some("elf.recall_debug_panel/v1")
);
assert_eq!(
report.pointer("/service_contract/read_model_only").and_then(Value::as_bool),
Some(true)
);
assert_eq!(
report.pointer("/service_contract/raw_sql_needed").and_then(Value::as_bool),
Some(false)
);
assert_eq!(report.pointer("/layer_contract/layer_count").and_then(Value::as_u64), Some(5));

let layers = array_at(&report, "/layer_contract/layers")?;

for (layer, authority, replay) in [
("memory_notes", "memory_note", "elf_admin_trace_bundle_get"),
("source_documents", "source_library", "elf_docs_search_l0"),
("knowledge_pages", "derived_knowledge_page", "elf_recall_debug_panel"),
("graph_facts", "graph_fact", "elf_graph_report"),
("dreaming_proposals", "reviewable_dreaming_proposal", "elf_dreaming_review_queue"),
] {
let row = find_by_field(layers, "/layer", layer)?;

assert_eq!(row.pointer("/authority_layer").and_then(Value::as_str), Some(authority));
assert_eq!(row.pointer("/replay_surface").and_then(Value::as_str), Some(replay));
assert_eq!(row.pointer("/evidence_class").and_then(Value::as_str), Some("pass"));
}

let memory = find_by_field(layers, "/layer", "memory_notes")?;
let docs = find_by_field(layers, "/layer", "source_documents")?;

assert!(array_contains_str(memory, "/selection_states", "selected")?);
assert!(array_contains_str(memory, "/selection_states", "dropped")?);
assert_eq!(docs.pointer("/effective_limit").and_then(Value::as_u64), Some(32));
assert_eq!(
report.pointer("/debug_invariants/not_requested_layers_preserved").and_then(Value::as_bool),
Some(true)
);
assert_eq!(
report
.pointer("/debug_invariants/selected_and_dropped_memory_candidates")
.and_then(Value::as_bool),
Some(true)
);
assert_eq!(
report
.pointer("/debug_invariants/requested_layer_failures_preserved_as_blocked")
.and_then(Value::as_bool),
Some(true)
);
assert_eq!(
report.pointer("/debug_invariants/no_source_mutation").and_then(Value::as_bool),
Some(true)
);
assert!(service.contains("ELF_RECALL_DEBUG_PANEL_SCHEMA_V1"));
assert!(service.contains("pub async fn recall_debug_panel"));
assert!(service.contains("not_requested_layer"));
assert!(service.contains("blocked_layer"));
assert!(service.contains("public_error_class"));
assert!(service.contains("candidate_identity"));
assert!(service.contains("ORG_PROJECT_ID"));
assert!(service.contains("trace_bundle_get"));
assert!(service.contains("docs_search_l0"));
assert!(service.contains("knowledge_pages_search"));
assert!(service.contains("graph_report"));
assert!(service.contains("dreaming_review_queue"));
assert!(service_lib.contains("pub mod recall_debug"));
assert!(service_lib.contains("RecallDebugPanelResponse"));
assert!(routes.contains("/v2/admin/recall-debug/panel"));
assert!(routes.contains("async fn recall_debug_panel"));
assert!(routes.contains("RecallDebugPanelRequest"));
assert!(mcp.contains("elf_recall_debug_panel"));
assert!(mcp.contains("recall_debug_panel_schema"));
assert!(mcp.contains("/v2/admin/recall-debug/panel"));
assert!(recall_spec.contains("elf.recall_debug_panel/v1"));
assert!(recall_spec.contains("not_requested"));
assert!(recall_spec.contains("evidence_class = \"blocked\""));
assert!(recall_spec.contains("effective `top_k` cap of 32"));
assert!(recall_spec.contains("selected`, `dropped`, `available`, or `reviewable`"));
assert!(service_spec.contains("POST /v2/admin/recall-debug/panel"));
assert!(service_spec.contains("system_recall_debug_panel_v1.md"));
assert!(version_registry.contains("elf.recall_debug_panel/v1"));
assert!(markdown.contains("Recall Debug Panel Report"));
assert!(markdown.contains("Missing anchors stay visible as `not_requested`"));
assert!(markdown.contains("retained dropped replay candidates"));
assert!(markdown.contains("effective cap of 32 rows"));
assert!(benchmarking_index.contains("2026-06-20-recall-debug-panel-report.md"));
assert!(readme.contains("Recall/debug panel after XY-1022"));
assert!(readme.contains("elf.recall_debug_panel/v1"));
assert!(readme.contains("retained dropped replay candidates"));

Ok(())
}

#[test]
fn operator_approved_public_proxy_private_addendum_preserves_boundary() -> Result<()> {
let report = serde_json::from_str::<Value>(&fs::read_to_string(
Expand Down
Loading