diff --git a/apps/elf-api/src/routes.rs b/apps/elf-api/src/routes.rs index ff51fb3f..a22920a6 100644 --- a/apps/elf-api/src/routes.rs +++ b/apps/elf-api/src/routes.rs @@ -53,17 +53,18 @@ use elf_service::{ EventMessage, GranteeKind, GraphQueryEntityRef, GraphQueryPredicateRef, GraphQueryRequest, GraphQueryResponse, IngestionProfileSelector, KnowledgePageGetRequest, KnowledgePageLintRequest, KnowledgePageLintResponse, KnowledgePageRebuildRequest, - KnowledgePageRebuildResponse, KnowledgePageResponse, KnowledgePagesListRequest, - KnowledgePagesListResponse, ListRequest, ListResponse, NoteFetchRequest, NoteFetchResponse, - NoteProvenanceBundleResponse, NoteProvenanceGetRequest, PayloadLevel, PublishNoteRequest, - QueryPlan, RankingRequestOverride, RebuildReport, SearchDetailsRequest, SearchDetailsResult, - SearchExplainRequest, SearchExplainResponse, SearchIndexItem, SearchRequest, SearchResponse, - SearchSessionGetRequest, SearchTimelineGroup, SearchTimelineRequest, SearchTrajectoryResponse, - SearchTrajectorySummary, ShareScope, SpaceGrantRevokeRequest, SpaceGrantRevokeResponse, - SpaceGrantUpsertRequest, SpaceGrantsListRequest, TextPositionSelector, TextQuoteSelector, - TraceBundleGetRequest, TraceBundleResponse, TraceGetRequest, TraceGetResponse, - TraceRecentListRequest, TraceRecentListResponse, TraceTrajectoryGetRequest, - UnpublishNoteRequest, UpdateRequest, UpdateResponse, search::TraceBundleMode, + KnowledgePageRebuildResponse, KnowledgePageResponse, KnowledgePageSearchRequest, + KnowledgePageSearchResponse, KnowledgePagesListRequest, KnowledgePagesListResponse, + ListRequest, ListResponse, NoteFetchRequest, NoteFetchResponse, NoteProvenanceBundleResponse, + NoteProvenanceGetRequest, PayloadLevel, PublishNoteRequest, QueryPlan, RankingRequestOverride, + RebuildReport, SearchDetailsRequest, SearchDetailsResult, SearchExplainRequest, + SearchExplainResponse, SearchIndexItem, SearchRequest, SearchResponse, SearchSessionGetRequest, + SearchTimelineGroup, SearchTimelineRequest, SearchTrajectoryResponse, SearchTrajectorySummary, + ShareScope, SpaceGrantRevokeRequest, SpaceGrantRevokeResponse, SpaceGrantUpsertRequest, + SpaceGrantsListRequest, TextPositionSelector, TextQuoteSelector, TraceBundleGetRequest, + TraceBundleResponse, TraceGetRequest, TraceGetResponse, TraceRecentListRequest, + TraceRecentListResponse, TraceTrajectoryGetRequest, UnpublishNoteRequest, UpdateRequest, + UpdateResponse, search::TraceBundleMode, }; /// JSON OpenAPI contract route. @@ -138,6 +139,7 @@ const VIEWER_HTML: &str = include_str!("../static/viewer.html"); consolidation_proposal_review, knowledge_page_rebuild, knowledge_pages_list, + knowledge_pages_search, knowledge_page_get, knowledge_page_lint, rebuild_qdrant, @@ -393,6 +395,13 @@ struct KnowledgePagesListQuery { limit: Option, } +#[derive(Clone, Debug, Deserialize)] +struct KnowledgePagesSearchBody { + query: String, + page_kind: Option, + limit: Option, +} + #[derive(Clone, Debug, Serialize, ToSchema)] struct AdminIngestionProfileDefaultResponseV2 { profile_id: String, @@ -678,6 +687,7 @@ pub fn admin_router(state: AppState) -> Router { ) .route("/v2/admin/knowledge/pages", routing::get(knowledge_pages_list)) .route("/v2/admin/knowledge/pages/rebuild", routing::post(knowledge_page_rebuild)) + .route("/v2/admin/knowledge/pages/search", routing::post(knowledge_pages_search)) .route("/v2/admin/knowledge/pages/{page_id}", routing::get(knowledge_page_get)) .route("/v2/admin/knowledge/pages/{page_id}/lint", routing::post(knowledge_page_lint)) .route("/v2/admin/qdrant/rebuild", routing::post(rebuild_qdrant)) @@ -2795,6 +2805,45 @@ async fn knowledge_pages_list( Ok(Json(response)) } +#[utoipa::path( + post, + path = "/v2/admin/knowledge/pages/search", + tag = "knowledge", + request_body = Value, + responses( + (status = 200, description = "Knowledge page section search results.", body = Value), + (status = 400, description = "Invalid request.", body = ErrorBody), + (status = 401, description = "Authentication required.", body = ErrorBody), + (status = 403, description = "Admin access required.", body = ErrorBody), + (status = 422, description = "Non-English input rejected.", body = ErrorBody), + (status = 500, description = "Internal error.", body = ErrorBody), + ) +)] +async fn knowledge_pages_search( + State(state): State, + headers: HeaderMap, + payload: Result, JsonRejection>, +) -> Result, ApiError> { + let ctx = RequestContext::from_headers(&headers)?; + let Json(payload) = payload.map_err(|err| { + tracing::warn!(error = %err, "Invalid request payload."); + + json_error(StatusCode::BAD_REQUEST, "INVALID_REQUEST", "Invalid request payload.", None) + })?; + let response = state + .service + .knowledge_pages_search(KnowledgePageSearchRequest { + tenant_id: ctx.tenant_id, + project_id: ctx.project_id, + query: payload.query, + page_kind: payload.page_kind, + limit: payload.limit, + }) + .await?; + + Ok(Json(response)) +} + #[utoipa::path( get, path = "/v2/admin/knowledge/pages/{page_id}", @@ -3451,12 +3500,15 @@ mod tests { assert!(html.contains("/v2/admin/traces/recent")); assert!(html.contains("/v2/admin/traces/${encodeURIComponent(traceId)}/bundle")); assert!(html.contains("/v2/admin/notes/")); + assert!(html.contains("/v2/admin/knowledge/pages/search")); assert!(html.contains("mode: \"full\"")); assert!(html.contains("candidates_limit: 200")); assert!(html.contains("Replay Candidates")); assert!(html.contains("Selected Final Results")); assert!(html.contains("Providers And Ranking")); assert!(html.contains("Relation Context")); + assert!(html.contains("Knowledge Page Snippets")); + assert!(html.contains("Derived page: source notes")); assert!(html.contains("directTraceId")); assert!(html.contains("trace_id")); assert!(html.contains("loadInitialTrace")); diff --git a/apps/elf-api/static/viewer.html b/apps/elf-api/static/viewer.html index 752e0c6f..83e555bc 100644 --- a/apps/elf-api/static/viewer.html +++ b/apps/elf-api/static/viewer.html @@ -358,6 +358,12 @@ color: var(--amber); } + .chip.danger { + background: #fff1f0; + border-color: #efb4b1; + color: var(--danger); + } + .kv { border: 1px solid var(--line); border-radius: 8px; @@ -630,12 +636,25 @@

Timeline

No timeline loaded.
+
+
+

Knowledge Page Snippets

+ +
+
+
Run a search to load derived page snippets.
+
+

Note Detail

Select a note.
+
+

Knowledge Page Detail

+
Select a derived page snippet.
+

Trace Explain

Run or load a session.
@@ -747,6 +766,7 @@

Recent Traces

activeTab: "searchView", session: null, selectedNoteId: null, + selectedKnowledgePageId: null, traceBundle: null, traceMetrics: {} }; @@ -1034,6 +1054,161 @@

Recent Traces

target.replaceChildren(...session.items.map((item) => resultRow(item, item.note_id === state.selectedNoteId))); } + function trustChipVariant(trustState) { + if (trustState === "derived_error") { + return "danger"; + } + if (trustState === "derived_warning" || trustState === "derived_low_coverage") { + return "amber"; + } + return "teal"; + } + + function knowledgeResultRow(item, selected = false) { + const openButton = make("button", { type: "button", text: "Open Page" }); + openButton.addEventListener("click", (event) => { + event.stopPropagation(); + openKnowledgePage(item.page_id); + }); + const row = make("div", { + className: `row clickable ${selected ? "selected" : ""}`.trim(), + dataPageId: item.page_id + }, [ + make("div", { className: "row-head" }, [ + make("div", { className: "title", text: `${item.title} / ${item.heading}` }), + openButton + ]), + make("div", { className: "chips" }, [ + chip("derived page", "indigo"), + chip(item.page_kind, "teal"), + chip(item.trust_state || "derived", trustChipVariant(item.trust_state)), + chip(`citations ${item.citation_count ?? 0}`), + chip(`sources ${item.source_ref_count ?? 0}`) + ]), + make("div", { className: "summary", text: item.snippet || "" }), + item.repair_guidance ? make("div", { className: "summary", text: item.repair_guidance }) : make("span") + ]); + row.addEventListener("click", () => openKnowledgePage(item.page_id)); + return row; + } + + function renderKnowledgeResults(items) { + const target = $("#knowledgeResults"); + if (!items || items.length === 0) { + target.replaceChildren(empty("No derived page snippets matched.")); + return; + } + target.replaceChildren(...items.map((item) => knowledgeResultRow(item, item.page_id === state.selectedKnowledgePageId))); + } + + async function searchKnowledgePages(queryOverride) { + const query = (queryOverride || $("#searchQuery").value).trim(); + if (!query) { + $("#knowledgeResults").replaceChildren(empty("Query is required.")); + return; + } + try { + const data = await api("/v2/admin/knowledge/pages/search", { + method: "POST", + body: JSON.stringify({ + query, + limit: Number($("#topK").value || 12) + }) + }); + renderKnowledgeResults(data.items || []); + } catch (err) { + $("#knowledgeResults").replaceChildren(empty(err.message)); + } + } + + async function openKnowledgePage(pageId) { + if (!pageId) { + return; + } + state.selectedKnowledgePageId = pageId; + document.querySelectorAll("#knowledgeResults [data-page-id]").forEach((row) => { + row.classList.toggle("selected", row.dataset.pageId === pageId); + }); + if (state.session) { + renderSearchSession(state.session); + } + setStatus(`Loading knowledge page ${pageId}...`); + try { + const page = await api(`/v2/admin/knowledge/pages/${encodeURIComponent(pageId)}`); + renderKnowledgePageDetail($("#knowledgeDetail"), page); + setStatus(`Loaded knowledge page ${pageId}.`); + } catch (err) { + setStatus(err.message, true); + $("#knowledgeDetail").replaceChildren(empty(err.message)); + } + } + + function renderKnowledgePageDetail(target, data) { + if (!data || !data.page) { + target.replaceChildren(empty("Knowledge page unavailable.")); + return; + } + const page = data.page; + const lint = data.lint_findings || []; + target.replaceChildren( + kvTable([ + ["page_id", page.page_id], + ["kind / key", `${page.page_kind} / ${page.page_key}`], + ["status", page.status], + ["updated_at", dateText(page.updated_at)], + ["rebuilt_at", dateText(page.rebuilt_at)], + ["derived notice", "Derived page: source notes, events, relations, and proposals remain authoritative."] + ]), + make("div", { className: "split-stack", style: "margin-top: 12px;" }, [ + make("div", { className: "title", text: "Source coverage" }), + pre(page.source_coverage || {}), + make("div", { className: "title", text: "Sections" }), + ...(data.sections || []).map(knowledgeSectionRow), + make("div", { className: "title", text: "Lint findings" }), + lint.length ? make("div", { className: "list" }, lint.map(lintFindingRow)) : empty("No lint findings stored."), + make("div", { className: "title", text: "Normalized source refs" }), + sourceRefsTable(data.source_refs || []) + ]) + ); + } + + function knowledgeSectionRow(sectionItem) { + return section(sectionItem.heading || sectionItem.section_key, [ + make("div", { className: "chips" }, [ + chip(sectionItem.role || "section"), + chip(`citations ${sectionItem.citation_count ?? 0}`), + chip(`source refs ${sectionItem.source_ref_count ?? 0}`), + chip(sectionItem.coverage_complete ? "coverage complete" : "coverage incomplete", sectionItem.coverage_complete ? "teal" : "amber") + ]), + pre(sectionItem.content || ""), + sourceRefsTable(sectionItem.source_backlinks || []) + ]); + } + + function lintFindingRow(finding) { + return make("div", { className: "row" }, [ + make("div", { className: "row-head" }, [ + make("div", { className: "title", text: finding.finding_type }), + chip(finding.severity, finding.severity === "error" ? "danger" : "amber") + ]), + make("div", { className: "summary", text: finding.message || "" }), + make("div", { className: "summary", text: finding.repair_guidance || "" }), + pre(finding.details || {}) + ]); + } + + function sourceRefsTable(refs) { + if (!refs || refs.length === 0) { + return empty("No source refs."); + } + return table(["kind", "source_id", "status", "updated"], refs.map((ref) => [ + ref.source_kind, + { value: ref.source_id, wrap: true }, + ref.source_status || "none", + dateText(ref.source_updated_at) + ])); + } + async function runSearch() { const query = $("#searchQuery").value.trim(); if (!query) { @@ -1060,7 +1235,8 @@

Recent Traces

$("#loadSearchId").value = session.search_id; await Promise.all([ loadTimeline(), - loadTraceBundle(session.trace_id, $("#traceDetail")) + loadTraceBundle(session.trace_id, $("#traceDetail")), + searchKnowledgePages(query) ]); if (state.selectedNoteId) { await selectSearchNote(state.selectedNoteId); @@ -1086,7 +1262,8 @@

Recent Traces

renderSearchSession(session); await Promise.all([ loadTimeline(), - loadTraceBundle(session.trace_id, $("#traceDetail")) + loadTraceBundle(session.trace_id, $("#traceDetail")), + searchKnowledgePages() ]); if (state.selectedNoteId) { await selectSearchNote(state.selectedNoteId); @@ -1535,6 +1712,8 @@

Recent Traces

if (state.activeTab === "searchView") { if (state.session) { await loadSession(); + } else { + await searchKnowledgePages(); } } else if (state.activeTab === "notesView") { await loadNotes(); @@ -1554,6 +1733,7 @@

Recent Traces

$("#runSearchButton").addEventListener("click", runSearch); $("#loadSessionButton").addEventListener("click", loadSession); $("#loadTimelineButton").addEventListener("click", loadTimeline); + $("#searchKnowledgeButton").addEventListener("click", () => searchKnowledgePages()); $("#loadNotesButton").addEventListener("click", loadNotes); $("#loadTracesButton").addEventListener("click", loadRecentTraces); $("#loadTraceByIdButton").addEventListener("click", loadTraceById); diff --git a/apps/elf-api/tests/http.rs b/apps/elf-api/tests/http.rs index 5e34928d..6d894994 100644 --- a/apps/elf-api/tests/http.rs +++ b/apps/elf-api/tests/http.rs @@ -852,6 +852,7 @@ async fn openapi_json_route_serves_generated_contract() { assert_openapi_method(&spec, "/v2/admin/consolidation/proposals/{proposal_id}/review", "post"); assert_openapi_method(&spec, "/v2/admin/knowledge/pages/rebuild", "post"); assert_openapi_method(&spec, "/v2/admin/knowledge/pages", "get"); + assert_openapi_method(&spec, "/v2/admin/knowledge/pages/search", "post"); assert_openapi_method(&spec, "/v2/admin/knowledge/pages/{page_id}", "get"); assert_openapi_method(&spec, "/v2/admin/knowledge/pages/{page_id}/lint", "post"); } @@ -880,6 +881,7 @@ async fn scalar_docs_route_serves_api_reference_html() { assert!(html.contains("/v2/admin/events/ingestion-profiles/default")); assert!(html.contains("/v2/admin/consolidation/proposals")); assert!(html.contains("/v2/admin/knowledge/pages")); + assert!(html.contains("/v2/admin/knowledge/pages/search")); } #[tokio::test] diff --git a/docs/spec/real_world_agent_memory_benchmark_v1.md b/docs/spec/real_world_agent_memory_benchmark_v1.md index f587b5d0..b48a0f97 100644 --- a/docs/spec/real_world_agent_memory_benchmark_v1.md +++ b/docs/spec/real_world_agent_memory_benchmark_v1.md @@ -312,7 +312,7 @@ is not a hidden unsupported claim because the page explicitly marks the gap. Each `lint_findings[]` entry SHOULD include: - `finding_id` -- `finding_type`: for example `stale_claim`, `unsupported_section`, or +- `finding_type`: for example `stale_claim`, `unsupported_claim`, or `contradiction`. - `severity` - `text` diff --git a/docs/spec/system_elf_memory_service_v2.md b/docs/spec/system_elf_memory_service_v2.md index 0db9c469..7ef7218b 100644 --- a/docs/spec/system_elf_memory_service_v2.md +++ b/docs/spec/system_elf_memory_service_v2.md @@ -1009,17 +1009,22 @@ Behavior: Admin derived knowledge pages: - POST /v2/admin/knowledge/pages/rebuild - GET /v2/admin/knowledge/pages +- POST /v2/admin/knowledge/pages/search - GET /v2/admin/knowledge/pages/{page_id} - POST /v2/admin/knowledge/pages/{page_id}/lint Behavior: - These endpoints expose deterministic rebuild, list/detail readback, and stale-source - lint for derived knowledge pages. + lint for derived knowledge pages. The search endpoint exposes derived page section + snippets with visible citations, source coverage, lint summary, trust state, and + repair/rebuild guidance. - Page payloads must follow `elf.knowledge_page/v1`, preserve section citations, and write normalized source refs for lint. - Pages are derived and rebuildable; rebuilding or linting a page must not mutate authoritative notes, event audits, graph facts, consolidation proposals, docs, traces, or source pointers. +- Page snippets are not authoritative note search hits and must be labeled as derived + knowledge page snippets wherever surfaced. - The detailed contract is defined in `system_knowledge_pages_v1.md`. POST /v2/admin/qdrant/rebuild diff --git a/docs/spec/system_knowledge_pages_v1.md b/docs/spec/system_knowledge_pages_v1.md index 17496c16..a30336f9 100644 --- a/docs/spec/system_knowledge_pages_v1.md +++ b/docs/spec/system_knowledge_pages_v1.md @@ -109,20 +109,57 @@ At minimum, lint must detect: - changed source status - changed source freshness timestamp - changed source content hash +- persisted sections with no citations and no explicit unsupported reason +- persisted sections with an explicit unsupported reason +- sections whose citations have no normalized source backlinks +- page-level low source coverage where `coverage_complete` is false or the cited + source count differs from the total source count Stale or missing source references must be stored in `knowledge_page_lint_findings` with `finding_type = "stale_source_ref"` and enough `details` to show stored versus current source state. +Unsupported sections must be stored with `finding_type = "unsupported_claim"`. +Missing citations must use `finding_type = "missing_citation"`. +Missing normalized source backlinks must use `finding_type = "missing_source_ref"`. +Incomplete page coverage must use `finding_type = "low_source_coverage"`. +Every lint finding response must include repair or rebuild guidance. Guidance is +advisory and must not mutate source memory. + Lint findings are derived diagnostics. They must not mutate authoritative source memory. +## Search and Viewer Readback + +Knowledge page search is a derived-artifact readback surface, not the authoritative +note search surface. Page snippets may be shown beside search sessions only when they +are labeled as derived knowledge page snippets and include visible citation and source +coverage metadata. + +Page search results must include: + +- result type discriminator `knowledge_page_section` +- page id, page kind, page key, title, status, section id, section key, heading, role +- bounded section snippet +- section citations and normalized source backlinks +- page source coverage metadata +- lint summary and trust state that distinguishes clean, warning, error, and low + coverage results +- a derived-result notice that source notes, event audits, relation facts, and applied + proposals remain authoritative +- repair or rebuild guidance when lint or source coverage indicates stale, + unsupported, missing, or weakly covered content + +Knowledge page snippets must not be inserted into note search results as if they were +authoritative memory notes. + ## Admin API Minimal admin readback endpoints: - `POST /v2/admin/knowledge/pages/rebuild` - `GET /v2/admin/knowledge/pages` +- `POST /v2/admin/knowledge/pages/search` - `GET /v2/admin/knowledge/pages/{page_id}` - `POST /v2/admin/knowledge/pages/{page_id}/lint` diff --git a/packages/elf-service/src/knowledge.rs b/packages/elf-service/src/knowledge.rs index dab31375..cdc9b24d 100644 --- a/packages/elf-service/src/knowledge.rs +++ b/packages/elf-service/src/knowledge.rs @@ -1,6 +1,6 @@ //! Deterministic derived knowledge page rebuild and readback service APIs. -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use serde::{Deserialize, Serialize}; use serde_json::{self, Map, Value}; @@ -9,15 +9,18 @@ use time::OffsetDateTime; use uuid::Uuid; use crate::{ElfService, Error, Result}; -use elf_domain::knowledge::{ - KNOWLEDGE_PAGE_CONTRACT_SCHEMA_V1, KNOWLEDGE_PAGE_REBUILD_SCHEMA_V1, - KNOWLEDGE_PAGE_SOURCE_COVERAGE_SCHEMA_V1, KnowledgePageKind, KnowledgeSourceKind, +use elf_domain::{ + english_gate, + knowledge::{ + KNOWLEDGE_PAGE_CONTRACT_SCHEMA_V1, KNOWLEDGE_PAGE_REBUILD_SCHEMA_V1, + KNOWLEDGE_PAGE_SOURCE_COVERAGE_SCHEMA_V1, KnowledgePageKind, KnowledgeSourceKind, + }, }; use elf_storage::{ knowledge::{ self, KnowledgeEventSource, KnowledgeNoteSource, KnowledgePageLintFindingInsert, - KnowledgePageSectionInsert, KnowledgePageSourceRefInsert, KnowledgePageUpsert, - KnowledgeProposalSource, KnowledgeRelationSource, + KnowledgePageSearchRow, KnowledgePageSectionInsert, KnowledgePageSourceRefInsert, + KnowledgePageUpsert, KnowledgeProposalSource, KnowledgeRelationSource, }, models::{ KnowledgePage, KnowledgePageLintFinding, KnowledgePageSection, KnowledgePageSourceRef, @@ -26,6 +29,7 @@ use elf_storage::{ const DEFAULT_LIST_LIMIT: i64 = 50; const MAX_LIST_LIMIT: i64 = 200; +const SEARCH_SNIPPET_CHARS: usize = 280; /// Request to rebuild one derived knowledge page from explicit source ids. #[derive(Clone, Debug, Deserialize)] @@ -108,6 +112,21 @@ pub struct KnowledgePageLintRequest { pub page_id: Uuid, } +/// Request to search derived knowledge page sections. +#[derive(Clone, Debug, Deserialize)] +pub struct KnowledgePageSearchRequest { + /// Tenant that owns the pages. + pub tenant_id: String, + /// Project that owns the pages. + pub project_id: String, + /// English-only query for page title, key, heading, or section content. + pub query: String, + /// Optional page-kind filter. + pub page_kind: Option, + /// Maximum number of section snippets to return. + pub limit: Option, +} + /// Response returned after linting one knowledge page. #[derive(Clone, Debug, Serialize)] pub struct KnowledgePageLintResponse { @@ -117,6 +136,13 @@ pub struct KnowledgePageLintResponse { pub findings: Vec, } +/// Response returned by derived knowledge page section search. +#[derive(Clone, Debug, Serialize)] +pub struct KnowledgePageSearchResponse { + /// Matching derived page snippets. + pub items: Vec, +} + /// Summary DTO for one derived knowledge page. #[derive(Clone, Debug, Serialize)] pub struct KnowledgePageSummary { @@ -207,6 +233,14 @@ pub struct KnowledgePageSectionResponse { pub citations: Value, /// Reason this section is intentionally unsupported, when present. pub unsupported_reason: Option, + /// Count of section-local citations. + pub citation_count: usize, + /// Count of normalized source refs attached to this section. + pub source_ref_count: usize, + /// True when the section has both citations and normalized source backlinks. + pub coverage_complete: bool, + /// Section-local normalized source backlinks. + pub source_backlinks: Vec, /// Section content hash. pub content_hash: String, /// Creation timestamp. @@ -226,6 +260,10 @@ impl From for KnowledgePageSectionResponse { ordinal: section.ordinal, citations: section.citations, unsupported_reason: section.unsupported_reason, + citation_count: 0, + source_ref_count: 0, + coverage_complete: false, + source_backlinks: Vec::new(), content_hash: section.content_hash, created_at: section.created_at, updated_at: section.updated_at, @@ -233,6 +271,32 @@ impl From for KnowledgePageSectionResponse { } } +/// Section-local source backlink used by page readback and viewer provenance. +#[derive(Clone, Debug, Serialize)] +pub struct KnowledgePageSectionSourceBacklink { + /// Source kind. + pub source_kind: String, + /// Authoritative source identifier. + pub source_id: Uuid, + /// Captured source status. + pub source_status: Option, + /// Captured source update timestamp. + pub source_updated_at: Option, + /// Captured source content hash. + pub source_content_hash: Option, +} +impl From<&KnowledgePageSourceRef> for KnowledgePageSectionSourceBacklink { + fn from(source_ref: &KnowledgePageSourceRef) -> Self { + Self { + source_kind: source_ref.source_kind.clone(), + source_id: source_ref.source_id, + source_status: source_ref.source_status.clone(), + source_updated_at: source_ref.source_updated_at, + source_content_hash: source_ref.source_content_hash.clone(), + } + } +} + /// Readback DTO for one normalized source reference. #[derive(Clone, Debug, Serialize)] pub struct KnowledgePageSourceRefResponse { @@ -298,11 +362,16 @@ pub struct KnowledgePageLintFindingResponse { pub message: String, /// Structured finding details. pub details: Value, + /// Operator guidance for repair or rebuild. + pub repair_guidance: String, /// Creation timestamp. pub created_at: OffsetDateTime, } impl From for KnowledgePageLintFindingResponse { fn from(finding: KnowledgePageLintFinding) -> Self { + let repair_guidance = + repair_guidance_for_finding_type(finding.finding_type.as_str()).to_string(); + Self { finding_id: finding.finding_id, page_id: finding.page_id, @@ -312,12 +381,79 @@ impl From for KnowledgePageLintFindingResponse { source_kind: finding.source_kind, source_id: finding.source_id, message: finding.message, + repair_guidance, details: finding.details, created_at: finding.created_at, } } } +/// Search result for one derived knowledge page section. +#[derive(Clone, Debug, Serialize)] +pub struct KnowledgePageSearchItem { + /// Result type discriminator for clients that mix pages with notes. + pub result_kind: String, + /// Derived page identifier. + pub page_id: Uuid, + /// Page kind. + pub page_kind: String, + /// Stable page key. + pub page_key: String, + /// Page title. + pub title: String, + /// Page lifecycle status. + pub status: String, + /// Section identifier. + pub section_id: Uuid, + /// Stable section key. + pub section_key: String, + /// Section heading. + pub heading: String, + /// Section role. + pub role: String, + /// Bounded matching section snippet. + pub snippet: String, + /// Section citations for visible provenance. + pub citations: Value, + /// Count of section-local citations. + pub citation_count: usize, + /// Count of normalized source refs attached to this section. + pub source_ref_count: usize, + /// Section-local source refs for backlink readback. + pub source_refs: Vec, + /// Page-level source coverage metadata. + pub source_coverage: Value, + /// Page-level rebuild metadata. + pub rebuild_metadata: Value, + /// Lint summary for distinguishing clean, stale, and unsupported pages. + pub lint_summary: KnowledgePageLintSummary, + /// Trust state discriminator for viewer/search clients. + pub trust_state: String, + /// Explicit notice that the result is derived, not authoritative source truth. + pub derived_notice: String, + /// Repair or rebuild guidance when lint or coverage indicates risk. + pub repair_guidance: Option, + /// Page update timestamp. + pub updated_at: OffsetDateTime, + /// Page rebuild timestamp. + pub rebuilt_at: OffsetDateTime, +} + +/// Aggregate lint counts for page search results. +#[derive(Clone, Debug, Serialize)] +pub struct KnowledgePageLintSummary { + /// Error finding count. + pub error_count: i64, + /// Warning finding count. + pub warning_count: i64, + /// Info finding count. + pub info_count: i64, + /// True when at least one error finding exists. + pub has_errors: bool, + /// True when at least one warning finding exists. + pub has_warnings: bool, +} + #[derive(Clone, Debug)] struct SourceSnapshot { kind: KnowledgeSourceKind, @@ -540,6 +676,47 @@ impl ElfService { Ok(KnowledgePagesListResponse { pages }) } + /// Searches derived knowledge page sections and returns provenance-rich snippets. + pub async fn knowledge_pages_search( + &self, + req: KnowledgePageSearchRequest, + ) -> Result { + validate_non_empty("tenant_id", req.tenant_id.as_str())?; + validate_non_empty("project_id", req.project_id.as_str())?; + validate_non_empty("query", req.query.as_str())?; + + if !english_gate::is_english_natural_language(req.query.as_str()) { + return Err(Error::NonEnglishInput { field: "$.query".to_string() }); + } + + let query = req.query.trim().to_ascii_lowercase(); + let query_pattern = format!("%{query}%"); + let page_kind = req.page_kind.map(KnowledgePageKind::as_str); + let rows = knowledge::search_knowledge_page_sections( + &self.db.pool, + req.tenant_id.as_str(), + req.project_id.as_str(), + page_kind, + query_pattern.as_str(), + bounded_limit(req.limit), + ) + .await?; + let page_ids = sorted_unique(&rows.iter().map(|row| row.page_id).collect::>()); + let source_refs = + knowledge::list_knowledge_page_source_refs_for_pages(&self.db.pool, &page_ids).await?; + let source_refs_by_section = source_refs_by_section(&source_refs); + let items = rows + .into_iter() + .map(|row| { + let refs = cloned_source_refs(source_refs_by_section.get(&row.section_id)); + + knowledge_page_search_item(row, refs, req.query.as_str()) + }) + .collect(); + + Ok(KnowledgePageSearchResponse { items }) + } + /// Lints a derived knowledge page against current source snapshots. pub async fn knowledge_page_lint( &self, @@ -555,7 +732,11 @@ impl ElfService { .ok_or_else(|| Error::NotFound { message: "knowledge page not found".to_string() })?; let source_refs = knowledge::list_knowledge_page_source_refs(&self.db.pool, page.page_id).await?; - let findings = self.lint_source_refs(&page, &source_refs).await?; + let sections = knowledge::list_knowledge_page_sections(&self.db.pool, page.page_id).await?; + let mut findings = self.lint_source_refs(&page, &source_refs).await?; + + findings.extend(lint_page_sections(&page, §ions, &source_refs)); + let now = OffsetDateTime::now_utc(); let mut tx = self.db.pool.begin().await?; @@ -578,16 +759,20 @@ impl ElfService { async fn knowledge_page_response(&self, page: KnowledgePage) -> Result { let page_id = page.page_id; - let sections = knowledge::list_knowledge_page_sections(&self.db.pool, page_id) - .await? + let section_rows = knowledge::list_knowledge_page_sections(&self.db.pool, page_id).await?; + let source_ref_rows = + knowledge::list_knowledge_page_source_refs(&self.db.pool, page_id).await?; + let source_refs_by_section = source_refs_by_section(&source_ref_rows); + let sections = section_rows .into_iter() - .map(KnowledgePageSectionResponse::from) - .collect(); - let source_refs = knowledge::list_knowledge_page_source_refs(&self.db.pool, page_id) - .await? - .into_iter() - .map(KnowledgePageSourceRefResponse::from) + .map(|section| { + let refs = cloned_source_refs(source_refs_by_section.get(§ion.section_id)); + + section_response(section, refs) + }) .collect(); + let source_refs = + source_ref_rows.into_iter().map(KnowledgePageSourceRefResponse::from).collect(); let lint_findings = knowledge::list_knowledge_page_lint_findings(&self.db.pool, page_id) .await? .into_iter() @@ -607,46 +792,56 @@ impl ElfService { req: &KnowledgePageRebuildRequest, ids: &SourceIds, ) -> Result> { + let (notes, events, relations, proposals) = self + .resolve_existing_source_rows(req.tenant_id.as_str(), req.project_id.as_str(), ids) + .await?; + + ids.require_counts(notes.len(), events.len(), relations.len(), proposals.len())?; + + Ok(source_snapshots(notes, events, relations, proposals)) + } + + async fn resolve_existing_source_rows( + &self, + tenant_id: &str, + project_id: &str, + ids: &SourceIds, + ) -> Result<( + Vec, + Vec, + Vec, + Vec, + )> { let notes = knowledge::fetch_knowledge_note_sources( &self.db.pool, - req.tenant_id.as_str(), - req.project_id.as_str(), + tenant_id, + project_id, &ids.note_ids, ) .await?; let events = knowledge::fetch_knowledge_event_sources( &self.db.pool, - req.tenant_id.as_str(), - req.project_id.as_str(), + tenant_id, + project_id, &ids.event_ids, ) .await?; let relations = knowledge::fetch_knowledge_relation_sources( &self.db.pool, - req.tenant_id.as_str(), - req.project_id.as_str(), + tenant_id, + project_id, &ids.relation_ids, ) .await?; let proposals = knowledge::fetch_knowledge_proposal_sources( &self.db.pool, - req.tenant_id.as_str(), - req.project_id.as_str(), + tenant_id, + project_id, &ids.proposal_ids, ) .await?; - ids.require_counts(notes.len(), events.len(), relations.len(), proposals.len())?; - - let mut sources = Vec::new(); - - sources.extend(notes.into_iter().map(note_source_snapshot)); - sources.extend(events.into_iter().map(event_source_snapshot)); - sources.extend(relations.into_iter().map(relation_source_snapshot)); - sources.extend(proposals.into_iter().map(proposal_source_snapshot)); - sources.sort_by_key(source_sort_key); - - Ok(sources) + Ok((notes, events, relations, proposals)) } async fn lint_source_refs( @@ -679,29 +874,176 @@ impl ElfService { page: &KnowledgePage, ids: &SourceIds, ) -> Result> { - let req = KnowledgePageRebuildRequest { - tenant_id: page.tenant_id.clone(), - project_id: page.project_id.clone(), - agent_id: String::new(), - page_kind: KnowledgePageKind::parse(page.page_kind.as_str()).ok_or_else(|| { - Error::InvalidRequest { - message: "stored knowledge page kind is invalid".to_string(), - } - })?, - page_key: page.page_key.clone(), - title: Some(page.title.clone()), - note_ids: ids.note_ids.clone(), - event_ids: ids.event_ids.clone(), - relation_ids: ids.relation_ids.clone(), - proposal_ids: ids.proposal_ids.clone(), - provider_metadata: empty_object(), - }; - let mut sources = self.resolve_sources(&req, ids).await?; + let _page_kind = KnowledgePageKind::parse(page.page_kind.as_str()).ok_or_else(|| { + Error::InvalidRequest { message: "stored knowledge page kind is invalid".to_string() } + })?; + let (notes, events, relations, proposals) = self + .resolve_existing_source_rows(page.tenant_id.as_str(), page.project_id.as_str(), ids) + .await?; + let mut sources = source_snapshots(notes, events, relations, proposals); Ok(sources.drain(..).map(|source| (source_key(&source), source)).collect()) } } +fn source_snapshots( + notes: Vec, + events: Vec, + relations: Vec, + proposals: Vec, +) -> Vec { + let mut sources = Vec::new(); + + sources.extend(notes.into_iter().map(note_source_snapshot)); + sources.extend(events.into_iter().map(event_source_snapshot)); + sources.extend(relations.into_iter().map(relation_source_snapshot)); + sources.extend(proposals.into_iter().map(proposal_source_snapshot)); + sources.sort_by_key(source_sort_key); + + sources +} + +fn source_refs_by_section( + source_refs: &[KnowledgePageSourceRef], +) -> HashMap> { + let mut by_section = HashMap::>::new(); + + for source_ref in source_refs { + let Some(section_id) = source_ref.section_id else { + continue; + }; + + by_section.entry(section_id).or_default().push(clone_source_ref(source_ref)); + } + + by_section +} + +fn cloned_source_refs( + source_refs: Option<&Vec>, +) -> Vec { + source_refs.map(|refs| refs.iter().map(clone_source_ref).collect()).unwrap_or_default() +} + +fn clone_source_ref(source_ref: &KnowledgePageSourceRef) -> KnowledgePageSourceRef { + KnowledgePageSourceRef { + ref_id: source_ref.ref_id, + page_id: source_ref.page_id, + section_id: source_ref.section_id, + source_kind: source_ref.source_kind.clone(), + source_id: source_ref.source_id, + source_status: source_ref.source_status.clone(), + source_updated_at: source_ref.source_updated_at, + source_content_hash: source_ref.source_content_hash.clone(), + source_snapshot: source_ref.source_snapshot.clone(), + citation_metadata: source_ref.citation_metadata.clone(), + created_at: source_ref.created_at, + } +} + +fn section_response( + section: KnowledgePageSection, + source_refs: Vec, +) -> KnowledgePageSectionResponse { + let citation_count = citation_count(§ion.citations); + let source_ref_count = source_refs.len(); + let source_backlinks = + source_refs.iter().map(KnowledgePageSectionSourceBacklink::from).collect(); + + KnowledgePageSectionResponse { + citation_count, + source_ref_count, + coverage_complete: citation_count > 0 && source_ref_count > 0, + source_backlinks, + ..KnowledgePageSectionResponse::from(section) + } +} + +fn knowledge_page_search_item( + row: KnowledgePageSearchRow, + source_refs: Vec, + query: &str, +) -> KnowledgePageSearchItem { + let source_ref_count = usize::try_from(row.section_source_ref_count).unwrap_or(0); + let citation_count = citation_count(&row.citations); + let lint_summary = KnowledgePageLintSummary { + error_count: row.lint_error_count, + warning_count: row.lint_warning_count, + info_count: row.lint_info_count, + has_errors: row.lint_error_count > 0, + has_warnings: row.lint_warning_count > 0, + }; + let coverage_complete = + row.source_coverage.get("coverage_complete").and_then(Value::as_bool).unwrap_or(false); + let trust_state = search_trust_state(&lint_summary, coverage_complete, &row); + let repair_guidance = search_repair_guidance(&trust_state); + + KnowledgePageSearchItem { + result_kind: "knowledge_page_section".to_string(), + page_id: row.page_id, + page_kind: row.page_kind, + page_key: row.page_key, + title: row.title, + status: row.status, + section_id: row.section_id, + section_key: row.section_key, + heading: row.heading, + role: row.role, + snippet: snippet_for_query(row.content.as_str(), query, SEARCH_SNIPPET_CHARS), + citations: row.citations, + citation_count, + source_ref_count, + source_refs: source_refs.into_iter().map(KnowledgePageSourceRefResponse::from).collect(), + source_coverage: row.source_coverage, + rebuild_metadata: row.rebuild_metadata, + lint_summary, + trust_state, + derived_notice: + "Derived knowledge page snippet. Verify cited source notes, events, relations, or proposals before treating it as authoritative." + .to_string(), + repair_guidance, + updated_at: row.page_updated_at, + rebuilt_at: row.rebuilt_at, + } +} + +fn search_trust_state( + lint: &KnowledgePageLintSummary, + coverage_complete: bool, + row: &KnowledgePageSearchRow, +) -> String { + if lint.has_errors { + return "derived_error".to_string(); + } + if lint.has_warnings || row.unsupported_reason.is_some() { + return "derived_warning".to_string(); + } + + if !coverage_complete || row.section_source_ref_count == 0 { + return "derived_low_coverage".to_string(); + } + + "derived_clean".to_string() +} + +fn search_repair_guidance(trust_state: &str) -> Option { + match trust_state { + "derived_error" => Some( + "Run knowledge page lint, inspect stale or missing source refs, then rebuild the page from current authoritative sources." + .to_string(), + ), + "derived_warning" => Some( + "Inspect unsupported or stale findings before using this derived snippet; rebuild after source review." + .to_string(), + ), + "derived_low_coverage" => Some( + "Rebuild with complete citations or add source-backed sections before relying on this page." + .to_string(), + ), + _ => None, + } +} + fn build_sections(sources: &[SourceSnapshot]) -> Result> { let note_indexes = source_indexes(sources, KnowledgeSourceKind::Note); let event_indexes = source_indexes(sources, KnowledgeSourceKind::Event); @@ -777,17 +1119,146 @@ fn lint_unsupported_sections(sections: &[DraftSection]) -> Vec { .filter_map(|section| { section.unsupported_reason.as_ref().map(|reason| LintDraft { section_id: Some(section.section_id), - finding_type: "unsupported_section".to_string(), + finding_type: "unsupported_claim".to_string(), severity: "warning".to_string(), source_kind: None, source_id: None, - message: format!("Knowledge page section lacks citations: {reason}"), - details: serde_json::json!({ "section_key": section.section_key }), + message: format!("Knowledge page section has unsupported content: {reason}"), + details: serde_json::json!({ + "section_key": section.section_key, + "unsupported_reason": reason, + "repair_guidance": repair_guidance_for_finding_type("unsupported_claim"), + }), }) }) .collect() } +fn lint_page_sections( + page: &KnowledgePage, + sections: &[KnowledgePageSection], + source_refs: &[KnowledgePageSourceRef], +) -> Vec { + let source_refs_by_section = source_refs_by_section(source_refs); + let mut findings = Vec::new(); + + for section in sections { + findings.extend(lint_one_section(section, &source_refs_by_section)); + } + + if !coverage_complete(page.source_coverage.as_object()) { + findings.push(low_source_coverage_finding(page)); + } + + findings +} + +fn lint_one_section( + section: &KnowledgePageSection, + source_refs_by_section: &HashMap>, +) -> Vec { + let citation_count = citation_count(§ion.citations); + let source_ref_count = + source_refs_by_section.get(§ion.section_id).map(Vec::len).unwrap_or_default(); + let mut findings = Vec::new(); + + if let Some(reason) = §ion.unsupported_reason { + findings.push(section_finding( + section, + "unsupported_claim", + "warning", + "Knowledge page section contains unsupported content.", + serde_json::json!({ + "unsupported_reason": reason, + "citation_count": citation_count, + "source_ref_count": source_ref_count, + }), + )); + } + + if citation_count == 0 && section.unsupported_reason.is_none() { + findings.push(section_finding( + section, + "missing_citation", + "error", + "Knowledge page section has no citations.", + serde_json::json!({ "source_ref_count": source_ref_count }), + )); + } + if source_ref_count == 0 && section.unsupported_reason.is_none() { + findings.push(section_finding( + section, + "missing_source_ref", + "error", + "Knowledge page section has no normalized source backlinks.", + serde_json::json!({ "citation_count": citation_count }), + )); + } + + findings +} + +fn section_finding( + section: &KnowledgePageSection, + finding_type: &str, + severity: &str, + message: &str, + details: Value, +) -> LintDraft { + LintDraft { + section_id: Some(section.section_id), + finding_type: finding_type.to_string(), + severity: severity.to_string(), + source_kind: None, + source_id: None, + message: message.to_string(), + details: with_repair_guidance( + details, + section.section_key.as_str(), + repair_guidance_for_finding_type(finding_type), + ), + } +} + +fn low_source_coverage_finding(page: &KnowledgePage) -> LintDraft { + LintDraft { + section_id: None, + finding_type: "low_source_coverage".to_string(), + severity: "warning".to_string(), + source_kind: None, + source_id: None, + message: "Knowledge page source coverage is incomplete.".to_string(), + details: serde_json::json!({ + "source_coverage": page.source_coverage.clone(), + "repair_guidance": repair_guidance_for_finding_type("low_source_coverage"), + }), + } +} + +fn with_repair_guidance(details: Value, section_key: &str, guidance: &str) -> Value { + let mut object = details.as_object().cloned().unwrap_or_default(); + + object.insert("section_key".to_string(), Value::String(section_key.to_string())); + object.insert("repair_guidance".to_string(), Value::String(guidance.to_string())); + + Value::Object(object) +} + +fn coverage_complete(coverage: Option<&Map>) -> bool { + let Some(coverage) = coverage else { + return false; + }; + let source_count = coverage.get("source_count").and_then(Value::as_u64).unwrap_or(0); + let cited_count = coverage.get("cited_source_count").and_then(Value::as_u64).unwrap_or(0); + let complete = coverage.get("coverage_complete").and_then(Value::as_bool).unwrap_or(false); + + complete && source_count == cited_count +} + +fn citation_count(citations: &Value) -> usize { + citations.as_array().map(Vec::len).unwrap_or_default() +} + fn source_indexes(sources: &[SourceSnapshot], kind: KnowledgeSourceKind) -> Vec { sources .iter() @@ -1062,6 +1533,7 @@ fn missing_source_finding(source_ref: &KnowledgePageSourceRef) -> LintDraft { details: serde_json::json!({ "source_kind": source_ref.source_kind.clone(), "source_id": source_ref.source_id, + "repair_guidance": repair_guidance_for_finding_type("stale_source_ref"), }), } } @@ -1088,16 +1560,104 @@ fn stale_source_finding( "updated_at": current.updated_at, "content_hash": current.content_hash.clone(), }, + "repair_guidance": repair_guidance_for_finding_type("stale_source_ref"), }), } } +fn repair_guidance_for_finding_type(finding_type: &str) -> &'static str { + match finding_type { + "stale_source_ref" => + "Inspect the stale or missing source, then rebuild the page from current authoritative sources.", + "unsupported_claim" => + "Replace the unsupported section content with source-backed text or rebuild from cited sources.", + "missing_citation" => + "Rebuild the page section with explicit citations or mark the section unsupported with a reason.", + "missing_source_ref" => + "Rebuild the page so each section citation is normalized into knowledge_page_source_refs.", + "low_source_coverage" => + "Rebuild with all intended sources or remove uncited material before relying on this page.", + _ => "Inspect the finding and rebuild the page after source review.", + } +} + fn source_changed(source_ref: &KnowledgePageSourceRef, current: &SourceSnapshot) -> bool { source_ref.source_status.as_deref() != current.status.as_deref() || source_ref.source_updated_at != current.updated_at || source_ref.source_content_hash.as_deref() != current.content_hash.as_deref() } +fn snippet_for_query(content: &str, query: &str, max_chars: usize) -> String { + let normalized = normalize_whitespace(content); + let query = query.trim(); + + if query.is_empty() { + return truncate_chars(normalized.as_str(), max_chars); + } + + let lower = normalized.to_ascii_lowercase(); + let lower_query = query.to_ascii_lowercase(); + let Some(byte_idx) = lower.find(lower_query.as_str()) else { + return truncate_chars(normalized.as_str(), max_chars); + }; + let before_chars = normalized[..byte_idx].chars().count(); + let start = before_chars.saturating_sub(40); + let mut snippet: String = normalized.chars().skip(start).take(max_chars).collect(); + + if start > 0 { + snippet = format!("...{snippet}"); + } + if normalized.chars().count() > start + snippet.chars().count() { + snippet.push_str("..."); + } + + snippet +} + +fn normalize_whitespace(raw: &str) -> String { + let mut out = String::with_capacity(raw.len()); + let mut prev_space = false; + + for ch in raw.chars() { + if ch.is_whitespace() { + if !prev_space { + out.push(' '); + + prev_space = true; + } + + continue; + } + + out.push(ch); + + prev_space = false; + } + + out.trim().to_string() +} + +fn truncate_chars(raw: &str, max_chars: usize) -> String { + if raw.chars().count() <= max_chars { + return raw.to_string(); + } + + const TRUNCATION_MARKER: &str = "..."; + + let marker_chars = TRUNCATION_MARKER.chars().count(); + + if max_chars <= marker_chars { + return TRUNCATION_MARKER.chars().take(max_chars).collect(); + } + + let truncated_chars = max_chars - marker_chars; + let mut out = raw.chars().take(truncated_chars).collect::(); + + out.push_str(TRUNCATION_MARKER); + + out +} + fn source_sort_key(source: &SourceSnapshot) -> (String, Uuid) { (source.kind.as_str().to_string(), source.id) } @@ -1293,8 +1853,8 @@ async fn insert_lint_finding( #[cfg(test)] mod tests { use crate::knowledge::{ - self, KnowledgePageKind, KnowledgePageSourceRef, KnowledgeSourceKind, OffsetDateTime, - SourceSnapshot, Uuid, + self, KnowledgePage, KnowledgePageKind, KnowledgePageSearchRow, KnowledgePageSection, + KnowledgePageSourceRef, KnowledgeSourceKind, OffsetDateTime, SourceSnapshot, Uuid, }; fn test_source(kind: KnowledgeSourceKind, raw_id: u128, line: &str) -> SourceSnapshot { @@ -1408,4 +1968,138 @@ mod tests { assert_eq!(finding.source_kind, Some(KnowledgeSourceKind::Note)); assert_eq!(finding.source_id, Some(source_id)); } + + #[test] + fn lint_page_sections_detects_unsupported_missing_and_low_coverage() { + let page = test_page(); + let unsupported = test_section( + Uuid::from_u128(10), + "unsupported", + serde_json::json!([]), + Some("No source supports this claim.".to_string()), + ); + let missing = test_section(Uuid::from_u128(11), "missing", serde_json::json!([]), None); + let findings = knowledge::lint_page_sections(&page, &[unsupported, missing], &[]); + let finding_types = + findings.iter().map(|finding| finding.finding_type.as_str()).collect::>(); + + assert!(finding_types.contains(&"unsupported_claim")); + assert!(finding_types.contains(&"missing_citation")); + assert!(finding_types.contains(&"missing_source_ref")); + assert!(finding_types.contains(&"low_source_coverage")); + assert!(findings.iter().all(|finding| { + finding + .details + .get("repair_guidance") + .and_then(serde_json::Value::as_str) + .is_some_and(|guidance| !guidance.is_empty()) + })); + } + + #[test] + fn search_item_marks_derived_page_snippet_with_provenance() { + let section_id = Uuid::from_u128(20); + let source_ref = test_source_ref(section_id); + let row = KnowledgePageSearchRow { + page_id: Uuid::from_u128(21), + page_kind: "project".to_string(), + page_key: "elf".to_string(), + title: "ELF Knowledge".to_string(), + status: "active".to_string(), + source_coverage: serde_json::json!({ + "source_count": 1, + "cited_source_count": 1, + "coverage_complete": true + }), + rebuild_metadata: serde_json::json!({ "deterministic": true }), + page_updated_at: OffsetDateTime::UNIX_EPOCH, + rebuilt_at: OffsetDateTime::UNIX_EPOCH, + section_id, + section_key: "source-notes".to_string(), + heading: "Source Notes".to_string(), + role: "current_truth".to_string(), + content: "Derived knowledge pages cite source notes before they are trusted." + .to_string(), + ordinal: 0, + citations: serde_json::json!([{ "source_kind": "note", "source_id": source_ref.source_id }]), + unsupported_reason: None, + lint_error_count: 0, + lint_warning_count: 1, + lint_info_count: 0, + section_source_ref_count: 1, + }; + let item = knowledge::knowledge_page_search_item(row, vec![source_ref], "source notes"); + + assert_eq!(item.result_kind, "knowledge_page_section"); + assert_eq!(item.trust_state, "derived_warning"); + assert_eq!(item.citation_count, 1); + assert_eq!(item.source_ref_count, 1); + assert_eq!(item.source_refs.len(), 1); + assert!(item.derived_notice.contains("Derived knowledge page snippet")); + assert!(item.repair_guidance.is_some()); + assert!(item.snippet.contains("source notes")); + } + + fn test_page() -> KnowledgePage { + KnowledgePage { + page_id: Uuid::from_u128(1), + tenant_id: "tenant".to_string(), + project_id: "project".to_string(), + page_kind: "project".to_string(), + page_key: "elf".to_string(), + title: "ELF".to_string(), + contract_schema: "elf.knowledge_page/v1".to_string(), + status: "active".to_string(), + rebuild_source_hash: "source-hash".to_string(), + content_hash: "content-hash".to_string(), + source_coverage: serde_json::json!({ + "source_count": 2, + "cited_source_count": 1, + "coverage_complete": false + }), + source_snapshot: serde_json::json!({}), + rebuild_metadata: serde_json::json!({}), + created_at: OffsetDateTime::UNIX_EPOCH, + updated_at: OffsetDateTime::UNIX_EPOCH, + rebuilt_at: OffsetDateTime::UNIX_EPOCH, + } + } + + fn test_section( + section_id: Uuid, + section_key: &str, + citations: serde_json::Value, + unsupported_reason: Option, + ) -> KnowledgePageSection { + KnowledgePageSection { + section_id, + page_id: Uuid::from_u128(1), + section_key: section_key.to_string(), + heading: section_key.to_string(), + role: "current_truth".to_string(), + content: "Section content.".to_string(), + ordinal: 0, + citations, + unsupported_reason, + content_hash: "section-hash".to_string(), + created_at: OffsetDateTime::UNIX_EPOCH, + updated_at: OffsetDateTime::UNIX_EPOCH, + } + } + + fn test_source_ref(section_id: Uuid) -> KnowledgePageSourceRef { + KnowledgePageSourceRef { + ref_id: Uuid::from_u128(30), + page_id: Uuid::from_u128(21), + section_id: Some(section_id), + source_kind: "note".to_string(), + source_id: Uuid::from_u128(31), + source_status: Some("active".to_string()), + source_updated_at: Some(OffsetDateTime::UNIX_EPOCH), + source_content_hash: Some("source-hash".to_string()), + source_snapshot: serde_json::json!({}), + citation_metadata: serde_json::json!({}), + created_at: OffsetDateTime::UNIX_EPOCH, + } + } } diff --git a/packages/elf-service/src/lib.rs b/packages/elf-service/src/lib.rs index 7ba4f202..47833604 100644 --- a/packages/elf-service/src/lib.rs +++ b/packages/elf-service/src/lib.rs @@ -69,9 +69,11 @@ pub use self::{ }, knowledge::{ KnowledgePageGetRequest, KnowledgePageLintFindingResponse, KnowledgePageLintRequest, - KnowledgePageLintResponse, KnowledgePageRebuildRequest, KnowledgePageRebuildResponse, - KnowledgePageResponse, KnowledgePageSectionResponse, KnowledgePageSourceRefResponse, - KnowledgePageSummary, KnowledgePagesListRequest, KnowledgePagesListResponse, + KnowledgePageLintResponse, KnowledgePageLintSummary, KnowledgePageRebuildRequest, + KnowledgePageRebuildResponse, KnowledgePageResponse, KnowledgePageSearchItem, + KnowledgePageSearchRequest, KnowledgePageSearchResponse, KnowledgePageSectionResponse, + KnowledgePageSectionSourceBacklink, KnowledgePageSourceRefResponse, KnowledgePageSummary, + KnowledgePagesListRequest, KnowledgePagesListResponse, }, list::{ListItem, ListRequest, ListResponse}, notes::{NoteFetchRequest, NoteFetchResponse}, diff --git a/packages/elf-storage/src/knowledge.rs b/packages/elf-storage/src/knowledge.rs index cee88f0f..1e37cf7e 100644 --- a/packages/elf-storage/src/knowledge.rs +++ b/packages/elf-storage/src/knowledge.rs @@ -252,6 +252,53 @@ pub struct KnowledgeProposalSource { pub updated_at: OffsetDateTime, } +/// Searchable knowledge page section row with page and lint metadata. +#[derive(Debug, FromRow)] +pub struct KnowledgePageSearchRow { + /// Derived page identifier. + pub page_id: Uuid, + /// Page kind. + pub page_kind: String, + /// Stable page key. + pub page_key: String, + /// Page title. + pub title: String, + /// Page lifecycle status. + pub status: String, + /// Source coverage metadata. + pub source_coverage: Value, + /// Rebuild metadata. + pub rebuild_metadata: Value, + /// Page update timestamp. + pub page_updated_at: OffsetDateTime, + /// Page rebuild timestamp. + pub rebuilt_at: OffsetDateTime, + /// Section identifier. + pub section_id: Uuid, + /// Stable section key. + pub section_key: String, + /// Section heading. + pub heading: String, + /// Section role. + pub role: String, + /// Section content. + pub content: String, + /// Section display order. + pub ordinal: i32, + /// Section citations. + pub citations: Value, + /// Reason the section is unsupported, when present. + pub unsupported_reason: Option, + /// Number of error lint findings for the page. + pub lint_error_count: i64, + /// Number of warning lint findings for the page. + pub lint_warning_count: i64, + /// Number of info lint findings for the page. + pub lint_info_count: i64, + /// Number of normalized source refs for this section. + pub section_source_ref_count: i64, +} + /// Upserts one derived knowledge page and returns the persisted row. pub async fn upsert_knowledge_page<'e, E>( executor: E, @@ -650,6 +697,43 @@ ORDER BY source_kind ASC, source_id ASC, ref_id ASC", Ok(rows) } +/// Lists normalized source refs for a set of knowledge pages. +pub async fn list_knowledge_page_source_refs_for_pages<'e, E>( + executor: E, + page_ids: &[Uuid], +) -> Result> +where + E: PgExecutor<'e>, +{ + if page_ids.is_empty() { + return Ok(Vec::new()); + } + + let rows = sqlx::query_as::<_, KnowledgePageSourceRef>( + "\ +SELECT + ref_id, + page_id, + section_id, + source_kind, + source_id, + source_status, + source_updated_at, + source_content_hash, + source_snapshot, + citation_metadata, + created_at +FROM knowledge_page_source_refs +WHERE page_id = ANY($1::uuid[]) +ORDER BY page_id ASC, source_kind ASC, source_id ASC, ref_id ASC", + ) + .bind(page_ids) + .fetch_all(executor) + .await?; + + Ok(rows) +} + /// Lists lint findings for one knowledge page. pub async fn list_knowledge_page_lint_findings<'e, E>( executor: E, @@ -682,6 +766,93 @@ ORDER BY severity DESC, created_at ASC, finding_id ASC", Ok(rows) } +/// Searches derived knowledge page sections by page and section text. +pub async fn search_knowledge_page_sections<'e, E>( + executor: E, + tenant_id: &str, + project_id: &str, + page_kind: Option<&str>, + query_pattern: &str, + limit: i64, +) -> Result> +where + E: PgExecutor<'e>, +{ + let rows = sqlx::query_as::<_, KnowledgePageSearchRow>( + "\ +WITH page_lint AS ( + SELECT + page_id, + count(*) FILTER (WHERE severity = 'error') AS error_count, + count(*) FILTER (WHERE severity = 'warning') AS warning_count, + count(*) FILTER (WHERE severity = 'info') AS info_count + FROM knowledge_page_lint_findings + GROUP BY page_id +), +section_refs AS ( + SELECT section_id, count(*) AS source_ref_count + FROM knowledge_page_source_refs + GROUP BY section_id +) +SELECT + p.page_id, + p.page_kind, + p.page_key, + p.title, + p.status, + p.source_coverage, + p.rebuild_metadata, + p.updated_at AS page_updated_at, + p.rebuilt_at, + s.section_id, + s.section_key, + s.heading, + s.role, + s.content, + s.ordinal, + s.citations, + s.unsupported_reason, + COALESCE(page_lint.error_count, 0)::bigint AS lint_error_count, + COALESCE(page_lint.warning_count, 0)::bigint AS lint_warning_count, + COALESCE(page_lint.info_count, 0)::bigint AS lint_info_count, + COALESCE(section_refs.source_ref_count, 0)::bigint AS section_source_ref_count +FROM knowledge_pages p +JOIN knowledge_page_sections s ON s.page_id = p.page_id +LEFT JOIN page_lint ON page_lint.page_id = p.page_id +LEFT JOIN section_refs ON section_refs.section_id = s.section_id +WHERE p.tenant_id = $1 + AND p.project_id = $2 + AND p.status IN ('active', 'stale') + AND ($3::text IS NULL OR p.page_kind = $3) + AND ( + lower(p.title) LIKE $4 + OR lower(p.page_key) LIKE $4 + OR lower(s.heading) LIKE $4 + OR lower(s.content) LIKE $4 + ) +ORDER BY + CASE + WHEN lower(p.title) LIKE $4 THEN 4 + WHEN lower(s.heading) LIKE $4 THEN 3 + WHEN lower(p.page_key) LIKE $4 THEN 2 + ELSE 1 + END DESC, + p.updated_at DESC, + s.ordinal ASC, + p.page_id DESC +LIMIT $5", + ) + .bind(tenant_id) + .bind(project_id) + .bind(page_kind) + .bind(query_pattern) + .bind(limit) + .fetch_all(executor) + .await?; + + Ok(rows) +} + /// Fetches note sources by identifier for a knowledge page rebuild. pub async fn fetch_knowledge_note_sources<'e, E>( executor: E, diff --git a/packages/elf-storage/src/models.rs b/packages/elf-storage/src/models.rs index 7343b713..2276d977 100644 --- a/packages/elf-storage/src/models.rs +++ b/packages/elf-storage/src/models.rs @@ -526,7 +526,7 @@ pub struct KnowledgePageLintFinding { pub page_id: Uuid, /// Section associated with the finding, when available. pub section_id: Option, - /// Finding type, such as stale_source_ref or unsupported_section. + /// Finding type, such as stale_source_ref or unsupported_claim. pub finding_type: String, /// Finding severity. pub severity: String,