diff --git a/README.md b/README.md index da46d724..3ec59b3a 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,8 @@ provider-backed ELF evidence was required. does not create a managed-memory parity claim. The new `proactive_brief` fixture scores 5 jobs, with 4 pass and 1 blocked private-corpus case; it does not create Pulse or hosted managed-memory parity. -- Dreaming competitor-strength closeout after XY-955: the June 17 retest keeps ELF +- Dreaming competitor-strength closeout after XY-955: the June 17 competitor-strength closeout + retest keeps ELF locally and partially stronger only. The aggregate fixture retest remains 53 pass and 7 typed blockers, the representative graph/RAG slice remains typed non-pass, first-generation OSS fixture coverage remains 4 pass and 2 blocked, and the fresh @@ -216,6 +217,11 @@ provider-backed ELF evidence was required. boundaries. This upgrades ELF's own knowledge-page evidence from fixture-only to service-native proof, but it does not claim llm-wiki, gbrain, GraphRAG, RAGFlow, LightRAG, or graphify parity without comparable contained adapter outputs. +- Knowledge Workspace version diffs after XY-1019: the June 20 follow-up adds + `elf.knowledge_page.version_diff/v1` readback under knowledge page rebuild metadata + and surfaces it as `page_version_diff` in benchmark artifacts. The live command now + reports `version_diff_coverage = 1.000` while preserving deterministic page content + hashes and `source_mutation_allowed = false`. - Operator-approved public-proxy addendum after XY-930: the June 19 follow-up runs `cargo make baseline-production-private-addendum` with a simulated/public-proxy production corpus manifest approved for this stage. The run records 12 documents, @@ -342,6 +348,8 @@ Detailed evidence and interpretation: - [Service-Native Dreaming Readback Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-service-native-dreaming-readback-report.md) - [OpenMemory UI/Export Product Readback Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-openmemory-ui-export-product-readback-report.md) - [Operator-Approved Public-Proxy Production-Private Addendum - June 19, 2026](docs/evidence/benchmarking/2026-06-19-operator-approved-public-proxy-production-private-addendum.md) +- [Knowledge Workspace Version-Diff Report - June 20, 2026](docs/evidence/benchmarking/2026-06-20-knowledge-workspace-version-diff-report.md) +- [Live Knowledge-Page Rebuild/Lint Report - June 20, 2026](docs/evidence/benchmarking/2026-06-20-live-knowledge-page-rebuild-lint-report.md) - [Live Baseline Benchmark Runbook](docs/runbook/benchmarking/live_baseline_benchmark.md) - [Single-User Production Runbook](docs/runbook/single_user_production.md) - Benchmark contract: @@ -443,11 +451,12 @@ Detailed comparison, mechanism-level analysis, and source map: - [Dreaming Product Surface Follow-Up Research](docs/research/dreaming_product_surface_followup.md) Latest real-world benchmark report: June 20, 2026. Latest external research refresh: -June 11, 2026; June 20 adds the Live Knowledge-Page Rebuild/Lint Report - June 20, 2026 -after the June 19 XY-930 operator-approved public-proxy production addendum and -service-native Dreaming readback, the qmd debug-ergonomics Dreaming retest, the -June 17 competitor-strength closeout, and the June 16 temporal reconciliation, -live consolidation self-check, proactive-brief, and scheduled-memory scoring evidence. +June 11, 2026; June 20 adds the Knowledge Workspace Version-Diff Report - June 20, 2026 +and the Live Knowledge-Page Rebuild/Lint Report - June 20, 2026 after the June 19 +XY-930 operator-approved public-proxy production addendum and service-native Dreaming +readback, the qmd debug-ergonomics Dreaming retest, the June 17 competitor-strength +closeout, and the June 16 temporal reconciliation, live consolidation self-check, +proactive-brief, and scheduled-memory scoring evidence. ## Documentation diff --git a/apps/elf-eval/src/bin/real_world_job_benchmark.rs b/apps/elf-eval/src/bin/real_world_job_benchmark.rs index 796e767e..7df8d086 100644 --- a/apps/elf-eval/src/bin/real_world_job_benchmark.rs +++ b/apps/elf-eval/src/bin/real_world_job_benchmark.rs @@ -460,6 +460,8 @@ struct DerivedPageArtifact { lint_findings: Vec, #[serde(skip_serializing_if = "Option::is_none")] rebuild: Option, + #[serde(skip_serializing_if = "Option::is_none")] + page_version_diff: Option, } #[derive(Clone, Debug, Deserialize, Serialize)] @@ -1271,10 +1273,12 @@ struct KnowledgeSummary { section_count: usize, backlink_count: usize, pages_with_backlinks: usize, + pages_with_version_diff: usize, citation_coverage: f64, stale_claim_detection: f64, rebuild_determinism: f64, backlink_coverage: f64, + version_diff_coverage: f64, page_usefulness: f64, unsupported_summary_count: usize, untraced_section_count: usize, @@ -1459,6 +1463,7 @@ struct KnowledgeJobMetrics { unsupported_summary_count: usize, backlink_count: usize, pages_with_backlinks: usize, + pages_with_version_diff: usize, stale_trap_count: usize, stale_traps_detected: usize, rebuild_page_count: usize, @@ -1469,6 +1474,7 @@ struct KnowledgeJobMetrics { stale_claim_detection: f64, rebuild_determinism: f64, backlink_coverage: f64, + version_diff_coverage: f64, page_usefulness: f64, } @@ -2195,6 +2201,23 @@ fn validate_page_artifact( page.page_id )); } + if let Some(diff) = &page.page_version_diff { + if !diff.is_object() { + return Err(eyre::eyre!( + "{} page {} previous-version diff must be a JSON object.", + path.display(), + page.page_id + )); + } + if diff.get("schema").and_then(Value::as_str) != Some("elf.knowledge_page.version_diff/v1") + { + return Err(eyre::eyre!( + "{} page {} previous-version diff has an unexpected schema.", + path.display(), + page.page_id + )); + } + } Ok(()) } @@ -3854,6 +3877,7 @@ fn knowledge_metrics(job: &RealWorldJob, answer: &ProducedAnswer) -> Option bool { + page.page_version_diff.as_ref().is_some_and(|diff| { + diff.get("schema").and_then(Value::as_str) == Some("elf.knowledge_page.version_diff/v1") + && diff.get("available").and_then(Value::as_bool).unwrap_or(false) + }) +} + fn section_is_traced(section: &DerivedPageSection) -> bool { !section.evidence_ids.is_empty() || !section.timeline_event_ids.is_empty() } @@ -5804,6 +5838,8 @@ fn knowledge_summary(jobs: &[JobReport]) -> Option { let backlink_count = knowledge_jobs.iter().map(|metrics| metrics.backlink_count).sum::(); let pages_with_backlinks = knowledge_jobs.iter().map(|metrics| metrics.pages_with_backlinks).sum::(); + let pages_with_version_diff = + knowledge_jobs.iter().map(|metrics| metrics.pages_with_version_diff).sum::(); let page_usefulness = round3( knowledge_jobs.iter().map(|metrics| metrics.page_usefulness).sum::() / job_count as f64, @@ -5815,10 +5851,12 @@ fn knowledge_summary(jobs: &[JobReport]) -> Option { section_count, backlink_count, pages_with_backlinks, + pages_with_version_diff, citation_coverage: ratio(traced_section_count, section_count), stale_claim_detection: ratio_or_full(stale_traps_detected, stale_trap_count), rebuild_determinism: ratio(deterministic_rebuild_count, rebuild_page_count), backlink_coverage: ratio(pages_with_backlinks, page_count), + version_diff_coverage: ratio(pages_with_version_diff, page_count), page_usefulness, unsupported_summary_count: knowledge_jobs .iter() @@ -6810,6 +6848,10 @@ fn render_markdown_optional_summary_metrics(out: &mut String, summary: &ReportSu "- Backlinks: `{}` total, `{:.3}` page coverage\n", knowledge.backlink_count, knowledge.backlink_coverage )); + out.push_str(&format!( + "- Version diff coverage: `{:.3}`\n", + knowledge.version_diff_coverage + )); out.push_str(&format!("- Page usefulness: `{:.3}`\n", knowledge.page_usefulness)); out.push_str(&format!( "- Unsupported summary count: `{}`\n", @@ -7296,8 +7338,10 @@ fn render_markdown_knowledge(out: &mut String, report: &RealWorldReport) { } out.push_str("## Knowledge Page Metrics\n\n"); - out.push_str("| Job | Pages | Sections | Citation Coverage | Stale Claim Detection | Rebuild Determinism | Page Usefulness | Backlinks | Unsupported Summaries | Untraced Sections | Allowed Variance |\n"); - out.push_str("| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |\n"); + out.push_str("| Job | Pages | Sections | Citation Coverage | Stale Claim Detection | Rebuild Determinism | Version Diff Coverage | Page Usefulness | Backlinks | Unsupported Summaries | Untraced Sections | Allowed Variance |\n"); + out.push_str( + "| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |\n", + ); for job in knowledge_jobs { let Some(knowledge) = &job.knowledge else { @@ -7305,13 +7349,14 @@ fn render_markdown_knowledge(out: &mut String, report: &RealWorldReport) { }; out.push_str(&format!( - "| {} | {} | {} | `{:.3}` | `{:.3}` | `{:.3}` | `{:.3}` | {} | {} | {} | {} |\n", + "| {} | {} | {} | `{:.3}` | `{:.3}` | `{:.3}` | `{:.3}` | `{:.3}` | {} | {} | {} | {} |\n", md_cell(job.job_id.as_str()), knowledge.page_count, knowledge.section_count, knowledge.citation_coverage, knowledge.stale_claim_detection, knowledge.rebuild_determinism, + knowledge.version_diff_coverage, knowledge.page_usefulness, knowledge.backlink_count, knowledge.unsupported_summary_count, diff --git a/apps/elf-eval/src/bin/real_world_live_adapter.rs b/apps/elf-eval/src/bin/real_world_live_adapter.rs index f40ec884..23c36985 100644 --- a/apps/elf-eval/src/bin/real_world_live_adapter.rs +++ b/apps/elf-eval/src/bin/real_world_live_adapter.rs @@ -350,6 +350,7 @@ struct KnowledgeMaterializationEvidence { unsupported_claim_count: usize, citation_count: usize, source_ref_count: usize, + version_diff_available: bool, } #[derive(Clone, Debug, Default, Serialize)] @@ -3455,6 +3456,7 @@ fn knowledge_page_artifact( "sections": sections, "backlinks": source_backlinks(ingested), "lint_findings": lint_findings_for_page(loaded, ingested, lint), + "page_version_diff": second.page.previous_version_diff.clone(), "rebuild": { "first_hash": first.page.content_hash.clone(), "second_hash": second.page.content_hash.clone(), @@ -3485,6 +3487,13 @@ fn knowledge_materialization_evidence( unsupported_claim_count, citation_count: page.sections.iter().map(|section| section.citation_count).sum(), source_ref_count: page.source_refs.len(), + version_diff_available: page + .page + .previous_version_diff + .as_ref() + .and_then(|diff| diff.get("available")) + .and_then(serde_json::Value::as_bool) + .unwrap_or(false), } } diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs index 97eada0e..b86fb1f1 100644 --- a/apps/elf-eval/tests/real_world_job_benchmark.rs +++ b/apps/elf-eval/tests/real_world_job_benchmark.rs @@ -2348,6 +2348,16 @@ fn live_knowledge_page_rebuild_lint_has_dedicated_docker_task() -> Result<()> { fs::read_to_string(workspace.join("scripts/real-world-knowledge-live-adapter.sh"))?; let live_adapter = fs::read_to_string(workspace.join("apps/elf-eval/src/bin/real_world_live_adapter.rs"))?; + let knowledge_spec = fs::read_to_string( + workspace.join("docs").join("spec").join("system_knowledge_pages_v1.md"), + )?; + let version_diff_report = fs::read_to_string( + workspace + .join("docs") + .join("evidence") + .join("benchmarking") + .join("2026-06-20-knowledge-workspace-version-diff-report.md"), + )?; let benchmark_runbook = fs::read_to_string( workspace .join("docs") @@ -2380,16 +2390,29 @@ fn live_knowledge_page_rebuild_lint_has_dedicated_docker_task() -> Result<()> { assert!(live_script.contains("knowledge_page_lint")); assert!(live_script.contains("knowledge_pages_search")); assert!(live_script.contains("pages remain derived benchmark artifacts")); + assert!(live_adapter.contains("\"page_version_diff\"")); + assert!(live_adapter.contains("version_diff_available")); assert!(live_adapter.contains("fn materialize_elf_knowledge(")); assert!(live_adapter.contains("KnowledgePageRebuildRequest")); assert!(live_adapter.contains("KnowledgePageLintRequest")); assert!(live_adapter.contains("KnowledgePageSearchRequest")); + assert!( + fs::read_to_string(workspace.join("apps/elf-eval/src/bin/real_world_job_benchmark.rs"))? + .contains("version_diff_coverage") + ); + assert!(knowledge_spec.contains("elf.knowledge_page.version_diff/v1")); + assert!( + version_diff_report.contains("Knowledge Workspace Version-Diff Report - June 20, 2026") + ); + assert!(version_diff_report.contains("version_diff_coverage = 1.000")); assert!(benchmark_runbook.contains("Current live knowledge-page rebuild/lint increment")); assert!(benchmark_runbook.contains("cargo make real-world-memory-live-knowledge")); assert!(benchmark_runbook.contains("tmp/real-world-memory/live-knowledge/summary.json")); assert!(live_runbook.contains("cargo make real-world-memory-live-knowledge")); assert!(benchmarking_index.contains("2026-06-20-live-knowledge-page-rebuild-lint-report.md")); + assert!(benchmarking_index.contains("2026-06-20-knowledge-workspace-version-diff-report.md")); assert!(readme.contains("Live Knowledge-Page Rebuild/Lint Report - June 20, 2026")); + assert!(readme.contains("Knowledge Workspace Version-Diff Report - June 20, 2026")); Ok(()) } diff --git a/docs/evidence/benchmarking/2026-06-20-knowledge-workspace-version-diff-report.md b/docs/evidence/benchmarking/2026-06-20-knowledge-workspace-version-diff-report.md new file mode 100644 index 00000000..3b19dcb5 --- /dev/null +++ b/docs/evidence/benchmarking/2026-06-20-knowledge-workspace-version-diff-report.md @@ -0,0 +1,81 @@ +--- +type: Evidence +title: "Knowledge Workspace Version-Diff Report - June 20, 2026" +description: "Checked-in benchmark evidence record: Knowledge Workspace Version-Diff Report - June 20, 2026." +resource: docs/evidence/benchmarking/2026-06-20-knowledge-workspace-version-diff-report.md +status: active +authority: current_state +owner: evidence +last_verified: 2026-06-20 +tags: + - docs + - evidence + - benchmarking +--- +# Knowledge Workspace Version-Diff Report - June 20, 2026 + +Goal: Close XY-1019's product-quality Knowledge Workspace increment by proving +derived pages expose previous-version diffs while preserving citations, lint, +rebuild determinism, search readback, and source-of-truth boundaries. +Read this when: You need to know whether ELF knowledge pages now show rebuild diffs +without turning derived pages into authoritative memory. +Inputs: `cargo make real-world-memory-live-knowledge`, +`packages/elf-service/src/knowledge.rs`, +`apps/elf-eval/src/bin/real_world_live_adapter.rs`, and +`apps/elf-eval/src/bin/real_world_job_benchmark.rs`. +Outputs: Service and benchmark evidence for `elf.knowledge_page.version_diff/v1`. + +## Executive Judgment + +ELF Knowledge Workspace pages now expose previous-version diff metadata under +`rebuild_metadata.previous_version_diff` and surface it as `page_version_diff` in +live benchmark artifacts. The diff records previous/new content and source hashes, +title/source/content change booleans, section added/removed/changed/unchanged counts, +section key lists, a summary, and `source_mutation_allowed = false`. + +This is a product-quality readback improvement for ELF's derived knowledge pages. It +does not claim broad llm-wiki, gbrain, GraphRAG, RAGFlow, LightRAG, or graphify parity. +External comparisons still need contained adapters with comparable page sections, +source ids, citation mappings, lint findings, previous-version diffs, and typed +statuses. + +## Command Evidence + +| Command | Result | +| --- | --- | +| `cargo test -p elf-service knowledge::tests::previous_version_diff_records_delta_without_changing_content_hash -- --nocapture` | Passed; proves diff metadata does not perturb page content hashes. | +| `cargo test -p elf-eval --test real_world_job_benchmark live_knowledge_page_rebuild_lint_has_dedicated_docker_task -- --nocapture` | Passed; proves the live adapter and benchmark report keep the version-diff contract wired. | +| `cargo make real-world-memory-live-knowledge` | Passed; Docker-contained live materialization reports `version_diff_coverage = 1.000`. | + +## Current Live Metrics + +From `tmp/real-world-memory/live-knowledge/elf-report.json`: + +| Metric | Value | +| --- | ---: | +| Knowledge jobs | 2 | +| Pages | 2 | +| Pages with version diff | 2 | +| Version diff coverage | 1.000 | +| Rebuild determinism | 1.000 | +| Stale claim detection | 1.000 | +| Backlink coverage | 1.000 | +| Page usefulness | 0.938 | + +## Contract Boundary + +| Allowed claim | Boundary | +| --- | --- | +| ELF derived pages expose previous-version diff metadata after repeated rebuilds. | The diff is readback metadata only; it must not mutate source memory. | +| Search and benchmark artifacts can show `page_version_diff`. | Page snippets remain derived artifacts and must carry citations/lint/source coverage. | +| Rebuild determinism remains stable when diff metadata is present. | The page content hash excludes previous-version diff metadata. | +| External knowledge-product comparison remains future work. | Competitors need comparable contained artifacts before any parity or win/loss claim. | + +## Follow-Up Queue + +| Follow-up | Reason | +| --- | --- | +| XY-1020 | Temporal graph-lite facts can now feed cited pages without making pages source truth. | +| XY-1021 | Dreaming review queue can propose page rebuilds using source-backed diffs and lint. | +| Graph/RAG contained adapters | External comparison needs comparable version-diff and citation/lint outputs. | + diff --git a/docs/evidence/benchmarking/2026-06-20-live-knowledge-page-rebuild-lint-report.md b/docs/evidence/benchmarking/2026-06-20-live-knowledge-page-rebuild-lint-report.md index 202659bd..5f8b6984 100644 --- a/docs/evidence/benchmarking/2026-06-20-live-knowledge-page-rebuild-lint-report.md +++ b/docs/evidence/benchmarking/2026-06-20-live-knowledge-page-rebuild-lint-report.md @@ -18,7 +18,7 @@ Goal: Close XY-935 by moving ELF knowledge-page rebuild/lint scoring from fixtur evidence into a Docker-contained service materialization command. Read this when: You need to know whether ELF has service-native evidence for derived knowledge pages, citation coverage, stale-source lint, unsupported sections, -rebuild metadata, backlinks, and page search. +rebuild metadata, previous-version diffs, backlinks, and page search. Inputs: `cargo make real-world-memory-knowledge`, `cargo make real-world-memory-live-knowledge`, `apps/elf-eval/fixtures/real_world_memory/knowledge/`, and @@ -37,7 +37,7 @@ This improves ELF's own knowledge-page authority from fixture-only page artifact service-backed rebuild/lint/search evidence. It does not prove parity or superiority against llm-wiki, gbrain, GraphRAG, RAGFlow, LightRAG, or graphify. Those comparisons remain valid only when a contained adapter emits comparable page sections, source ids, -citation mappings, lint findings, and typed benchmark statuses. +citation mappings, lint findings, previous-version diffs, and typed benchmark statuses. ## Command Evidence @@ -68,6 +68,7 @@ The command is intentionally Docker-scoped. Host execution is refused unless | Stale-source lint | Stale source updates after rebuild produce lint findings instead of silently rewriting truth. | | Unsupported sections | Unsupported summaries remain visible as unsupported, not hidden claims. | | Rebuild metadata | First and second rebuild hashes, deterministic status, and allowed variance remain explicit. | +| Previous-version diff | Repeated rebuilds expose `elf.knowledge_page.version_diff/v1` metadata without changing page content hashes. | | Backlinks and search | Page artifacts expose backlinks, and `knowledge_pages_search` returns the materialized page surface. | | Source-of-truth boundary | Knowledge pages remain derived benchmark artifacts and do not replace Memory Notes or source records. | @@ -95,6 +96,8 @@ The command is intentionally Docker-scoped. Host execution is refused unless command for the checked-in `knowledge_compilation` fixture pack. - The command exercises `knowledge_page_rebuild`, `knowledge_page_lint`, and `knowledge_pages_search` before scoring. +- The current service-native artifact includes previous-version diff metadata and + reports `version_diff_coverage = 1.000`. - ELF's own knowledge-page evidence is stronger than fixture-only proof for this narrow slice. diff --git a/docs/evidence/benchmarking/index.md b/docs/evidence/benchmarking/index.md index b8533592..e444fb6a 100644 --- a/docs/evidence/benchmarking/index.md +++ b/docs/evidence/benchmarking/index.md @@ -43,4 +43,5 @@ Routes to: Benchmarking evidence concepts under `docs/evidence/benchmarking/`. - `2026-06-19-operator-approved-public-proxy-production-private-addendum.md`: Operator-Approved Public-Proxy Production-Private Addendum - June 19, 2026; closes the current XY-930 proxy/simulated-corpus stage with 8/8 query pass, 0 wrong_result, and explicit boundaries that this is not real private-corpus or provider-backed proof. - `2026-06-19-qmd-debug-ergonomics-dreaming-retest-report.md`: qmd Debug-Ergonomics Dreaming Retest Report - June 19, 2026; confirms qmd's default top-k/replay edge is unchanged while ELF keeps the narrow operator-debug trace/stage visibility wins. - `2026-06-19-service-native-dreaming-readback-report.md`: Service-Native Dreaming Readback Report - June 19, 2026; materializes memory summary, proactive brief, and scheduled-memory derived outputs through `ElfService` readback with 9 pass, 0 wrong_result, and 2 typed XY-930 blockers. +- `2026-06-20-knowledge-workspace-version-diff-report.md`: Knowledge Workspace Version-Diff Report - June 20, 2026; proves ELF knowledge pages now expose previous-version diff metadata without perturbing page content hashes while preserving citation, lint, and source-of-truth boundaries. - `2026-06-20-live-knowledge-page-rebuild-lint-report.md`: Live Knowledge-Page Rebuild/Lint Report - June 20, 2026; adds a Docker-contained ELF service-native knowledge-page materialization command while preserving llm-wiki, gbrain, GraphRAG, RAGFlow, LightRAG, and graphify as separate comparison targets until they emit comparable scored page artifacts. diff --git a/docs/log.md b/docs/log.md index 88fc55aa..f136fa97 100644 --- a/docs/log.md +++ b/docs/log.md @@ -67,3 +67,7 @@ logs. fixture pack can be materialized through `ElfService` rebuild, lint, and page search before scoring while keeping external wiki/graph/RAG product comparisons separate. +- Added the Knowledge Workspace version-diff report for XY-1019. Knowledge page + rebuild metadata now exposes `elf.knowledge_page.version_diff/v1`, live benchmark + artifacts expose `page_version_diff`, and the Docker-contained live knowledge + report now publishes `version_diff_coverage`. diff --git a/docs/runbook/benchmarking/live_baseline_benchmark.md b/docs/runbook/benchmarking/live_baseline_benchmark.md index 83fe2f32..9b10b960 100644 --- a/docs/runbook/benchmarking/live_baseline_benchmark.md +++ b/docs/runbook/benchmarking/live_baseline_benchmark.md @@ -537,7 +537,10 @@ tmp/real-world-memory/live-knowledge/summary.json This command materializes the same knowledge fixture pack through `ElfService::knowledge_page_rebuild`, `knowledge_page_lint`, and `knowledge_pages_search` inside the baseline Docker runner before publishing the -scored report. It is an ELF service self-check, not a direct competitor win. +scored report. The report now includes `version_diff_coverage` and each generated +page artifact includes `page_version_diff` under `elf.knowledge_page.version_diff/v1`, +with `source_mutation_allowed = false`. It is an ELF service self-check, not a direct +competitor win. ## Clean Up diff --git a/docs/runbook/benchmarking/real_world_agent_memory_benchmark.md b/docs/runbook/benchmarking/real_world_agent_memory_benchmark.md index f9b0dc90..abec8a58 100644 --- a/docs/runbook/benchmarking/real_world_agent_memory_benchmark.md +++ b/docs/runbook/benchmarking/real_world_agent_memory_benchmark.md @@ -480,9 +480,12 @@ The live increment runs inside the Docker baseline runner and materializes the knowledge fixtures through `ElfService::knowledge_page_rebuild`, `knowledge_page_lint`, and `knowledge_pages_search` before scoring them with the real-world job benchmark. It proves ELF service-native rebuild/lint/search behavior -for the checked-in `knowledge_compilation` pack. It does not claim llm-wiki, gbrain, -GraphRAG, RAGFlow, LightRAG, or graphify parity unless those projects emit comparable -page sections, source ids, citation mappings, lint findings, and typed statuses. +for the checked-in `knowledge_compilation` pack. The current productized workspace +increment also requires `page_version_diff` artifacts under +`elf.knowledge_page.version_diff/v1` and reports `version_diff_coverage` in the +knowledge summary. It does not claim llm-wiki, gbrain, GraphRAG, RAGFlow, LightRAG, +or graphify parity unless those projects emit comparable page sections, source ids, +citation mappings, lint findings, previous-version diffs, and typed statuses. Current checked-in production-ops increment: diff --git a/docs/spec/system_knowledge_pages_v1.md b/docs/spec/system_knowledge_pages_v1.md index 146ee3ab..2068ce8d 100644 --- a/docs/spec/system_knowledge_pages_v1.md +++ b/docs/spec/system_knowledge_pages_v1.md @@ -112,6 +112,19 @@ Unreviewed consolidation proposals must not be used as source input for persiste - `deterministic` - `provider_metadata` - `allowed_variance` +- `previous_version_diff` + +`previous_version_diff` must use schema `elf.knowledge_page.version_diff/v1`. +Initial rebuilds must set `available = false` and explain that no previous version +exists. Later rebuilds must set `available = true` and include previous and new +content/source hashes, title/source/content changed booleans, added/removed/changed/ +unchanged section key lists and counts, a human-readable summary, and +`source_mutation_allowed = false`. + +Previous-version diff metadata is rebuild readback metadata, not source content. Page +content hashes must not include `previous_version_diff`; otherwise repeating the same +source rebuild would appear nondeterministic solely because the previous-version +metadata changed. When future provider-backed or LLM-derived page text is persisted, `rebuild_metadata.deterministic` must be false unless the provider output is fully @@ -161,6 +174,7 @@ Page search results must include: - bounded section snippet - section citations and normalized source backlinks - page source coverage metadata +- rebuild metadata, including previous-version diff metadata when present - lint summary and trust state that distinguishes clean, warning, error, and low coverage results - a derived-result notice that source notes, event audits, relation facts, and applied diff --git a/packages/elf-domain/src/knowledge.rs b/packages/elf-domain/src/knowledge.rs index ce933b42..d076ba02 100644 --- a/packages/elf-domain/src/knowledge.rs +++ b/packages/elf-domain/src/knowledge.rs @@ -8,6 +8,8 @@ pub const KNOWLEDGE_PAGE_CONTRACT_SCHEMA_V1: &str = "elf.knowledge_page/v1"; pub const KNOWLEDGE_PAGE_REBUILD_SCHEMA_V1: &str = "elf.knowledge_page.rebuild/v1"; /// Current source coverage metadata schema identifier. pub const KNOWLEDGE_PAGE_SOURCE_COVERAGE_SCHEMA_V1: &str = "elf.knowledge_page.source_coverage/v1"; +/// Current previous-version diff metadata schema identifier. +pub const KNOWLEDGE_PAGE_VERSION_DIFF_SCHEMA_V1: &str = "elf.knowledge_page.version_diff/v1"; /// Derived knowledge page category. #[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize)] diff --git a/packages/elf-service/src/knowledge.rs b/packages/elf-service/src/knowledge.rs index cdc9b24d..fce10380 100644 --- a/packages/elf-service/src/knowledge.rs +++ b/packages/elf-service/src/knowledge.rs @@ -13,7 +13,8 @@ use elf_domain::{ english_gate, knowledge::{ KNOWLEDGE_PAGE_CONTRACT_SCHEMA_V1, KNOWLEDGE_PAGE_REBUILD_SCHEMA_V1, - KNOWLEDGE_PAGE_SOURCE_COVERAGE_SCHEMA_V1, KnowledgePageKind, KnowledgeSourceKind, + KNOWLEDGE_PAGE_SOURCE_COVERAGE_SCHEMA_V1, KNOWLEDGE_PAGE_VERSION_DIFF_SCHEMA_V1, + KnowledgePageKind, KnowledgeSourceKind, }, }; use elf_storage::{ @@ -30,6 +31,7 @@ use elf_storage::{ const DEFAULT_LIST_LIMIT: i64 = 50; const MAX_LIST_LIMIT: i64 = 200; const SEARCH_SNIPPET_CHARS: usize = 280; +const PREVIOUS_VERSION_DIFF_KEY: &str = "previous_version_diff"; /// Request to rebuild one derived knowledge page from explicit source ids. #[derive(Clone, Debug, Deserialize)] @@ -170,6 +172,8 @@ pub struct KnowledgePageSummary { pub source_coverage: Value, /// Rebuild metadata. pub rebuild_metadata: Value, + /// Previous-version diff metadata, when present. + pub previous_version_diff: Option, /// Creation timestamp. pub created_at: OffsetDateTime, /// Last update timestamp. @@ -191,6 +195,7 @@ impl From for KnowledgePageSummary { rebuild_source_hash: page.rebuild_source_hash, content_hash: page.content_hash, source_coverage: page.source_coverage, + previous_version_diff: previous_version_diff_from_metadata(&page.rebuild_metadata), rebuild_metadata: page.rebuild_metadata, created_at: page.created_at, updated_at: page.updated_at, @@ -425,6 +430,8 @@ pub struct KnowledgePageSearchItem { pub source_coverage: Value, /// Page-level rebuild metadata. pub rebuild_metadata: Value, + /// Previous-version diff metadata, when present. + pub previous_version_diff: Option, /// Lint summary for distinguishing clean, stale, and unsupported pages. pub lint_summary: KnowledgePageLintSummary, /// Trust state discriminator for viewer/search clients. @@ -591,6 +598,19 @@ impl ElfService { let ids = SourceIds::from_request(&req)?; let title = req.title.clone().unwrap_or_else(|| generated_title(req.page_kind, &req.page_key)); + let previous_page = knowledge::get_knowledge_page_by_key( + &self.db.pool, + req.tenant_id.as_str(), + req.project_id.as_str(), + req.page_kind.as_str(), + req.page_key.as_str(), + ) + .await?; + let previous_sections = match &previous_page { + Some(page) => + knowledge::list_knowledge_page_sections(&self.db.pool, page.page_id).await?, + None => Vec::new(), + }; let sources = self.resolve_sources(&req, &ids).await?; let now = OffsetDateTime::now_utc(); let source_snapshot = source_snapshot_value(&sources); @@ -605,9 +625,21 @@ impl ElfService { let source_coverage = source_coverage_value(req.page_kind, &req.page_key, §ions, &sources); - let rebuild_metadata = rebuild_metadata(&source_hash, &req.provider_metadata); + let base_rebuild_metadata = rebuild_metadata(&source_hash, &req.provider_metadata); let content_hash = - page_content_hash(&title, §ions, &source_coverage, &rebuild_metadata)?; + page_content_hash(&title, §ions, &source_coverage, &base_rebuild_metadata)?; + let previous_version_diff = previous_version_diff_value( + previous_page.as_ref(), + &previous_sections, + title.as_str(), + source_hash.as_str(), + content_hash.as_str(), + §ions, + ); + let rebuild_metadata = rebuild_metadata_with_previous_version_diff( + base_rebuild_metadata, + previous_version_diff, + ); let page_id = Uuid::new_v4(); let mut tx = self.db.pool.begin().await?; let page = knowledge::upsert_knowledge_page( @@ -977,6 +1009,7 @@ fn knowledge_page_search_item( row.source_coverage.get("coverage_complete").and_then(Value::as_bool).unwrap_or(false); let trust_state = search_trust_state(&lint_summary, coverage_complete, &row); let repair_guidance = search_repair_guidance(&trust_state); + let previous_version_diff = previous_version_diff_from_metadata(&row.rebuild_metadata); KnowledgePageSearchItem { result_kind: "knowledge_page_section".to_string(), @@ -996,6 +1029,7 @@ fn knowledge_page_search_item( source_refs: source_refs.into_iter().map(KnowledgePageSourceRefResponse::from).collect(), source_coverage: row.source_coverage, rebuild_metadata: row.rebuild_metadata, + previous_version_diff, lint_summary, trust_state, derived_notice: @@ -1497,6 +1531,148 @@ fn rebuild_metadata(source_hash: &str, provider_metadata: &Value) -> Value { }) } +fn rebuild_metadata_with_previous_version_diff(mut metadata: Value, diff: Value) -> Value { + let Some(object) = metadata.as_object_mut() else { + return serde_json::json!({ PREVIOUS_VERSION_DIFF_KEY: diff }); + }; + + object.insert(PREVIOUS_VERSION_DIFF_KEY.to_string(), diff); + + metadata +} + +fn previous_version_diff_from_metadata(metadata: &Value) -> Option { + metadata + .get(PREVIOUS_VERSION_DIFF_KEY) + .filter(|diff| diff.as_object().is_some_and(|object| !object.is_empty())) + .cloned() +} + +fn previous_version_diff_value( + previous: Option<&KnowledgePage>, + previous_sections: &[KnowledgePageSection], + new_title: &str, + new_source_hash: &str, + new_content_hash: &str, + new_sections: &[DraftSection], +) -> Value { + let Some(previous) = previous else { + return serde_json::json!({ + "schema": KNOWLEDGE_PAGE_VERSION_DIFF_SCHEMA_V1, + "available": false, + "reason": "no_previous_version", + "summary": "Initial rebuild; no previous knowledge page version exists.", + "source_mutation_allowed": false, + }); + }; + let previous_by_key = previous_sections + .iter() + .map(|section| (section.section_key.as_str(), section)) + .collect::>(); + let new_by_key = new_sections + .iter() + .map(|section| (section.section_key.as_str(), section)) + .collect::>(); + let previous_keys = previous_by_key.keys().copied().collect::>(); + let new_keys = new_by_key.keys().copied().collect::>(); + let added_section_keys = sorted_strings(new_keys.difference(&previous_keys).copied()); + let removed_section_keys = sorted_strings(previous_keys.difference(&new_keys).copied()); + let mut changed_section_keys = Vec::new(); + let mut unchanged_section_keys = Vec::new(); + + for key in previous_keys.intersection(&new_keys).copied() { + let previous_section = previous_by_key[key]; + let new_section = new_by_key[key]; + + if previous_section.content_hash == new_section.content_hash + && previous_section.heading == new_section.heading + && previous_section.role == new_section.role + && previous_section.unsupported_reason == new_section.unsupported_reason + { + unchanged_section_keys.push(key.to_string()); + } else { + changed_section_keys.push(key.to_string()); + } + } + + let title_changed = previous.title != new_title; + let source_changed = previous.rebuild_source_hash != new_source_hash; + let content_changed = previous.content_hash != new_content_hash; + let summary = version_diff_summary( + title_changed, + source_changed, + content_changed, + added_section_keys.len(), + removed_section_keys.len(), + changed_section_keys.len(), + ); + + serde_json::json!({ + "schema": KNOWLEDGE_PAGE_VERSION_DIFF_SCHEMA_V1, + "available": true, + "previous_page_id": previous.page_id, + "previous_content_hash": previous.content_hash, + "new_content_hash": new_content_hash, + "previous_source_hash": previous.rebuild_source_hash, + "new_source_hash": new_source_hash, + "title_changed": title_changed, + "source_changed": source_changed, + "content_changed": content_changed, + "section_added_count": added_section_keys.len(), + "section_removed_count": removed_section_keys.len(), + "section_changed_count": changed_section_keys.len(), + "section_unchanged_count": unchanged_section_keys.len(), + "added_section_keys": added_section_keys, + "removed_section_keys": removed_section_keys, + "changed_section_keys": changed_section_keys, + "unchanged_section_keys": unchanged_section_keys, + "source_mutation_allowed": false, + "summary": summary, + }) +} + +fn sorted_strings<'a>(items: impl Iterator) -> Vec { + let mut out = items.map(ToString::to_string).collect::>(); + + out.sort(); + + out +} + +fn version_diff_summary( + title_changed: bool, + source_changed: bool, + content_changed: bool, + added: usize, + removed: usize, + changed: usize, +) -> String { + if !title_changed + && !source_changed + && !content_changed + && added == 0 + && removed == 0 + && changed == 0 + { + return "No page-level or section-level changes from the previous rebuild.".to_string(); + } + + format!( + "Previous rebuild diff: title_changed={title_changed}, source_changed={source_changed}, content_changed={content_changed}, sections added={added}, removed={removed}, changed={changed}." + ) +} + +fn content_hash_rebuild_metadata(rebuild_metadata: &Value) -> Value { + let Some(object) = rebuild_metadata.as_object() else { + return rebuild_metadata.clone(); + }; + let mut stable = object.clone(); + + stable.remove(PREVIOUS_VERSION_DIFF_KEY); + + Value::Object(stable) +} + fn section_hash_payload(section: &DraftSection) -> Value { serde_json::json!({ "section_key": section.section_key.clone(), @@ -1514,11 +1690,13 @@ fn page_content_hash( source_coverage: &Value, rebuild_metadata: &Value, ) -> Result { + let stable_rebuild_metadata = content_hash_rebuild_metadata(rebuild_metadata); + hash_json(&serde_json::json!({ "title": title, "sections": sections.iter().map(section_hash_payload).collect::>(), "source_coverage": source_coverage, - "rebuild_metadata": rebuild_metadata, + "rebuild_metadata": stable_rebuild_metadata, })) } @@ -1853,8 +2031,9 @@ async fn insert_lint_finding( #[cfg(test)] mod tests { use crate::knowledge::{ - self, KnowledgePage, KnowledgePageKind, KnowledgePageSearchRow, KnowledgePageSection, - KnowledgePageSourceRef, KnowledgeSourceKind, OffsetDateTime, SourceSnapshot, Uuid, + self, DraftSection, KnowledgePage, KnowledgePageKind, KnowledgePageSearchRow, + KnowledgePageSection, KnowledgePageSourceRef, KnowledgeSourceKind, OffsetDateTime, + SourceSnapshot, Uuid, }; fn test_source(kind: KnowledgeSourceKind, raw_id: u128, line: &str) -> SourceSnapshot { @@ -1935,6 +2114,55 @@ mod tests { assert_eq!(metadata["provider_metadata"]["provider_id"], "fixture"); } + #[test] + fn previous_version_diff_records_delta_without_changing_content_hash() { + let previous = test_page(); + let previous_section = + test_section(Uuid::from_u128(10), "source-notes", serde_json::json!([]), None); + let sections = vec![DraftSection { + section_id: Uuid::from_u128(12), + section_key: "source-notes".to_string(), + heading: "source-notes".to_string(), + role: "current_truth".to_string(), + content: "Updated section content.".to_string(), + ordinal: 0, + source_indexes: vec![0], + unsupported_reason: None, + content_hash: "new-section-hash".to_string(), + citations: serde_json::json!([{ "source_kind": "note" }]), + }]; + let base_metadata = + knowledge::rebuild_metadata("new-source-hash", &knowledge::empty_object()); + let coverage = serde_json::json!({ "coverage_complete": true }); + let hash_without_diff = + knowledge::page_content_hash("ELF", §ions, &coverage, &base_metadata) + .expect("stable hash should serialize"); + let diff = knowledge::previous_version_diff_value( + Some(&previous), + &[previous_section], + "ELF", + "new-source-hash", + hash_without_diff.as_str(), + §ions, + ); + let metadata_with_diff = + knowledge::rebuild_metadata_with_previous_version_diff(base_metadata, diff.clone()); + let hash_with_diff = + knowledge::page_content_hash("ELF", §ions, &coverage, &metadata_with_diff) + .expect("hash should ignore previous-version diff metadata"); + + assert_eq!(hash_without_diff, hash_with_diff); + assert_eq!(diff["schema"], "elf.knowledge_page.version_diff/v1"); + assert_eq!(diff["available"], true); + assert_eq!(diff["source_mutation_allowed"], false); + assert_eq!(diff["section_changed_count"], 1); + assert_eq!( + knowledge::previous_version_diff_from_metadata(&metadata_with_diff) + .expect("diff should be extractable")["section_changed_count"], + 1 + ); + } + #[test] fn stale_source_comparison_detects_changed_snapshot() { let source_id = Uuid::from_u128(42); diff --git a/packages/elf-service/tests/acceptance/knowledge_pages.rs b/packages/elf-service/tests/acceptance/knowledge_pages.rs index 81ad83f3..69761ede 100644 --- a/packages/elf-service/tests/acceptance/knowledge_pages.rs +++ b/packages/elf-service/tests/acceptance/knowledge_pages.rs @@ -333,6 +333,15 @@ async fn rebuilds_pages_with_citations_and_detects_stale_sources() { })); assert_eq!(first.page.page.source_coverage["coverage_complete"], true); assert_eq!(first.page.page.rebuild_metadata["deterministic"], true); + assert_eq!( + first + .page + .page + .previous_version_diff + .as_ref() + .expect("initial rebuild should expose no-previous diff")["available"], + false + ); let second = service .knowledge_page_rebuild(KnowledgePageRebuildRequest { @@ -355,6 +364,18 @@ async fn rebuilds_pages_with_citations_and_detects_stale_sources() { assert_eq!(first.page.page.rebuild_source_hash, second.page.page.rebuild_source_hash); assert_eq!(first.page.page.content_hash, second.page.page.content_hash); + let second_diff = second + .page + .page + .previous_version_diff + .as_ref() + .expect("second rebuild should expose previous-version diff"); + + assert_eq!(second_diff["schema"], "elf.knowledge_page.version_diff/v1"); + assert_eq!(second_diff["available"], true); + assert_eq!(second_diff["source_mutation_allowed"], false); + assert_eq!(second_diff["content_changed"], false); + sqlx::query( "\ UPDATE memory_notes diff --git a/packages/elf-storage/src/knowledge.rs b/packages/elf-storage/src/knowledge.rs index 1e37cf7e..eef76197 100644 --- a/packages/elf-storage/src/knowledge.rs +++ b/packages/elf-storage/src/knowledge.rs @@ -583,6 +583,53 @@ LIMIT 1", Ok(row) } +/// Fetches one knowledge page by stable page key. +pub async fn get_knowledge_page_by_key<'e, E>( + executor: E, + tenant_id: &str, + project_id: &str, + page_kind: &str, + page_key: &str, +) -> Result> +where + E: PgExecutor<'e>, +{ + let row = sqlx::query_as::<_, KnowledgePage>( + "\ +SELECT + page_id, + tenant_id, + project_id, + page_kind, + page_key, + title, + contract_schema, + status, + rebuild_source_hash, + content_hash, + source_coverage, + source_snapshot, + rebuild_metadata, + created_at, + updated_at, + rebuilt_at +FROM knowledge_pages +WHERE tenant_id = $1 + AND project_id = $2 + AND page_kind = $3 + AND page_key = $4 +LIMIT 1", + ) + .bind(tenant_id) + .bind(project_id) + .bind(page_kind) + .bind(page_key) + .fetch_optional(executor) + .await?; + + Ok(row) +} + /// Lists knowledge pages for a tenant and project. pub async fn list_knowledge_pages<'e, E>( executor: E,